/*
The router lookahead provides an estimate of the cost from an intermediate node to the target node
during directed (A*-like) routing.

The VPR 7.0 lookahead (route/route_timing.c ==> get_timing_driven_expected_cost) lower-bounds the remaining delay and
congestion by assuming that a minimum number of wires, of the same type as the current node being expanded, can be used
to complete the route. While this method is efficient, it can run into trouble with architectures that use
multiple interconnected wire types.

The lookahead in this file pre-computes delay/congestion costs up and to the right of a starting tile. This generates
delay/congestion tables for {CHANX, CHANY} channel types, over all wire types defined in the architecture file.
See Section 3.2.4 in Oleg Petelin's MASc thesis (2016) for more discussion.

*/


#include <cmath>
#include <vector>
#include <queue>
#include <assert.h>
#include <ctime>
#include "vpr_types.h"
#include "vpr_error.h"
#include "vpr_utils.h"
#include "globals.h"
#include "vtr_math.h"
#include "vtr_log.h"
#include "vtr_assert.h"
#include "router_lookahead_map.h"

using namespace std;


/* the cost map is computed by running a Dijkstra search from channel segment rr nodes at the specified reference coordinate */
#define REF_X 3
#define REF_Y 3

/* we will profile delay/congestion using this many tracks for each wire type */
#define MAX_TRACK_OFFSET 16

/* we're profiling routing cost over many tracks for each wire type, so we'll have many cost entries at each |dx|,|dy| offset.
   there are many ways to "boil down" the many costs at each offset to a single entry for a given (wire type, chan_type) combination --
   we can take the smallest cost, the average, median, etc. This define selects the method we use.
   See e_representative_entry_method */
#define REPRESENTATIVE_ENTRY_METHOD SMALLEST


/* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single
   representative entry, this enum is passed-in to specify how that representative entry should be
   calculated */
enum e_representative_entry_method{
	FIRST = 0,	//the first cost that was recorded
	SMALLEST,	//the smallest-delay cost recorded
	AVERAGE,
	GEOMEAN,
	MEDIAN
};


/* f_cost_map is an array of these cost entries that specifies delay/congestion estimates
   to travel relative x/y distances */
class Cost_Entry{
public:
	float delay;
	float congestion;

	Cost_Entry(){
		delay = -1.0;
		congestion = -1.0;
	}
	Cost_Entry(float set_delay, float set_congestion){
		delay = set_delay;
		congestion = set_congestion;
	}
};



/* a class that stores delay/congestion information for a given relative coordinate during the Dijkstra expansion.
   since it stores multiple cost entries, it is later boiled down to a single representative cost entry to be stored
   in the final lookahead cost map */
class Expansion_Cost_Entry{
private:
	vector<Cost_Entry> cost_vector;

	Cost_Entry get_smallest_entry();
	Cost_Entry get_average_entry();
	Cost_Entry get_geomean_entry();
	Cost_Entry get_median_entry();


public:
	void add_cost_entry(float add_delay, float add_congestion){
		Cost_Entry cost_entry(add_delay, add_congestion);
		if (REPRESENTATIVE_ENTRY_METHOD == SMALLEST){
			/* taking the smallest-delay entry anyway, so no need to push back multple entries */
			if (this->cost_vector.empty()){
				this->cost_vector.push_back(cost_entry);
			} else {
				if (add_delay < this->cost_vector[0].delay){
					this->cost_vector[0] = cost_entry;
				}
			}
		} else {
			this->cost_vector.push_back(cost_entry);
		}
	}
	void clear_cost_entries(){
		this->cost_vector.clear();
	}

	Cost_Entry get_representative_cost_entry(e_representative_entry_method method){
		if (cost_vector.empty()){
			return Cost_Entry();
		} else {
			switch (method){
				case FIRST:
					return cost_vector[0];
					break;
				case SMALLEST:
					return this->get_smallest_entry();
					break;
				case AVERAGE:
					return this->get_average_entry();
					break;
				case GEOMEAN:
					return this->get_geomean_entry();
					break;
				case MEDIAN:
					return this->get_median_entry();
					break;
				default:
					return this->get_smallest_entry();
					break;
			}
		}
	}
};


/* a class that represents an entry in the Dijkstra expansion priority queue */
class PQ_Entry{
public:
	int rr_node_ind;     //index in device_ctx.rr_nodes that this entry represents
	float cost;          //the cost of the path to get to this node

	/* store backward delay, R and congestion info */
	float delay;
	float R_upstream;
	float congestion_upstream;

	PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node){
		this->rr_node_ind = set_rr_node_ind;

		auto& device_ctx = g_vpr_ctx.device();
		this->delay = parent_delay;
		this->congestion_upstream = parent_congestion_upstream;
		this->R_upstream = parent_R_upstream;
		if (!starting_node){
			int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index();
			//this->delay += g_rr_nodes[set_rr_node_ind].C() * (g_rr_switch_inf[switch_ind].R + 0.5*g_rr_nodes[set_rr_node_ind].R()) +
			//              g_rr_switch_inf[switch_ind].Tdel;

			//FIXME going to use the delay data that the VPR7 lookahead uses. For some reason the delay calculation above calculates
			//    a value that's just a little smaller compared to what VPR7 lookahead gets. While the above calculation should be more accurate,
			//    I have found that it produces the same CPD results but with worse runtime.
			//
			//    TODO: figure out whether anything's wrong with the calculation above and use that instead. If not, add the other
			//          terms like T_quadratic and R_upstream to the calculation below (they are == 0 or UNDEFINED for buffered archs I think)
			VTR_ASSERT(device_ctx.rr_indexed_data[cost_index].T_quadratic <= 0.);
			VTR_ASSERT(device_ctx.rr_indexed_data[cost_index].C_load <= 0.);
			this->delay += device_ctx.rr_indexed_data[cost_index].T_linear;

			this->congestion_upstream += device_ctx.rr_indexed_data[cost_index].base_cost;
		}

		/* set the cost of this node */
		this->cost = this->delay;
	}

	bool operator < (const PQ_Entry &obj) const{
		/* inserted into max priority queue so want queue entries with a lower cost to be greater */
		return (this->cost > obj.cost);
	}
};


/* provides delay/congestion estimates to travel specified distances
   in the x/y direction */
typedef vector< vector< vector< vector<Cost_Entry> > > > t_cost_map;        //[0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]	-- [0..1] entry is to
                                                                            //distinguish between CHANX/CHANY start nodes respectively
/* used during Dijkstra expansion to store delay/congestion info lists for each relative coordinate for a given segment and channel type.
   the list at each coordinate is later boiled down to a single representative cost entry to be stored in the final cost map */
typedef vector< vector<Expansion_Cost_Entry> > t_routing_cost_map;		//[0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]


/******** File-Scope Variables ********/
/* The cost map */
t_cost_map f_cost_map;

/******** File-Scope Functions ********/
static void alloc_cost_map(int num_segments);
static void free_cost_map();
/* returns index of a node from which to start routing */
static int get_start_node_ind(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset);
/* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
   to that pin is stored is added to an entry in the routing_cost_map */
static void run_dijkstra(int start_node_ind, int start_x, int start_y, t_routing_cost_map &routing_cost_map);
/* iterates over the children of the specified node and selectively pushes them onto the priority queue */
static void expand_dijkstra_neighbours(PQ_Entry parent_entry, vector<float> &node_visited_costs, vector<bool> &node_expanded, priority_queue<PQ_Entry> &pq);
/* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map &routing_cost_map);
/* fills in missing lookahead map entries by copying the cost of the closest valid entry */
static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type);
/* returns a cost entry in the f_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index);
/* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */
static void get_xy_deltas(int from_node_ind, int to_node_ind, int *delta_x, int *delta_y);


/******** Function Definitions ********/
/* queries the lookahead_map (should have been computed prior to routing) to get the expected cost
   from the specified source to the specified target */
float get_lookahead_map_cost(int from_node_ind, int to_node_ind, float criticality_fac){
	auto& device_ctx = g_vpr_ctx.device();
    auto& from_node = device_ctx.rr_nodes[from_node_ind];

	e_rr_type from_type = from_node.type();
	int from_cost_index = from_node.cost_index();
	int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index;

	assert(from_seg_index >= 0);

	int delta_x, delta_y;
	get_xy_deltas(from_node_ind, to_node_ind, &delta_x, &delta_y);
	delta_x = abs(delta_x);
	delta_y = abs(delta_y);

	/* now get the expected cost from our lookahead map */
	int from_chan_index = 0;
	if (from_type == CHANY){
		from_chan_index = 1;
	}

	Cost_Entry cost_entry = f_cost_map[from_chan_index][from_seg_index][delta_x][delta_y];
	float expected_delay = cost_entry.delay;
	float expected_congestion = cost_entry.congestion;

	float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion;
	return expected_cost;
}


/* Computes the lookahead map to be used by the router. If a map was computed prior to this, a new one will not be computed again.
   The rr graph must have been built before calling this function. */
void compute_router_lookahead(int num_segments){
	if (f_cost_map.size()){
		/* if lookahead map has already been computed, do not compute it again */
		vtr::printf_info("Router lookahead map already allocated. Will not compute router lookahead map again.\n");
		return;
	}

	auto& device_ctx = g_vpr_ctx.device();

	clock_t start_time = clock();

	/* free previous delay map and allocate new one */
	free_cost_map();
	alloc_cost_map(num_segments);

	/* run Dijkstra's algorithm for each segment type & channel type combination */
	for (int iseg = 0; iseg < num_segments; iseg++){
		for (e_rr_type chan_type : {CHANX, CHANY}){
			/* allocate the cost map for this iseg/chan_type */
			t_routing_cost_map routing_cost_map;
			routing_cost_map.assign( device_ctx.grid.width(), vector<Expansion_Cost_Entry>(device_ctx.grid.height(), Expansion_Cost_Entry()) );

			for (int ref_inc=0; ref_inc<3; ref_inc++){
				for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2){
					/* get the rr node index from which to start routing */
					int start_node_ind = get_start_node_ind(REF_X+ref_inc, REF_Y+ref_inc,
                                                            device_ctx.grid.width()-2, device_ctx.grid.height()-2,  //non-corner upper right
					                                        chan_type, iseg, track_offset);

					if (start_node_ind == UNDEFINED){
						continue;
					}

					/* run Dijkstra's algorithm */
					run_dijkstra(start_node_ind, REF_X+ref_inc, REF_Y+ref_inc, routing_cost_map);
				}
			}

			/* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead
			   cost map */
			set_lookahead_map_costs(iseg, chan_type, routing_cost_map);

			/* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on
			   a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */
			fill_in_missing_lookahead_entries(iseg, chan_type);
		}
	}

	//printing out delay maps
	//for (int iseg = 0; iseg < num_segments; iseg++){
	//	for (int chan_index : {0,1}){
	//		for (int iy = 0; iy < device_ctx.grid.height(); iy++){
	//			for (int ix = 0; ix < device_ctx.grid.width(); ix++){
	//				printf("%.3e\t", f_cost_map[chan_index][iseg][ix][iy].delay);
	//			}
	//			printf("\n");
	//		}
	//		printf("\n\n");
	//	}
	//}

	clock_t end_time = clock();

	vtr::printf_info("Computing router lookahead map took %f seconds.\n", ((float)end_time - start_time)/CLOCKS_PER_SEC);
}


/* returns index of a node from which to start routing */
static int get_start_node_ind(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset){

	int result = UNDEFINED;

	if (rr_type != CHANX && rr_type != CHANY){
		vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, "Must start lookahead routing from CHANX or CHANY node\n");
	}

	int chan_coord = start_x;
	int seg_coord = start_y;
	if (rr_type == CHANY){
		chan_coord = start_y;
		seg_coord = start_x;
	}

	/* determine which direction the wire should go in based on the start & target coordinates */
	e_direction direction = INC_DIRECTION;
	if ((rr_type == CHANX && target_x < start_x) ||
		    (rr_type == CHANY && target_y < start_y)){
		direction = DEC_DIRECTION;
	}

	auto& device_ctx = g_vpr_ctx.device();

	vector<int> channel_node_list = device_ctx.rr_node_indices[rr_type][chan_coord][0][seg_coord];

	/* find first node in channel that has specified segment index and goes in the desired direction */
	for (unsigned itrack = 0; itrack < channel_node_list.size(); itrack++){
		int node_ind = channel_node_list[itrack];

		e_direction node_direction = device_ctx.rr_nodes[node_ind].direction();
		int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index();
		int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index;

		if ((node_direction == direction || node_direction == BI_DIRECTION) &&
			    node_seg_ind == seg_index){
			/* found first track that has the specified segment index and goes in the desired direction */
			result = node_ind;
			if (track_offset == 0){
				break;
			}
			track_offset -= 2;
		}
	}

	return result;
}


/* allocates space for cost map entries */
static void alloc_cost_map(int num_segments){
    auto& device_ctx = g_vpr_ctx.device();

	vector<Cost_Entry> ny_entries( device_ctx.grid.height(), Cost_Entry() );
	vector< vector<Cost_Entry> > nx_entries( device_ctx.grid.width(), ny_entries );
	vector< vector< vector<Cost_Entry> > > segment_entries( num_segments, nx_entries );
	f_cost_map.assign( 2, segment_entries );
}


/* frees the cost map. duh. */
static void free_cost_map(){
	f_cost_map.clear();
}

/* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
   to that pin is stored is added to an entry in the routing_cost_map */
static void run_dijkstra(int start_node_ind, int start_x, int start_y, t_routing_cost_map &routing_cost_map){
	auto& device_ctx = g_vpr_ctx.device();

	/* a list of boolean flags (one for each rr node) to figure out if a certain node has already been expanded */
	vector<bool> node_expanded( device_ctx.rr_nodes.size(), false );
	/* for each node keep a list of the cost with which that node has been visited (used to determine whether to push
	   a candidate node onto the expansion queue */
	vector<float> node_visited_costs( device_ctx.rr_nodes.size(), -1.0 );
	/* a priority queue for expansion */
	priority_queue<PQ_Entry> pq;

	/* first entry has no upstream delay or congestion */
	PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true);

	pq.push( first_entry );

	/* now do routing */
	while( !pq.empty() ){
		PQ_Entry current = pq.top();
		pq.pop();

		int node_ind = current.rr_node_ind;

		/* check that we haven't already expanded from this node */
		if (node_expanded[node_ind]){
			continue;
		}

		/* if this node is an ipin record its congestion/delay in the routing_cost_map */
		if (device_ctx.rr_nodes[node_ind].type() == IPIN){
			int ipin_x = device_ctx.rr_nodes[node_ind].xlow();
			int ipin_y = device_ctx.rr_nodes[node_ind].ylow();

			if (ipin_x >= start_x && ipin_y >= start_y){
				int delta_x, delta_y;
				get_xy_deltas(start_node_ind, node_ind, &delta_x, &delta_y);

				routing_cost_map[delta_x][delta_y].add_cost_entry(current.delay, current.congestion_upstream);
			}
		}

		expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq);
		node_expanded[node_ind] = true;
	}
}


/* iterates over the children of the specified node and selectively pushes them onto the priority queue */
static void expand_dijkstra_neighbours(PQ_Entry parent_entry, vector<float> &node_visited_costs, vector<bool> &node_expanded, priority_queue<PQ_Entry> &pq){
	auto& device_ctx = g_vpr_ctx.device();

	int parent_ind = parent_entry.rr_node_ind;

	auto& parent_node = device_ctx.rr_nodes[parent_ind];

	for (int iedge = 0; iedge < parent_node.num_edges(); iedge++){
		int child_node_ind = parent_node.edge_sink_node(iedge);
		int switch_ind = parent_node.edge_switch(iedge);

		/* skip this child if it has already been expanded from */
		if (node_expanded[child_node_ind]){
			continue;
		}

		PQ_Entry child_entry(child_node_ind, switch_ind, parent_entry.delay,
		                             parent_entry.R_upstream, parent_entry.congestion_upstream, false);

		assert(child_entry.cost >= 0);

		/* skip this child if it has been visited with smaller cost */
		if (node_visited_costs[child_node_ind] >= 0 && node_visited_costs[child_node_ind] < child_entry.cost){
			continue;
		}

		/* finally, record the cost with which the child was visited and put the child entry on the queue */
		node_visited_costs[child_node_ind] = child_entry.cost;
		pq.push(child_entry);
	}
}


/* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map &routing_cost_map){
	int chan_index = 0;
	if (chan_type == CHANY){
		chan_index = 1;
	}

	/* set the lookahead cost map entries with a representative cost entry from routing_cost_map */
	for (unsigned ix = 0; ix < routing_cost_map.size(); ix++){
		for (unsigned iy = 0; iy < routing_cost_map[ix].size(); iy++){

			Expansion_Cost_Entry &expansion_cost_entry = routing_cost_map[ix][iy];

			f_cost_map[chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry( REPRESENTATIVE_ENTRY_METHOD );
		}
	}
}


/* fills in missing lookahead map entries by copying the cost of the closest valid entry */
static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type){
	int chan_index = 0;
	if (chan_type == CHANY){
		chan_index = 1;
	}

	/* find missing cost entries and fill them in by copying a nearby cost entry */
	for (unsigned ix = 0; ix < f_cost_map[chan_index][segment_index].size(); ix++){
		for (unsigned iy = 0; iy < f_cost_map[chan_index][segment_index][ix].size(); iy++){
			Cost_Entry cost_entry = f_cost_map[chan_index][segment_index][ix][iy];

			if (cost_entry.delay < 0 && cost_entry.congestion < 0){
				Cost_Entry copied_entry = get_nearby_cost_entry(ix, iy, segment_index, chan_index);
				f_cost_map[chan_index][segment_index][ix][iy] = copied_entry;
			}
		}
	}
}


/* returns a cost entry in the f_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index){
	/* compute the slope from x,y to 0,0 and then move towards 0,0 by one unit to get the coordinates
	   of the cost entry to be copied */

	assert(x > 0 || y > 0);

	float slope;
	if (x == 0){
		slope = 1e12;	//just a really large number
	} else if (y == 0){
		slope = 1e-12;	//just a really small number
	} else {
		slope = (float)y/(float)x;
	}

	int copy_x, copy_y;
	if (slope >= 1.0){
		copy_y = y-1;
		copy_x = vtr::nint( (float)y / slope );
	} else {
		copy_x = x-1;
		copy_y = vtr::nint( (float)x * slope );
	}

	assert(copy_x > 0 || copy_y > 0);

	Cost_Entry copy_entry = f_cost_map[chan_index][segment_index][copy_x][copy_y];

	/* if the entry to be copied is also empty, recurse */
	if (copy_entry.delay < 0 && copy_entry.congestion < 0){
		copy_entry = get_nearby_cost_entry(copy_x, copy_y, segment_index, chan_index);
	}

	return copy_entry;
}


/* returns cost entry with the smallest delay */
Cost_Entry Expansion_Cost_Entry::get_smallest_entry(){

	Cost_Entry smallest_entry;

	for (auto entry : this->cost_vector){
		if (smallest_entry.delay < 0 || entry.delay < smallest_entry.delay){
			smallest_entry = entry;
		}
	}

	return smallest_entry;
}

/* returns a cost entry that represents the average of all the recorded entries */
Cost_Entry Expansion_Cost_Entry::get_average_entry(){
	float avg_delay = 0;
	float avg_congestion = 0;

	for (auto cost_entry : this->cost_vector){
		avg_delay += cost_entry.delay;
		avg_congestion += cost_entry.congestion;
	}

	avg_delay /= (float)this->cost_vector.size();
	avg_congestion /= (float)this->cost_vector.size();

	return Cost_Entry(avg_delay, avg_congestion);
}

/* returns a cost entry that represents the geomean of all the recorded entries */
Cost_Entry Expansion_Cost_Entry::get_geomean_entry(){


	float geomean_delay = 0;
	float geomean_cong = 0;
	for (auto cost_entry : this->cost_vector){
		geomean_delay += log(cost_entry.delay);
		geomean_cong += log(cost_entry.congestion);
	}

	geomean_delay = exp( geomean_delay / (float)this->cost_vector.size() );
	geomean_cong = exp( geomean_cong / (float)this->cost_vector.size() );

	return Cost_Entry(geomean_delay, geomean_cong);
}

/* returns a cost entry that represents the medial of all recorded entries */
Cost_Entry Expansion_Cost_Entry::get_median_entry(){
	/* find median by binning the delays of all entries and then chosing the bin
	   with the largest number of entries */

	int num_bins = 10;

	/* find entries with smallest and largest delays */
	Cost_Entry min_del_entry;
	Cost_Entry max_del_entry;
	for(auto entry : this->cost_vector){
		if (min_del_entry.delay < 0 || entry.delay < min_del_entry.delay){
			min_del_entry = entry;
		}
		if (max_del_entry.delay < 0 || entry.delay > max_del_entry.delay){
			max_del_entry = entry;
		}
	}

	/* get the bin size */
	float delay_diff = max_del_entry.delay - min_del_entry.delay;
	float bin_size = delay_diff / (float)num_bins;

	/* sort the cost entries into bins */
	vector< vector<Cost_Entry> > entry_bins( num_bins, vector<Cost_Entry>() );
	for (auto entry : this->cost_vector){
		float bin_num = floor( (entry.delay - min_del_entry.delay) / bin_size );

		assert(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins);
		if (vtr::nint(bin_num) == num_bins){
			/* largest entry will otherwise have an out-of-bounds bin number */
			bin_num -= 1;
		}
		entry_bins[ vtr::nint(bin_num) ].push_back(entry);
	}

	/* find the bin with the largest number of elements */
	int largest_bin = 0;
	int largest_size = 0;
	for (int ibin = 0; ibin < num_bins; ibin++){
		if ( entry_bins[ibin].size() > (unsigned)largest_size ){
			largest_bin = ibin;
			largest_size = (unsigned)entry_bins[ibin].size();
		}
	}

	/* get the representative delay of the largest bin */
	Cost_Entry representative_entry = entry_bins[largest_bin][0];

	return representative_entry;
}

/* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */
static void get_xy_deltas(int from_node_ind, int to_node_ind, int *delta_x, int *delta_y){
	auto& device_ctx = g_vpr_ctx.device();

	auto& from = device_ctx.rr_nodes[from_node_ind];
	auto& to = device_ctx.rr_nodes[to_node_ind];

	/* get chan/seg coordinates of the from/to nodes. seg coordinate is along the wire,
           chan coordinate is orthogonal to the wire */
	int from_seg_low = from.xlow();
	int from_seg_high = from.xhigh();
	int from_chan = from.ylow();
	int to_seg = to.xlow();
	int to_chan = to.ylow();
	if (from.type() == CHANY){
		from_seg_low = from.ylow();
		from_seg_high = from.yhigh();
		from_chan = from.xlow();
		to_seg = to.ylow();
		to_chan = to.xlow();
	}

	/* now we want to count the minimum number of *channel segments* between the from and to nodes */
	int delta_seg, delta_chan;

	/* orthogonal to wire */
	int no_need_to_pass_by_clb = 0;	//if we need orthogonal wires then we don't need to pass by the target CLB along the current wire direction
	if (to_chan > from_chan + 1){
		/* above */
		delta_chan = to_chan - from_chan;
		no_need_to_pass_by_clb = 1;
	} else if (to_chan < from_chan){
		/* below */
		delta_chan = from_chan - to_chan + 1;
		no_need_to_pass_by_clb = 1;
	} else {
		/* adjacent to current channel */
		delta_chan = 0;
		no_need_to_pass_by_clb = 0;
	}

	/* along same direction as wire. */
	if (to_seg > from_seg_high){
		/* ahead */
		delta_seg = to_seg - from_seg_high - no_need_to_pass_by_clb;
	} else if (to_seg < from_seg_low){
		/* behind */
		delta_seg = from_seg_low - to_seg - no_need_to_pass_by_clb;
	} else {
		/* along the span of the wire */
		delta_seg = 0;
	}

	/* account for wire direction. lookahead map was computed by looking up and to the right starting at INC wires. for targets
	   that are opposite of the wire direction, let's add 1 to delta_seg */
	if ( (to_seg < from_seg_low && from.direction() == INC_DIRECTION) ||
	     (to_seg > from_seg_high && from.direction() == DEC_DIRECTION) ){
		delta_seg++;
	}

	*delta_x = delta_seg;
	*delta_y = delta_chan;
	if (from.type() == CHANY){
		*delta_x = delta_chan;
		*delta_y = delta_seg;
	}
}


