vpr/src/route/router_lookahead_map.cpp - third_party/vtr-verilog-to-routing - Git at Google

 /*
  * The router lookahead provides an estimate of the cost from an intermediate node to the target node
  * during directed (A*-like) routing.
  *
  * The VPR 7.0 lookahead (route/route_timing.c ==> get_timing_driven_expected_cost) lower-bounds the remaining delay and
  * congestion by assuming that a minimum number of wires, of the same type as the current node being expanded, can be used
  * to complete the route. While this method is efficient, it can run into trouble with architectures that use
  * multiple interconnected wire types.
  *
  * The lookahead in this file pre-computes delay/congestion costs up and to the right of a starting tile. This generates
  * delay/congestion tables for {CHANX, CHANY} channel types, over all wire types defined in the architecture file.
  * See Section 3.2.4 in Oleg Petelin's MASc thesis (2016) for more discussion.
  *
  */

 #include <cmath>
 #include <vector>
 #include <queue>
 #include <ctime>
 #include "vpr_types.h"
 #include "vpr_error.h"
 #include "vpr_utils.h"
 #include "globals.h"
 #include "vtr_math.h"
 #include "vtr_log.h"
 #include "vtr_assert.h"
 #include "vtr_time.h"
 #include "router_lookahead_map.h"

 /* the cost map is computed by running a Dijkstra search from channel segment rr nodes at the specified reference coordinate */
 #define REF_X 3
 #define REF_Y 3

 /* we will profile delay/congestion using this many tracks for each wire type */
 #define MAX_TRACK_OFFSET 16

 /* we're profiling routing cost over many tracks for each wire type, so we'll have many cost entries at each |dx|,|dy| offset.
  * there are many ways to "boil down" the many costs at each offset to a single entry for a given (wire type, chan_type) combination --
  * we can take the smallest cost, the average, median, etc. This define selects the method we use.
  * See e_representative_entry_method */
 #define REPRESENTATIVE_ENTRY_METHOD SMALLEST

 /* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single
  * representative entry, this enum is passed-in to specify how that representative entry should be
  * calculated */
 enum e_representative_entry_method {
     FIRST = 0, //the first cost that was recorded
     SMALLEST,  //the smallest-delay cost recorded
     AVERAGE,
     GEOMEAN,
     MEDIAN
 };

 /* f_cost_map is an array of these cost entries that specifies delay/congestion estimates
  * to travel relative x/y distances */
 class Cost_Entry {
   public:
     float delay;
     float congestion;

     Cost_Entry() {
         delay = -1.0;
         congestion = -1.0;
     }
     Cost_Entry(float set_delay, float set_congestion) {
         delay = set_delay;
         congestion = set_congestion;
     }
 };

 /* a class that stores delay/congestion information for a given relative coordinate during the Dijkstra expansion.
  * since it stores multiple cost entries, it is later boiled down to a single representative cost entry to be stored
  * in the final lookahead cost map */
 class Expansion_Cost_Entry {
   private:
     std::vector<Cost_Entry> cost_vector;

     Cost_Entry get_smallest_entry();
     Cost_Entry get_average_entry();
     Cost_Entry get_geomean_entry();
     Cost_Entry get_median_entry();

   public:
     void add_cost_entry(float add_delay, float add_congestion) {
         Cost_Entry cost_entry(add_delay, add_congestion);
         if (REPRESENTATIVE_ENTRY_METHOD == SMALLEST) {
             /* taking the smallest-delay entry anyway, so no need to push back multple entries */
             if (this->cost_vector.empty()) {
                 this->cost_vector.push_back(cost_entry);
             } else {
                 if (add_delay < this->cost_vector[0].delay) {
                     this->cost_vector[0] = cost_entry;
                 }
             }
         } else {
             this->cost_vector.push_back(cost_entry);
         }
     }
     void clear_cost_entries() {
         this->cost_vector.clear();
     }

     Cost_Entry get_representative_cost_entry(e_representative_entry_method method) {
         Cost_Entry entry;

         if (!cost_vector.empty()) {
             switch (method) {
                 case FIRST:
                     entry = cost_vector[0];
                     break;
                 case SMALLEST:
                     entry = this->get_smallest_entry();
                     break;
                 case AVERAGE:
                     entry = this->get_average_entry();
                     break;
                 case GEOMEAN:
                     entry = this->get_geomean_entry();
                     break;
                 case MEDIAN:
                     entry = this->get_median_entry();
                     break;
                 default:
                     break;
             }
         }
         return entry;
     }
 };

 /* a class that represents an entry in the Dijkstra expansion priority queue */
 class PQ_Entry {
   public:
     int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents
     float cost;      //the cost of the path to get to this node

     /* store backward delay, R and congestion info */
     float delay;
     float R_upstream;
     float congestion_upstream;

     PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node) {
         this->rr_node_ind = set_rr_node_ind;

         auto& device_ctx = g_vpr_ctx.device();
         this->delay = parent_delay;
         this->congestion_upstream = parent_congestion_upstream;
         this->R_upstream = parent_R_upstream;
         if (!starting_node) {
             int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index();
             //this->delay += g_rr_nodes[set_rr_node_ind].C() * (g_rr_switch_inf[switch_ind].R + 0.5*g_rr_nodes[set_rr_node_ind].R()) +
             //              g_rr_switch_inf[switch_ind].Tdel;

             //FIXME going to use the delay data that the VPR7 lookahead uses. For some reason the delay calculation above calculates
             //    a value that's just a little smaller compared to what VPR7 lookahead gets. While the above calculation should be more accurate,
             //    I have found that it produces the same CPD results but with worse runtime.
             //
             //    TODO: figure out whether anything's wrong with the calculation above and use that instead. If not, add the other
             //          terms like T_quadratic and R_upstream to the calculation below (they are == 0 or UNDEFINED for buffered archs I think)
             VTR_ASSERT(device_ctx.rr_indexed_data[cost_index].T_quadratic <= 0.);
             VTR_ASSERT(device_ctx.rr_indexed_data[cost_index].C_load <= 0.);
             this->delay += device_ctx.rr_indexed_data[cost_index].T_linear;

             this->congestion_upstream += device_ctx.rr_indexed_data[cost_index].base_cost;
         }

         /* set the cost of this node */
         this->cost = this->delay;
     }

     bool operator<(const PQ_Entry& obj) const {
         /* inserted into max priority queue so want queue entries with a lower cost to be greater */
         return (this->cost > obj.cost);
     }
 };

 /* provides delay/congestion estimates to travel specified distances
  * in the x/y direction */
 typedef vtr::NdMatrix<Cost_Entry, 4> t_cost_map; //[0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]
                                                  //[0..1] entry distinguish between CHANX/CHANY start nodes respectively

 /* used during Dijkstra expansion to store delay/congestion info lists for each relative coordinate for a given segment and channel type.
  * the list at each coordinate is later boiled down to a single representative cost entry to be stored in the final cost map */
 typedef vtr::Matrix<Expansion_Cost_Entry> t_routing_cost_map; //[0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1]

 /******** File-Scope Variables ********/
 /* The cost map */
 t_cost_map f_cost_map;

 /******** File-Scope Functions ********/
 static void alloc_cost_map(int num_segments);
 static void free_cost_map();
 /* returns index of a node from which to start routing */
 static int get_start_node_ind(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset);
 /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
  * to that pin is stored is added to an entry in the routing_cost_map */
 static void run_dijkstra(int start_node_ind, int start_x, int start_y, t_routing_cost_map& routing_cost_map);
 /* iterates over the children of the specified node and selectively pushes them onto the priority queue */
 static void expand_dijkstra_neighbours(PQ_Entry parent_entry, std::vector<float>& node_visited_costs, std::vector<bool>& node_expanded, std::priority_queue<PQ_Entry>& pq);
 /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
 static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map);
 /* fills in missing lookahead map entries by copying the cost of the closest valid entry */
 static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type);
 /* returns a cost entry in the f_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
 static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index);
 /* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */
 static void get_xy_deltas(int from_node_ind, int to_node_ind, int* delta_x, int* delta_y);

 static void print_cost_map();

 /******** Function Definitions ********/
 /* queries the lookahead_map (should have been computed prior to routing) to get the expected cost
  * from the specified source to the specified target */
 float get_lookahead_map_cost(int from_node_ind, int to_node_ind, float criticality_fac) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& from_node = device_ctx.rr_nodes[from_node_ind];

     e_rr_type from_type = from_node.type();
     int from_cost_index = from_node.cost_index();
     int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index;

     VTR_ASSERT(from_seg_index >= 0);

     int delta_x, delta_y;
     get_xy_deltas(from_node_ind, to_node_ind, &delta_x, &delta_y);
     delta_x = abs(delta_x);
     delta_y = abs(delta_y);

     /* now get the expected cost from our lookahead map */
     int from_chan_index = 0;
     if (from_type == CHANY) {
         from_chan_index = 1;
     }

     Cost_Entry cost_entry = f_cost_map[from_chan_index][from_seg_index][delta_x][delta_y];
     float expected_delay = cost_entry.delay;
     float expected_congestion = cost_entry.congestion;

     float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion;
     return expected_cost;
 }

 /* Computes the lookahead map to be used by the router. If a map was computed prior to this, a new one will not be computed again.
  * The rr graph must have been built before calling this function. */
 void compute_router_lookahead(int num_segments) {
     vtr::ScopedStartFinishTimer timer("Computing router lookahead map");

     f_cost_map.clear();

     auto& device_ctx = g_vpr_ctx.device();

     /* free previous delay map and allocate new one */
     free_cost_map();
     alloc_cost_map(num_segments);

     /* run Dijkstra's algorithm for each segment type & channel type combination */
     for (int iseg = 0; iseg < num_segments; iseg++) {
         for (e_rr_type chan_type : {CHANX, CHANY}) {
             /* allocate the cost map for this iseg/chan_type */
             t_routing_cost_map routing_cost_map({device_ctx.grid.width(), device_ctx.grid.height()});

             for (int ref_inc = 0; ref_inc < 3; ref_inc++) {
                 for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2) {
                     /* get the rr node index from which to start routing */
                     int start_node_ind = get_start_node_ind(REF_X + ref_inc, REF_Y + ref_inc,
                                                             device_ctx.grid.width() - 2, device_ctx.grid.height() - 2, //non-corner upper right
                                                             chan_type, iseg, track_offset);

                     if (start_node_ind == UNDEFINED) {
                         continue;
                     }

                     /* run Dijkstra's algorithm */
                     run_dijkstra(start_node_ind, REF_X + ref_inc, REF_Y + ref_inc, routing_cost_map);
                 }
             }

             /* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead
              * cost map */
             set_lookahead_map_costs(iseg, chan_type, routing_cost_map);

             /* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on
              * a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */
             fill_in_missing_lookahead_entries(iseg, chan_type);
         }
     }

     if (false) print_cost_map();
 }

 /* returns index of a node from which to start routing */
 static int get_start_node_ind(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) {
     int result = UNDEFINED;

     if (rr_type != CHANX && rr_type != CHANY) {
         VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Must start lookahead routing from CHANX or CHANY node\n");
     }

     /* determine which direction the wire should go in based on the start & target coordinates */
     e_direction direction = INC_DIRECTION;
     if ((rr_type == CHANX && target_x < start_x) || (rr_type == CHANY && target_y < start_y)) {
         direction = DEC_DIRECTION;
     }

     auto& device_ctx = g_vpr_ctx.device();

     VTR_ASSERT(rr_type == CHANX || rr_type == CHANY);

     const std::vector<int>& channel_node_list = device_ctx.rr_node_indices[rr_type][start_x][start_y][0];

     /* find first node in channel that has specified segment index and goes in the desired direction */
     for (unsigned itrack = 0; itrack < channel_node_list.size(); itrack++) {
         int node_ind = channel_node_list[itrack];

         e_direction node_direction = device_ctx.rr_nodes[node_ind].direction();
         int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index();
         int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index;

         if ((node_direction == direction || node_direction == BI_DIRECTION) && node_seg_ind == seg_index) {
             /* found first track that has the specified segment index and goes in the desired direction */
             result = node_ind;
             if (track_offset == 0) {
                 break;
             }
             track_offset -= 2;
         }
     }

     return result;
 }

 /* allocates space for cost map entries */
 static void alloc_cost_map(int num_segments) {
     auto& device_ctx = g_vpr_ctx.device();

     f_cost_map = t_cost_map({2, size_t(num_segments), device_ctx.grid.width(), device_ctx.grid.height()});
 }

 /* frees the cost map. duh. */
 static void free_cost_map() {
     f_cost_map.clear();
 }

 /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information
  * to that pin is stored is added to an entry in the routing_cost_map */
 static void run_dijkstra(int start_node_ind, int start_x, int start_y, t_routing_cost_map& routing_cost_map) {
     auto& device_ctx = g_vpr_ctx.device();

     /* a list of boolean flags (one for each rr node) to figure out if a certain node has already been expanded */
     std::vector<bool> node_expanded(device_ctx.rr_nodes.size(), false);
     /* for each node keep a list of the cost with which that node has been visited (used to determine whether to push
      * a candidate node onto the expansion queue */
     std::vector<float> node_visited_costs(device_ctx.rr_nodes.size(), -1.0);
     /* a priority queue for expansion */
     std::priority_queue<PQ_Entry> pq;

     /* first entry has no upstream delay or congestion */
     PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true);

     pq.push(first_entry);

     /* now do routing */
     while (!pq.empty()) {
         PQ_Entry current = pq.top();
         pq.pop();

         int node_ind = current.rr_node_ind;

         /* check that we haven't already expanded from this node */
         if (node_expanded[node_ind]) {
             continue;
         }

         /* if this node is an ipin record its congestion/delay in the routing_cost_map */
         if (device_ctx.rr_nodes[node_ind].type() == IPIN) {
             int ipin_x = device_ctx.rr_nodes[node_ind].xlow();
             int ipin_y = device_ctx.rr_nodes[node_ind].ylow();

             if (ipin_x >= start_x && ipin_y >= start_y) {
                 int delta_x, delta_y;
                 get_xy_deltas(start_node_ind, node_ind, &delta_x, &delta_y);

                 routing_cost_map[delta_x][delta_y].add_cost_entry(current.delay, current.congestion_upstream);
             }
         }

         expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq);
         node_expanded[node_ind] = true;
     }
 }

 /* iterates over the children of the specified node and selectively pushes them onto the priority queue */
 static void expand_dijkstra_neighbours(PQ_Entry parent_entry, std::vector<float>& node_visited_costs, std::vector<bool>& node_expanded, std::priority_queue<PQ_Entry>& pq) {
     auto& device_ctx = g_vpr_ctx.device();

     int parent_ind = parent_entry.rr_node_ind;

     auto& parent_node = device_ctx.rr_nodes[parent_ind];

     for (t_edge_size iedge = 0; iedge < parent_node.num_edges(); iedge++) {
         int child_node_ind = parent_node.edge_sink_node(iedge);
         int switch_ind = parent_node.edge_switch(iedge);

         /* skip this child if it has already been expanded from */
         if (node_expanded[child_node_ind]) {
             continue;
         }

         PQ_Entry child_entry(child_node_ind, switch_ind, parent_entry.delay,
                              parent_entry.R_upstream, parent_entry.congestion_upstream, false);

         //VTR_ASSERT(child_entry.cost >= 0); //Asertion fails in practise. TODO: debug

         /* skip this child if it has been visited with smaller cost */
         if (node_visited_costs[child_node_ind] >= 0 && node_visited_costs[child_node_ind] < child_entry.cost) {
             continue;
         }

         /* finally, record the cost with which the child was visited and put the child entry on the queue */
         node_visited_costs[child_node_ind] = child_entry.cost;
         pq.push(child_entry);
     }
 }

 /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */
 static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) {
     int chan_index = 0;
     if (chan_type == CHANY) {
         chan_index = 1;
     }

     /* set the lookahead cost map entries with a representative cost entry from routing_cost_map */
     for (unsigned ix = 0; ix < routing_cost_map.dim_size(0); ix++) {
         for (unsigned iy = 0; iy < routing_cost_map.dim_size(1); iy++) {
             Expansion_Cost_Entry& expansion_cost_entry = routing_cost_map[ix][iy];

             f_cost_map[chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD);
         }
     }
 }

 /* fills in missing lookahead map entries by copying the cost of the closest valid entry */
 static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type) {
     int chan_index = 0;
     if (chan_type == CHANY) {
         chan_index = 1;
     }

     auto& device_ctx = g_vpr_ctx.device();

     /* find missing cost entries and fill them in by copying a nearby cost entry */
     for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) {
         for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) {
             Cost_Entry cost_entry = f_cost_map[chan_index][segment_index][ix][iy];

             if (cost_entry.delay < 0 && cost_entry.congestion < 0) {
                 Cost_Entry copied_entry = get_nearby_cost_entry(ix, iy, segment_index, chan_index);
                 f_cost_map[chan_index][segment_index][ix][iy] = copied_entry;
             }
         }
     }
 }

 /* returns a cost entry in the f_cost_map that is near the specified coordinates (and preferably towards (0,0)) */
 static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index) {
     /* compute the slope from x,y to 0,0 and then move towards 0,0 by one unit to get the coordinates
      * of the cost entry to be copied */

     //VTR_ASSERT(x > 0 || y > 0); //Asertion fails in practise. TODO: debug

     float slope;
     if (x == 0) {
         slope = 1e12; //just a really large number
     } else if (y == 0) {
         slope = 1e-12; //just a really small number
     } else {
         slope = (float)y / (float)x;
     }

     int copy_x, copy_y;
     if (slope >= 1.0) {
         copy_y = y - 1;
         copy_x = vtr::nint((float)y / slope);
     } else {
         copy_x = x - 1;
         copy_y = vtr::nint((float)x * slope);
     }

     //VTR_ASSERT(copy_x > 0 || copy_y > 0); //Asertion fails in practise. TODO: debug

     Cost_Entry copy_entry = f_cost_map[chan_index][segment_index][copy_x][copy_y];

     /* if the entry to be copied is also empty, recurse */
     if (copy_entry.delay < 0 && copy_entry.congestion < 0) {
         copy_entry = get_nearby_cost_entry(copy_x, copy_y, segment_index, chan_index);
     }

     return copy_entry;
 }

 /* returns cost entry with the smallest delay */
 Cost_Entry Expansion_Cost_Entry::get_smallest_entry() {
     Cost_Entry smallest_entry;

     for (auto entry : this->cost_vector) {
         if (smallest_entry.delay < 0 || entry.delay < smallest_entry.delay) {
             smallest_entry = entry;
         }
     }

     return smallest_entry;
 }

 /* returns a cost entry that represents the average of all the recorded entries */
 Cost_Entry Expansion_Cost_Entry::get_average_entry() {
     float avg_delay = 0;
     float avg_congestion = 0;

     for (auto cost_entry : this->cost_vector) {
         avg_delay += cost_entry.delay;
         avg_congestion += cost_entry.congestion;
     }

     avg_delay /= (float)this->cost_vector.size();
     avg_congestion /= (float)this->cost_vector.size();

     return Cost_Entry(avg_delay, avg_congestion);
 }

 /* returns a cost entry that represents the geomean of all the recorded entries */
 Cost_Entry Expansion_Cost_Entry::get_geomean_entry() {
     float geomean_delay = 0;
     float geomean_cong = 0;
     for (auto cost_entry : this->cost_vector) {
         geomean_delay += log(cost_entry.delay);
         geomean_cong += log(cost_entry.congestion);
     }

     geomean_delay = exp(geomean_delay / (float)this->cost_vector.size());
     geomean_cong = exp(geomean_cong / (float)this->cost_vector.size());

     return Cost_Entry(geomean_delay, geomean_cong);
 }

 /* returns a cost entry that represents the medial of all recorded entries */
 Cost_Entry Expansion_Cost_Entry::get_median_entry() {
     /* find median by binning the delays of all entries and then chosing the bin
      * with the largest number of entries */

     int num_bins = 10;

     /* find entries with smallest and largest delays */
     Cost_Entry min_del_entry;
     Cost_Entry max_del_entry;
     for (auto entry : this->cost_vector) {
         if (min_del_entry.delay < 0 || entry.delay < min_del_entry.delay) {
             min_del_entry = entry;
         }
         if (max_del_entry.delay < 0 || entry.delay > max_del_entry.delay) {
             max_del_entry = entry;
         }
     }

     /* get the bin size */
     float delay_diff = max_del_entry.delay - min_del_entry.delay;
     float bin_size = delay_diff / (float)num_bins;

     /* sort the cost entries into bins */
     std::vector<std::vector<Cost_Entry> > entry_bins(num_bins, std::vector<Cost_Entry>());
     for (auto entry : this->cost_vector) {
         float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size);

         VTR_ASSERT(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins);
         if (vtr::nint(bin_num) == num_bins) {
             /* largest entry will otherwise have an out-of-bounds bin number */
             bin_num -= 1;
         }
         entry_bins[vtr::nint(bin_num)].push_back(entry);
     }

     /* find the bin with the largest number of elements */
     int largest_bin = 0;
     int largest_size = 0;
     for (int ibin = 0; ibin < num_bins; ibin++) {
         if (entry_bins[ibin].size() > (unsigned)largest_size) {
             largest_bin = ibin;
             largest_size = (unsigned)entry_bins[ibin].size();
         }
     }

     /* get the representative delay of the largest bin */
     Cost_Entry representative_entry = entry_bins[largest_bin][0];

     return representative_entry;
 }

 /* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */
 static void get_xy_deltas(int from_node_ind, int to_node_ind, int* delta_x, int* delta_y) {
     auto& device_ctx = g_vpr_ctx.device();

     auto& from = device_ctx.rr_nodes[from_node_ind];
     auto& to = device_ctx.rr_nodes[to_node_ind];

     /* get chan/seg coordinates of the from/to nodes. seg coordinate is along the wire,
      * chan coordinate is orthogonal to the wire */
     int from_seg_low = from.xlow();
     int from_seg_high = from.xhigh();
     int from_chan = from.ylow();
     int to_seg = to.xlow();
     int to_chan = to.ylow();
     if (from.type() == CHANY) {
         from_seg_low = from.ylow();
         from_seg_high = from.yhigh();
         from_chan = from.xlow();
         to_seg = to.ylow();
         to_chan = to.xlow();
     }

     /* now we want to count the minimum number of *channel segments* between the from and to nodes */
     int delta_seg, delta_chan;

     /* orthogonal to wire */
     int no_need_to_pass_by_clb = 0; //if we need orthogonal wires then we don't need to pass by the target CLB along the current wire direction
     if (to_chan > from_chan + 1) {
         /* above */
         delta_chan = to_chan - from_chan;
         no_need_to_pass_by_clb = 1;
     } else if (to_chan < from_chan) {
         /* below */
         delta_chan = from_chan - to_chan + 1;
         no_need_to_pass_by_clb = 1;
     } else {
         /* adjacent to current channel */
         delta_chan = 0;
         no_need_to_pass_by_clb = 0;
     }

     /* along same direction as wire. */
     if (to_seg > from_seg_high) {
         /* ahead */
         delta_seg = to_seg - from_seg_high - no_need_to_pass_by_clb;
     } else if (to_seg < from_seg_low) {
         /* behind */
         delta_seg = from_seg_low - to_seg - no_need_to_pass_by_clb;
     } else {
         /* along the span of the wire */
         delta_seg = 0;
     }

     /* account for wire direction. lookahead map was computed by looking up and to the right starting at INC wires. for targets
      * that are opposite of the wire direction, let's add 1 to delta_seg */
     if ((to_seg < from_seg_low && from.direction() == INC_DIRECTION) || (to_seg > from_seg_high && from.direction() == DEC_DIRECTION)) {
         delta_seg++;
     }

     *delta_x = delta_seg;
     *delta_y = delta_chan;
     if (from.type() == CHANY) {
         *delta_x = delta_chan;
         *delta_y = delta_seg;
     }
 }

 static void print_cost_map() {
     auto& device_ctx = g_vpr_ctx.device();

     for (size_t chan_index = 0; chan_index < f_cost_map.dim_size(0); chan_index++) {
         for (size_t iseg = 0; iseg < f_cost_map.dim_size(1); iseg++) {
             vtr::printf("Seg %d CHAN %d\n", iseg, chan_index);
             for (size_t iy = 0; iy < device_ctx.grid.height(); iy++) {
                 for (size_t ix = 0; ix < device_ctx.grid.width(); ix++) {
                     vtr::printf("%d,%d: %.3e\t", ix, iy, f_cost_map[chan_index][iseg][ix][iy].delay);
                 }
                 vtr::printf("\n");
             }
             vtr::printf("\n\n");
         }
     }
 }