vpr/src/route/route_common.cpp - third_party/vtr-verilog-to-routing - Git at Google

 #include <cstdio>
 #include <ctime>
 #include <cmath>
 #include <algorithm>
 #include <vector>
 #include <iostream>

 #include "vtr_assert.h"
 #include "vtr_util.h"
 #include "vtr_log.h"
 #include "vtr_digest.h"
 #include "vtr_memory.h"

 #include "vpr_types.h"
 #include "vpr_error.h"
 #include "vpr_utils.h"

 #include "stats.h"
 #include "globals.h"
 #include "route_export.h"
 #include "route_common.h"
 #include "route_tree_timing.h"
 #include "route_timing.h"
 #include "route_breadth_first.h"
 #include "place_and_route.h"
 #include "rr_graph.h"
 #include "rr_graph2.h"
 #include "read_xml_arch_file.h"
 #include "draw.h"
 #include "echo_files.h"

 #include "route_profiling.h"

 #include "timing_util.h"
 #include "RoutingDelayCalculator.h"
 #include "timing_info.h"
 #include "tatum/echo_writer.hpp"

 /**************** Types local to route_common.c ******************/
 struct t_trace_branch {
     t_trace* head;
     t_trace* tail;
 };

 /**************** Static variables local to route_common.c ******************/

 static t_heap** heap; /* Indexed from [1..heap_size] */
 static int heap_size; /* Number of slots in the heap array */
 static int heap_tail; /* Index of first unused slot in the heap array */

 /* For managing my own list of currently free heap data structures.     */
 static t_heap* heap_free_head = nullptr;
 /* For keeping track of the sudo malloc memory for the heap*/
 static vtr::t_chunk heap_ch;

 /* For managing my own list of currently free trace data structures.    */
 static t_trace* trace_free_head = nullptr;
 /* For keeping track of the sudo malloc memory for the trace*/
 static vtr::t_chunk trace_ch;

 static int num_trace_allocated = 0; /* To watch for memory leaks. */
 static int num_heap_allocated = 0;
 static int num_linked_f_pointer_allocated = 0;

 /*  The numbering relation between the channels and clbs is:				*
  *																	        *
  *  |    IO     | chan_   |   CLB     | chan_   |   CLB     |               *
  *  |grid[0][2] | y[0][2] |grid[1][2] | y[1][2] | grid[2][2]|               *
  *  +-----------+         +-----------+         +-----------+               *
  *                                                            } capacity in *
  *   No channel           chan_x[1][1]          chan_x[2][1]  } chan_width  *
  *                                                            } _x[1]       *
  *  +-----------+         +-----------+         +-----------+               *
  *  |           |  chan_  |           |  chan_  |           |               *
  *  |    IO     | y[0][1] |   CLB     | y[1][1] |   CLB     |               *
  *  |grid[0][1] |         |grid[1][1] |         |grid[2][1] |               *
  *  |           |         |           |         |           |               *
  *  +-----------+         +-----------+         +-----------+               *
  *                                                            } capacity in *
  *                        chan_x[1][0]          chan_x[2][0]  } chan_width  *
  *                                                            } _x[0]       *
  *                        +-----------+         +-----------+               *
  *                 No     |           |	   No   |           |               *
  *               Channel  |    IO     | Channel |    IO     |               *
  *                        |grid[1][0] |         |grid[2][0] |               *
  *                        |           |         |           |               *
  *                        +-----------+         +-----------+               *
  *                                                                          *
  *               {=======}              {=======}                           *
  *              Capacity in            Capacity in                          *
  *            chan_width_y[0]        chan_width_y[1]                        *
  *                                                                          */

 /******************** Subroutines local to route_common.c *******************/
 static t_trace_branch traceback_branch(int node, std::unordered_set<int>& main_branch_visited);
 static std::pair<t_trace*, t_trace*> add_trace_non_configurable(t_trace* head, t_trace* tail, int node, std::unordered_set<int>& visited);
 static std::pair<t_trace*, t_trace*> add_trace_non_configurable_recurr(int node, std::unordered_set<int>& visited, int depth = 0);

 static vtr::vector<ClusterNetId, std::vector<int>> load_net_rr_terminals(const t_rr_node_indices& L_rr_node_indices);
 static vtr::vector<ClusterBlockId, std::vector<int>> load_rr_clb_sources(const t_rr_node_indices& L_rr_node_indices);

 static t_clb_opins_used alloc_and_load_clb_opins_used_locally();
 static void adjust_one_rr_occ_and_apcost(int inode, int add_or_sub, float pres_fac, float acc_fac);

 bool validate_traceback_recurr(t_trace* trace, std::set<int>& seen_rr_nodes);
 static bool validate_trace_nodes(t_trace* head, const std::unordered_set<int>& trace_nodes);
 static float get_single_rr_cong_cost(int inode);

 /************************** Subroutine definitions ***************************/

 void save_routing(vtr::vector<ClusterNetId, t_trace*>& best_routing,
                   const t_clb_opins_used& clb_opins_used_locally,
                   t_clb_opins_used& saved_clb_opins_used_locally) {
     /* This routing frees any routing currently held in best routing,       *
      * then copies over the current routing (held in route_ctx.trace), and       *
      * finally sets route_ctx.trace_head and route_ctx.trace_tail to all NULLs so that the      *
      * connection to the saved routing is broken.  This is necessary so     *
      * that the next iteration of the router does not free the saved        *
      * routing elements.  Also saves any data about locally used clb_opins, *
      * since this is also part of the routing.                              */

     t_trace *tptr, *tempptr;

     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         /* Free any previously saved routing.  It is no longer best. */
         tptr = best_routing[net_id];
         while (tptr != nullptr) {
             tempptr = tptr->next;
             free_trace_data(tptr);
             tptr = tempptr;
         }

         /* Save a pointer to the current routing in best_routing. */
         best_routing[net_id] = route_ctx.trace[net_id].head;

         /* Set the current (working) routing to NULL so the current trace       *
          * elements won't be reused by the memory allocator.                    */

         route_ctx.trace[net_id].head = nullptr;
         route_ctx.trace[net_id].tail = nullptr;
         route_ctx.trace_nodes[net_id].clear();
     }

     /* Save which OPINs are locally used.                           */
     saved_clb_opins_used_locally = clb_opins_used_locally;
 }

 /* Deallocates any current routing in route_ctx.trace_head, and replaces it with    *
  * the routing in best_routing.  Best_routing is set to NULL to show that *
  * it no longer points to a valid routing.  NOTE:  route_ctx.trace_tail is not      *
  * restored -- it is set to all NULLs since it is only used in            *
  * update_traceback.  If you need route_ctx.trace_tail restored, modify this        *
  * routine.  Also restores the locally used opin data.                    */
 void restore_routing(vtr::vector<ClusterNetId, t_trace*>& best_routing,
                      t_clb_opins_used& clb_opins_used_locally,
                      const t_clb_opins_used& saved_clb_opins_used_locally) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         /* Free any current routing. */
         pathfinder_update_path_cost(route_ctx.trace[net_id].head, -1, 0.f);
         free_traceback(net_id);

         /* Set the current routing to the saved one. */
         route_ctx.trace[net_id].head = best_routing[net_id];
         pathfinder_update_path_cost(route_ctx.trace[net_id].head, 1, 0.f);
         best_routing[net_id] = nullptr; /* No stored routing. */
     }

     /* Restore which OPINs are locally used.                           */
     clb_opins_used_locally = saved_clb_opins_used_locally;
 }

 /* This routine finds a "magic cookie" for the routing and prints it.    *
  * Use this number as a routing serial number to ensure that programming *
  * changes do not break the router.                                      */
 void get_serial_num() {
     int serial_num, inode;
     t_trace* tptr;

     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.routing();
     auto& device_ctx = g_vpr_ctx.device();

     serial_num = 0;

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         /* Global nets will have null trace_heads (never routed) so they *
          * are not included in the serial number calculation.            */

         tptr = route_ctx.trace[net_id].head;
         while (tptr != nullptr) {
             inode = tptr->index;
             serial_num += (size_t(net_id) + 1)
                           * (device_ctx.rr_nodes[inode].xlow() * (device_ctx.grid.width()) - device_ctx.rr_nodes[inode].yhigh());

             serial_num -= device_ctx.rr_nodes[inode].ptc_num() * (size_t(net_id) + 1) * 10;

             serial_num -= device_ctx.rr_nodes[inode].type() * (size_t(net_id) + 1) * 100;
             serial_num %= 2000000000; /* Prevent overflow */
             tptr = tptr->next;
         }
     }
     VTR_LOG("Serial number (magic cookie) for the routing is: %d\n", serial_num);
 }

 void try_graph(int width_fac, const t_router_opts& router_opts, t_det_routing_arch* det_routing_arch, std::vector<t_segment_inf>& segment_inf, t_chan_width_dist chan_width_dist, t_direct_inf* directs, int num_directs) {
     auto& device_ctx = g_vpr_ctx.mutable_device();

     t_graph_type graph_type;
     if (router_opts.route_type == GLOBAL) {
         graph_type = GRAPH_GLOBAL;
     } else {
         graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
     }

     /* Set the channel widths */
     t_chan_width chan_width = init_chan(width_fac, chan_width_dist);

     /* Free any old routing graph, if one exists. */
     free_rr_graph();

     /* Set up the routing resource graph defined by this FPGA architecture. */
     int warning_count;
     create_rr_graph(graph_type,
                     device_ctx.physical_tile_types,
                     device_ctx.grid,
                     chan_width,
                     device_ctx.num_arch_switches,
                     det_routing_arch,
                     segment_inf,
                     router_opts.base_cost_type,
                     router_opts.trim_empty_channels,
                     router_opts.trim_obs_channels,
                     router_opts.clock_modeling,
                     directs, num_directs,
                     &warning_count);
 }

 bool try_route(int width_fac,
                const t_router_opts& router_opts,
                const t_analysis_opts& analysis_opts,
                t_det_routing_arch* det_routing_arch,
                std::vector<t_segment_inf>& segment_inf,
                vtr::vector<ClusterNetId, float*>& net_delay,
                std::shared_ptr<SetupHoldTimingInfo> timing_info,
                std::shared_ptr<RoutingDelayCalculator> delay_calc,
                t_chan_width_dist chan_width_dist,
                t_direct_inf* directs,
                int num_directs,
                ScreenUpdatePriority first_iteration_priority) {
     /* Attempts a routing via an iterated maze router algorithm.  Width_fac *
      * specifies the relative width of the channels, while the members of   *
      * router_opts determine the value of the costs assigned to routing     *
      * resource node, etc.  det_routing_arch describes the detailed routing *
      * architecture (connection and switch boxes) of the FPGA; it is used   *
      * only if a DETAILED routing has been selected.                        */

     auto& device_ctx = g_vpr_ctx.mutable_device();
     auto& cluster_ctx = g_vpr_ctx.clustering();

     t_graph_type graph_type;
     if (router_opts.route_type == GLOBAL) {
         graph_type = GRAPH_GLOBAL;
     } else {
         graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
     }

     /* Set the channel widths */
     t_chan_width chan_width = init_chan(width_fac, chan_width_dist);

     /* Set up the routing resource graph defined by this FPGA architecture. */
     int warning_count;

     create_rr_graph(graph_type,
                     device_ctx.physical_tile_types,
                     device_ctx.grid,
                     chan_width,
                     device_ctx.num_arch_switches,
                     det_routing_arch,
                     segment_inf,
                     router_opts.base_cost_type,
                     router_opts.trim_empty_channels,
                     router_opts.trim_obs_channels,
                     router_opts.clock_modeling,
                     directs, num_directs,
                     &warning_count);

     //Initialize drawing, now that we have an RR graph
     init_draw_coords(width_fac);

     bool success = true;

     /* Allocate and load additional rr_graph information needed only by the router. */
     alloc_and_load_rr_node_route_structs();

     init_route_structs(router_opts.bb_factor);

     if (cluster_ctx.clb_nlist.nets().empty()) {
         VTR_LOG_WARN("No nets to route\n");
     }

     if (router_opts.router_algorithm == BREADTH_FIRST) {
         VTR_LOG("Confirming router algorithm: BREADTH_FIRST.\n");
         success = try_breadth_first_route(router_opts);
     } else { /* TIMING_DRIVEN route */
         VTR_LOG("Confirming router algorithm: TIMING_DRIVEN.\n");

         IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types);
         ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, intra_lb_pb_pin_lookup);

         success = try_timing_driven_route(router_opts,
                                           analysis_opts,
                                           segment_inf,
                                           net_delay,
                                           netlist_pin_lookup,
                                           timing_info,
                                           delay_calc,
                                           first_iteration_priority);

         profiling::time_on_fanout_analysis();
     }

     return (success);
 }

 bool feasible_routing() {
     /* This routine checks to see if this is a resource-feasible routing.      *
      * That is, are all rr_node capacity limitations respected?  It assumes    *
      * that the occupancy arrays are up to date when it is called.             */

     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.routing();

     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         if (route_ctx.rr_node_route_inf[inode].occ() > device_ctx.rr_nodes[inode].capacity()) {
             return (false);
         }
     }

     return (true);
 }

 //Returns all RR nodes in the current routing which are congested
 std::vector<int> collect_congested_rr_nodes() {
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.routing();

     std::vector<int> congested_rr_nodes;
     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         short occ = route_ctx.rr_node_route_inf[inode].occ();
         short capacity = device_ctx.rr_nodes[inode].capacity();

         if (occ > capacity) {
             congested_rr_nodes.push_back(inode);
         }
     }
     return congested_rr_nodes;
 }

 /* Returns a vector from [0..device_ctx.rr_nodes.size()-1] containing the set
  * of nets using each RR node */
 std::vector<std::set<ClusterNetId>> collect_rr_node_nets() {
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.routing();
     auto& cluster_ctx = g_vpr_ctx.clustering();

     std::vector<std::set<ClusterNetId>> rr_node_nets(device_ctx.rr_nodes.size());
     for (ClusterNetId inet : cluster_ctx.clb_nlist.nets()) {
         t_trace* trace_elem = route_ctx.trace[inet].head;
         while (trace_elem) {
             int rr_node = trace_elem->index;

             rr_node_nets[rr_node].insert(inet);

             trace_elem = trace_elem->next;
         }
     }
     return rr_node_nets;
 }

 void pathfinder_update_path_cost(t_trace* route_segment_start,
                                  int add_or_sub,
                                  float pres_fac) {
     /* This routine updates the occupancy and pres_cost of the rr_nodes that are *
      * affected by the portion of the routing of one net that starts at          *
      * route_segment_start.  If route_segment_start is route_ctx.trace[net_id].head, the     *
      * cost of all the nodes in the routing of net net_id are updated.  If         *
      * add_or_sub is -1 the net (or net portion) is ripped up, if it is 1 the    *
      * net is added to the routing.  The size of pres_fac determines how severly *
      * oversubscribed rr_nodes are penalized.                                    */

     t_trace* tptr;

     tptr = route_segment_start;
     if (tptr == nullptr) /* No routing yet. */
         return;

     for (;;) {
         pathfinder_update_single_node_cost(tptr->index, add_or_sub, pres_fac);

         if (tptr->iswitch == OPEN) { //End of branch
             tptr = tptr->next;       /* Skip next segment. */
             if (tptr == nullptr)
                 break;
         }

         tptr = tptr->next;

     } /* End while loop -- did an entire traceback. */
 }

 void pathfinder_update_single_node_cost(int inode, int add_or_sub, float pres_fac) {
     /* Updates pathfinder's congestion cost by either adding or removing the
      * usage of a resource node. pres_cost is Pn in the Pathfinder paper.
      * pres_cost is set according to the overuse that would result from having
      * ONE MORE net use this routing node.     */

     auto& route_ctx = g_vpr_ctx.mutable_routing();
     auto& device_ctx = g_vpr_ctx.device();

     int occ = route_ctx.rr_node_route_inf[inode].occ() + add_or_sub;
     route_ctx.rr_node_route_inf[inode].set_occ(occ);
     // can't have negative occupancy
     VTR_ASSERT(occ >= 0);

     int capacity = device_ctx.rr_nodes[inode].capacity();
     if (occ < capacity) {
         route_ctx.rr_node_route_inf[inode].pres_cost = 1.0;
     } else {
         route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + (occ + 1 - capacity) * pres_fac;
     }
 }

 void pathfinder_update_cost(float pres_fac, float acc_fac) {
     /* This routine recomputes the pres_cost and acc_cost of each routing        *
      * resource for the pathfinder algorithm after all nets have been routed.    *
      * It updates the accumulated cost to by adding in the number of extra       *
      * signals sharing a resource right now (i.e. after each complete iteration) *
      * times acc_fac.  It also updates pres_cost, since pres_fac may have        *
      * changed.  THIS ROUTINE ASSUMES THE OCCUPANCY VALUES IN RR_NODE ARE UP TO  *
      * DATE.                                                                     */

     int occ, capacity;
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         occ = route_ctx.rr_node_route_inf[inode].occ();
         capacity = device_ctx.rr_nodes[inode].capacity();

         if (occ > capacity) {
             route_ctx.rr_node_route_inf[inode].acc_cost += (occ - capacity) * acc_fac;
             route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + (occ + 1 - capacity) * pres_fac;
         }

         /* If occ == capacity, we don't need to increase acc_cost, but a change    *
          * in pres_fac could have made it necessary to recompute the cost anyway.  */

         else if (occ == capacity) {
             route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + pres_fac;
         }
     }
 }

 void init_heap(const DeviceGrid& grid) {
     if (heap != nullptr) {
         vtr::free(heap + 1);
         heap = nullptr;
     }
     heap_size = (grid.width() - 1) * (grid.height() - 1);
     heap = (t_heap**)vtr::malloc(heap_size * sizeof(t_heap*));
     heap--; /* heap stores from [1..heap_size] */
     heap_tail = 1;
 }

 /* Call this before you route any nets.  It frees any old traceback and   *
  * sets the list of rr_nodes touched to empty.                            */
 void init_route_structs(int bb_factor) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     //Free any old tracebacks
     for (auto net_id : cluster_ctx.clb_nlist.nets())
         free_traceback(net_id);

     //Allocate new tracebacks
     route_ctx.trace.resize(cluster_ctx.clb_nlist.nets().size());
     route_ctx.trace_nodes.resize(cluster_ctx.clb_nlist.nets().size());

     init_heap(device_ctx.grid);

     //Various look-ups
     route_ctx.net_rr_terminals = load_net_rr_terminals(device_ctx.rr_node_indices);
     route_ctx.route_bb = load_route_bb(bb_factor);
     route_ctx.rr_blk_source = load_rr_clb_sources(device_ctx.rr_node_indices);
     route_ctx.clb_opins_used_locally = alloc_and_load_clb_opins_used_locally();
     route_ctx.net_status.resize(cluster_ctx.clb_nlist.nets().size());

     /* Check that things that should have been emptied after the last routing *
      * really were.                                                           */

     if (heap_tail != 1) {
         VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
                         "in init_route_structs. Heap is not empty.\n");
     }
 }

 t_trace*
 update_traceback(t_heap* hptr, ClusterNetId net_id) {
     /* This routine adds the most recently finished wire segment to the         *
      * traceback linked list.  The first connection starts with the net SOURCE  *
      * and begins at the structure pointed to by route_ctx.trace[net_id].head. Each         *
      * connection ends with a SINK.  After each SINK, the next connection       *
      * begins (if the net has more than 2 pins).  The first element after the   *
      * SINK gives the routing node on a previous piece of the routing, which is *
      * the link from the existing net to this new piece of the net.             *
      * In each traceback I start at the end of a path and trace back through    *
      * its predecessors to the beginning.  I have stored information on the     *
      * predecesser of each node to make traceback easy -- this sacrificies some *
      * memory for easier code maintenance.  This routine returns a pointer to   *
      * the first "new" node in the traceback (node not previously in trace).    */
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     auto& trace_nodes = route_ctx.trace_nodes[net_id];

     VTR_ASSERT_SAFE(validate_trace_nodes(route_ctx.trace[net_id].head, trace_nodes));

     t_trace_branch branch = traceback_branch(hptr->index, trace_nodes);

     VTR_ASSERT_SAFE(validate_trace_nodes(branch.head, trace_nodes));

     t_trace* ret_ptr = nullptr;
     if (route_ctx.trace[net_id].tail != nullptr) {
         route_ctx.trace[net_id].tail->next = branch.head; /* Traceback ends with tptr */
         ret_ptr = branch.head->next;                      /* First new segment.       */
     } else {                                              /* This was the first "chunk" of the net's routing */
         route_ctx.trace[net_id].head = branch.head;
         ret_ptr = branch.head; /* Whole traceback is new. */
     }

     route_ctx.trace[net_id].tail = branch.tail;
     return (ret_ptr);
 }

 //Traces back a new routing branch starting from the specified 'node' and working backwards to any existing routing.
 //Returns the new branch, and also updates trace_nodes for any new nodes which are included in the branches traceback.
 static t_trace_branch traceback_branch(int node, std::unordered_set<int>& trace_nodes) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.routing();

     auto rr_type = device_ctx.rr_nodes[node].type();
     if (rr_type != SINK) {
         VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
                         "in traceback_branch: Expected type = SINK (%d).\n");
     }

     //We construct the main traceback by walking from the given node back to the source,
     //according to the previous edges/nodes recorded in rr_node_route_inf by the router.
     t_trace* branch_head = alloc_trace_data();
     t_trace* branch_tail = branch_head;
     branch_head->index = node;
     branch_head->iswitch = OPEN;
     branch_head->next = nullptr;

     trace_nodes.insert(node);

     std::vector<int> new_nodes_added_to_traceback = {node};

     auto iedge = route_ctx.rr_node_route_inf[node].prev_edge;
     int inode = route_ctx.rr_node_route_inf[node].prev_node;

     while (inode != NO_PREVIOUS) {
         //Add the current node to the head of traceback
         t_trace* prev_ptr = alloc_trace_data();
         prev_ptr->index = inode;
         prev_ptr->iswitch = device_ctx.rr_nodes[inode].edge_switch(iedge);
         prev_ptr->next = branch_head;
         branch_head = prev_ptr;

         if (trace_nodes.count(inode)) {
             break; //Connected to existing routing
         }

         trace_nodes.insert(inode); //Record this node as visited
         new_nodes_added_to_traceback.push_back(inode);

         iedge = route_ctx.rr_node_route_inf[inode].prev_edge;
         inode = route_ctx.rr_node_route_inf[inode].prev_node;
     }

     //We next re-expand all the main-branch nodes to add any non-configurably connected side branches
     // We are careful to do this *after* the main branch is constructed to ensure nodes which are both
     // non-configurably connected *and* part of the main branch are only added to the traceback once.
     for (int new_node : new_nodes_added_to_traceback) {
         //Expand each main branch node
         std::tie(branch_head, branch_tail) = add_trace_non_configurable(branch_head, branch_tail, new_node, trace_nodes);
     }

     return {branch_head, branch_tail};
 }

 //Traces any non-configurable subtrees from branch_head, returning the new branch_head and updating trace_nodes
 //
 //This effectively does a depth-first traversal
 static std::pair<t_trace*, t_trace*> add_trace_non_configurable(t_trace* head, t_trace* tail, int node, std::unordered_set<int>& trace_nodes) {
     //Trace any non-configurable subtrees
     t_trace* subtree_head = nullptr;
     t_trace* subtree_tail = nullptr;
     std::tie(subtree_head, subtree_tail) = add_trace_non_configurable_recurr(node, trace_nodes);

     //Add any non-empty subtree to tail of traceback
     if (subtree_head && subtree_tail) {
         if (!head) { //First subtree becomes head
             head = subtree_head;
         } else { //Later subtrees added to tail
             VTR_ASSERT(tail);
             tail->next = subtree_head;
         }

         tail = subtree_tail;
     } else {
         VTR_ASSERT(subtree_head == nullptr && subtree_tail == nullptr);
     }

     return {head, tail};
 }

 //Recursive helper function for add_trace_non_configurable()
 static std::pair<t_trace*, t_trace*> add_trace_non_configurable_recurr(int node, std::unordered_set<int>& trace_nodes, int depth) {
     t_trace* head = nullptr;
     t_trace* tail = nullptr;

     //Record the non-configurable out-going edges
     std::vector<t_edge_size> unvisited_non_configurable_edges;
     auto& device_ctx = g_vpr_ctx.device();
     for (auto iedge : device_ctx.rr_nodes[node].non_configurable_edges()) {
         VTR_ASSERT_SAFE(!device_ctx.rr_nodes[node].edge_is_configurable(iedge));

         int to_node = device_ctx.rr_nodes[node].edge_sink_node(iedge);

         if (!trace_nodes.count(to_node)) {
             unvisited_non_configurable_edges.push_back(iedge);
         }
     }

     if (unvisited_non_configurable_edges.size() == 0) {
         //Base case: leaf node with no non-configurable edges
         if (depth > 0) { //Arrived via non-configurable edge
             VTR_ASSERT(!trace_nodes.count(node));
             head = alloc_trace_data();
             head->index = node;
             head->iswitch = -1;
             head->next = nullptr;
             tail = head;

             trace_nodes.insert(node);
         }

     } else {
         //Recursive case: intermediate node with non-configurable edges
         for (auto iedge : unvisited_non_configurable_edges) {
             int to_node = device_ctx.rr_nodes[node].edge_sink_node(iedge);
             int iswitch = device_ctx.rr_nodes[node].edge_switch(iedge);

             VTR_ASSERT(!trace_nodes.count(to_node));
             trace_nodes.insert(node);

             //Recurse
             t_trace* subtree_head = nullptr;
             t_trace* subtree_tail = nullptr;
             std::tie(subtree_head, subtree_tail) = add_trace_non_configurable_recurr(to_node, trace_nodes, depth + 1);

             if (subtree_head && subtree_tail) {
                 //Add the non-empty sub-tree

                 //Duplicate the original head as the new tail (for the new branch)
                 t_trace* intermediate_head = alloc_trace_data();
                 intermediate_head->index = node;
                 intermediate_head->iswitch = iswitch;
                 intermediate_head->next = nullptr;

                 intermediate_head->next = subtree_head;

                 if (!head) { //First subtree becomes head
                     head = intermediate_head;
                 } else { //Later subtrees added to tail
                     VTR_ASSERT(tail);
                     tail->next = intermediate_head;
                 }

                 tail = subtree_tail;
             } else {
                 VTR_ASSERT(subtree_head == nullptr && subtree_tail == nullptr);
             }
         }
     }

     return {head, tail};
 }

 /* The routine sets the path_cost to HUGE_POSITIVE_FLOAT for  *
  * all channel segments touched by previous routing phases.    */
 void reset_path_costs(const std::vector<int>& visited_rr_nodes) {
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     for (auto node : visited_rr_nodes) {
         route_ctx.rr_node_route_inf[node].path_cost = std::numeric_limits<float>::infinity();
         route_ctx.rr_node_route_inf[node].backward_path_cost = std::numeric_limits<float>::infinity();
         route_ctx.rr_node_route_inf[node].prev_node = NO_PREVIOUS;
         route_ctx.rr_node_route_inf[node].prev_edge = NO_PREVIOUS;
     }
 }

 /* Returns the *congestion* cost of using this rr_node. */
 float get_rr_cong_cost(int inode) {
     auto& device_ctx = g_vpr_ctx.device();

     float cost = get_single_rr_cong_cost(inode);

     auto itr = device_ctx.rr_node_to_non_config_node_set.find(inode);
     if (itr != device_ctx.rr_node_to_non_config_node_set.end()) {
         for (int node : device_ctx.rr_non_config_node_sets[itr->second]) {
             if (node != inode) {
                 continue; //Already included above
             }

             cost += get_single_rr_cong_cost(node);
         }
     }
     return (cost);
 }

 /* Returns the congestion cost of using this rr_node, *ignoring*
  * non-configurable edges */
 static float get_single_rr_cong_cost(int inode) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.routing();

     auto cost_index = device_ctx.rr_nodes[inode].cost_index();
     float cost = device_ctx.rr_indexed_data[cost_index].base_cost
                  * route_ctx.rr_node_route_inf[inode].acc_cost
                  * route_ctx.rr_node_route_inf[inode].pres_cost;
     return cost;
 }

 /* Mark all the SINKs of this net as targets by setting their target flags  *
  * to the number of times the net must connect to each SINK.  Note that     *
  * this number can occasionally be greater than 1 -- think of connecting   *
  * the same net to two inputs of an and-gate (and-gate inputs are logically *
  * equivalent, so both will connect to the same SINK).                      */
 void mark_ends(ClusterNetId net_id) {
     unsigned int ipin;
     int inode;

     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
         inode = route_ctx.net_rr_terminals[net_id][ipin];
         route_ctx.rr_node_route_inf[inode].target_flag++;
     }
 }

 void mark_remaining_ends(const std::vector<int>& remaining_sinks) {
     // like mark_ends, but only performs it for the remaining sinks of a net
     auto& route_ctx = g_vpr_ctx.mutable_routing();
     for (int sink_node : remaining_sinks)
         ++route_ctx.rr_node_route_inf[sink_node].target_flag;
 }

 void node_to_heap(int inode, float total_cost, int prev_node, int prev_edge, float backward_path_cost, float R_upstream) {
     /* Puts an rr_node on the heap, if the new cost given is lower than the     *
      * current path_cost to this channel segment.  The index of its predecessor *
      * is stored to make traceback easy.  The index of the edge used to get     *
      * from its predecessor to it is also stored to make timing analysis, etc.  *
      * easy.  The backward_path_cost and R_upstream values are used only by the *
      * timing-driven router -- the breadth-first router ignores them.           */

     auto& route_ctx = g_vpr_ctx.routing();

     if (total_cost >= route_ctx.rr_node_route_inf[inode].path_cost)
         return;

     t_heap* hptr = alloc_heap_data();
     hptr->index = inode;
     hptr->cost = total_cost;
     VTR_ASSERT_SAFE(hptr->u.prev.node == NO_PREVIOUS);
     VTR_ASSERT_SAFE(hptr->u.prev.edge == NO_PREVIOUS);
     hptr->u.prev.node = prev_node;
     hptr->u.prev.edge = prev_edge;
     hptr->backward_path_cost = backward_path_cost;
     hptr->R_upstream = R_upstream;
     add_to_heap(hptr);
 }

 void free_traceback(ClusterNetId net_id) {
     /* Puts the entire traceback (old routing) for this net on the free list *
      * and sets the route_ctx.trace_head pointers etc. for the net to NULL.            */

     auto& route_ctx = g_vpr_ctx.mutable_routing();

     if (route_ctx.trace.empty()) {
         return;
     }

     if (route_ctx.trace[net_id].head == nullptr) {
         return;
     }

     free_traceback(route_ctx.trace[net_id].head);

     route_ctx.trace[net_id].head = nullptr;
     route_ctx.trace[net_id].tail = nullptr;
     route_ctx.trace_nodes[net_id].clear();
 }

 void free_traceback(t_trace* tptr) {
     while (tptr != nullptr) {
         t_trace* tempptr = tptr->next;
         free_trace_data(tptr);
         tptr = tempptr;
     }
 }

 /* Allocates data structures into which the key routing data can be saved,   *
  * allowing the routing to be recovered later (e.g. after a another routing  *
  * is attempted).                                                            */
 vtr::vector<ClusterNetId, t_trace*> alloc_saved_routing() {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     vtr::vector<ClusterNetId, t_trace*> best_routing(cluster_ctx.clb_nlist.nets().size());

     return (best_routing);
 }

 //Calculates how many (and allocates space for) OPINs which must be reserved to
 //respect 'instance' equivalence.
 //
 //TODO: At the moment this makes a significant simplifying assumption. It reserves
 //      all OPINs not connected to external nets. This ensures each signal leaving
 //      the logic block uses only a single OPIN. This is a safe, but pessmistic
 //      behaviour, which prevents any logic duplication (e.g. duplicating LUTs/BLEs)
 //      to drive multiple OPINs which could aid routability.
 //
 //      Note that this only applies for 'intance' equivalence. 'full' equivalence
 //      does allow for multiple outputs to be used for a single signal.
 static t_clb_opins_used alloc_and_load_clb_opins_used_locally() {
     /* Allocates and loads the data needed to make the router reserve some CLB  *
      * output pins for connections made locally within a CLB (if the netlist    *
      * specifies that this is necessary).                                       */

     t_clb_opins_used clb_opins_used_locally;
     int clb_pin, iclass, class_low, class_high;

     auto& cluster_ctx = g_vpr_ctx.clustering();

     clb_opins_used_locally.resize(cluster_ctx.clb_nlist.blocks().size());

     for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
         auto type = physical_tile_type(blk_id);

         get_class_range_for_block(blk_id, &class_low, &class_high);
         clb_opins_used_locally[blk_id].resize(type->num_class);

         if (is_io_type(type)) continue;

         int pin_low = 0;
         int pin_high = 0;
         get_pin_range_for_block(blk_id, &pin_low, &pin_high);

         for (clb_pin = pin_low; clb_pin <= pin_high; clb_pin++) {
             auto net = cluster_ctx.clb_nlist.block_net(blk_id, clb_pin);

             if (!net || (net && cluster_ctx.clb_nlist.net_sinks(net).size() == 0)) {
                 //There is no external net connected to this pin

                 iclass = type->pin_class[clb_pin];

                 if (type->class_inf[iclass].equivalence == PortEquivalence::INSTANCE) {
                     //The pin is part of an instance equivalent class, hence we need to reserve a pin

                     VTR_ASSERT(type->class_inf[iclass].type == DRIVER);

                     /* Check to make sure class is in same range as that assigned to block */
                     VTR_ASSERT(iclass >= class_low && iclass <= class_high);

                     //We push back OPEN to reserve space to store the exact pin which
                     //will be reserved (determined later)
                     clb_opins_used_locally[blk_id][iclass].emplace_back(OPEN);
                 }
             }
         }
     }

     return (clb_opins_used_locally);
 }

 /*the trace lists are only freed after use by the timing-driven placer */
 /*Do not  free them after use by the router, since stats, and draw  */
 /*routines use the trace values */
 void free_trace_structs() {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     if (route_ctx.trace.empty()) {
         return;
     }

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         free_traceback(net_id);

         if (route_ctx.trace[net_id].head) {
             free(route_ctx.trace[net_id].head);
             free(route_ctx.trace[net_id].tail);
         }
         route_ctx.trace[net_id].head = nullptr;
         route_ctx.trace[net_id].tail = nullptr;
     }
 }

 void free_route_structs() {
     /* Frees the temporary storage needed only during the routing.  The  *
      * final routing result is not freed.                                */
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     if (heap != nullptr) {
         //Free the individiaul heap elements (calls destructors)
         for (int i = 1; i < num_heap_allocated; i++) {
             VTR_LOG("Freeing %p\n", heap[i]);
             vtr::chunk_delete(heap[i], &heap_ch);
         }

         // coverity[offset_free : Intentional]
         free(heap + 1);

         heap = nullptr; /* Defensive coding:  crash hard if I use these. */
     }

     if (heap_free_head != nullptr) {
         t_heap* curr = heap_free_head;
         while (curr) {
             t_heap* tmp = curr;
             curr = curr->u.next;

             vtr::chunk_delete(tmp, &heap_ch);
         }

         heap_free_head = nullptr;
     }
     if (route_ctx.route_bb.size() != 0) {
         route_ctx.route_bb.clear();
     }

     /*free the memory chunks that were used by heap and linked f pointer */
     free_chunk_memory(&heap_ch);
 }

 /* Frees the data structures needed to save a routing.                     */
 void free_saved_routing(vtr::vector<ClusterNetId, t_trace*>& best_routing) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         if (best_routing[net_id] != nullptr) {
             free(best_routing[net_id]);
             best_routing[net_id] = nullptr;
         }
     }
 }

 void alloc_and_load_rr_node_route_structs() {
     /* Allocates some extra information about each rr_node that is used only   *
      * during routing.                                                         */

     auto& route_ctx = g_vpr_ctx.mutable_routing();
     auto& device_ctx = g_vpr_ctx.device();

     route_ctx.rr_node_route_inf.resize(device_ctx.rr_nodes.size());
     reset_rr_node_route_structs();
 }

 void reset_rr_node_route_structs() {
     /* Resets some extra information about each rr_node that is used only   *
      * during routing.                                                         */

     auto& route_ctx = g_vpr_ctx.mutable_routing();
     auto& device_ctx = g_vpr_ctx.device();

     VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_nodes.size()));

     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         route_ctx.rr_node_route_inf[inode].prev_node = NO_PREVIOUS;
         route_ctx.rr_node_route_inf[inode].prev_edge = NO_PREVIOUS;
         route_ctx.rr_node_route_inf[inode].pres_cost = 1.0;
         route_ctx.rr_node_route_inf[inode].acc_cost = 1.0;
         route_ctx.rr_node_route_inf[inode].path_cost = std::numeric_limits<float>::infinity();
         route_ctx.rr_node_route_inf[inode].backward_path_cost = std::numeric_limits<float>::infinity();
         route_ctx.rr_node_route_inf[inode].target_flag = 0;
         route_ctx.rr_node_route_inf[inode].set_occ(0);
     }
 }

 /* Allocates and loads the route_ctx.net_rr_terminals data structure. For each net it stores the rr_node   *
  * index of the SOURCE of the net and all the SINKs of the net [clb_nlist.nets()][clb_nlist.net_pins()].    *
  * Entry [inet][pnum] stores the rr index corresponding to the SOURCE (opin) or SINK (ipin) of the pin.     */
 static vtr::vector<ClusterNetId, std::vector<int>> load_net_rr_terminals(const t_rr_node_indices& L_rr_node_indices) {
     vtr::vector<ClusterNetId, std::vector<int>> net_rr_terminals;

     int inode, i, j, node_block_pin, iclass;

     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();

     auto nets = cluster_ctx.clb_nlist.nets();
     net_rr_terminals.resize(nets.size());

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         auto net_pins = cluster_ctx.clb_nlist.net_pins(net_id);
         net_rr_terminals[net_id].resize(net_pins.size());

         int pin_count = 0;
         for (auto pin_id : cluster_ctx.clb_nlist.net_pins(net_id)) {
             auto block_id = cluster_ctx.clb_nlist.pin_block(pin_id);
             i = place_ctx.block_locs[block_id].loc.x;
             j = place_ctx.block_locs[block_id].loc.y;
             auto type = physical_tile_type(block_id);

             /* In the routing graph, each (x, y) location has unique pins on it
              * so when there is capacity, blocks are packed and their pin numbers
              * are offset to get their actual rr_node */
             node_block_pin = cluster_ctx.clb_nlist.pin_physical_index(pin_id);

             iclass = type->pin_class[node_block_pin];

             inode = get_rr_node_index(L_rr_node_indices, i, j, (pin_count == 0 ? SOURCE : SINK), /* First pin is driver */
                                       iclass);
             net_rr_terminals[net_id][pin_count] = inode;
             pin_count++;
         }
     }

     return net_rr_terminals;
 }

 /* Saves the rr_node corresponding to each SOURCE and SINK in each CLB      *
  * in the FPGA.  Currently only the SOURCE rr_node values are used, and     *
  * they are used only to reserve pins for locally used OPINs in the router. *
  * [0..cluster_ctx.clb_nlist.blocks().size()-1][0..num_class-1].            *
  * The values for blocks that are padsare NOT valid.                        */
 static vtr::vector<ClusterBlockId, std::vector<int>> load_rr_clb_sources(const t_rr_node_indices& L_rr_node_indices) {
     vtr::vector<ClusterBlockId, std::vector<int>> rr_blk_source;

     int i, j, iclass, inode;
     int class_low, class_high;
     t_rr_type rr_type;

     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();

     rr_blk_source.resize(cluster_ctx.clb_nlist.blocks().size());

     for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
         auto type = physical_tile_type(blk_id);
         get_class_range_for_block(blk_id, &class_low, &class_high);
         rr_blk_source[blk_id].resize(type->num_class);
         for (iclass = 0; iclass < type->num_class; iclass++) {
             if (iclass >= class_low && iclass <= class_high) {
                 i = place_ctx.block_locs[blk_id].loc.x;
                 j = place_ctx.block_locs[blk_id].loc.y;

                 if (type->class_inf[iclass].type == DRIVER)
                     rr_type = SOURCE;
                 else
                     rr_type = SINK;

                 inode = get_rr_node_index(L_rr_node_indices, i, j, rr_type, iclass);
                 rr_blk_source[blk_id][iclass] = inode;
             } else {
                 rr_blk_source[blk_id][iclass] = OPEN;
             }
         }
     }

     return rr_blk_source;
 }

 vtr::vector<ClusterNetId, t_bb> load_route_bb(int bb_factor) {
     vtr::vector<ClusterNetId, t_bb> route_bb;

     auto& cluster_ctx = g_vpr_ctx.clustering();

     auto nets = cluster_ctx.clb_nlist.nets();
     route_bb.resize(nets.size());
     for (auto net_id : nets) {
         route_bb[net_id] = load_net_route_bb(net_id, bb_factor);
     }
     return route_bb;
 }

 t_bb load_net_route_bb(ClusterNetId net_id, int bb_factor) {
     /*
      * This routine loads the bounding box used to limit the space
      * searched by the maze router when routing a specific net. The search is
      * limited to channels contained with the net bounding box expanded
      * by bb_factor channels on each side.  For example, if bb_factor is
      * 0, the maze router must route each net within its bounding box.
      * If bb_factor = max(device_ctx.grid.width()-1, device_cts.grid.height() - 1),
      * the maze router will search every channel in
      * the FPGA if necessary.  The bounding box returned by this routine
      * are different from the ones used by the placer in that they are
      * clipped to lie within (0,0) and (device_ctx.grid.width()-1,device_ctx.grid.height()-1)
      * rather than (1,1) and (device_ctx.grid.width()-1,device_ctx.grid.height()-1).
      */
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.routing();

     //Ensure bb_factor is bounded by the device size
     //This ensures that if someone passes in an extremely large bb_factor
     //(e.g. std::numeric_limits<int>::max()) the later addition/subtraction
     //of bb_factor will not cause under/overflow
     int max_dim = std::max<int>(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
     bb_factor = std::min(bb_factor, max_dim);

     int driver_rr = route_ctx.net_rr_terminals[net_id][0];
     const t_rr_node& source_node = device_ctx.rr_nodes[driver_rr];
     VTR_ASSERT(source_node.type() == SOURCE);

     VTR_ASSERT(source_node.xlow() <= source_node.xhigh());
     VTR_ASSERT(source_node.ylow() <= source_node.yhigh());

     int xmin = source_node.xlow();
     int ymin = source_node.ylow();
     int xmax = source_node.xhigh();
     int ymax = source_node.yhigh();

     auto net_sinks = cluster_ctx.clb_nlist.net_sinks(net_id);
     for (size_t ipin = 1; ipin < net_sinks.size() + 1; ++ipin) { //Start at 1 since looping through sinks
         int sink_rr = route_ctx.net_rr_terminals[net_id][ipin];
         const t_rr_node& sink_node = device_ctx.rr_nodes[sink_rr];
         VTR_ASSERT(sink_node.type() == SINK);

         VTR_ASSERT(sink_node.xlow() <= sink_node.xhigh());
         VTR_ASSERT(sink_node.ylow() <= sink_node.yhigh());

         xmin = std::min<int>(xmin, sink_node.xlow());
         xmax = std::max<int>(xmax, sink_node.xhigh());
         ymin = std::min<int>(ymin, sink_node.ylow());
         ymax = std::max<int>(ymax, sink_node.yhigh());
     }

     /* Want the channels on all 4 sides to be usuable, even if bb_factor = 0. */
     xmin -= 1;
     ymin -= 1;

     /* Expand the net bounding box by bb_factor, then clip to the physical *
      * chip area.                                                          */

     t_bb bb;

     bb.xmin = std::max<int>(xmin - bb_factor, 0);
     bb.xmax = std::min<int>(xmax + bb_factor, device_ctx.grid.width() - 1);
     bb.ymin = std::max<int>(ymin - bb_factor, 0);
     bb.ymax = std::min<int>(ymax + bb_factor, device_ctx.grid.height() - 1);

     return bb;
 }

 void add_to_mod_list(int inode, std::vector<int>& modified_rr_node_inf) {
     auto& route_ctx = g_vpr_ctx.routing();

     if (std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) {
         modified_rr_node_inf.push_back(inode);
     }
 }

 namespace heap_ {
 size_t parent(size_t i);
 size_t left(size_t i);
 size_t right(size_t i);
 size_t size();
 void expand_heap_if_full();

 size_t parent(size_t i) { return i >> 1; }
 // child indices of a heap
 size_t left(size_t i) { return i << 1; }
 size_t right(size_t i) { return (i << 1) + 1; }
 size_t size() { return static_cast<size_t>(heap_tail - 1); } // heap[0] is not valid element

 // make a heap rooted at index i by **sifting down** in O(lgn) time
 void sift_down(size_t hole) {
     t_heap* head{heap[hole]};
     size_t child{left(hole)};
     while ((int)child < heap_tail) {
         if ((int)child + 1 < heap_tail && heap[child + 1]->cost < heap[child]->cost)
             ++child;
         if (heap[child]->cost < head->cost) {
             heap[hole] = heap[child];
             hole = child;
             child = left(child);
         } else
             break;
     }
     heap[hole] = head;
 }

 // runs in O(n) time by sifting down; the least work is done on the most elements: 1 swap for bottom layer, 2 swap for 2nd, ... lgn swap for top
 // 1*(n/2) + 2*(n/4) + 3*(n/8) + ... + lgn*1 = 2n (sum of i/2^i)
 void build_heap() {
     // second half of heap are leaves
     for (size_t i = heap_tail >> 1; i != 0; --i)
         sift_down(i);
 }

 // O(lgn) sifting up to maintain heap property after insertion (should sift down when building heap)
 void sift_up(size_t leaf, t_heap* const node) {
     while ((leaf > 1) && (node->cost < heap[parent(leaf)]->cost)) {
         // sift hole up
         heap[leaf] = heap[parent(leaf)];
         leaf = parent(leaf);
     }
     heap[leaf] = node;
 }

 void expand_heap_if_full() {
     if (heap_tail > heap_size) { /* Heap is full */
         heap_size *= 2;
         heap = (t_heap**)vtr::realloc((void*)(heap + 1),
                                       heap_size * sizeof(t_heap*));
         heap--; /* heap goes from [1..heap_size] */
     }
 }

 // adds an element to the back of heap and expand if necessary, but does not maintain heap property
 void push_back(t_heap* const hptr) {
     expand_heap_if_full();
     heap[heap_tail] = hptr;
     ++heap_tail;
 }

 void push_back_node(int inode, float total_cost, int prev_node, int prev_edge, float backward_path_cost, float R_upstream) {
     /* Puts an rr_node on the heap with the same condition as node_to_heap,
      * but do not fix heap property yet as that is more efficiently done from
      * bottom up with build_heap    */

     auto& route_ctx = g_vpr_ctx.routing();
     if (total_cost >= route_ctx.rr_node_route_inf[inode].path_cost)
         return;

     t_heap* hptr = alloc_heap_data();
     hptr->index = inode;
     hptr->cost = total_cost;
     hptr->u.prev.node = prev_node;
     hptr->u.prev.edge = prev_edge;
     hptr->backward_path_cost = backward_path_cost;
     hptr->R_upstream = R_upstream;
     push_back(hptr);
 }

 bool is_valid() {
     for (size_t i = 1; (int)i <= heap_tail >> 1; ++i) {
         if ((int)left(i) < heap_tail && heap[left(i)]->cost < heap[i]->cost) return false;
         if ((int)right(i) < heap_tail && heap[right(i)]->cost < heap[i]->cost) return false;
     }
     return true;
 }
 // extract every element and print it
 void pop_heap() {
     while (!is_empty_heap())
         VTR_LOG("%e ", get_heap_head()->cost);
     VTR_LOG("\n");
 }
 // print every element; not necessarily in order for minheap
 void print_heap() {
     for (int i = 1; i<heap_tail>> 1; ++i)
         VTR_LOG("(%e %e %e) ", heap[i]->cost, heap[left(i)]->cost, heap[right(i)]->cost);
     VTR_LOG("\n");
 }
 // verify correctness of extract top by making a copy, sorting it, and iterating it at the same time as extraction
 void verify_extract_top() {
     constexpr float float_epsilon = 1e-20;
     std::cout << "copying heap\n";
     std::vector<t_heap*> heap_copy{heap + 1, heap + heap_tail};
     // sort based on cost with cheapest first
     VTR_ASSERT(heap_copy.size() == size());
     std::sort(begin(heap_copy), end(heap_copy),
               [](const t_heap* a, const t_heap* b) {
                   return a->cost < b->cost;
               });
     std::cout << "starting to compare top elements\n";
     size_t i = 0;
     while (!is_empty_heap()) {
         while (heap_copy[i]->index == OPEN)
             ++i; // skip the ones that won't be extracted
         auto top = get_heap_head();
         if (abs(top->cost - heap_copy[i]->cost) > float_epsilon)
             std::cout << "mismatch with sorted " << top << '(' << top->cost << ") " << heap_copy[i] << '(' << heap_copy[i]->cost << ")\n";
         ++i;
     }
     if (i != heap_copy.size())
         std::cout << "did not finish extracting: " << i << " vs " << heap_copy.size() << std::endl;
     else
         std::cout << "extract top working as intended\n";
 }
 } // namespace heap_
 // adds to heap and maintains heap quality
 void add_to_heap(t_heap* hptr) {
     heap_::expand_heap_if_full();
     // start with undefined hole
     ++heap_tail;
     heap_::sift_up(heap_tail - 1, hptr);
 }

 /*WMF: peeking accessor :) */
 bool is_empty_heap() {
     return (bool)(heap_tail == 1);
 }

 t_heap*
 get_heap_head() {
     /* Returns a pointer to the smallest element on the heap, or NULL if the     *
      * heap is empty.  Invalid (index == OPEN) entries on the heap are never     *
      * returned -- they are just skipped over.                                   */

     t_heap* cheapest;
     size_t hole, child;

     do {
         if (heap_tail == 1) { /* Empty heap. */
             VTR_LOG_WARN("Empty heap occurred in get_heap_head.\n");
             return (nullptr);
         }

         cheapest = heap[1];

         hole = 1;
         child = 2;
         --heap_tail;
         while ((int)child < heap_tail) {
             if (heap[child + 1]->cost < heap[child]->cost)
                 ++child; // become right child
             heap[hole] = heap[child];
             hole = child;
             child = heap_::left(child);
         }
         heap_::sift_up(hole, heap[heap_tail]);

     } while (cheapest->index == OPEN); /* Get another one if invalid entry. */

     return (cheapest);
 }

 void empty_heap() {
     for (int i = 1; i < heap_tail; i++)
         free_heap_data(heap[i]);

     heap_tail = 1;
 }

 t_heap*
 alloc_heap_data() {
     if (heap_free_head == nullptr) { /* No elements on the free list */
         heap_free_head = vtr::chunk_new<t_heap>(&heap_ch);
     }

     //Extract the head
     t_heap* temp_ptr = heap_free_head;
     heap_free_head = heap_free_head->u.next;

     num_heap_allocated++;

     //Reset
     temp_ptr->u.next = nullptr;
     temp_ptr->cost = 0.;
     temp_ptr->backward_path_cost = 0.;
     temp_ptr->R_upstream = 0.;
     temp_ptr->index = OPEN;
     temp_ptr->u.prev.node = NO_PREVIOUS;
     temp_ptr->u.prev.edge = NO_PREVIOUS;
     return (temp_ptr);
 }

 void free_heap_data(t_heap* hptr) {
     hptr->u.next = heap_free_head;
     heap_free_head = hptr;
     num_heap_allocated--;
 }

 void invalidate_heap_entries(int sink_node, int ipin_node) {
     /* Marks all the heap entries consisting of sink_node, where it was reached *
      * via ipin_node, as invalid (OPEN).  Used only by the breadth_first router *
      * and even then only in rare circumstances.                                */

     for (int i = 1; i < heap_tail; i++) {
         if (heap[i]->index == sink_node) {
             if (heap[i]->u.prev.node == ipin_node) {
                 heap[i]->index = OPEN; /* Invalid. */
                 break;
             }
         }
     }
 }

 t_trace*
 alloc_trace_data() {
     t_trace* temp_ptr;

     if (trace_free_head == nullptr) { /* No elements on the free list */
         trace_free_head = (t_trace*)vtr::chunk_malloc(sizeof(t_trace), &trace_ch);
         trace_free_head->next = nullptr;
     }
     temp_ptr = trace_free_head;
     trace_free_head = trace_free_head->next;
     num_trace_allocated++;
     return (temp_ptr);
 }

 void free_trace_data(t_trace* tptr) {
     /* Puts the traceback structure pointed to by tptr on the free list. */

     tptr->next = trace_free_head;
     trace_free_head = tptr;
     num_trace_allocated--;
 }

 void print_route(FILE* fp, const vtr::vector<ClusterNetId, t_traceback>& tracebacks) {
     if (tracebacks.empty()) return; //Only if routing exists

     auto& place_ctx = g_vpr_ctx.placement();
     auto& device_ctx = g_vpr_ctx.device();
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
             fprintf(fp, "\n\nNet %zu (%s)\n\n", size_t(net_id), cluster_ctx.clb_nlist.net_name(net_id).c_str());
             if (cluster_ctx.clb_nlist.net_sinks(net_id).size() == false) {
                 fprintf(fp, "\n\nUsed in local cluster only, reserved one CLB pin\n\n");
             } else {
                 t_trace* tptr = route_ctx.trace[net_id].head;

                 while (tptr != nullptr) {
                     int inode = tptr->index;
                     t_rr_type rr_type = device_ctx.rr_nodes[inode].type();
                     int ilow = device_ctx.rr_nodes[inode].xlow();
                     int jlow = device_ctx.rr_nodes[inode].ylow();

                     fprintf(fp, "Node:\t%d\t%6s (%d,%d) ", inode,
                             device_ctx.rr_nodes[inode].type_string(), ilow, jlow);

                     if ((ilow != device_ctx.rr_nodes[inode].xhigh())
                         || (jlow != device_ctx.rr_nodes[inode].yhigh()))
                         fprintf(fp, "to (%d,%d) ", device_ctx.rr_nodes[inode].xhigh(),
                                 device_ctx.rr_nodes[inode].yhigh());

                     switch (rr_type) {
                         case IPIN:
                         case OPIN:
                             if (is_io_type(device_ctx.grid[ilow][jlow].type)) {
                                 fprintf(fp, " Pad: ");
                             } else { /* IO Pad. */
                                 fprintf(fp, " Pin: ");
                             }
                             break;

                         case CHANX:
                         case CHANY:
                             fprintf(fp, " Track: ");
                             break;

                         case SOURCE:
                         case SINK:
                             if (is_io_type(device_ctx.grid[ilow][jlow].type)) {
                                 fprintf(fp, " Pad: ");
                             } else { /* IO Pad. */
                                 fprintf(fp, " Class: ");
                             }
                             break;

                         default:
                             VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
                                             "in print_route: Unexpected traceback element type: %d (%s).\n",
                                             rr_type, device_ctx.rr_nodes[inode].type_string());
                             break;
                     }

                     fprintf(fp, "%d  ", device_ctx.rr_nodes[inode].ptc_num());

                     if (!is_io_type(device_ctx.grid[ilow][jlow].type) && (rr_type == IPIN || rr_type == OPIN)) {
                         int pin_num = device_ctx.rr_nodes[inode].ptc_num();
                         int xoffset = device_ctx.grid[ilow][jlow].width_offset;
                         int yoffset = device_ctx.grid[ilow][jlow].height_offset;
                         ClusterBlockId iblock = place_ctx.grid_blocks[ilow - xoffset][jlow - yoffset].blocks[0];
                         VTR_ASSERT(iblock);
                         t_pb_graph_pin* pb_pin = get_pb_graph_node_pin_from_block_pin(iblock, pin_num);
                         t_pb_type* pb_type = pb_pin->parent_node->pb_type;
                         fprintf(fp, " %s.%s[%d] ", pb_type->name, pb_pin->port->name, pb_pin->pin_number);
                     }

                     /* Uncomment line below if you're debugging and want to see the switch types *
                      * used in the routing.                                                      */
                     fprintf(fp, "Switch: %d", tptr->iswitch);

                     fprintf(fp, "\n");

                     tptr = tptr->next;
                 }
             }
         } else { /* Global net.  Never routed. */
             fprintf(fp, "\n\nNet %zu (%s): global net connecting:\n\n", size_t(net_id),
                     cluster_ctx.clb_nlist.net_name(net_id).c_str());

             for (auto pin_id : cluster_ctx.clb_nlist.net_pins(net_id)) {
                 ClusterBlockId block_id = cluster_ctx.clb_nlist.pin_block(pin_id);
                 int pin_index = cluster_ctx.clb_nlist.pin_physical_index(pin_id);
                 int iclass = physical_tile_type(block_id)->pin_class[pin_index];

                 fprintf(fp, "Block %s (#%zu) at (%d,%d), Pin class %d.\n",
                         cluster_ctx.clb_nlist.block_name(block_id).c_str(), size_t(block_id),
                         place_ctx.block_locs[block_id].loc.x,
                         place_ctx.block_locs[block_id].loc.y,
                         iclass);
             }
         }
     }
 }

 /* Prints out the routing to file route_file.  */
 void print_route(const char* placement_file, const char* route_file) {
     FILE* fp;

     fp = fopen(route_file, "w");

     auto& place_ctx = g_vpr_ctx.placement();
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.mutable_routing();

     fprintf(fp, "Placement_File: %s Placement_ID: %s\n", placement_file, place_ctx.placement_id.c_str());

     fprintf(fp, "Array size: %zu x %zu logic blocks.\n", device_ctx.grid.width(), device_ctx.grid.height());
     fprintf(fp, "\nRouting:");

     print_route(fp, route_ctx.trace);

     fclose(fp);

     if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_MEM)) {
         fp = vtr::fopen(getEchoFileName(E_ECHO_MEM), "w");
         fprintf(fp, "\nNum_heap_allocated: %d   Num_trace_allocated: %d\n",
                 num_heap_allocated, num_trace_allocated);
         fprintf(fp, "Num_linked_f_pointer_allocated: %d\n",
                 num_linked_f_pointer_allocated);
         fclose(fp);
     }

     //Save the digest of the route file
     route_ctx.routing_id = vtr::secure_digest_file(route_file);
 }

 //To ensure the router can only swaps pin which are actually logically equivalent some block output pins must be
 //reserved in certain cases.
 //
 // In the RR graph, output pin equivalence is modelled by a single SRC node connected to (multiple) OPINs, modelling
 // that each of the OPINs is logcially equivalent (i.e. it doesn't matter through which the router routes a signal,
 // so long as it is from the appropriate SRC).
 //
 // This correctly models 'full' equivalence (e.g. if there is a full crossbar between the outputs), but is to
 // optimistic for 'instance' equivalence (which typcially models the pin equivalence possible by swapping sub-block
 // instances like BLEs). In particular, for the 'instance' equivalence case, some of the 'equivalent' block outputs
 // may be used by internal signals which are routed entirely *within* the block (i.e. the signals which never leave
 // the block). These signals effectively 'use-up' an output pin which should now be unavailable to the router.
 //
 // To model this we 'reserve' these locally used outputs, ensuring that the router will not use them (as if it did
 // this would equate to duplicating a BLE into an already in-use BLE instance, which is clearly incorrect).
 void reserve_locally_used_opins(float pres_fac, float acc_fac, bool rip_up_local_opins) {
     int num_local_opin, inode, from_node, iconn, num_edges, to_node;
     int iclass, ipin;
     float cost;
     t_heap* heap_head_ptr;
     t_physical_tile_type_ptr type;

     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.mutable_routing();
     auto& device_ctx = g_vpr_ctx.device();

     if (rip_up_local_opins) {
         for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
             type = physical_tile_type(blk_id);
             for (iclass = 0; iclass < type->num_class; iclass++) {
                 num_local_opin = route_ctx.clb_opins_used_locally[blk_id][iclass].size();

                 if (num_local_opin == 0) continue;
                 VTR_ASSERT(type->class_inf[iclass].equivalence == PortEquivalence::INSTANCE);

                 /* Always 0 for pads and for RECEIVER (IPIN) classes */
                 for (ipin = 0; ipin < num_local_opin; ipin++) {
                     inode = route_ctx.clb_opins_used_locally[blk_id][iclass][ipin];
                     adjust_one_rr_occ_and_apcost(inode, -1, pres_fac, acc_fac);
                 }
             }
         }
     }

     for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
         type = physical_tile_type(blk_id);
         for (iclass = 0; iclass < type->num_class; iclass++) {
             num_local_opin = route_ctx.clb_opins_used_locally[blk_id][iclass].size();

             if (num_local_opin == 0) continue;

             VTR_ASSERT(type->class_inf[iclass].equivalence == PortEquivalence::INSTANCE);

             //From the SRC node we walk through it's out going edges to collect the
             //OPIN nodes. We then push them onto a heap so the the OPINs with lower
             //congestion cost are popped-off/reserved first. (Intuitively, we want
             //the reserved OPINs to move out of the way of congestion, by preferring
             //to reserve OPINs with lower congestion costs).
             from_node = route_ctx.rr_blk_source[blk_id][iclass];
             num_edges = device_ctx.rr_nodes[from_node].num_edges();
             for (iconn = 0; iconn < num_edges; iconn++) {
                 to_node = device_ctx.rr_nodes[from_node].edge_sink_node(iconn);

                 VTR_ASSERT(device_ctx.rr_nodes[to_node].type() == OPIN);

                 //Add the OPIN to the heap according to it's congestion cost
                 cost = get_rr_cong_cost(to_node);
                 node_to_heap(to_node, cost, OPEN, OPEN, 0., 0.);
             }

             for (ipin = 0; ipin < num_local_opin; ipin++) {
                 //Pop the nodes off the heap. We get them from the heap so we
                 //reserve those pins with lowest congestion cost first.
                 heap_head_ptr = get_heap_head();
                 inode = heap_head_ptr->index;

                 VTR_ASSERT(device_ctx.rr_nodes[inode].type() == OPIN);

                 adjust_one_rr_occ_and_apcost(inode, 1, pres_fac, acc_fac);
                 route_ctx.clb_opins_used_locally[blk_id][iclass][ipin] = inode;
                 free_heap_data(heap_head_ptr);
             }

             empty_heap();
         }
     }
 }

 static void adjust_one_rr_occ_and_apcost(int inode, int add_or_sub, float pres_fac, float acc_fac) {
     /* Increments or decrements (depending on add_or_sub) the occupancy of    *
      * one rr_node, and adjusts the present cost of that node appropriately.  */

     auto& route_ctx = g_vpr_ctx.mutable_routing();
     auto& device_ctx = g_vpr_ctx.device();

     int new_occ = route_ctx.rr_node_route_inf[inode].occ() + add_or_sub;
     int capacity = device_ctx.rr_nodes[inode].capacity();
     route_ctx.rr_node_route_inf[inode].set_occ(new_occ);

     if (new_occ < capacity) {
         route_ctx.rr_node_route_inf[inode].pres_cost = 1.0;
     } else {
         route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + (new_occ + 1 - capacity) * pres_fac;
         if (add_or_sub == 1) {
             route_ctx.rr_node_route_inf[inode].acc_cost += (new_occ - capacity) * acc_fac;
         }
     }
 }

 void free_chunk_memory_trace() {
     if (trace_ch.chunk_ptr_head != nullptr) {
         free_chunk_memory(&trace_ch);
         trace_ch.chunk_ptr_head = nullptr;
         trace_free_head = nullptr;
     }
 }

 // connection based overhaul (more specificity than nets)
 // utility and debugging functions -----------------------
 void print_traceback(ClusterNetId net_id) {
     // linearly print linked list
     auto& route_ctx = g_vpr_ctx.routing();
     auto& device_ctx = g_vpr_ctx.device();

     VTR_LOG("traceback %zu: ", size_t(net_id));
     t_trace* head = route_ctx.trace[net_id].head;
     while (head) {
         int inode{head->index};
         if (device_ctx.rr_nodes[inode].type() == SINK)
             VTR_LOG("%d(sink)(%d)->", inode, route_ctx.rr_node_route_inf[inode].occ());
         else
             VTR_LOG("%d(%d)->", inode, route_ctx.rr_node_route_inf[inode].occ());
         head = head->next;
     }
     VTR_LOG("\n");
 }

 void print_traceback(const t_trace* trace) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& route_ctx = g_vpr_ctx.routing();
     const t_trace* prev = nullptr;
     while (trace) {
         int inode = trace->index;
         VTR_LOG("%d (%s)", inode, rr_node_typename[device_ctx.rr_nodes[inode].type()]);

         if (trace->iswitch == OPEN) {
             VTR_LOG(" !"); //End of branch
         }

         if (prev && prev->iswitch != OPEN && !device_ctx.rr_switch_inf[prev->iswitch].configurable()) {
             VTR_LOG("*"); //Reached non-configurably
         }

         if (route_ctx.rr_node_route_inf[inode].occ() > device_ctx.rr_nodes[inode].capacity()) {
             VTR_LOG(" x"); //Overused
         }
         VTR_LOG("\n");
         prev = trace;
         trace = trace->next;
     }
     VTR_LOG("\n");
 }

 bool validate_traceback(t_trace* trace) {
     std::set<int> seen_rr_nodes;

     return validate_traceback_recurr(trace, seen_rr_nodes);
 }

 bool validate_traceback_recurr(t_trace* trace, std::set<int>& seen_rr_nodes) {
     if (!trace) {
         return true;
     }

     seen_rr_nodes.insert(trace->index);

     t_trace* next = trace->next;

     if (next) {
         if (trace->iswitch == OPEN) { //End of a branch

             //Verify that the next element (branch point) has been already seen in the traceback so far
             if (!seen_rr_nodes.count(next->index)) {
                 VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Traceback branch point %d not found", next->index);
             } else {
                 //Recurse along the new branch
                 return validate_traceback_recurr(next, seen_rr_nodes);
             }
         } else { //Midway along branch

             //Check there is an edge connecting trace and next

             auto& device_ctx = g_vpr_ctx.device();

             bool found = false;
             for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[trace->index].num_edges(); ++iedge) {
                 int to_node = device_ctx.rr_nodes[trace->index].edge_sink_node(iedge);

                 if (to_node == next->index) {
                     found = true;

                     //Verify that the switch matches
                     int rr_iswitch = device_ctx.rr_nodes[trace->index].edge_switch(iedge);
                     if (trace->iswitch != rr_iswitch) {
                         VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Traceback mismatched switch type: traceback %d rr_graph %d (RR nodes %d -> %d)\n",
                                         trace->iswitch, rr_iswitch,
                                         trace->index, to_node);
                     }
                     break;
                 }
             }

             if (!found) {
                 VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Traceback no RR edge between RR nodes %d -> %d\n", trace->index, next->index);
             }

             //Recurse
             return validate_traceback_recurr(next, seen_rr_nodes);
         }
     }

     VTR_ASSERT(!next);
     return true; //End of traceback
 }

 //Print information about an invalid routing, caused by overused routing resources
 void print_invalid_routing_info() {
     auto& device_ctx = g_vpr_ctx.device();
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.routing();

     //Build a look-up of nets using each RR node
     std::multimap<int, ClusterNetId> rr_node_nets;

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         t_trace* tptr = route_ctx.trace[net_id].head;

         while (tptr != nullptr) {
             rr_node_nets.emplace(tptr->index, net_id);
             tptr = tptr->next;
         }
     }

     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         int occ = route_ctx.rr_node_route_inf[inode].occ();
         int cap = device_ctx.rr_nodes[inode].capacity();
         if (occ > cap) {
             VTR_LOG("  %s is overused (occ=%d capacity=%d)\n", describe_rr_node(inode).c_str(), occ, cap);

             auto range = rr_node_nets.equal_range(inode);
             for (auto itr = range.first; itr != range.second; ++itr) {
                 auto net_id = itr->second;
                 VTR_LOG("    Used by net %s (%zu)\n", cluster_ctx.clb_nlist.net_name(net_id).c_str(), size_t(net_id));
             }
         }
     }
 }

 void print_rr_node_route_inf() {
     auto& route_ctx = g_vpr_ctx.routing();
     auto& device_ctx = g_vpr_ctx.device();
     for (size_t inode = 0; inode < route_ctx.rr_node_route_inf.size(); ++inode) {
         if (!std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) {
             int prev_node = route_ctx.rr_node_route_inf[inode].prev_node;
             int prev_edge = route_ctx.rr_node_route_inf[inode].prev_edge;
             VTR_LOG("rr_node: %d prev_node: %d prev_edge: %d",
                     inode, prev_node, prev_edge);

             if (prev_node != OPEN && prev_edge != OPEN && !device_ctx.rr_nodes[prev_node].edge_is_configurable(prev_edge)) {
                 VTR_LOG("*");
             }

             VTR_LOG(" pcost: %g back_pcost: %g\n",
                     route_ctx.rr_node_route_inf[inode].path_cost, route_ctx.rr_node_route_inf[inode].backward_path_cost);
         }
     }
 }

 void print_rr_node_route_inf_dot() {
     auto& route_ctx = g_vpr_ctx.routing();
     auto& device_ctx = g_vpr_ctx.device();

     VTR_LOG("digraph G {\n");
     VTR_LOG("\tnode[shape=record]\n");
     for (size_t inode = 0; inode < route_ctx.rr_node_route_inf.size(); ++inode) {
         if (!std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) {
             VTR_LOG("\tnode%zu[label=\"{%zu (%s)", inode, inode, device_ctx.rr_nodes[inode].type_string());
             if (route_ctx.rr_node_route_inf[inode].occ() > device_ctx.rr_nodes[inode].capacity()) {
                 VTR_LOG(" x");
             }
             VTR_LOG("}\"]\n");
         }
     }
     for (size_t inode = 0; inode < route_ctx.rr_node_route_inf.size(); ++inode) {
         if (!std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) {
             int prev_node = route_ctx.rr_node_route_inf[inode].prev_node;
             int prev_edge = route_ctx.rr_node_route_inf[inode].prev_edge;

             if (prev_node != OPEN && prev_edge != OPEN) {
                 VTR_LOG("\tnode%d -> node%zu [", prev_node, inode);
                 if (prev_node != OPEN && prev_edge != OPEN && !device_ctx.rr_nodes[prev_node].edge_is_configurable(prev_edge)) {
                     VTR_LOG("label=\"*\"");
                 }

                 VTR_LOG("];\n");
             }
         }
     }

     VTR_LOG("}\n");
 }

 static bool validate_trace_nodes(t_trace* head, const std::unordered_set<int>& trace_nodes) {
     //Verifies that all nodes in the traceback 'head' are conatined in 'trace_nodes'

     if (!head) {
         return true;
     }

     std::vector<int> missing_from_trace_nodes;
     for (t_trace* tptr = head; tptr != nullptr; tptr = tptr->next) {
         if (!trace_nodes.count(tptr->index)) {
             missing_from_trace_nodes.push_back(tptr->index);
         }
     }

     if (!missing_from_trace_nodes.empty()) {
         std::string msg = vtr::string_fmt(
             "The following %zu nodes were found in traceback"
             " but were missing from trace_nodes: %s\n",
             missing_from_trace_nodes.size(),
             vtr::join(missing_from_trace_nodes, ", ").c_str());

         VPR_FATAL_ERROR(VPR_ERROR_ROUTE, msg.c_str());
         return false;
     }

     return true;
 }

 // True if router will use a lookahead.
 //
 // This controls whether the router lookahead cache will be primed outside of
 // the router ScopedStartFinishTimer.
 bool router_needs_lookahead(enum e_router_algorithm router_algorithm) {
     switch (router_algorithm) {
         case BREADTH_FIRST:
             return false;
         case TIMING_DRIVEN:
             return true;
         default:
             VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown routing algorithm %d",
                             router_algorithm);
     }
 }