vpr/src/timing/net_delay.cpp - third_party/vtr-verilog-to-routing - Git at Google

 #include <cstdio>
 using namespace std;

 #include "vtr_memory.h"
 #include "vtr_log.h"

 #include "vpr_types.h"
 #include "vpr_error.h"

 #include "globals.h"
 #include "net_delay.h"
 /***************** Types and defines local to this module ********************/
 struct t_rc_node;

 /* Linked list listing the children of an rc_node.                           *
  * child:  Pointer to an rc_node (child of the current node).                *
  * iswitch:  Index of the switch type used to connect to the child node.     *
  * next:   Pointer to the next linked_rc_edge in the linked list (allows     *
  *         you to get the next child of the current rc_node).                */
 struct t_linked_rc_edge {
     t_rc_node* child;
     short iswitch;
     t_linked_rc_edge* next;
 };

 /* Structure describing one node in an RC tree (used to get net delays).     *
  * u.child_list:  Pointer to a linked list of linked_rc_edge.  Each one of   *
  *                the linked list entries gives a child of this node.        *
  * u.next:  Used only when this node is on the free list.  Gives the next    *
  *          node on the free list.                                           *
  * inode:  index (ID) of the rr_node that corresponds to this rc_node.       *
  * C_downstream:  Total downstream capacitance from this rc_node.  That is,  *
  *                the total C of the subtree rooted at the current node,     *
  *                including the C of the current node.                       *
  * Tdel:  Time delay for the signal to get from the net source to this node. *
  *        Includes the time to go through this node.                         */
 struct t_rc_node {
     union {
         t_linked_rc_edge* child_list;
         t_rc_node* next;
     } u;
     int inode;
     float C_downstream;
     float Tdel;
 };

 /* Linked list of pointers to rc_nodes.                                      *
  * rc_node:  Pointer to an rc_node.                                          *
  * next:  Next list element.                                                 */
 struct t_linked_rc_ptr {
     t_rc_node* rc_node;
     t_linked_rc_ptr* next;
 };

 /*********************** Subroutines local to this module ********************/

 static t_rc_node* alloc_and_load_rc_tree(ClusterNetId net_id,
                                          t_rc_node** rc_node_free_list_ptr,
                                          t_linked_rc_edge** rc_edge_free_list_ptr,
                                          t_linked_rc_ptr* rr_node_to_rc_node);

 static void add_to_rc_tree(t_rc_node* parent_rc, t_rc_node* child_rc, short iswitch, int inode, t_linked_rc_edge** rc_edge_free_list_ptr);

 static t_rc_node* alloc_rc_node(t_rc_node** rc_node_free_list_ptr);

 static void free_rc_node(t_rc_node* rc_node,
                          t_rc_node** rc_node_free_list_ptr);

 static t_linked_rc_edge* alloc_linked_rc_edge(t_linked_rc_edge** rc_edge_free_list_ptr);

 static void free_linked_rc_edge(t_linked_rc_edge* rc_edge,
                                 t_linked_rc_edge** rc_edge_free_list_ptr);

 static float load_rc_tree_C(t_rc_node* rc_node);

 static void load_rc_tree_T(t_rc_node* rc_node, float T_arrival);

 static void load_one_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, t_linked_rc_ptr* rr_node_to_rc_node);

 static void load_one_constant_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, float delay_value);

 static void free_rc_tree(t_rc_node* rc_root,
                          t_rc_node** rc_node_free_list_ptr,
                          t_linked_rc_edge** rc_edge_free_list_ptr);

 static void reset_rr_node_to_rc_node(t_linked_rc_ptr* rr_node_to_rc_node,
                                      ClusterNetId net_id);

 static void free_rc_node_free_list(t_rc_node* rc_node_free_list);

 static void free_rc_edge_free_list(t_linked_rc_edge* rc_edge_free_list);

 /*************************** Subroutine definitions **************************/

 /* Allocates space for the net_delay data structure   *
  * [0..nets.size()-1][1..num_pins-1]. I chunk the data *
  * to save space on large problems.                    */
 vtr::vector<ClusterNetId, float*> alloc_net_delay(vtr::t_chunk* chunk_list_ptr) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     vtr::vector<ClusterNetId, float*> net_delay; /* [0..nets.size()-1][1..num_pins-1] */

     auto nets = cluster_ctx.clb_nlist.nets();
     net_delay.resize(nets.size());

     for (auto net_id : nets) {
         float* tmp_ptr = (float*)vtr::chunk_malloc(cluster_ctx.clb_nlist.net_sinks(net_id).size() * sizeof(float), chunk_list_ptr);

         net_delay[net_id] = tmp_ptr - 1; /* [1..num_pins-1] */

         //Ensure the net delays are initialized with non-garbage values
         for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) {
             net_delay[net_id][ipin] = std::numeric_limits<float>::quiet_NaN();
         }
     }

     return (net_delay);
 }

 void free_net_delay(vtr::vector<ClusterNetId, float*>& net_delay,
                     vtr::t_chunk* chunk_list_ptr) {
     /* Frees the net_delay structure.  Assumes it was chunk allocated.          */

     net_delay.clear();
     vtr::free_chunk_memory(chunk_list_ptr);
 }

 void load_net_delay_from_routing(vtr::vector<ClusterNetId, float*>& net_delay) {
     /* This routine loads net_delay[0..nets.size()-1][1..num_pins-1].  Each entry   *
      * is the Elmore delay from the net source to the appropriate sink.  Both    *
      * the rr_graph and the routing traceback must be completely constructed     *
      * before this routine is called, and the net_delay array must have been     *
      * allocated.                                                                */
     auto& device_ctx = g_vpr_ctx.device();
     auto& cluster_ctx = g_vpr_ctx.clustering();

     t_rc_node *rc_node_free_list, *rc_root;
     t_linked_rc_edge* rc_edge_free_list;
     t_linked_rc_ptr* rr_node_to_rc_node; /* [0..device_ctx.rr_nodes.size()-1]  */

     rr_node_to_rc_node = (t_linked_rc_ptr*)vtr::calloc(device_ctx.rr_nodes.size(),
                                                        sizeof(t_linked_rc_ptr));

     rc_node_free_list = nullptr;
     rc_edge_free_list = nullptr;

     for (auto net_id : cluster_ctx.clb_nlist.nets()) {
         if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
             load_one_constant_net_delay(net_delay, net_id, 0.);
         } else {
             rc_root = alloc_and_load_rc_tree(net_id, &rc_node_free_list,
                                              &rc_edge_free_list, rr_node_to_rc_node);
             load_rc_tree_C(rc_root);
             load_rc_tree_T(rc_root, 0.);
             load_one_net_delay(net_delay, net_id, rr_node_to_rc_node);
             free_rc_tree(rc_root, &rc_node_free_list, &rc_edge_free_list);
             reset_rr_node_to_rc_node(rr_node_to_rc_node, net_id);
         }
     }

     free_rc_node_free_list(rc_node_free_list);
     free_rc_edge_free_list(rc_edge_free_list);
     free(rr_node_to_rc_node);
 }

 static t_rc_node*
 alloc_and_load_rc_tree(ClusterNetId net_id, t_rc_node** rc_node_free_list_ptr, t_linked_rc_edge** rc_edge_free_list_ptr, t_linked_rc_ptr* rr_node_to_rc_node) {
     /* Builds a tree describing the routing of net inet.  Allocates all the data *
      * and inserts all the connections in the tree.                              */

     t_rc_node *curr_rc, *prev_rc, *root_rc;
     t_trace* tptr;
     int inode;
     short iswitch;
     t_linked_rc_ptr* linked_rc_ptr;

     auto& route_ctx = g_vpr_ctx.routing();

     root_rc = alloc_rc_node(rc_node_free_list_ptr);
     tptr = route_ctx.trace[net_id].head;

     if (tptr == nullptr) {
         VPR_FATAL_ERROR(VPR_ERROR_TIMING,
                         "in alloc_and_load_rc_tree: Traceback for net %lu does not exist.\n", size_t(net_id));
     }

     inode = tptr->index;
     iswitch = tptr->iswitch;
     root_rc->inode = inode;
     root_rc->u.child_list = nullptr;
     rr_node_to_rc_node[inode].rc_node = root_rc;

     prev_rc = root_rc;
     tptr = tptr->next;

     while (tptr != nullptr) {
         inode = tptr->index;

         /* Is this node a "stitch-in" point to part of the existing routing or a   *
          * new piece of routing along the current routing "arm?"                   */

         if (rr_node_to_rc_node[inode].rc_node == nullptr) { /* Part of current "arm" */
             curr_rc = alloc_rc_node(rc_node_free_list_ptr);
             add_to_rc_tree(prev_rc, curr_rc, iswitch, inode, rc_edge_free_list_ptr);
             rr_node_to_rc_node[inode].rc_node = curr_rc;
             prev_rc = curr_rc;

         } else if (iswitch == OPEN) { /* Connection to old stuff. */

             prev_rc = rr_node_to_rc_node[inode].rc_node;

         } else { /* SINK that this net has connected to more than once. */

             /* I can connect to a SINK node more than once in some weird architectures. *
              * That means the routing isn't really a tree -- there is reconvergent      *
              * fanout from two or more IPINs into one SINK.  I convert this structure   *
              * into a true RC tree on the fly by creating a new rc_node each time I hit *
              * the same sink.  This means I need to keep a linked list of the rc_nodes  *
              * associated with the rr_node (inode) associated with that SINK.           */

             curr_rc = alloc_rc_node(rc_node_free_list_ptr);
             add_to_rc_tree(prev_rc, curr_rc, iswitch, inode, rc_edge_free_list_ptr);

             linked_rc_ptr = (t_linked_rc_ptr*)vtr::malloc(sizeof(t_linked_rc_ptr));
             linked_rc_ptr->next = rr_node_to_rc_node[inode].next;
             rr_node_to_rc_node[inode].next = linked_rc_ptr;
             linked_rc_ptr->rc_node = curr_rc;

             prev_rc = curr_rc;
         }
         iswitch = tptr->iswitch;
         tptr = tptr->next;
     }

     return (root_rc);
 }

 static void add_to_rc_tree(t_rc_node* parent_rc, t_rc_node* child_rc, short iswitch, int inode, t_linked_rc_edge** rc_edge_free_list_ptr) {
     /* Adds child_rc to the child list of parent_rc, and sets the switch between *
      * them to iswitch.  This routine also intitializes the child_rc properly    *
      * and sets its node value to inode.                                         */

     t_linked_rc_edge* linked_rc_edge;

     linked_rc_edge = alloc_linked_rc_edge(rc_edge_free_list_ptr);

     linked_rc_edge->next = parent_rc->u.child_list;
     parent_rc->u.child_list = linked_rc_edge;

     linked_rc_edge->child = child_rc;
     linked_rc_edge->iswitch = iswitch;

     child_rc->u.child_list = nullptr;
     child_rc->inode = inode;
 }

 static t_rc_node*
 alloc_rc_node(t_rc_node** rc_node_free_list_ptr) {
     /* Allocates a new rc_node, from the free list if possible, from the free   *
      * store otherwise.                                                         */

     t_rc_node* rc_node;

     rc_node = *rc_node_free_list_ptr;

     if (rc_node != nullptr) {
         *rc_node_free_list_ptr = rc_node->u.next;
     } else {
         rc_node = (t_rc_node*)vtr::malloc(sizeof(t_rc_node));
     }

     return (rc_node);
 }

 static void free_rc_node(t_rc_node* rc_node,
                          t_rc_node** rc_node_free_list_ptr) {
     /* Adds rc_node to the proper free list.          */

     rc_node->u.next = *rc_node_free_list_ptr;
     *rc_node_free_list_ptr = rc_node;
 }

 static t_linked_rc_edge*
 alloc_linked_rc_edge(t_linked_rc_edge** rc_edge_free_list_ptr) {
     /* Allocates a new linked_rc_edge, from the free list if possible, from the  *
      * free store otherwise.                                                     */

     t_linked_rc_edge* linked_rc_edge;

     linked_rc_edge = *rc_edge_free_list_ptr;

     if (linked_rc_edge != nullptr) {
         *rc_edge_free_list_ptr = linked_rc_edge->next;
     } else {
         linked_rc_edge = (t_linked_rc_edge*)vtr::malloc(sizeof(t_linked_rc_edge));
     }

     return (linked_rc_edge);
 }

 static void free_linked_rc_edge(t_linked_rc_edge* rc_edge,
                                 t_linked_rc_edge** rc_edge_free_list_ptr) {
     /* Adds the rc_edge to the rc_edge free list.                       */

     rc_edge->next = *rc_edge_free_list_ptr;
     *rc_edge_free_list_ptr = rc_edge;
 }

 static float load_rc_tree_C(t_rc_node* rc_node) {
     /* Does a post-order traversal of the rc tree to load each node's           *
      * C_downstream with the proper sum of all the downstream capacitances.     *
      * This routine calls itself recursively to perform the traversal.          */

     t_linked_rc_edge* linked_rc_edge;
     t_rc_node* child_node;
     int inode;
     short iswitch;
     float C, C_downstream;

     auto& device_ctx = g_vpr_ctx.device();

     linked_rc_edge = rc_node->u.child_list;
     inode = rc_node->inode;
     C = device_ctx.rr_nodes[inode].C();

     while (linked_rc_edge != nullptr) { /* For all children */
         iswitch = linked_rc_edge->iswitch;
         child_node = linked_rc_edge->child;
         C_downstream = load_rc_tree_C(child_node);

         if (!device_ctx.rr_switch_inf[iswitch].buffered())
             C += C_downstream;

         linked_rc_edge = linked_rc_edge->next;
     }

     rc_node->C_downstream = C;
     return (C);
 }

 static void load_rc_tree_T(t_rc_node* rc_node, float T_arrival) {
     /* This routine does a pre-order depth-first traversal of the rc tree to    *
      * compute the Tdel to each node in the rc tree.  The T_arrival is the time *
      * at which the signal hits the input to this node.  This routine calls     *
      * itself recursively to perform the traversal.                             */

     float Tdel, Rmetal, Tchild;
     t_linked_rc_edge* linked_rc_edge;
     t_rc_node* child_node;
     short iswitch;
     int inode;

     auto& device_ctx = g_vpr_ctx.device();

     Tdel = T_arrival;
     inode = rc_node->inode;
     Rmetal = device_ctx.rr_nodes[inode].R();

     /* NB:  device_ctx.rr_nodes[inode].C gives the capacitance of this node, while          *
      * rc_node->C_downstream gives the unbuffered downstream capacitance rooted *
      * at this node, including the C of the node itself.  I want to multiply    *
      * the C of this node by 0.5 Rmetal, since it's a distributed RC line.      *
      * Hence 0.5 Rmetal * Cnode is a pessimistic estimate of delay (i.e. end to *
      * end).  For the downstream capacitance rooted at this node (not including *
      * the capacitance of the node itself), I assume it is, on average,         *
      * connected halfway along the line, so I also multiply by 0.5 Rmetal.  To  *
      * be totally pessimistic I would multiply the downstream part of the       *
      * capacitance by Rmetal.  Play with this equation if you like.             */

     /* Rmetal is distributed so x0.5 */
     Tdel += 0.5 * rc_node->C_downstream * Rmetal;
     rc_node->Tdel = Tdel;

     /* Now expand the children of this node to load their Tdel values.       */

     linked_rc_edge = rc_node->u.child_list;

     while (linked_rc_edge != nullptr) { /* For all children */
         iswitch = linked_rc_edge->iswitch;
         child_node = linked_rc_edge->child;

         Tchild = Tdel + device_ctx.rr_switch_inf[iswitch].R * child_node->C_downstream;
         Tchild += device_ctx.rr_switch_inf[iswitch].Tdel; /* Intrinsic switch delay. */
         load_rc_tree_T(child_node, Tchild);

         linked_rc_edge = linked_rc_edge->next;
     }
 }

 /* Loads the net delay array for net inet.  The rc tree for that net must  *
  * have already been completely built and loaded.                           */
 static void load_one_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, t_linked_rc_ptr* rr_node_to_rc_node) {
     unsigned int ipin, inode;
     float Tmax;
     t_rc_node* rc_node;
     t_linked_rc_ptr *linked_rc_ptr, *next_ptr;

     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& route_ctx = g_vpr_ctx.routing();

     for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
         inode = route_ctx.net_rr_terminals[net_id][ipin];
         linked_rc_ptr = rr_node_to_rc_node[inode].next;
         rc_node = rr_node_to_rc_node[inode].rc_node;
         Tmax = rc_node->Tdel;

         /* If below only executes when one net connects several times to the      *
          * same SINK.  In this case, I can't tell which net pin each connection   *
          * to this SINK corresponds to (I can just choose arbitrarily).  To make  *
          * sure the timing behaviour converges, I pessimistically set the delay   *
          * for all of the connections to this SINK by this net to be the max. of  *
          * the delays from this net to this SINK.  NB:  This code only occurs     *
          * when a net connect more than once to the same pin class on the same    *
          * logic block.  Only a weird architecture would allow this.              */

         if (linked_rc_ptr != nullptr) {
             /* The first time I hit a multiply-used SINK, I choose the largest delay  *
              * from this net to this SINK and use it for every connection to this     *
              * SINK by this net.                                                      */

             do {
                 rc_node = linked_rc_ptr->rc_node;
                 if (rc_node->Tdel > Tmax) {
                     Tmax = rc_node->Tdel;
                     rr_node_to_rc_node[inode].rc_node = rc_node;
                 }
                 next_ptr = linked_rc_ptr->next;
                 free(linked_rc_ptr);
                 linked_rc_ptr = next_ptr;
             } while (linked_rc_ptr != nullptr); /* End do while */

             rr_node_to_rc_node[inode].next = nullptr;
         }
         /* End of if multiply-used SINK */
         net_delay[net_id][ipin] = Tmax;
     }
 }

 static void load_one_constant_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, float delay_value) {
     /* Sets each entry of the net_delay array for net inet to delay_value.     */
     unsigned int ipin;
     auto& cluster_ctx = g_vpr_ctx.clustering();

     for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++)
         net_delay[net_id][ipin] = delay_value;
 }

 static void free_rc_tree(t_rc_node* rc_root,
                          t_rc_node** rc_node_free_list_ptr,
                          t_linked_rc_edge** rc_edge_free_list_ptr) {
     /* Puts the rc tree pointed to by rc_root back on the free list.  Depth-     *
      * first post-order traversal via recursion.                                 */

     t_rc_node *rc_node, *child_node;
     t_linked_rc_edge *rc_edge, *next_edge;

     rc_node = rc_root;
     rc_edge = rc_node->u.child_list;

     while (rc_edge != nullptr) { /* For all children */
         child_node = rc_edge->child;
         free_rc_tree(child_node, rc_node_free_list_ptr, rc_edge_free_list_ptr);
         next_edge = rc_edge->next;
         free_linked_rc_edge(rc_edge, rc_edge_free_list_ptr);
         rc_edge = next_edge;
     }

     free_rc_node(rc_node, rc_node_free_list_ptr);
 }

 static void reset_rr_node_to_rc_node(t_linked_rc_ptr* rr_node_to_rc_node, ClusterNetId net_id) {
     /* Resets the rr_node_to_rc_node mapping entries that were set during       *
      * construction of the RC tree for net inet.  Any extra linked list entries *
      * added to deal with a SINK being connected to multiple times have already *
      * been freed by load_one_net_delay.                                        */

     t_trace* tptr;
     int inode;

     auto& route_ctx = g_vpr_ctx.routing();

     tptr = route_ctx.trace[net_id].head;

     while (tptr != nullptr) {
         inode = tptr->index;
         rr_node_to_rc_node[inode].rc_node = nullptr;
         tptr = tptr->next;
     }
 }

 static void free_rc_node_free_list(t_rc_node* rc_node_free_list) {
     /* Really frees (i.e. calls free()) all the rc_nodes on the free list.   */

     t_rc_node *rc_node, *next_node;

     rc_node = rc_node_free_list;

     while (rc_node != nullptr) {
         next_node = rc_node->u.next;
         free(rc_node);
         rc_node = next_node;
     }
 }

 static void free_rc_edge_free_list(t_linked_rc_edge* rc_edge_free_list) {
     /* Really frees (i.e. calls free()) all the rc_edges on the free list.   */

     t_linked_rc_edge *rc_edge, *next_edge;

     rc_edge = rc_edge_free_list;

     while (rc_edge != nullptr) {
         next_edge = rc_edge->next;
         free(rc_edge);
         rc_edge = next_edge;
     }
 }
	#include <cstdio>
	using namespace std;

	#include "vtr_memory.h"
	#include "vtr_log.h"

	#include "vpr_types.h"
	#include "vpr_error.h"

	#include "globals.h"
	#include "net_delay.h"
	/*************** Types and defines local to this module ******************/
	struct t_rc_node;

	/* Linked list listing the children of an rc_node. *
	* child: Pointer to an rc_node (child of the current node). *
	* iswitch: Index of the switch type used to connect to the child node. *
	* next: Pointer to the next linked_rc_edge in the linked list (allows *
	* you to get the next child of the current rc_node). */
	struct t_linked_rc_edge {
	t_rc_node* child;
	short iswitch;
	t_linked_rc_edge* next;
	};

	/* Structure describing one node in an RC tree (used to get net delays). *
	* u.child_list: Pointer to a linked list of linked_rc_edge. Each one of *
	* the linked list entries gives a child of this node. *
	* u.next: Used only when this node is on the free list. Gives the next *
	* node on the free list. *
	* inode: index (ID) of the rr_node that corresponds to this rc_node. *
	* C_downstream: Total downstream capacitance from this rc_node. That is, *
	* the total C of the subtree rooted at the current node, *
	* including the C of the current node. *
	* Tdel: Time delay for the signal to get from the net source to this node. *
	* Includes the time to go through this node. */
	struct t_rc_node {
	union {
	t_linked_rc_edge* child_list;
	t_rc_node* next;
	} u;
	int inode;
	float C_downstream;
	float Tdel;
	};

	/* Linked list of pointers to rc_nodes. *
	* rc_node: Pointer to an rc_node. *
	* next: Next list element. */
	struct t_linked_rc_ptr {
	t_rc_node* rc_node;
	t_linked_rc_ptr* next;
	};

	/********************* Subroutines local to this module ******************/

	static t_rc_node* alloc_and_load_rc_tree(ClusterNetId net_id,
	t_rc_node** rc_node_free_list_ptr,
	t_linked_rc_edge** rc_edge_free_list_ptr,
	t_linked_rc_ptr* rr_node_to_rc_node);

	static void add_to_rc_tree(t_rc_node* parent_rc, t_rc_node* child_rc, short iswitch, int inode, t_linked_rc_edge** rc_edge_free_list_ptr);

	static t_rc_node* alloc_rc_node(t_rc_node** rc_node_free_list_ptr);

	static void free_rc_node(t_rc_node* rc_node,
	t_rc_node** rc_node_free_list_ptr);

	static t_linked_rc_edge* alloc_linked_rc_edge(t_linked_rc_edge** rc_edge_free_list_ptr);

	static void free_linked_rc_edge(t_linked_rc_edge* rc_edge,
	t_linked_rc_edge** rc_edge_free_list_ptr);

	static float load_rc_tree_C(t_rc_node* rc_node);

	static void load_rc_tree_T(t_rc_node* rc_node, float T_arrival);

	static void load_one_net_delay(vtr::vector<ClusterNetId, float>& net_delay, ClusterNetId net_id, t_linked_rc_ptr rr_node_to_rc_node);

	static void load_one_constant_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, float delay_value);

	static void free_rc_tree(t_rc_node* rc_root,
	t_rc_node** rc_node_free_list_ptr,
	t_linked_rc_edge** rc_edge_free_list_ptr);

	static void reset_rr_node_to_rc_node(t_linked_rc_ptr* rr_node_to_rc_node,
	ClusterNetId net_id);

	static void free_rc_node_free_list(t_rc_node* rc_node_free_list);

	static void free_rc_edge_free_list(t_linked_rc_edge* rc_edge_free_list);

	/************************* Subroutine definitions ************************/

	/* Allocates space for the net_delay data structure *
	* [0..nets.size()-1][1..num_pins-1]. I chunk the data *
	* to save space on large problems. */
	vtr::vector<ClusterNetId, float> alloc_net_delay(vtr::t_chunk chunk_list_ptr) {
	auto& cluster_ctx = g_vpr_ctx.clustering();
	vtr::vector<ClusterNetId, float> net_delay; / [0..nets.size()-1][1..num_pins-1] */

	auto nets = cluster_ctx.clb_nlist.nets();
	net_delay.resize(nets.size());

	for (auto net_id : nets) {
	float* tmp_ptr = (float)vtr::chunk_malloc(cluster_ctx.clb_nlist.net_sinks(net_id).size() sizeof(float), chunk_list_ptr);

	net_delay[net_id] = tmp_ptr - 1; /* [1..num_pins-1] */

	//Ensure the net delays are initialized with non-garbage values
	for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) {
	net_delay[net_id][ipin] = std::numeric_limits<float>::quiet_NaN();
	}
	}

	return (net_delay);
	}

	void free_net_delay(vtr::vector<ClusterNetId, float*>& net_delay,
	vtr::t_chunk* chunk_list_ptr) {
	/* Frees the net_delay structure. Assumes it was chunk allocated. */

	net_delay.clear();
	vtr::free_chunk_memory(chunk_list_ptr);
	}

	void load_net_delay_from_routing(vtr::vector<ClusterNetId, float*>& net_delay) {
	/* This routine loads net_delay[0..nets.size()-1][1..num_pins-1]. Each entry *
	* is the Elmore delay from the net source to the appropriate sink. Both *
	* the rr_graph and the routing traceback must be completely constructed *
	* before this routine is called, and the net_delay array must have been *
	* allocated. */
	auto& device_ctx = g_vpr_ctx.device();
	auto& cluster_ctx = g_vpr_ctx.clustering();

	t_rc_node rc_node_free_list, rc_root;
	t_linked_rc_edge* rc_edge_free_list;
	t_linked_rc_ptr* rr_node_to_rc_node; /* [0..device_ctx.rr_nodes.size()-1] */

	rr_node_to_rc_node = (t_linked_rc_ptr*)vtr::calloc(device_ctx.rr_nodes.size(),
	sizeof(t_linked_rc_ptr));

	rc_node_free_list = nullptr;
	rc_edge_free_list = nullptr;

	for (auto net_id : cluster_ctx.clb_nlist.nets()) {
	if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
	load_one_constant_net_delay(net_delay, net_id, 0.);
	} else {
	rc_root = alloc_and_load_rc_tree(net_id, &rc_node_free_list,
	&rc_edge_free_list, rr_node_to_rc_node);
	load_rc_tree_C(rc_root);
	load_rc_tree_T(rc_root, 0.);
	load_one_net_delay(net_delay, net_id, rr_node_to_rc_node);
	free_rc_tree(rc_root, &rc_node_free_list, &rc_edge_free_list);
	reset_rr_node_to_rc_node(rr_node_to_rc_node, net_id);
	}
	}

	free_rc_node_free_list(rc_node_free_list);
	free_rc_edge_free_list(rc_edge_free_list);
	free(rr_node_to_rc_node);
	}

	static t_rc_node*
	alloc_and_load_rc_tree(ClusterNetId net_id, t_rc_node rc_node_free_list_ptr, t_linked_rc_edge rc_edge_free_list_ptr, t_linked_rc_ptr* rr_node_to_rc_node) {
	/* Builds a tree describing the routing of net inet. Allocates all the data *
	* and inserts all the connections in the tree. */

	t_rc_node curr_rc, prev_rc, *root_rc;
	t_trace* tptr;
	int inode;
	short iswitch;
	t_linked_rc_ptr* linked_rc_ptr;

	auto& route_ctx = g_vpr_ctx.routing();

	root_rc = alloc_rc_node(rc_node_free_list_ptr);
	tptr = route_ctx.trace[net_id].head;

	if (tptr == nullptr) {
	VPR_FATAL_ERROR(VPR_ERROR_TIMING,
	"in alloc_and_load_rc_tree: Traceback for net %lu does not exist.\n", size_t(net_id));
	}

	inode = tptr->index;
	iswitch = tptr->iswitch;
	root_rc->inode = inode;
	root_rc->u.child_list = nullptr;
	rr_node_to_rc_node[inode].rc_node = root_rc;

	prev_rc = root_rc;
	tptr = tptr->next;

	while (tptr != nullptr) {
	inode = tptr->index;

	/* Is this node a "stitch-in" point to part of the existing routing or a *
	* new piece of routing along the current routing "arm?" */

	if (rr_node_to_rc_node[inode].rc_node == nullptr) { /* Part of current "arm" */
	curr_rc = alloc_rc_node(rc_node_free_list_ptr);
	add_to_rc_tree(prev_rc, curr_rc, iswitch, inode, rc_edge_free_list_ptr);
	rr_node_to_rc_node[inode].rc_node = curr_rc;
	prev_rc = curr_rc;

	} else if (iswitch == OPEN) { /* Connection to old stuff. */

	prev_rc = rr_node_to_rc_node[inode].rc_node;

	} else { /* SINK that this net has connected to more than once. */

	/* I can connect to a SINK node more than once in some weird architectures. *
	* That means the routing isn't really a tree -- there is reconvergent *
	* fanout from two or more IPINs into one SINK. I convert this structure *
	* into a true RC tree on the fly by creating a new rc_node each time I hit *
	* the same sink. This means I need to keep a linked list of the rc_nodes *
	* associated with the rr_node (inode) associated with that SINK. */

	curr_rc = alloc_rc_node(rc_node_free_list_ptr);
	add_to_rc_tree(prev_rc, curr_rc, iswitch, inode, rc_edge_free_list_ptr);

	linked_rc_ptr = (t_linked_rc_ptr*)vtr::malloc(sizeof(t_linked_rc_ptr));
	linked_rc_ptr->next = rr_node_to_rc_node[inode].next;
	rr_node_to_rc_node[inode].next = linked_rc_ptr;
	linked_rc_ptr->rc_node = curr_rc;

	prev_rc = curr_rc;
	}
	iswitch = tptr->iswitch;
	tptr = tptr->next;
	}

	return (root_rc);
	}

	static void add_to_rc_tree(t_rc_node* parent_rc, t_rc_node* child_rc, short iswitch, int inode, t_linked_rc_edge** rc_edge_free_list_ptr) {
	/* Adds child_rc to the child list of parent_rc, and sets the switch between *
	* them to iswitch. This routine also intitializes the child_rc properly *
	* and sets its node value to inode. */

	t_linked_rc_edge* linked_rc_edge;

	linked_rc_edge = alloc_linked_rc_edge(rc_edge_free_list_ptr);

	linked_rc_edge->next = parent_rc->u.child_list;
	parent_rc->u.child_list = linked_rc_edge;

	linked_rc_edge->child = child_rc;
	linked_rc_edge->iswitch = iswitch;

	child_rc->u.child_list = nullptr;
	child_rc->inode = inode;
	}

	static t_rc_node*
	alloc_rc_node(t_rc_node** rc_node_free_list_ptr) {
	/* Allocates a new rc_node, from the free list if possible, from the free *
	* store otherwise. */

	t_rc_node* rc_node;

	rc_node = *rc_node_free_list_ptr;

	if (rc_node != nullptr) {
	*rc_node_free_list_ptr = rc_node->u.next;
	} else {
	rc_node = (t_rc_node*)vtr::malloc(sizeof(t_rc_node));
	}

	return (rc_node);
	}

	static void free_rc_node(t_rc_node* rc_node,
	t_rc_node** rc_node_free_list_ptr) {
	/* Adds rc_node to the proper free list. */

	rc_node->u.next = *rc_node_free_list_ptr;
	*rc_node_free_list_ptr = rc_node;
	}

	static t_linked_rc_edge*
	alloc_linked_rc_edge(t_linked_rc_edge** rc_edge_free_list_ptr) {
	/* Allocates a new linked_rc_edge, from the free list if possible, from the *
	* free store otherwise. */

	t_linked_rc_edge* linked_rc_edge;

	linked_rc_edge = *rc_edge_free_list_ptr;

	if (linked_rc_edge != nullptr) {
	*rc_edge_free_list_ptr = linked_rc_edge->next;
	} else {
	linked_rc_edge = (t_linked_rc_edge*)vtr::malloc(sizeof(t_linked_rc_edge));
	}

	return (linked_rc_edge);
	}

	static void free_linked_rc_edge(t_linked_rc_edge* rc_edge,
	t_linked_rc_edge** rc_edge_free_list_ptr) {
	/* Adds the rc_edge to the rc_edge free list. */

	rc_edge->next = *rc_edge_free_list_ptr;
	*rc_edge_free_list_ptr = rc_edge;
	}

	static float load_rc_tree_C(t_rc_node* rc_node) {
	/* Does a post-order traversal of the rc tree to load each node's *
	* C_downstream with the proper sum of all the downstream capacitances. *
	* This routine calls itself recursively to perform the traversal. */

	t_linked_rc_edge* linked_rc_edge;
	t_rc_node* child_node;
	int inode;
	short iswitch;
	float C, C_downstream;

	auto& device_ctx = g_vpr_ctx.device();

	linked_rc_edge = rc_node->u.child_list;
	inode = rc_node->inode;
	C = device_ctx.rr_nodes[inode].C();

	while (linked_rc_edge != nullptr) { /* For all children */
	iswitch = linked_rc_edge->iswitch;
	child_node = linked_rc_edge->child;
	C_downstream = load_rc_tree_C(child_node);

	if (!device_ctx.rr_switch_inf[iswitch].buffered())
	C += C_downstream;

	linked_rc_edge = linked_rc_edge->next;
	}

	rc_node->C_downstream = C;
	return (C);
	}

	static void load_rc_tree_T(t_rc_node* rc_node, float T_arrival) {
	/* This routine does a pre-order depth-first traversal of the rc tree to *
	* compute the Tdel to each node in the rc tree. The T_arrival is the time *
	* at which the signal hits the input to this node. This routine calls *
	* itself recursively to perform the traversal. */

	float Tdel, Rmetal, Tchild;
	t_linked_rc_edge* linked_rc_edge;
	t_rc_node* child_node;
	short iswitch;
	int inode;

	auto& device_ctx = g_vpr_ctx.device();

	Tdel = T_arrival;
	inode = rc_node->inode;
	Rmetal = device_ctx.rr_nodes[inode].R();

	/* NB: device_ctx.rr_nodes[inode].C gives the capacitance of this node, while *
	* rc_node->C_downstream gives the unbuffered downstream capacitance rooted *
	* at this node, including the C of the node itself. I want to multiply *
	* the C of this node by 0.5 Rmetal, since it's a distributed RC line. *
	* Hence 0.5 Rmetal * Cnode is a pessimistic estimate of delay (i.e. end to *
	* end). For the downstream capacitance rooted at this node (not including *
	* the capacitance of the node itself), I assume it is, on average, *
	* connected halfway along the line, so I also multiply by 0.5 Rmetal. To *
	* be totally pessimistic I would multiply the downstream part of the *
	* capacitance by Rmetal. Play with this equation if you like. */

	/* Rmetal is distributed so x0.5 */
	Tdel += 0.5 * rc_node->C_downstream * Rmetal;
	rc_node->Tdel = Tdel;

	/* Now expand the children of this node to load their Tdel values. */

	linked_rc_edge = rc_node->u.child_list;

	while (linked_rc_edge != nullptr) { /* For all children */
	iswitch = linked_rc_edge->iswitch;
	child_node = linked_rc_edge->child;

	Tchild = Tdel + device_ctx.rr_switch_inf[iswitch].R * child_node->C_downstream;
	Tchild += device_ctx.rr_switch_inf[iswitch].Tdel; /* Intrinsic switch delay. */
	load_rc_tree_T(child_node, Tchild);

	linked_rc_edge = linked_rc_edge->next;
	}
	}

	/* Loads the net delay array for net inet. The rc tree for that net must *
	* have already been completely built and loaded. */
	static void load_one_net_delay(vtr::vector<ClusterNetId, float>& net_delay, ClusterNetId net_id, t_linked_rc_ptr rr_node_to_rc_node) {
	unsigned int ipin, inode;
	float Tmax;
	t_rc_node* rc_node;
	t_linked_rc_ptr linked_rc_ptr, next_ptr;

	auto& cluster_ctx = g_vpr_ctx.clustering();
	auto& route_ctx = g_vpr_ctx.routing();

	for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
	inode = route_ctx.net_rr_terminals[net_id][ipin];
	linked_rc_ptr = rr_node_to_rc_node[inode].next;
	rc_node = rr_node_to_rc_node[inode].rc_node;
	Tmax = rc_node->Tdel;

	/* If below only executes when one net connects several times to the *
	* same SINK. In this case, I can't tell which net pin each connection *
	* to this SINK corresponds to (I can just choose arbitrarily). To make *
	* sure the timing behaviour converges, I pessimistically set the delay *
	* for all of the connections to this SINK by this net to be the max. of *
	* the delays from this net to this SINK. NB: This code only occurs *
	* when a net connect more than once to the same pin class on the same *
	* logic block. Only a weird architecture would allow this. */

	if (linked_rc_ptr != nullptr) {
	/* The first time I hit a multiply-used SINK, I choose the largest delay *
	* from this net to this SINK and use it for every connection to this *
	* SINK by this net. */

	do {
	rc_node = linked_rc_ptr->rc_node;
	if (rc_node->Tdel > Tmax) {
	Tmax = rc_node->Tdel;
	rr_node_to_rc_node[inode].rc_node = rc_node;
	}
	next_ptr = linked_rc_ptr->next;
	free(linked_rc_ptr);
	linked_rc_ptr = next_ptr;
	} while (linked_rc_ptr != nullptr); /* End do while */

	rr_node_to_rc_node[inode].next = nullptr;
	}
	/* End of if multiply-used SINK */
	net_delay[net_id][ipin] = Tmax;
	}
	}

	static void load_one_constant_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, float delay_value) {
	/* Sets each entry of the net_delay array for net inet to delay_value. */
	unsigned int ipin;
	auto& cluster_ctx = g_vpr_ctx.clustering();

	for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++)
	net_delay[net_id][ipin] = delay_value;
	}

	static void free_rc_tree(t_rc_node* rc_root,
	t_rc_node** rc_node_free_list_ptr,
	t_linked_rc_edge** rc_edge_free_list_ptr) {
	/* Puts the rc tree pointed to by rc_root back on the free list. Depth- *
	* first post-order traversal via recursion. */

	t_rc_node rc_node, child_node;
	t_linked_rc_edge rc_edge, next_edge;

	rc_node = rc_root;
	rc_edge = rc_node->u.child_list;

	while (rc_edge != nullptr) { /* For all children */
	child_node = rc_edge->child;
	free_rc_tree(child_node, rc_node_free_list_ptr, rc_edge_free_list_ptr);
	next_edge = rc_edge->next;
	free_linked_rc_edge(rc_edge, rc_edge_free_list_ptr);
	rc_edge = next_edge;
	}

	free_rc_node(rc_node, rc_node_free_list_ptr);
	}

	static void reset_rr_node_to_rc_node(t_linked_rc_ptr* rr_node_to_rc_node, ClusterNetId net_id) {
	/* Resets the rr_node_to_rc_node mapping entries that were set during *
	* construction of the RC tree for net inet. Any extra linked list entries *
	* added to deal with a SINK being connected to multiple times have already *
	* been freed by load_one_net_delay. */

	t_trace* tptr;
	int inode;

	auto& route_ctx = g_vpr_ctx.routing();

	tptr = route_ctx.trace[net_id].head;

	while (tptr != nullptr) {
	inode = tptr->index;
	rr_node_to_rc_node[inode].rc_node = nullptr;
	tptr = tptr->next;
	}
	}

	static void free_rc_node_free_list(t_rc_node* rc_node_free_list) {
	/* Really frees (i.e. calls free()) all the rc_nodes on the free list. */

	t_rc_node rc_node, next_node;

	rc_node = rc_node_free_list;

	while (rc_node != nullptr) {
	next_node = rc_node->u.next;
	free(rc_node);
	rc_node = next_node;
	}
	}

	static void free_rc_edge_free_list(t_linked_rc_edge* rc_edge_free_list) {
	/* Really frees (i.e. calls free()) all the rc_edges on the free list. */

	t_linked_rc_edge rc_edge, next_edge;

	rc_edge = rc_edge_free_list;

	while (rc_edge != nullptr) {
	next_edge = rc_edge->next;
	free(rc_edge);
	rc_edge = next_edge;
	}
	}