blob: 6045ea7f5524cd24f3eaa12b6292902050431efd [file] [log] [blame]
#include <cstdio>
using namespace std;
#include "vtr_memory.h"
#include "vtr_log.h"
#include "vpr_types.h"
#include "vpr_error.h"
#include "globals.h"
#include "net_delay.h"
/***************** Types and defines local to this module ********************/
struct t_rc_node;
/* Linked list listing the children of an rc_node. *
* child: Pointer to an rc_node (child of the current node). *
* iswitch: Index of the switch type used to connect to the child node. *
* next: Pointer to the next linked_rc_edge in the linked list (allows *
* you to get the next child of the current rc_node). */
struct t_linked_rc_edge {
t_rc_node* child;
short iswitch;
t_linked_rc_edge* next;
};
/* Structure describing one node in an RC tree (used to get net delays). *
* u.child_list: Pointer to a linked list of linked_rc_edge. Each one of *
* the linked list entries gives a child of this node. *
* u.next: Used only when this node is on the free list. Gives the next *
* node on the free list. *
* inode: index (ID) of the rr_node that corresponds to this rc_node. *
* C_downstream: Total downstream capacitance from this rc_node. That is, *
* the total C of the subtree rooted at the current node, *
* including the C of the current node. *
* Tdel: Time delay for the signal to get from the net source to this node. *
* Includes the time to go through this node. */
struct t_rc_node {
union {
t_linked_rc_edge* child_list;
t_rc_node* next;
} u;
int inode;
float C_downstream;
float Tdel;
};
/* Linked list of pointers to rc_nodes. *
* rc_node: Pointer to an rc_node. *
* next: Next list element. */
struct t_linked_rc_ptr {
t_rc_node* rc_node;
t_linked_rc_ptr* next;
};
/*********************** Subroutines local to this module ********************/
static t_rc_node* alloc_and_load_rc_tree(ClusterNetId net_id,
t_rc_node** rc_node_free_list_ptr,
t_linked_rc_edge** rc_edge_free_list_ptr,
t_linked_rc_ptr* rr_node_to_rc_node);
static void add_to_rc_tree(t_rc_node* parent_rc, t_rc_node* child_rc, short iswitch, int inode, t_linked_rc_edge** rc_edge_free_list_ptr);
static t_rc_node* alloc_rc_node(t_rc_node** rc_node_free_list_ptr);
static void free_rc_node(t_rc_node* rc_node,
t_rc_node** rc_node_free_list_ptr);
static t_linked_rc_edge* alloc_linked_rc_edge(t_linked_rc_edge** rc_edge_free_list_ptr);
static void free_linked_rc_edge(t_linked_rc_edge* rc_edge,
t_linked_rc_edge** rc_edge_free_list_ptr);
static float load_rc_tree_C(t_rc_node* rc_node);
static void load_rc_tree_T(t_rc_node* rc_node, float T_arrival);
static void load_one_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, t_linked_rc_ptr* rr_node_to_rc_node);
static void load_one_constant_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, float delay_value);
static void free_rc_tree(t_rc_node* rc_root,
t_rc_node** rc_node_free_list_ptr,
t_linked_rc_edge** rc_edge_free_list_ptr);
static void reset_rr_node_to_rc_node(t_linked_rc_ptr* rr_node_to_rc_node,
ClusterNetId net_id);
static void free_rc_node_free_list(t_rc_node* rc_node_free_list);
static void free_rc_edge_free_list(t_linked_rc_edge* rc_edge_free_list);
/*************************** Subroutine definitions **************************/
/* Allocates space for the net_delay data structure *
* [0..nets.size()-1][1..num_pins-1]. I chunk the data *
* to save space on large problems. */
vtr::vector<ClusterNetId, float*> alloc_net_delay(vtr::t_chunk* chunk_list_ptr) {
auto& cluster_ctx = g_vpr_ctx.clustering();
vtr::vector<ClusterNetId, float*> net_delay; /* [0..nets.size()-1][1..num_pins-1] */
auto nets = cluster_ctx.clb_nlist.nets();
net_delay.resize(nets.size());
for (auto net_id : nets) {
float* tmp_ptr = (float*)vtr::chunk_malloc(cluster_ctx.clb_nlist.net_sinks(net_id).size() * sizeof(float), chunk_list_ptr);
net_delay[net_id] = tmp_ptr - 1; /* [1..num_pins-1] */
//Ensure the net delays are initialized with non-garbage values
for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) {
net_delay[net_id][ipin] = std::numeric_limits<float>::quiet_NaN();
}
}
return (net_delay);
}
void free_net_delay(vtr::vector<ClusterNetId, float*>& net_delay,
vtr::t_chunk* chunk_list_ptr) {
/* Frees the net_delay structure. Assumes it was chunk allocated. */
net_delay.clear();
vtr::free_chunk_memory(chunk_list_ptr);
}
void load_net_delay_from_routing(vtr::vector<ClusterNetId, float*>& net_delay) {
/* This routine loads net_delay[0..nets.size()-1][1..num_pins-1]. Each entry *
* is the Elmore delay from the net source to the appropriate sink. Both *
* the rr_graph and the routing traceback must be completely constructed *
* before this routine is called, and the net_delay array must have been *
* allocated. */
auto& device_ctx = g_vpr_ctx.device();
auto& cluster_ctx = g_vpr_ctx.clustering();
t_rc_node *rc_node_free_list, *rc_root;
t_linked_rc_edge* rc_edge_free_list;
t_linked_rc_ptr* rr_node_to_rc_node; /* [0..device_ctx.rr_nodes.size()-1] */
rr_node_to_rc_node = (t_linked_rc_ptr*)vtr::calloc(device_ctx.rr_nodes.size(),
sizeof(t_linked_rc_ptr));
rc_node_free_list = nullptr;
rc_edge_free_list = nullptr;
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
load_one_constant_net_delay(net_delay, net_id, 0.);
} else {
rc_root = alloc_and_load_rc_tree(net_id, &rc_node_free_list,
&rc_edge_free_list, rr_node_to_rc_node);
load_rc_tree_C(rc_root);
load_rc_tree_T(rc_root, 0.);
load_one_net_delay(net_delay, net_id, rr_node_to_rc_node);
free_rc_tree(rc_root, &rc_node_free_list, &rc_edge_free_list);
reset_rr_node_to_rc_node(rr_node_to_rc_node, net_id);
}
}
free_rc_node_free_list(rc_node_free_list);
free_rc_edge_free_list(rc_edge_free_list);
free(rr_node_to_rc_node);
}
static t_rc_node*
alloc_and_load_rc_tree(ClusterNetId net_id, t_rc_node** rc_node_free_list_ptr, t_linked_rc_edge** rc_edge_free_list_ptr, t_linked_rc_ptr* rr_node_to_rc_node) {
/* Builds a tree describing the routing of net inet. Allocates all the data *
* and inserts all the connections in the tree. */
t_rc_node *curr_rc, *prev_rc, *root_rc;
t_trace* tptr;
int inode;
short iswitch;
t_linked_rc_ptr* linked_rc_ptr;
auto& route_ctx = g_vpr_ctx.routing();
root_rc = alloc_rc_node(rc_node_free_list_ptr);
tptr = route_ctx.trace[net_id].head;
if (tptr == nullptr) {
VPR_FATAL_ERROR(VPR_ERROR_TIMING,
"in alloc_and_load_rc_tree: Traceback for net %lu does not exist.\n", size_t(net_id));
}
inode = tptr->index;
iswitch = tptr->iswitch;
root_rc->inode = inode;
root_rc->u.child_list = nullptr;
rr_node_to_rc_node[inode].rc_node = root_rc;
prev_rc = root_rc;
tptr = tptr->next;
while (tptr != nullptr) {
inode = tptr->index;
/* Is this node a "stitch-in" point to part of the existing routing or a *
* new piece of routing along the current routing "arm?" */
if (rr_node_to_rc_node[inode].rc_node == nullptr) { /* Part of current "arm" */
curr_rc = alloc_rc_node(rc_node_free_list_ptr);
add_to_rc_tree(prev_rc, curr_rc, iswitch, inode, rc_edge_free_list_ptr);
rr_node_to_rc_node[inode].rc_node = curr_rc;
prev_rc = curr_rc;
} else if (iswitch == OPEN) { /* Connection to old stuff. */
prev_rc = rr_node_to_rc_node[inode].rc_node;
} else { /* SINK that this net has connected to more than once. */
/* I can connect to a SINK node more than once in some weird architectures. *
* That means the routing isn't really a tree -- there is reconvergent *
* fanout from two or more IPINs into one SINK. I convert this structure *
* into a true RC tree on the fly by creating a new rc_node each time I hit *
* the same sink. This means I need to keep a linked list of the rc_nodes *
* associated with the rr_node (inode) associated with that SINK. */
curr_rc = alloc_rc_node(rc_node_free_list_ptr);
add_to_rc_tree(prev_rc, curr_rc, iswitch, inode, rc_edge_free_list_ptr);
linked_rc_ptr = (t_linked_rc_ptr*)vtr::malloc(sizeof(t_linked_rc_ptr));
linked_rc_ptr->next = rr_node_to_rc_node[inode].next;
rr_node_to_rc_node[inode].next = linked_rc_ptr;
linked_rc_ptr->rc_node = curr_rc;
prev_rc = curr_rc;
}
iswitch = tptr->iswitch;
tptr = tptr->next;
}
return (root_rc);
}
static void add_to_rc_tree(t_rc_node* parent_rc, t_rc_node* child_rc, short iswitch, int inode, t_linked_rc_edge** rc_edge_free_list_ptr) {
/* Adds child_rc to the child list of parent_rc, and sets the switch between *
* them to iswitch. This routine also intitializes the child_rc properly *
* and sets its node value to inode. */
t_linked_rc_edge* linked_rc_edge;
linked_rc_edge = alloc_linked_rc_edge(rc_edge_free_list_ptr);
linked_rc_edge->next = parent_rc->u.child_list;
parent_rc->u.child_list = linked_rc_edge;
linked_rc_edge->child = child_rc;
linked_rc_edge->iswitch = iswitch;
child_rc->u.child_list = nullptr;
child_rc->inode = inode;
}
static t_rc_node*
alloc_rc_node(t_rc_node** rc_node_free_list_ptr) {
/* Allocates a new rc_node, from the free list if possible, from the free *
* store otherwise. */
t_rc_node* rc_node;
rc_node = *rc_node_free_list_ptr;
if (rc_node != nullptr) {
*rc_node_free_list_ptr = rc_node->u.next;
} else {
rc_node = (t_rc_node*)vtr::malloc(sizeof(t_rc_node));
}
return (rc_node);
}
static void free_rc_node(t_rc_node* rc_node,
t_rc_node** rc_node_free_list_ptr) {
/* Adds rc_node to the proper free list. */
rc_node->u.next = *rc_node_free_list_ptr;
*rc_node_free_list_ptr = rc_node;
}
static t_linked_rc_edge*
alloc_linked_rc_edge(t_linked_rc_edge** rc_edge_free_list_ptr) {
/* Allocates a new linked_rc_edge, from the free list if possible, from the *
* free store otherwise. */
t_linked_rc_edge* linked_rc_edge;
linked_rc_edge = *rc_edge_free_list_ptr;
if (linked_rc_edge != nullptr) {
*rc_edge_free_list_ptr = linked_rc_edge->next;
} else {
linked_rc_edge = (t_linked_rc_edge*)vtr::malloc(sizeof(t_linked_rc_edge));
}
return (linked_rc_edge);
}
static void free_linked_rc_edge(t_linked_rc_edge* rc_edge,
t_linked_rc_edge** rc_edge_free_list_ptr) {
/* Adds the rc_edge to the rc_edge free list. */
rc_edge->next = *rc_edge_free_list_ptr;
*rc_edge_free_list_ptr = rc_edge;
}
static float load_rc_tree_C(t_rc_node* rc_node) {
/* Does a post-order traversal of the rc tree to load each node's *
* C_downstream with the proper sum of all the downstream capacitances. *
* This routine calls itself recursively to perform the traversal. */
t_linked_rc_edge* linked_rc_edge;
t_rc_node* child_node;
int inode;
short iswitch;
float C, C_downstream;
auto& device_ctx = g_vpr_ctx.device();
linked_rc_edge = rc_node->u.child_list;
inode = rc_node->inode;
C = device_ctx.rr_nodes[inode].C();
while (linked_rc_edge != nullptr) { /* For all children */
iswitch = linked_rc_edge->iswitch;
child_node = linked_rc_edge->child;
C_downstream = load_rc_tree_C(child_node);
if (!device_ctx.rr_switch_inf[iswitch].buffered())
C += C_downstream;
linked_rc_edge = linked_rc_edge->next;
}
rc_node->C_downstream = C;
return (C);
}
static void load_rc_tree_T(t_rc_node* rc_node, float T_arrival) {
/* This routine does a pre-order depth-first traversal of the rc tree to *
* compute the Tdel to each node in the rc tree. The T_arrival is the time *
* at which the signal hits the input to this node. This routine calls *
* itself recursively to perform the traversal. */
float Tdel, Rmetal, Tchild;
t_linked_rc_edge* linked_rc_edge;
t_rc_node* child_node;
short iswitch;
int inode;
auto& device_ctx = g_vpr_ctx.device();
Tdel = T_arrival;
inode = rc_node->inode;
Rmetal = device_ctx.rr_nodes[inode].R();
/* NB: device_ctx.rr_nodes[inode].C gives the capacitance of this node, while *
* rc_node->C_downstream gives the unbuffered downstream capacitance rooted *
* at this node, including the C of the node itself. I want to multiply *
* the C of this node by 0.5 Rmetal, since it's a distributed RC line. *
* Hence 0.5 Rmetal * Cnode is a pessimistic estimate of delay (i.e. end to *
* end). For the downstream capacitance rooted at this node (not including *
* the capacitance of the node itself), I assume it is, on average, *
* connected halfway along the line, so I also multiply by 0.5 Rmetal. To *
* be totally pessimistic I would multiply the downstream part of the *
* capacitance by Rmetal. Play with this equation if you like. */
/* Rmetal is distributed so x0.5 */
Tdel += 0.5 * rc_node->C_downstream * Rmetal;
rc_node->Tdel = Tdel;
/* Now expand the children of this node to load their Tdel values. */
linked_rc_edge = rc_node->u.child_list;
while (linked_rc_edge != nullptr) { /* For all children */
iswitch = linked_rc_edge->iswitch;
child_node = linked_rc_edge->child;
Tchild = Tdel + device_ctx.rr_switch_inf[iswitch].R * child_node->C_downstream;
Tchild += device_ctx.rr_switch_inf[iswitch].Tdel; /* Intrinsic switch delay. */
load_rc_tree_T(child_node, Tchild);
linked_rc_edge = linked_rc_edge->next;
}
}
/* Loads the net delay array for net inet. The rc tree for that net must *
* have already been completely built and loaded. */
static void load_one_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, t_linked_rc_ptr* rr_node_to_rc_node) {
unsigned int ipin, inode;
float Tmax;
t_rc_node* rc_node;
t_linked_rc_ptr *linked_rc_ptr, *next_ptr;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.routing();
for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
inode = route_ctx.net_rr_terminals[net_id][ipin];
linked_rc_ptr = rr_node_to_rc_node[inode].next;
rc_node = rr_node_to_rc_node[inode].rc_node;
Tmax = rc_node->Tdel;
/* If below only executes when one net connects several times to the *
* same SINK. In this case, I can't tell which net pin each connection *
* to this SINK corresponds to (I can just choose arbitrarily). To make *
* sure the timing behaviour converges, I pessimistically set the delay *
* for all of the connections to this SINK by this net to be the max. of *
* the delays from this net to this SINK. NB: This code only occurs *
* when a net connect more than once to the same pin class on the same *
* logic block. Only a weird architecture would allow this. */
if (linked_rc_ptr != nullptr) {
/* The first time I hit a multiply-used SINK, I choose the largest delay *
* from this net to this SINK and use it for every connection to this *
* SINK by this net. */
do {
rc_node = linked_rc_ptr->rc_node;
if (rc_node->Tdel > Tmax) {
Tmax = rc_node->Tdel;
rr_node_to_rc_node[inode].rc_node = rc_node;
}
next_ptr = linked_rc_ptr->next;
free(linked_rc_ptr);
linked_rc_ptr = next_ptr;
} while (linked_rc_ptr != nullptr); /* End do while */
rr_node_to_rc_node[inode].next = nullptr;
}
/* End of if multiply-used SINK */
net_delay[net_id][ipin] = Tmax;
}
}
static void load_one_constant_net_delay(vtr::vector<ClusterNetId, float*>& net_delay, ClusterNetId net_id, float delay_value) {
/* Sets each entry of the net_delay array for net inet to delay_value. */
unsigned int ipin;
auto& cluster_ctx = g_vpr_ctx.clustering();
for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++)
net_delay[net_id][ipin] = delay_value;
}
static void free_rc_tree(t_rc_node* rc_root,
t_rc_node** rc_node_free_list_ptr,
t_linked_rc_edge** rc_edge_free_list_ptr) {
/* Puts the rc tree pointed to by rc_root back on the free list. Depth- *
* first post-order traversal via recursion. */
t_rc_node *rc_node, *child_node;
t_linked_rc_edge *rc_edge, *next_edge;
rc_node = rc_root;
rc_edge = rc_node->u.child_list;
while (rc_edge != nullptr) { /* For all children */
child_node = rc_edge->child;
free_rc_tree(child_node, rc_node_free_list_ptr, rc_edge_free_list_ptr);
next_edge = rc_edge->next;
free_linked_rc_edge(rc_edge, rc_edge_free_list_ptr);
rc_edge = next_edge;
}
free_rc_node(rc_node, rc_node_free_list_ptr);
}
static void reset_rr_node_to_rc_node(t_linked_rc_ptr* rr_node_to_rc_node, ClusterNetId net_id) {
/* Resets the rr_node_to_rc_node mapping entries that were set during *
* construction of the RC tree for net inet. Any extra linked list entries *
* added to deal with a SINK being connected to multiple times have already *
* been freed by load_one_net_delay. */
t_trace* tptr;
int inode;
auto& route_ctx = g_vpr_ctx.routing();
tptr = route_ctx.trace[net_id].head;
while (tptr != nullptr) {
inode = tptr->index;
rr_node_to_rc_node[inode].rc_node = nullptr;
tptr = tptr->next;
}
}
static void free_rc_node_free_list(t_rc_node* rc_node_free_list) {
/* Really frees (i.e. calls free()) all the rc_nodes on the free list. */
t_rc_node *rc_node, *next_node;
rc_node = rc_node_free_list;
while (rc_node != nullptr) {
next_node = rc_node->u.next;
free(rc_node);
rc_node = next_node;
}
}
static void free_rc_edge_free_list(t_linked_rc_edge* rc_edge_free_list) {
/* Really frees (i.e. calls free()) all the rc_edges on the free list. */
t_linked_rc_edge *rc_edge, *next_edge;
rc_edge = rc_edge_free_list;
while (rc_edge != nullptr) {
next_edge = rc_edge->next;
free(rc_edge);
rc_edge = next_edge;
}
}