blob: 18ad4584c5247d8fdbbe0d625eb3059207d549bd [file] [log] [blame]
#include <stack>
#include <vector>
#include <algorithm>
#include "vtr_assert.h"
#include "vtr_list.h"
#include "vtr_log.h"
#include "vtr_memory.h"
#include "vpr_types.h"
#include "vpr_error.h"
#include "globals.h"
#include "atom_netlist.h"
#include "path_delay2.h"
#include "read_xml_arch_file.h"
#include "path_delay.h"
/******************* Subroutines local to this module ************************/
static int *alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr);
void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops);
void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes);
std::vector<std::vector<int> > detect_timing_graph_combinational_loops();
std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size);
void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, bool* tnode_instack,
std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs,
size_t min_size, int inode);
void print_comb_loop(std::vector<int>& loop_tnodes);
/************************** Subroutine definitions ***************************/
static int *
alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr) {
/* Allocates an array and fills it with the number of in-edges (inputs) to *
* each tnode. While doing this it also checks that each edge in the timing *
* graph points to a valid tnode. Also counts the number of sinks. */
int inode, iedge, to_node, num_edges, error, num_sinks;
int *tnode_num_fanin;
t_tedge *tedge;
auto& timing_ctx = g_vpr_ctx.timing();
tnode_num_fanin = (int *) vtr::calloc(timing_ctx.num_tnodes, sizeof(int));
error = 0;
num_sinks = 0;
for (inode = 0; inode < timing_ctx.num_tnodes; inode++) {
num_edges = timing_ctx.tnodes[inode].num_edges;
if (num_edges > 0) {
tedge = timing_ctx.tnodes[inode].out_edges;
for (iedge = 0; iedge < num_edges; iedge++) {
to_node = tedge[iedge].to_node;
if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes
if (to_node < 0 || to_node >= timing_ctx.num_tnodes) {
vtr::printf_error(__FILE__, __LINE__,
"in alloc_and_load_tnode_fanin_and_check_edges:\n");
vtr::printf_error(__FILE__, __LINE__,
"\ttnode #%d edge #%d goes to illegal node #%d.\n",
inode, iedge, to_node);
error++;
}
tnode_num_fanin[to_node]++;
}
}
else if (num_edges == 0) {
num_sinks++;
}
else {
vtr::printf_error(__FILE__, __LINE__,
"in alloc_and_load_tnode_fanin_and_check_edges:\n");
vtr::printf_error(__FILE__, __LINE__,
"\ttnode #%d has %d edges.\n",
inode, num_edges);
error++;
}
}
if (error != 0) {
vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
"Found %d Errors in the timing graph. Aborting.\n", error);
}
*num_sinks_ptr = num_sinks;
return (tnode_num_fanin);
}
int alloc_and_load_timing_graph_levels() {
/* Does a breadth-first search through the timing graph in order to levelize *
* it. This allows subsequent traversals to be done topologically for speed. *
* Also returns the number of sinks in the graph (nodes with no fanout). */
vtr::t_linked_int *free_list_head, *nodes_at_level_head;
int inode, num_at_level, iedge, to_node, num_edges, num_sinks, num_levels;
unsigned i;
t_tedge *tedge;
auto& timing_ctx = g_vpr_ctx.mutable_timing();
/* [0..timing_ctx.num_tnodes-1]. # of in-edges to each tnode that have not yet been *
* seen in this traversal. */
int *tnode_fanin_left;
tnode_fanin_left = alloc_and_load_tnode_fanin_and_check_edges(&num_sinks);
free_list_head = nullptr;
nodes_at_level_head = nullptr;
/* Very conservative -> max number of levels = timing_ctx.num_tnodes. Realloc later. *
* Temporarily need one extra level on the end because I look at the first *
* empty level. */
timing_ctx.tnodes_at_level.resize(timing_ctx.num_tnodes + 1);
/* Scan through the timing graph, putting all the primary input nodes (no *
* fanin) into level 0 of the level structure. */
num_at_level = 0;
for (inode = 0; inode < timing_ctx.num_tnodes; inode++) {
if (tnode_fanin_left[inode] == 0) {
num_at_level++;
nodes_at_level_head = insert_in_int_list(nodes_at_level_head, inode,
&free_list_head);
}
}
alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level,
&timing_ctx.tnodes_at_level[0], &free_list_head);
num_levels = 0;
while (num_at_level != 0) { /* Until there's nothing in the queue. */
num_levels++;
num_at_level = 0;
for (i = 0; i < timing_ctx.tnodes_at_level[num_levels - 1].size(); i++) {
inode = timing_ctx.tnodes_at_level[num_levels - 1][i];
tedge = timing_ctx.tnodes[inode].out_edges;
num_edges = timing_ctx.tnodes[inode].num_edges;
for (iedge = 0; iedge < num_edges; iedge++) {
to_node = tedge[iedge].to_node;
if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes
tnode_fanin_left[to_node]--;
if (tnode_fanin_left[to_node] == 0) {
num_at_level++;
nodes_at_level_head = insert_in_int_list(
nodes_at_level_head, to_node, &free_list_head);
}
}
}
alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level,
&timing_ctx.tnodes_at_level[num_levels], &free_list_head);
}
timing_ctx.tnodes_at_level.resize(num_levels);
timing_ctx.num_tnode_levels = num_levels;
free(tnode_fanin_left);
free_int_list(&free_list_head);
return (num_sinks);
}
void check_timing_graph() {
/* Checks the timing graph to see that: (1) all the tnodes have been put *
* into some level of the timing graph; */
/* Addition error checks that need to be done but not yet implemented: (2) the number of primary inputs *
* to the timing graph is equal to the number of input pads + the number of *
* constant generators; and (3) the number of sinks (nodes with no fanout) *
* equals the number of output pads + the number of flip flops. */
int num_tnodes_check, ilevel, error;
auto& timing_ctx = g_vpr_ctx.timing();
error = 0;
num_tnodes_check = 0;
/* TODO: Rework error checks for I/Os*/
for (ilevel = 0; ilevel < timing_ctx.num_tnode_levels; ilevel++)
num_tnodes_check += timing_ctx.tnodes_at_level[ilevel].size();
if (num_tnodes_check != timing_ctx.num_tnodes) {
vtr::printf_error(__FILE__, __LINE__,
"Error in check_timing_graph: %d tnodes appear in the tnode level structure. Expected %d.\n",
num_tnodes_check, timing_ctx.num_tnodes);
vtr::printf_info("Checking the netlist for combinational cycles:\n");
if (timing_ctx.num_tnodes > num_tnodes_check) {
std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops();
//Inform user about Combinational Loops
size_t iloop;
size_t itnode;
for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) {
vtr::printf_info(" Combinational Loop %d contains the following nodes:\n", iloop);
for(itnode = 0; itnode < tnode_comb_loops[iloop].size(); itnode++) {
vtr::printf_info(" tnode: %d\n", tnode_comb_loops[iloop][itnode]);
}
}
}
error++;
}
/* Todo: Add error checks that # of flip-flops, memories, and other
black boxes match # of sinks/sources*/
if (error != 0) {
vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
"Found %d Errors in the timing graph. Aborting.\n", error);
}
}
float print_critical_path_node(FILE * fp, vtr::t_linked_int * critical_path_node, vtr::vector_map<ClusterBlockId, t_pb **> &pin_id_to_pb_mapping) {
/* Prints one tnode on the critical path out to fp. Returns the delay to the next node. */
int inode, downstream_node;
ClusterBlockId iblk;
ClusterNetId inet;
t_pb_graph_pin * pb_graph_pin;
e_tnode_type type;
static const char *tnode_type_names[] = { "TN_INPAD_SOURCE", "TN_INPAD_OPIN",
"TN_OUTPAD_IPIN", "TN_OUTPAD_SINK", "TN_CB_IPIN", "TN_CB_OPIN",
"TN_INTERMEDIATE_NODE", "TN_PRIMITIVE_IPIN", "TN_PRIMITIVE_OPIN", "TN_FF_IPIN",
"TN_FF_OPIN", "TN_FF_SINK", "TN_FF_SOURCE", "TN_FF_CLOCK", "TN_CLOCK_SOURCE", "TN_CLOCK_OPIN",
"TN_CONSTANT_GEN_SOURCE" };
vtr::t_linked_int *next_crit_node;
float Tdel;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& timing_ctx = g_vpr_ctx.timing();
inode = critical_path_node->data;
type = timing_ctx.tnodes[inode].type;
iblk = timing_ctx.tnodes[inode].block;
pb_graph_pin = timing_ctx.tnodes[inode].pb_graph_pin;
fprintf(fp, "Node: %d %s Block #%zu (%s)\n", inode, tnode_type_names[type],
size_t(iblk), cluster_ctx.clb_nlist.block_name(iblk).c_str());
if (pb_graph_pin == nullptr) {
VTR_ASSERT(
type == TN_INPAD_SOURCE || type == TN_OUTPAD_SINK || type == TN_FF_SOURCE || type == TN_FF_SINK);
}
if (pb_graph_pin != nullptr) {
fprintf(fp, "Pin: %s.%s[%d] pb (%s)", pb_graph_pin->parent_node->pb_type->name,
pb_graph_pin->port->name, pb_graph_pin->pin_number, pin_id_to_pb_mapping[iblk][pb_graph_pin->pin_count_in_cluster]->name);
}
if (type != TN_INPAD_SOURCE && type != TN_OUTPAD_SINK) {
fprintf(fp, "\n");
}
fprintf(fp, "T_arr: %g T_req: %g ", timing_ctx.tnodes[inode].T_arr,
timing_ctx.tnodes[inode].T_req);
next_crit_node = critical_path_node->next;
if (next_crit_node != nullptr) {
downstream_node = next_crit_node->data;
Tdel = timing_ctx.tnodes[downstream_node].T_arr - timing_ctx.tnodes[inode].T_arr;
fprintf(fp, "Tdel: %g\n", Tdel);
} else { /* last node, no Tdel. */
Tdel = 0.;
fprintf(fp, "\n");
}
auto& atom_ctx = g_vpr_ctx.atom();
AtomNetId atom_net_id = cluster_ctx.clb_nlist.block_pb(iblk)->pb_route[pb_graph_pin->pin_count_in_cluster].atom_net_id;
if (type == TN_CB_OPIN) {
inet = atom_ctx.lookup.clb_net(atom_net_id);
VTR_ASSERT(inet != ClusterNetId::INVALID());
fprintf(fp, "External-to-Block Net: #%zu (%s). Pins on net: %zu.\n",
size_t(inet), cluster_ctx.clb_nlist.net_name(inet).c_str(), cluster_ctx.clb_nlist.net_pins(inet).size());
} else if (pb_graph_pin != nullptr) {
fprintf(fp, "Internal Net: %s. Pins on net: %zu.\n",
atom_ctx.nlist.net_name(atom_net_id).c_str(), atom_ctx.nlist.net_pins(atom_net_id).size());
}
fprintf(fp, "\n");
return (Tdel);
}
//Repeatedly detects combinational loops and remove timing edges to break them.
//
// The idea behind the implementation of is to identify Strongly
// Connected Components (SCCs) in the timing graph which, by definition,
// must contain cycles if they include more than one element. This is done using
// Tarjan's algorithm in O(V + E) time.
//
// Once the SCCs are identified, an arbitrary edge in the timing graph is
// disconnected to break the cycle. Since it may be possible for smaller sub-SCCs
// to result, this is done iteratively until no SCCs with more than one element
// are found.
void detect_and_fix_timing_graph_combinational_loops() {
int comb_cycle_iter_count = 0;
int comb_cycle_count = 0;
vtr::printf_info("Iteratively removing timing edges to break combinational cycles in timing graph.\n");
std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops();
//Repeat until all loops broken
while(tnode_comb_loops.size() > 0) {
comb_cycle_iter_count++;
vtr::printf_info("Found %d Combinational Loops in the timing graph on iteration %d.\n",
tnode_comb_loops.size(), comb_cycle_iter_count);
vtr::printf_warning(__FILE__, __LINE__,
"Combinational Loops can not be analyzed properly and will be "
"arbitrarily disconnected.\n");
break_timing_graph_combinational_loops(tnode_comb_loops);
comb_cycle_count += tnode_comb_loops.size();
tnode_comb_loops = detect_timing_graph_combinational_loops();
}
vtr::printf_info("Removed %d combinational cycles from timing graph after %d iteration(s)\n",
comb_cycle_count, comb_cycle_iter_count);
}
/*
* Identify combinational loops in the timing graph
*/
std::vector<std::vector<int> > detect_timing_graph_combinational_loops() {
//Combinational loops are SCC with >= 2 elements in the
//timing graph
return identify_strongly_connected_components(2);
}
/*
* This function breaks every combinational loop passed to it. Each loop is represented
* as a vector of tnode indicies*/
void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops) {
size_t iloop;
for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) {
break_timing_graph_combinational_loop(tnode_comb_loops[iloop]);
}
}
/*
* Given a set of tnode indicies forming a combinational loop,
* this breaks the loop by removing an arbitrary edge from the
* cycle.
*/
void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes) {
int i_first_tnode;
int i_edge;
int i_to_tnode;
auto& timing_ctx = g_vpr_ctx.timing();
VTR_ASSERT(loop_tnodes.size() >= 2); //Must have atleast 2 nodes for a valid cycle
//Find an edge between two tnodes in the loop set
// arbitrarily decide that it will be the first edge
// from the first tnode which fans out to another tnode
// in the loop set that will be cut
i_first_tnode = loop_tnodes[0];
for(i_edge = 0; i_edge < timing_ctx.tnodes[i_first_tnode].num_edges; i_edge++) {
i_to_tnode = timing_ctx.tnodes[i_first_tnode].out_edges[i_edge].to_node;
if(std::find(loop_tnodes.begin(), loop_tnodes.end(), i_to_tnode) != loop_tnodes.end()) {
//This edge does fanout into the loop_tnodes set
// so cut it
vtr::printf_warning(__FILE__, __LINE__, "Disconnecting timing graph edge from tnode %d to tnode %d to break combinational cycle\n", i_first_tnode, i_to_tnode);
//Mark the original target node as a combinational loop breakpoint
timing_ctx.tnodes[i_to_tnode].is_comb_loop_breakpoint = true;
//Mark the edge as invalid
timing_ctx.tnodes[i_first_tnode].out_edges[i_edge].to_node = DO_NOT_ANALYSE;
return;
}
}
vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
"Could not find edge to break combinational loop in timing graph.\n");
}
/*
* Tarjan's algorithm for finding Strongly Connected Components (SCCs) in
* a direct graph. Only SCCs with min_size or greater members are returned.
*
* We keep track of the following information:
* - The current 'index' of the node (stored in tnode_indexes), this
* corresponds to the order the node was traversed in the DFS
* - The current 'lowlink' of the node (stored in tnode_lowlinks), this
* corresponds to the lowest node index which connects to the current
* node
* - Whether the node is currently in the stack (stored in tnode_instack)
* - A stack (tnode_stack) of elements in the current SCC
*
* The key idea behind the algorithm is that a node stays on the stack if it
* connects to a node earlier in the traversal.
*/
std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size) {
int i;
int index = 0; //The current index of the traversal
std::vector<std::vector<int> > tnode_sccs;
auto& timing_ctx = g_vpr_ctx.timing();
//Allocate book-keeping information
int* tnode_indexes = (int*) vtr::calloc(timing_ctx.num_tnodes, sizeof(int));
int* tnode_lowlinks = (int*) vtr::calloc(timing_ctx.num_tnodes, sizeof(int));
bool* tnode_instack = (bool*) vtr::calloc(timing_ctx.num_tnodes, sizeof(bool));
//Initialize everything to unvisited
for(i = 0; i < timing_ctx.num_tnodes; i++) {
tnode_indexes[i] = -1;
tnode_lowlinks[i] = -1;
tnode_instack[i] = false;
}
//The stack of nodes
std::stack<int> tnode_stack;
//We ensure that every node gets traversed
for(i = 0 ; i < timing_ctx.num_tnodes; i++) {
if(tnode_indexes[i] == -1) {
strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, i);
}
}
//Clean-up
free(tnode_indexes);
free(tnode_lowlinks);
free(tnode_instack);
return tnode_sccs;
}
void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, bool* tnode_instack,
std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs,
size_t min_size, int inode) {
int iedge; //Index for out-going edges of the current node (inode)
int iscc_element; //Index for the current SCC element (used when poping stack)
int to_node_index; //Index to the sink node for the current edge
auto& timing_ctx = g_vpr_ctx.timing();
//Mark this node as visited
tnode_indexes[inode] = index;
tnode_lowlinks[inode] = index;
index += 1;
//Add it to the stack
tnode_stack.push(inode);
tnode_instack[inode] = true;
//Fanout of inode
for(iedge = 0; iedge < timing_ctx.tnodes[inode].num_edges; iedge++) {
to_node_index = timing_ctx.tnodes[inode].out_edges[iedge].to_node;
if(to_node_index == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes
if(tnode_indexes[to_node_index] == -1) {
//Haven't visited successor of inode (to_node) yet, recurse
strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, to_node_index);
VTR_ASSERT(tnode_lowlinks[inode] >= 0);
VTR_ASSERT(tnode_lowlinks[to_node_index] >= 0);
//We are connected to to_node, so our lowest link should be either ourselves, or
//to_node's lowest link
tnode_lowlinks[inode] = std::min(tnode_lowlinks[inode], tnode_lowlinks[to_node_index]);
} else if (tnode_instack[to_node_index]) {
//to_node was in the stack, and so is part of the current SCC
VTR_ASSERT(tnode_lowlinks[inode] >= 0);
VTR_ASSERT(tnode_indexes[to_node_index] >= 0);
//to_node was on the stack, since we connect to it our lowest link is either ourselves
//or the index of to_node (since it may have been traversed earlier)
tnode_lowlinks[inode] = std::min(tnode_lowlinks[inode], tnode_indexes[to_node_index]);
}
}
VTR_ASSERT(tnode_indexes[inode] >= 0);
if(tnode_lowlinks[inode] == tnode_indexes[inode]) {
//This inode is the root of a new SCC
//Create a new SCC
std::vector<int> scc;
//Pop of elements of the stack until we reach ourselves
do {
iscc_element = tnode_stack.top();
tnode_stack.pop();
tnode_instack[iscc_element] = false;
scc.push_back(iscc_element); //Add to the SCC
} while(iscc_element != inode);
//Add the SCC to the list of SCC if the meet
// the minimum size requirement
if(scc.size() >= min_size) {
tnode_sccs.push_back(scc);
}
}
}
void print_comb_loop(std::vector<int>& loop_tnodes) {
auto& timing_ctx = g_vpr_ctx.timing();
vtr::printf_info("Comb Loop:\n");
for(std::vector<int>::iterator it = loop_tnodes.begin(); it != loop_tnodes.end(); it++) {
int i_tnode = *it;
if(timing_ctx.tnodes[i_tnode].pb_graph_pin != nullptr) {
vtr::printf_info("\ttnode: %d %s.%s[%d]\n", i_tnode,
timing_ctx.tnodes[i_tnode].pb_graph_pin->parent_node->pb_type->name,
timing_ctx.tnodes[i_tnode].pb_graph_pin->port->name,
timing_ctx.tnodes[i_tnode].pb_graph_pin->pin_number);
} else {
vtr::printf_info("\ttnode: %d\n", i_tnode);
}
}
}