| |
| #include <stack> |
| #include <vector> |
| #include <algorithm> |
| |
| #include "vtr_assert.h" |
| #include "vtr_list.h" |
| #include "vtr_log.h" |
| #include "vtr_memory.h" |
| |
| #include "vpr_types.h" |
| #include "vpr_error.h" |
| |
| #include "globals.h" |
| #include "atom_netlist.h" |
| #include "path_delay2.h" |
| #include "read_xml_arch_file.h" |
| |
| #include "path_delay.h" |
| /******************* Subroutines local to this module ************************/ |
| |
| static int *alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr); |
| |
| void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops); |
| |
| void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes); |
| |
| std::vector<std::vector<int> > detect_timing_graph_combinational_loops(); |
| |
| std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size); |
| |
| void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, bool* tnode_instack, |
| std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs, |
| size_t min_size, int inode); |
| |
| void print_comb_loop(std::vector<int>& loop_tnodes); |
| /************************** Subroutine definitions ***************************/ |
| |
| static int * |
| alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr) { |
| |
| /* Allocates an array and fills it with the number of in-edges (inputs) to * |
| * each tnode. While doing this it also checks that each edge in the timing * |
| * graph points to a valid tnode. Also counts the number of sinks. */ |
| |
| int inode, iedge, to_node, num_edges, error, num_sinks; |
| int *tnode_num_fanin; |
| t_tedge *tedge; |
| |
| auto& timing_ctx = g_vpr_ctx.timing(); |
| |
| tnode_num_fanin = (int *) vtr::calloc(timing_ctx.num_tnodes, sizeof(int)); |
| error = 0; |
| num_sinks = 0; |
| |
| for (inode = 0; inode < timing_ctx.num_tnodes; inode++) { |
| num_edges = timing_ctx.tnodes[inode].num_edges; |
| |
| if (num_edges > 0) { |
| tedge = timing_ctx.tnodes[inode].out_edges; |
| for (iedge = 0; iedge < num_edges; iedge++) { |
| to_node = tedge[iedge].to_node; |
| if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes |
| |
| if (to_node < 0 || to_node >= timing_ctx.num_tnodes) { |
| vtr::printf_error(__FILE__, __LINE__, |
| "in alloc_and_load_tnode_fanin_and_check_edges:\n"); |
| vtr::printf_error(__FILE__, __LINE__, |
| "\ttnode #%d edge #%d goes to illegal node #%d.\n", |
| inode, iedge, to_node); |
| error++; |
| } |
| |
| tnode_num_fanin[to_node]++; |
| } |
| } |
| |
| else if (num_edges == 0) { |
| num_sinks++; |
| } |
| |
| else { |
| vtr::printf_error(__FILE__, __LINE__, |
| "in alloc_and_load_tnode_fanin_and_check_edges:\n"); |
| vtr::printf_error(__FILE__, __LINE__, |
| "\ttnode #%d has %d edges.\n", |
| inode, num_edges); |
| error++; |
| } |
| |
| } |
| |
| if (error != 0) { |
| vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__, |
| "Found %d Errors in the timing graph. Aborting.\n", error); |
| } |
| |
| *num_sinks_ptr = num_sinks; |
| return (tnode_num_fanin); |
| } |
| |
| int alloc_and_load_timing_graph_levels() { |
| |
| /* Does a breadth-first search through the timing graph in order to levelize * |
| * it. This allows subsequent traversals to be done topologically for speed. * |
| * Also returns the number of sinks in the graph (nodes with no fanout). */ |
| |
| vtr::t_linked_int *free_list_head, *nodes_at_level_head; |
| int inode, num_at_level, iedge, to_node, num_edges, num_sinks, num_levels; |
| unsigned i; |
| t_tedge *tedge; |
| |
| auto& timing_ctx = g_vpr_ctx.mutable_timing(); |
| |
| /* [0..timing_ctx.num_tnodes-1]. # of in-edges to each tnode that have not yet been * |
| * seen in this traversal. */ |
| int *tnode_fanin_left; |
| |
| tnode_fanin_left = alloc_and_load_tnode_fanin_and_check_edges(&num_sinks); |
| |
| free_list_head = nullptr; |
| nodes_at_level_head = nullptr; |
| |
| /* Very conservative -> max number of levels = timing_ctx.num_tnodes. Realloc later. * |
| * Temporarily need one extra level on the end because I look at the first * |
| * empty level. */ |
| |
| timing_ctx.tnodes_at_level.resize(timing_ctx.num_tnodes + 1); |
| |
| /* Scan through the timing graph, putting all the primary input nodes (no * |
| * fanin) into level 0 of the level structure. */ |
| |
| num_at_level = 0; |
| |
| for (inode = 0; inode < timing_ctx.num_tnodes; inode++) { |
| if (tnode_fanin_left[inode] == 0) { |
| num_at_level++; |
| nodes_at_level_head = insert_in_int_list(nodes_at_level_head, inode, |
| &free_list_head); |
| } |
| } |
| |
| alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level, |
| &timing_ctx.tnodes_at_level[0], &free_list_head); |
| |
| num_levels = 0; |
| |
| while (num_at_level != 0) { /* Until there's nothing in the queue. */ |
| num_levels++; |
| num_at_level = 0; |
| |
| for (i = 0; i < timing_ctx.tnodes_at_level[num_levels - 1].size(); i++) { |
| inode = timing_ctx.tnodes_at_level[num_levels - 1][i]; |
| tedge = timing_ctx.tnodes[inode].out_edges; |
| num_edges = timing_ctx.tnodes[inode].num_edges; |
| |
| for (iedge = 0; iedge < num_edges; iedge++) { |
| to_node = tedge[iedge].to_node; |
| if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes |
| |
| tnode_fanin_left[to_node]--; |
| |
| if (tnode_fanin_left[to_node] == 0) { |
| num_at_level++; |
| nodes_at_level_head = insert_in_int_list( |
| nodes_at_level_head, to_node, &free_list_head); |
| } |
| } |
| } |
| |
| alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level, |
| &timing_ctx.tnodes_at_level[num_levels], &free_list_head); |
| } |
| |
| timing_ctx.tnodes_at_level.resize(num_levels); |
| timing_ctx.num_tnode_levels = num_levels; |
| |
| free(tnode_fanin_left); |
| free_int_list(&free_list_head); |
| return (num_sinks); |
| } |
| |
| void check_timing_graph() { |
| |
| /* Checks the timing graph to see that: (1) all the tnodes have been put * |
| * into some level of the timing graph; */ |
| |
| /* Addition error checks that need to be done but not yet implemented: (2) the number of primary inputs * |
| * to the timing graph is equal to the number of input pads + the number of * |
| * constant generators; and (3) the number of sinks (nodes with no fanout) * |
| * equals the number of output pads + the number of flip flops. */ |
| |
| int num_tnodes_check, ilevel, error; |
| |
| auto& timing_ctx = g_vpr_ctx.timing(); |
| |
| error = 0; |
| num_tnodes_check = 0; |
| |
| /* TODO: Rework error checks for I/Os*/ |
| |
| for (ilevel = 0; ilevel < timing_ctx.num_tnode_levels; ilevel++) |
| num_tnodes_check += timing_ctx.tnodes_at_level[ilevel].size(); |
| |
| if (num_tnodes_check != timing_ctx.num_tnodes) { |
| vtr::printf_error(__FILE__, __LINE__, |
| "Error in check_timing_graph: %d tnodes appear in the tnode level structure. Expected %d.\n", |
| num_tnodes_check, timing_ctx.num_tnodes); |
| vtr::printf_info("Checking the netlist for combinational cycles:\n"); |
| if (timing_ctx.num_tnodes > num_tnodes_check) { |
| std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops(); |
| |
| //Inform user about Combinational Loops |
| size_t iloop; |
| size_t itnode; |
| for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) { |
| vtr::printf_info(" Combinational Loop %d contains the following nodes:\n", iloop); |
| for(itnode = 0; itnode < tnode_comb_loops[iloop].size(); itnode++) { |
| vtr::printf_info(" tnode: %d\n", tnode_comb_loops[iloop][itnode]); |
| } |
| } |
| } |
| error++; |
| } |
| /* Todo: Add error checks that # of flip-flops, memories, and other |
| black boxes match # of sinks/sources*/ |
| |
| if (error != 0) { |
| vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__, |
| "Found %d Errors in the timing graph. Aborting.\n", error); |
| } |
| } |
| |
| float print_critical_path_node(FILE * fp, vtr::t_linked_int * critical_path_node, vtr::vector_map<ClusterBlockId, t_pb **> &pin_id_to_pb_mapping) { |
| |
| /* Prints one tnode on the critical path out to fp. Returns the delay to the next node. */ |
| |
| int inode, downstream_node; |
| ClusterBlockId iblk; |
| ClusterNetId inet; |
| t_pb_graph_pin * pb_graph_pin; |
| e_tnode_type type; |
| static const char *tnode_type_names[] = { "TN_INPAD_SOURCE", "TN_INPAD_OPIN", |
| "TN_OUTPAD_IPIN", "TN_OUTPAD_SINK", "TN_CB_IPIN", "TN_CB_OPIN", |
| "TN_INTERMEDIATE_NODE", "TN_PRIMITIVE_IPIN", "TN_PRIMITIVE_OPIN", "TN_FF_IPIN", |
| "TN_FF_OPIN", "TN_FF_SINK", "TN_FF_SOURCE", "TN_FF_CLOCK", "TN_CLOCK_SOURCE", "TN_CLOCK_OPIN", |
| "TN_CONSTANT_GEN_SOURCE" }; |
| |
| vtr::t_linked_int *next_crit_node; |
| float Tdel; |
| |
| auto& cluster_ctx = g_vpr_ctx.clustering(); |
| auto& timing_ctx = g_vpr_ctx.timing(); |
| |
| inode = critical_path_node->data; |
| type = timing_ctx.tnodes[inode].type; |
| iblk = timing_ctx.tnodes[inode].block; |
| pb_graph_pin = timing_ctx.tnodes[inode].pb_graph_pin; |
| |
| fprintf(fp, "Node: %d %s Block #%zu (%s)\n", inode, tnode_type_names[type], |
| size_t(iblk), cluster_ctx.clb_nlist.block_name(iblk).c_str()); |
| |
| if (pb_graph_pin == nullptr) { |
| VTR_ASSERT( |
| type == TN_INPAD_SOURCE || type == TN_OUTPAD_SINK || type == TN_FF_SOURCE || type == TN_FF_SINK); |
| } |
| |
| if (pb_graph_pin != nullptr) { |
| fprintf(fp, "Pin: %s.%s[%d] pb (%s)", pb_graph_pin->parent_node->pb_type->name, |
| pb_graph_pin->port->name, pb_graph_pin->pin_number, pin_id_to_pb_mapping[iblk][pb_graph_pin->pin_count_in_cluster]->name); |
| } |
| if (type != TN_INPAD_SOURCE && type != TN_OUTPAD_SINK) { |
| fprintf(fp, "\n"); |
| } |
| |
| fprintf(fp, "T_arr: %g T_req: %g ", timing_ctx.tnodes[inode].T_arr, |
| timing_ctx.tnodes[inode].T_req); |
| |
| next_crit_node = critical_path_node->next; |
| if (next_crit_node != nullptr) { |
| downstream_node = next_crit_node->data; |
| Tdel = timing_ctx.tnodes[downstream_node].T_arr - timing_ctx.tnodes[inode].T_arr; |
| fprintf(fp, "Tdel: %g\n", Tdel); |
| } else { /* last node, no Tdel. */ |
| Tdel = 0.; |
| fprintf(fp, "\n"); |
| } |
| |
| auto& atom_ctx = g_vpr_ctx.atom(); |
| |
| AtomNetId atom_net_id = cluster_ctx.clb_nlist.block_pb(iblk)->pb_route[pb_graph_pin->pin_count_in_cluster].atom_net_id; |
| if (type == TN_CB_OPIN) { |
| inet = atom_ctx.lookup.clb_net(atom_net_id); |
| VTR_ASSERT(inet != ClusterNetId::INVALID()); |
| fprintf(fp, "External-to-Block Net: #%zu (%s). Pins on net: %zu.\n", |
| size_t(inet), cluster_ctx.clb_nlist.net_name(inet).c_str(), cluster_ctx.clb_nlist.net_pins(inet).size()); |
| } else if (pb_graph_pin != nullptr) { |
| fprintf(fp, "Internal Net: %s. Pins on net: %zu.\n", |
| atom_ctx.nlist.net_name(atom_net_id).c_str(), atom_ctx.nlist.net_pins(atom_net_id).size()); |
| } |
| |
| fprintf(fp, "\n"); |
| return (Tdel); |
| } |
| |
| //Repeatedly detects combinational loops and remove timing edges to break them. |
| // |
| // The idea behind the implementation of is to identify Strongly |
| // Connected Components (SCCs) in the timing graph which, by definition, |
| // must contain cycles if they include more than one element. This is done using |
| // Tarjan's algorithm in O(V + E) time. |
| // |
| // Once the SCCs are identified, an arbitrary edge in the timing graph is |
| // disconnected to break the cycle. Since it may be possible for smaller sub-SCCs |
| // to result, this is done iteratively until no SCCs with more than one element |
| // are found. |
| void detect_and_fix_timing_graph_combinational_loops() { |
| int comb_cycle_iter_count = 0; |
| int comb_cycle_count = 0; |
| |
| vtr::printf_info("Iteratively removing timing edges to break combinational cycles in timing graph.\n"); |
| |
| std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops(); |
| |
| //Repeat until all loops broken |
| while(tnode_comb_loops.size() > 0) { |
| comb_cycle_iter_count++; |
| vtr::printf_info("Found %d Combinational Loops in the timing graph on iteration %d.\n", |
| tnode_comb_loops.size(), comb_cycle_iter_count); |
| vtr::printf_warning(__FILE__, __LINE__, |
| "Combinational Loops can not be analyzed properly and will be " |
| "arbitrarily disconnected.\n"); |
| |
| break_timing_graph_combinational_loops(tnode_comb_loops); |
| |
| comb_cycle_count += tnode_comb_loops.size(); |
| |
| tnode_comb_loops = detect_timing_graph_combinational_loops(); |
| } |
| vtr::printf_info("Removed %d combinational cycles from timing graph after %d iteration(s)\n", |
| comb_cycle_count, comb_cycle_iter_count); |
| } |
| |
| /* |
| * Identify combinational loops in the timing graph |
| */ |
| std::vector<std::vector<int> > detect_timing_graph_combinational_loops() { |
| //Combinational loops are SCC with >= 2 elements in the |
| //timing graph |
| return identify_strongly_connected_components(2); |
| } |
| |
| /* |
| * This function breaks every combinational loop passed to it. Each loop is represented |
| * as a vector of tnode indicies*/ |
| void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops) { |
| size_t iloop; |
| for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) { |
| break_timing_graph_combinational_loop(tnode_comb_loops[iloop]); |
| } |
| } |
| |
| /* |
| * Given a set of tnode indicies forming a combinational loop, |
| * this breaks the loop by removing an arbitrary edge from the |
| * cycle. |
| */ |
| void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes) { |
| int i_first_tnode; |
| int i_edge; |
| int i_to_tnode; |
| auto& timing_ctx = g_vpr_ctx.timing(); |
| |
| VTR_ASSERT(loop_tnodes.size() >= 2); //Must have atleast 2 nodes for a valid cycle |
| |
| //Find an edge between two tnodes in the loop set |
| // arbitrarily decide that it will be the first edge |
| // from the first tnode which fans out to another tnode |
| // in the loop set that will be cut |
| i_first_tnode = loop_tnodes[0]; |
| |
| for(i_edge = 0; i_edge < timing_ctx.tnodes[i_first_tnode].num_edges; i_edge++) { |
| i_to_tnode = timing_ctx.tnodes[i_first_tnode].out_edges[i_edge].to_node; |
| |
| if(std::find(loop_tnodes.begin(), loop_tnodes.end(), i_to_tnode) != loop_tnodes.end()) { |
| //This edge does fanout into the loop_tnodes set |
| // so cut it |
| vtr::printf_warning(__FILE__, __LINE__, "Disconnecting timing graph edge from tnode %d to tnode %d to break combinational cycle\n", i_first_tnode, i_to_tnode); |
| |
| //Mark the original target node as a combinational loop breakpoint |
| timing_ctx.tnodes[i_to_tnode].is_comb_loop_breakpoint = true; |
| |
| //Mark the edge as invalid |
| timing_ctx.tnodes[i_first_tnode].out_edges[i_edge].to_node = DO_NOT_ANALYSE; |
| |
| return; |
| } |
| } |
| vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__, |
| "Could not find edge to break combinational loop in timing graph.\n"); |
| } |
| |
| /* |
| * Tarjan's algorithm for finding Strongly Connected Components (SCCs) in |
| * a direct graph. Only SCCs with min_size or greater members are returned. |
| * |
| * We keep track of the following information: |
| * - The current 'index' of the node (stored in tnode_indexes), this |
| * corresponds to the order the node was traversed in the DFS |
| * - The current 'lowlink' of the node (stored in tnode_lowlinks), this |
| * corresponds to the lowest node index which connects to the current |
| * node |
| * - Whether the node is currently in the stack (stored in tnode_instack) |
| * - A stack (tnode_stack) of elements in the current SCC |
| * |
| * The key idea behind the algorithm is that a node stays on the stack if it |
| * connects to a node earlier in the traversal. |
| */ |
| std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size) { |
| int i; |
| int index = 0; //The current index of the traversal |
| std::vector<std::vector<int> > tnode_sccs; |
| |
| auto& timing_ctx = g_vpr_ctx.timing(); |
| |
| //Allocate book-keeping information |
| int* tnode_indexes = (int*) vtr::calloc(timing_ctx.num_tnodes, sizeof(int)); |
| int* tnode_lowlinks = (int*) vtr::calloc(timing_ctx.num_tnodes, sizeof(int)); |
| bool* tnode_instack = (bool*) vtr::calloc(timing_ctx.num_tnodes, sizeof(bool)); |
| |
| //Initialize everything to unvisited |
| for(i = 0; i < timing_ctx.num_tnodes; i++) { |
| tnode_indexes[i] = -1; |
| tnode_lowlinks[i] = -1; |
| tnode_instack[i] = false; |
| } |
| |
| //The stack of nodes |
| std::stack<int> tnode_stack; |
| |
| //We ensure that every node gets traversed |
| for(i = 0 ; i < timing_ctx.num_tnodes; i++) { |
| if(tnode_indexes[i] == -1) { |
| strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, i); |
| } |
| } |
| |
| //Clean-up |
| free(tnode_indexes); |
| free(tnode_lowlinks); |
| free(tnode_instack); |
| |
| return tnode_sccs; |
| } |
| |
| void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, bool* tnode_instack, |
| std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs, |
| size_t min_size, int inode) { |
| int iedge; //Index for out-going edges of the current node (inode) |
| int iscc_element; //Index for the current SCC element (used when poping stack) |
| int to_node_index; //Index to the sink node for the current edge |
| |
| auto& timing_ctx = g_vpr_ctx.timing(); |
| |
| //Mark this node as visited |
| tnode_indexes[inode] = index; |
| tnode_lowlinks[inode] = index; |
| index += 1; |
| |
| //Add it to the stack |
| tnode_stack.push(inode); |
| tnode_instack[inode] = true; |
| |
| //Fanout of inode |
| for(iedge = 0; iedge < timing_ctx.tnodes[inode].num_edges; iedge++) { |
| to_node_index = timing_ctx.tnodes[inode].out_edges[iedge].to_node; |
| if(to_node_index == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes |
| |
| if(tnode_indexes[to_node_index] == -1) { |
| //Haven't visited successor of inode (to_node) yet, recurse |
| strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, to_node_index); |
| VTR_ASSERT(tnode_lowlinks[inode] >= 0); |
| VTR_ASSERT(tnode_lowlinks[to_node_index] >= 0); |
| |
| //We are connected to to_node, so our lowest link should be either ourselves, or |
| //to_node's lowest link |
| tnode_lowlinks[inode] = std::min(tnode_lowlinks[inode], tnode_lowlinks[to_node_index]); |
| } else if (tnode_instack[to_node_index]) { |
| //to_node was in the stack, and so is part of the current SCC |
| VTR_ASSERT(tnode_lowlinks[inode] >= 0); |
| VTR_ASSERT(tnode_indexes[to_node_index] >= 0); |
| |
| //to_node was on the stack, since we connect to it our lowest link is either ourselves |
| //or the index of to_node (since it may have been traversed earlier) |
| tnode_lowlinks[inode] = std::min(tnode_lowlinks[inode], tnode_indexes[to_node_index]); |
| } |
| } |
| |
| VTR_ASSERT(tnode_indexes[inode] >= 0); |
| |
| if(tnode_lowlinks[inode] == tnode_indexes[inode]) { |
| //This inode is the root of a new SCC |
| |
| //Create a new SCC |
| std::vector<int> scc; |
| //Pop of elements of the stack until we reach ourselves |
| do { |
| iscc_element = tnode_stack.top(); |
| tnode_stack.pop(); |
| tnode_instack[iscc_element] = false; |
| scc.push_back(iscc_element); //Add to the SCC |
| } while(iscc_element != inode); |
| |
| //Add the SCC to the list of SCC if the meet |
| // the minimum size requirement |
| if(scc.size() >= min_size) { |
| tnode_sccs.push_back(scc); |
| } |
| } |
| } |
| |
| void print_comb_loop(std::vector<int>& loop_tnodes) { |
| auto& timing_ctx = g_vpr_ctx.timing(); |
| |
| vtr::printf_info("Comb Loop:\n"); |
| for(std::vector<int>::iterator it = loop_tnodes.begin(); it != loop_tnodes.end(); it++) { |
| int i_tnode = *it; |
| if(timing_ctx.tnodes[i_tnode].pb_graph_pin != nullptr) { |
| vtr::printf_info("\ttnode: %d %s.%s[%d]\n", i_tnode, |
| timing_ctx.tnodes[i_tnode].pb_graph_pin->parent_node->pb_type->name, |
| timing_ctx.tnodes[i_tnode].pb_graph_pin->port->name, |
| timing_ctx.tnodes[i_tnode].pb_graph_pin->pin_number); |
| } else { |
| vtr::printf_info("\ttnode: %d\n", i_tnode); |
| } |
| } |
| } |