| #include <assert.h> | |
| #include <stack> | |
| #include <vector> | |
| #include <algorithm> | |
| #include "util.h" | |
| #include "vpr_types.h" | |
| #include "globals.h" | |
| #include "path_delay2.h" | |
| #include "read_xml_arch_file.h" | |
| #include "path_delay.h" | |
| /************* Variables (globals) shared by all path_delay modules **********/ | |
| int num_tnode_levels; /* Number of levels in the timing graph. */ | |
| struct s_ivec *tnodes_at_level; | |
| /* [0..num__tnode_levels - 1]. Count and list of tnodes at each level of | |
| * the timing graph, to make topological searches easier. Level-0 nodes are | |
| * sources to the timing graph (types TN_FF_SOURCE, TN_INPAD_SOURCE | |
| * and TN_CONSTANT_GEN_SOURCE). Level-N nodes are in the immediate fanout of | |
| * nodes with level at most N-1. */ | |
| /******************* Subroutines local to this module ************************/ | |
| static int *alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr); | |
| void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops); | |
| void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes); | |
| std::vector<std::vector<int> > detect_timing_graph_combinational_loops(); | |
| std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size); | |
| void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, boolean* tnode_instack, | |
| std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs, | |
| size_t min_size, int inode); | |
| void print_comb_loop(std::vector<int>& loop_tnodes); | |
| /************************** Subroutine definitions ***************************/ | |
| static int * | |
| alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr) { | |
| /* Allocates an array and fills it with the number of in-edges (inputs) to * | |
| * each tnode. While doing this it also checks that each edge in the timing * | |
| * graph points to a valid tnode. Also counts the number of sinks. */ | |
| int inode, iedge, to_node, num_edges, error, num_sinks; | |
| int *tnode_num_fanin; | |
| t_tedge *tedge; | |
| tnode_num_fanin = (int *) my_calloc(num_tnodes, sizeof(int)); | |
| error = 0; | |
| num_sinks = 0; | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| num_edges = tnode[inode].num_edges; | |
| if (num_edges > 0) { | |
| tedge = tnode[inode].out_edges; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes | |
| if (to_node < 0 || to_node >= num_tnodes) { | |
| vpr_printf_error(__FILE__, __LINE__, | |
| "in alloc_and_load_tnode_fanin_and_check_edges:\n"); | |
| vpr_printf_error(__FILE__, __LINE__, | |
| "\ttnode #%d edge #%d goes to illegal node #%d.\n", | |
| inode, iedge, to_node); | |
| error++; | |
| } | |
| tnode_num_fanin[to_node]++; | |
| } | |
| } | |
| else if (num_edges == 0) { | |
| num_sinks++; | |
| } | |
| else { | |
| vpr_printf_error(__FILE__, __LINE__, | |
| "in alloc_and_load_tnode_fanin_and_check_edges:\n"); | |
| vpr_printf_error(__FILE__, __LINE__, | |
| "\ttnode #%d has %d edges.\n", | |
| inode, num_edges); | |
| error++; | |
| } | |
| } | |
| if (error != 0) { | |
| vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__, | |
| "Found %d Errors in the timing graph. Aborting.\n", error); | |
| } | |
| *num_sinks_ptr = num_sinks; | |
| return (tnode_num_fanin); | |
| } | |
| int alloc_and_load_timing_graph_levels(void) { | |
| /* Does a breadth-first search through the timing graph in order to levelize * | |
| * it. This allows subsequent traversals to be done topologically for speed. * | |
| * Also returns the number of sinks in the graph (nodes with no fanout). */ | |
| t_linked_int *free_list_head, *nodes_at_level_head; | |
| int inode, num_at_level, iedge, to_node, num_edges, num_sinks, num_levels, | |
| i; | |
| t_tedge *tedge; | |
| /* [0..num_tnodes-1]. # of in-edges to each tnode that have not yet been * | |
| * seen in this traversal. */ | |
| int *tnode_fanin_left; | |
| tnode_fanin_left = alloc_and_load_tnode_fanin_and_check_edges(&num_sinks); | |
| free_list_head = NULL; | |
| nodes_at_level_head = NULL; | |
| /* Very conservative -> max number of levels = num_tnodes. Realloc later. * | |
| * Temporarily need one extra level on the end because I look at the first * | |
| * empty level. */ | |
| tnodes_at_level = (struct s_ivec *) my_malloc( | |
| (num_tnodes + 1) * sizeof(struct s_ivec)); | |
| /* Scan through the timing graph, putting all the primary input nodes (no * | |
| * fanin) into level 0 of the level structure. */ | |
| num_at_level = 0; | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| if (tnode_fanin_left[inode] == 0) { | |
| num_at_level++; | |
| nodes_at_level_head = insert_in_int_list(nodes_at_level_head, inode, | |
| &free_list_head); | |
| } | |
| } | |
| alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level, | |
| &tnodes_at_level[0], &free_list_head); | |
| num_levels = 0; | |
| while (num_at_level != 0) { /* Until there's nothing in the queue. */ | |
| num_levels++; | |
| num_at_level = 0; | |
| for (i = 0; i < tnodes_at_level[num_levels - 1].nelem; i++) { | |
| inode = tnodes_at_level[num_levels - 1].list[i]; | |
| tedge = tnode[inode].out_edges; | |
| num_edges = tnode[inode].num_edges; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes | |
| tnode_fanin_left[to_node]--; | |
| if (tnode_fanin_left[to_node] == 0) { | |
| num_at_level++; | |
| nodes_at_level_head = insert_in_int_list( | |
| nodes_at_level_head, to_node, &free_list_head); | |
| } | |
| } | |
| } | |
| alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level, | |
| &tnodes_at_level[num_levels], &free_list_head); | |
| } | |
| tnodes_at_level = (struct s_ivec *) my_realloc(tnodes_at_level, | |
| num_levels * sizeof(struct s_ivec)); | |
| num_tnode_levels = num_levels; | |
| free(tnode_fanin_left); | |
| free_int_list(&free_list_head); | |
| return (num_sinks); | |
| } | |
| void check_timing_graph(int num_sinks) { | |
| /* Checks the timing graph to see that: (1) all the tnodes have been put * | |
| * into some level of the timing graph; */ | |
| /* Addition error checks that need to be done but not yet implemented: (2) the number of primary inputs * | |
| * to the timing graph is equal to the number of input pads + the number of * | |
| * constant generators; and (3) the number of sinks (nodes with no fanout) * | |
| * equals the number of output pads + the number of flip flops. */ | |
| int num_tnodes_check, ilevel, error; | |
| error = 0; | |
| num_tnodes_check = 0; | |
| /* TODO: Rework error checks for I/Os*/ | |
| for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) | |
| num_tnodes_check += tnodes_at_level[ilevel].nelem; | |
| if (num_tnodes_check != num_tnodes) { | |
| vpr_printf_error(__FILE__, __LINE__, | |
| "Error in check_timing_graph: %d tnodes appear in the tnode level structure. Expected %d.\n", | |
| num_tnodes_check, num_tnodes); | |
| vpr_printf_info("Checking the netlist for combinational cycles:\n"); | |
| if (num_tnodes > num_tnodes_check) { | |
| std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops(); | |
| //Inform user about Combinational Loops | |
| size_t iloop; | |
| size_t itnode; | |
| for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) { | |
| vpr_printf_info(" Combinational Loop %d contains the following nodes:\n", iloop); | |
| for(itnode = 0; itnode < tnode_comb_loops[iloop].size(); itnode++) { | |
| vpr_printf_info(" tnode: %d\n", tnode_comb_loops[iloop][itnode]); | |
| } | |
| } | |
| } | |
| error++; | |
| } | |
| /* Todo: Add error checks that # of flip-flops, memories, and other | |
| black boxes match # of sinks/sources*/ | |
| if (error != 0) { | |
| vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__, | |
| "Found %d Errors in the timing graph. Aborting.\n", error); | |
| } | |
| } | |
| float print_critical_path_node(FILE * fp, t_linked_int * critical_path_node) { | |
| /* Prints one tnode on the critical path out to fp. Returns the delay to the next node. */ | |
| int inode, iblk, inet, downstream_node; | |
| t_pb_graph_pin * pb_graph_pin; | |
| e_tnode_type type; | |
| static const char *tnode_type_names[] = { "TN_INPAD_SOURCE", "TN_INPAD_OPIN", | |
| "TN_OUTPAD_IPIN", "TN_OUTPAD_SINK", "TN_CB_IPIN", "TN_CB_OPIN", | |
| "TN_INTERMEDIATE_NODE", "TN_PRIMITIVE_IPIN", "TN_PRIMITIVE_OPIN", "TN_FF_IPIN", | |
| "TN_FF_OPIN", "TN_FF_SINK", "TN_FF_SOURCE", "TN_FF_CLOCK", "TN_CONSTANT_GEN_SOURCE" }; | |
| t_linked_int *next_crit_node; | |
| float Tdel; | |
| inode = critical_path_node->data; | |
| type = tnode[inode].type; | |
| iblk = tnode[inode].block; | |
| pb_graph_pin = tnode[inode].pb_graph_pin; | |
| fprintf(fp, "Node: %d %s Block #%d (%s)\n", inode, tnode_type_names[type], | |
| iblk, block[iblk].name); | |
| if (pb_graph_pin == NULL) { | |
| assert( | |
| type == TN_INPAD_SOURCE || type == TN_OUTPAD_SINK || type == TN_FF_SOURCE || type == TN_FF_SINK); | |
| } | |
| if (pb_graph_pin != NULL) { | |
| fprintf(fp, "Pin: %s.%s[%d] pb (%s)", pb_graph_pin->parent_node->pb_type->name, | |
| pb_graph_pin->port->name, pb_graph_pin->pin_number, block[iblk].pb->rr_node_to_pb_mapping[pb_graph_pin->pin_count_in_cluster]->name); | |
| } | |
| if (type != TN_INPAD_SOURCE && type != TN_OUTPAD_SINK) { | |
| fprintf(fp, "\n"); | |
| } | |
| fprintf(fp, "T_arr: %g T_req: %g ", tnode[inode].T_arr, | |
| tnode[inode].T_req); | |
| next_crit_node = critical_path_node->next; | |
| if (next_crit_node != NULL) { | |
| downstream_node = next_crit_node->data; | |
| Tdel = tnode[downstream_node].T_arr - tnode[inode].T_arr; | |
| fprintf(fp, "Tdel: %g\n", Tdel); | |
| } else { /* last node, no Tdel. */ | |
| Tdel = 0.; | |
| fprintf(fp, "\n"); | |
| } | |
| if (type == TN_CB_OPIN) { | |
| inet = | |
| block[iblk].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].net_num; | |
| inet = vpack_to_clb_net_mapping[inet]; | |
| fprintf(fp, "External-to-Block Net: #%d (%s). Pins on net: %d.\n", | |
| inet, g_clbs_nlist.net[inet].name, (int) g_clbs_nlist.net[inet].pins.size()); | |
| } else if (pb_graph_pin != NULL) { | |
| inet = | |
| block[iblk].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].net_num; | |
| fprintf(fp, "Internal Net: #%d (%s). Pins on net: %d.\n", inet, | |
| g_atoms_nlist.net[inet].name, (int) g_atoms_nlist.net[inet].pins.size()); | |
| } | |
| fprintf(fp, "\n"); | |
| return (Tdel); | |
| } | |
| //Repeatedly detects combinational loops and remove timing edges to break them. | |
| // | |
| // The idea behind the implementation of is to identify Strongly | |
| // Connected Components (SCCs) in the timing graph which, by definition, | |
| // must contain cycles if they include more than one element. This is done using | |
| // Tarjan's algorithm in O(V + E) time. | |
| // | |
| // Once the SCCs are identified, an arbitrary edge in the timing graph is | |
| // disconnected to break the cycle. Since it may be possible for smaller sub-SCCs | |
| // to result, this is done iteratively until no SCCs with more than one element | |
| // are found. | |
| void detect_and_fix_timing_graph_combinational_loops() { | |
| int comb_cycle_iter_count = 0; | |
| int comb_cycle_count = 0; | |
| vpr_printf_info("Iteratively removing timing edges to break combinational cycles in timing graph.\n"); | |
| std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops(); | |
| //Repeat until all loops broken | |
| while(tnode_comb_loops.size() > 0) { | |
| comb_cycle_iter_count++; | |
| vpr_printf_info("Found %d Combinational Loops in the timing graph on iteration %d.\n", | |
| tnode_comb_loops.size(), comb_cycle_iter_count); | |
| vpr_printf_warning(__FILE__, __LINE__, | |
| "Combinational Loops can not be analyzed properly and will be " | |
| "arbitrarily disconnected.\n"); | |
| break_timing_graph_combinational_loops(tnode_comb_loops); | |
| comb_cycle_count += tnode_comb_loops.size(); | |
| tnode_comb_loops = detect_timing_graph_combinational_loops(); | |
| } | |
| vpr_printf_info("Removed %d combinational cycles from timing graph after %d iteration(s)\n", | |
| comb_cycle_count, comb_cycle_iter_count); | |
| } | |
| /* | |
| * Identify combinational loops in the timing graph | |
| */ | |
| std::vector<std::vector<int> > detect_timing_graph_combinational_loops() { | |
| //Combinational loops are SCC with >= 2 elements in the | |
| //timing graph | |
| return identify_strongly_connected_components(2); | |
| } | |
| /* | |
| * This function breaks every combinational loop passed to it. Each loop is represented | |
| * as a vector of tnode indicies*/ | |
| void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops) { | |
| size_t iloop; | |
| for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) { | |
| break_timing_graph_combinational_loop(tnode_comb_loops[iloop]); | |
| } | |
| } | |
| /* | |
| * Given a set of tnode indicies forming a combinational loop, | |
| * this breaks the loop by removing an arbitrary edge from the | |
| * cycle. | |
| */ | |
| void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes) { | |
| int i_first_tnode; | |
| int i_edge; | |
| int i_to_tnode; | |
| assert(loop_tnodes.size() >= 2); //Must have atleast 2 nodes for a valid cycle | |
| //Find an edge between two tnodes in the loop set | |
| // arbitrarily decide that it will be the first edge | |
| // from the first tnode which fans out to another tnode | |
| // in the loop set that will be cut | |
| i_first_tnode = loop_tnodes[0]; | |
| for(i_edge = 0; i_edge < tnode[i_first_tnode].num_edges; i_edge++) { | |
| i_to_tnode = tnode[i_first_tnode].out_edges[i_edge].to_node; | |
| if(std::find(loop_tnodes.begin(), loop_tnodes.end(), i_to_tnode) != loop_tnodes.end()) { | |
| //This edge does fanout into the loop_tnodes set | |
| // so cut it | |
| vpr_printf_warning(__FILE__, __LINE__, "Disconnecting timing graph edge from tnode %d to tnode %d to break combinational cycle\n", i_first_tnode, i_to_tnode); | |
| //Mark the original target node as a combinational loop breakpoint | |
| tnode[i_to_tnode].is_comb_loop_breakpoint = TRUE; | |
| //Mark the edge as invalid | |
| tnode[i_first_tnode].out_edges[i_edge].to_node = DO_NOT_ANALYSE; | |
| return; | |
| } | |
| } | |
| vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__, | |
| "Could not find edge to break combinational loop in timing graph.\n"); | |
| } | |
| /* | |
| * Tarjan's algorithm for finding Strongly Connected Components (SCCs) in | |
| * a direct graph. Only SCCs with min_size or greater members are returned. | |
| * | |
| * We keep track of the following information: | |
| * - The current 'index' of the node (stored in tnode_indexes), this | |
| * corresponds to the order the node was traversed in the DFS | |
| * - The current 'lowlink' of the node (stored in tnode_lowlinks), this | |
| * corresponds to the lowest node index which connects to the current | |
| * node | |
| * - Whether the node is currently in the stack (stored in tnode_instack) | |
| * - A stack (tnode_stack) of elements in the current SCC | |
| * | |
| * The key idea behind the algorithm is that a node stays on the stack if it | |
| * connects to a node earlier in the traversal. | |
| */ | |
| std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size) { | |
| int i; | |
| int index = 0; //The current index of the traversal | |
| std::vector<std::vector<int> > tnode_sccs; | |
| //Allocate book-keeping information | |
| int* tnode_indexes = (int*) my_calloc(num_tnodes, sizeof(int)); | |
| int* tnode_lowlinks = (int*) my_calloc(num_tnodes, sizeof(int)); | |
| boolean* tnode_instack = (boolean*) my_calloc(num_tnodes, sizeof(boolean)); | |
| //Initialize everything to unvisited | |
| for(i = 0; i < num_tnodes; i++) { | |
| tnode_indexes[i] = -1; | |
| tnode_lowlinks[i] = -1; | |
| tnode_instack[i] = FALSE; | |
| } | |
| //The stack of nodes | |
| std::stack<int> tnode_stack; | |
| //We ensure that every node gets traversed | |
| for(i = 0 ; i < num_tnodes; i++) { | |
| if(tnode_indexes[i] == -1) { | |
| strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, i); | |
| } | |
| } | |
| //Clean-up | |
| free(tnode_indexes); | |
| free(tnode_lowlinks); | |
| free(tnode_instack); | |
| return tnode_sccs; | |
| } | |
| void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, boolean* tnode_instack, | |
| std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs, | |
| size_t min_size, int inode) { | |
| int iedge; //Index for out-going edges of the current node (inode) | |
| int iscc_element; //Index for the current SCC element (used when poping stack) | |
| int to_node_index; //Index to the sink node for the current edge | |
| //Mark this node as visited | |
| tnode_indexes[inode] = index; | |
| tnode_lowlinks[inode] = index; | |
| index += 1; | |
| //Add it to the stack | |
| tnode_stack.push(inode); | |
| tnode_instack[inode] = TRUE; | |
| //Fanout of inode | |
| for(iedge = 0; iedge < tnode[inode].num_edges; iedge++) { | |
| to_node_index = tnode[inode].out_edges[iedge].to_node; | |
| if(to_node_index == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes | |
| if(tnode_indexes[to_node_index] == -1) { | |
| //Haven't visited successor of inode (to_node) yet, recurse | |
| strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, to_node_index); | |
| assert(tnode_lowlinks[inode] >= 0); | |
| assert(tnode_lowlinks[to_node_index] >= 0); | |
| //We are connected to to_node, so our lowest link should be either ourselves, or | |
| //to_node's lowest link | |
| tnode_lowlinks[inode] = min(tnode_lowlinks[inode], tnode_lowlinks[to_node_index]); | |
| } else if (tnode_instack[to_node_index]) { | |
| //to_node was in the stack, and so is part of the current SCC | |
| assert(tnode_lowlinks[inode] >= 0); | |
| assert(tnode_indexes[to_node_index] >= 0); | |
| //to_node was on the stack, since we connect to it our lowest link is either ourselves | |
| //or the index of to_node (since it may have been traversed earlier) | |
| tnode_lowlinks[inode] = min(tnode_lowlinks[inode], tnode_indexes[to_node_index]); | |
| } | |
| } | |
| assert(tnode_indexes[inode] >= 0); | |
| if(tnode_lowlinks[inode] == tnode_indexes[inode]) { | |
| //This inode is the root of a new SCC | |
| //Create a new SCC | |
| std::vector<int> scc; | |
| //Pop of elements of the stack until we reach ourselves | |
| do { | |
| iscc_element = tnode_stack.top(); | |
| tnode_stack.pop(); | |
| tnode_instack[iscc_element] = FALSE; | |
| scc.push_back(iscc_element); //Add to the SCC | |
| } while(iscc_element != inode); | |
| //Add the SCC to the list of SCC if the meet | |
| // the minimum size requirement | |
| if(scc.size() >= min_size) { | |
| tnode_sccs.push_back(scc); | |
| } | |
| } | |
| } | |
| void print_comb_loop(std::vector<int>& loop_tnodes) { | |
| printf("Comb Loop:\n"); | |
| for(std::vector<int>::iterator it = loop_tnodes.begin(); it != loop_tnodes.end(); it++) { | |
| int i_tnode = *it; | |
| if(tnode[i_tnode].pb_graph_pin != NULL) { | |
| printf("\ttnode: %d %s.%s[%d]\n", i_tnode, | |
| tnode[i_tnode].pb_graph_pin->parent_node->pb_type->name, | |
| tnode[i_tnode].pb_graph_pin->port->name, | |
| tnode[i_tnode].pb_graph_pin->pin_number); | |
| } else { | |
| printf("\ttnode: %d\n", i_tnode); | |
| } | |
| } | |
| } |