vpr/SRC/timing/path_delay2.c - third_party/vtr-verilog-to-routing - Git at Google

 #include <assert.h>

 #include <stack>
 #include <vector>
 #include <algorithm>


 #include "util.h"
 #include "vpr_types.h"
 #include "globals.h"
 #include "path_delay2.h"
 #include "read_xml_arch_file.h"

 #include "path_delay.h"
 /************* Variables (globals) shared by all path_delay modules **********/

 int num_tnode_levels; /* Number of levels in the timing graph. */

 struct s_ivec *tnodes_at_level;
 /* [0..num__tnode_levels - 1].  Count and list of tnodes at each level of
  * the timing graph, to make topological searches easier. Level-0 nodes are
  * sources to the timing graph (types TN_FF_SOURCE, TN_INPAD_SOURCE
  * and TN_CONSTANT_GEN_SOURCE). Level-N nodes are in the immediate fanout of
  * nodes with level at most N-1. */

 /******************* Subroutines local to this module ************************/

 static int *alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr);

 void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops);

 void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes);

 std::vector<std::vector<int> > detect_timing_graph_combinational_loops();

 std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size);

 void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, boolean* tnode_instack,
                    std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs,
                    size_t min_size, int inode);

 void print_comb_loop(std::vector<int>& loop_tnodes);
 /************************** Subroutine definitions ***************************/

 static int *
 alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr) {

 	/* Allocates an array and fills it with the number of in-edges (inputs) to   *
 	 * each tnode.  While doing this it also checks that each edge in the timing *
 	 * graph points to a valid tnode. Also counts the number of sinks.           */

 	int inode, iedge, to_node, num_edges, error, num_sinks;
 	int *tnode_num_fanin;
 	t_tedge *tedge;

 	tnode_num_fanin = (int *) my_calloc(num_tnodes, sizeof(int));
 	error = 0;
 	num_sinks = 0;

 	for (inode = 0; inode < num_tnodes; inode++) {
 		num_edges = tnode[inode].num_edges;

 		if (num_edges > 0) {
 			tedge = tnode[inode].out_edges;
 			for (iedge = 0; iedge < num_edges; iedge++) {
 				to_node = tedge[iedge].to_node;
 				if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes

 				if (to_node < 0 || to_node >= num_tnodes) {
 					vpr_printf_error(__FILE__, __LINE__,
 							"in alloc_and_load_tnode_fanin_and_check_edges:\n");
 					vpr_printf_error(__FILE__, __LINE__,
 							"\ttnode #%d edge #%d goes to illegal node #%d.\n",
 							inode, iedge, to_node);
 					error++;
 				}

 				tnode_num_fanin[to_node]++;
 			}
 		}

 		else if (num_edges == 0) {
 			num_sinks++;
 		}

 		else {
 			vpr_printf_error(__FILE__, __LINE__,
 					"in alloc_and_load_tnode_fanin_and_check_edges:\n");
 			vpr_printf_error(__FILE__, __LINE__,
 					"\ttnode #%d has %d edges.\n",
 					inode, num_edges);
 			error++;
 		}

 	}

 	if (error != 0) {
 		vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
 				"Found %d Errors in the timing graph. Aborting.\n", error);
 	}

 	*num_sinks_ptr = num_sinks;
 	return (tnode_num_fanin);
 }

 int alloc_and_load_timing_graph_levels(void) {

 	/* Does a breadth-first search through the timing graph in order to levelize  *
 	 * it.  This allows subsequent traversals to be done topologically for speed. *
 	 * Also returns the number of sinks in the graph (nodes with no fanout).      */

 	t_linked_int *free_list_head, *nodes_at_level_head;
 	int inode, num_at_level, iedge, to_node, num_edges, num_sinks, num_levels,
 			i;
 	t_tedge *tedge;

 	/* [0..num_tnodes-1]. # of in-edges to each tnode that have not yet been    *
 	 * seen in this traversal.                                                  */

 	int *tnode_fanin_left;

 	tnode_fanin_left = alloc_and_load_tnode_fanin_and_check_edges(&num_sinks);

 	free_list_head = NULL;
 	nodes_at_level_head = NULL;

 	/* Very conservative -> max number of levels = num_tnodes.  Realloc later.  *
 	 * Temporarily need one extra level on the end because I look at the first  *
 	 * empty level.                                                             */

 	tnodes_at_level = (struct s_ivec *) my_malloc(
 			(num_tnodes + 1) * sizeof(struct s_ivec));

 	/* Scan through the timing graph, putting all the primary input nodes (no    *
 	 * fanin) into level 0 of the level structure.                               */

 	num_at_level = 0;

 	for (inode = 0; inode < num_tnodes; inode++) {
 		if (tnode_fanin_left[inode] == 0) {
 			num_at_level++;
 			nodes_at_level_head = insert_in_int_list(nodes_at_level_head, inode,
 					&free_list_head);
 		}
 	}

 	alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level,
 			&tnodes_at_level[0], &free_list_head);

 	num_levels = 0;

 	while (num_at_level != 0) { /* Until there's nothing in the queue. */
 		num_levels++;
 		num_at_level = 0;

 		for (i = 0; i < tnodes_at_level[num_levels - 1].nelem; i++) {
 			inode = tnodes_at_level[num_levels - 1].list[i];
 			tedge = tnode[inode].out_edges;
 			num_edges = tnode[inode].num_edges;

 			for (iedge = 0; iedge < num_edges; iedge++) {
 				to_node = tedge[iedge].to_node;
 				if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes

 				tnode_fanin_left[to_node]--;

 				if (tnode_fanin_left[to_node] == 0) {
 					num_at_level++;
 					nodes_at_level_head = insert_in_int_list(
 							nodes_at_level_head, to_node, &free_list_head);
 				}
 			}
 		}

 		alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level,
 				&tnodes_at_level[num_levels], &free_list_head);
 	}

 	tnodes_at_level = (struct s_ivec *) my_realloc(tnodes_at_level,
 			num_levels * sizeof(struct s_ivec));
 	num_tnode_levels = num_levels;

 	free(tnode_fanin_left);
 	free_int_list(&free_list_head);
 	return (num_sinks);
 }

 void check_timing_graph(int num_sinks) {

 	/* Checks the timing graph to see that: (1) all the tnodes have been put    *
 	 * into some level of the timing graph; */

 	/* Addition error checks that need to be done but not yet implemented: (2) the number of primary inputs    *
 	 * to the timing graph is equal to the number of input pads + the number of *
 	 * constant generators; and (3) the number of sinks (nodes with no fanout)  *
 	 * equals the number of output pads + the number of flip flops.             */

 	int num_tnodes_check, ilevel, error;

 	error = 0;
 	num_tnodes_check = 0;

 	/* TODO: Rework error checks for I/Os*/

 	for (ilevel = 0; ilevel < num_tnode_levels; ilevel++)
 		num_tnodes_check += tnodes_at_level[ilevel].nelem;

 	if (num_tnodes_check != num_tnodes) {
 		vpr_printf_error(__FILE__, __LINE__,
 				"Error in check_timing_graph: %d tnodes appear in the tnode level structure. Expected %d.\n",
 				num_tnodes_check, num_tnodes);
 		vpr_printf_info("Checking the netlist for combinational cycles:\n");
 		if (num_tnodes > num_tnodes_check) {
             std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops();

             //Inform user about Combinational Loops
             size_t iloop;
             size_t itnode;
             for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) {
                 vpr_printf_info("  Combinational Loop %d contains the following nodes:\n", iloop);
                 for(itnode = 0; itnode < tnode_comb_loops[iloop].size(); itnode++) {
                     vpr_printf_info("   tnode: %d\n", tnode_comb_loops[iloop][itnode]);
                 }
             }
 		}
 		error++;
 	}
 	/* Todo: Add error checks that # of flip-flops, memories, and other
 	 black boxes match # of sinks/sources*/

 	if (error != 0) {
 		vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
 				"Found %d Errors in the timing graph. Aborting.\n", error);
 	}
 }

 float print_critical_path_node(FILE * fp, t_linked_int * critical_path_node) {

 	/* Prints one tnode on the critical path out to fp. Returns the delay to the next node. */

 	int inode, iblk, inet, downstream_node;
 	t_pb_graph_pin * pb_graph_pin;
 	e_tnode_type type;
 	static const char *tnode_type_names[] = { "TN_INPAD_SOURCE", "TN_INPAD_OPIN",
 			"TN_OUTPAD_IPIN", "TN_OUTPAD_SINK", "TN_CB_IPIN", "TN_CB_OPIN",
 			"TN_INTERMEDIATE_NODE", "TN_PRIMITIVE_IPIN", "TN_PRIMITIVE_OPIN", "TN_FF_IPIN",
 			"TN_FF_OPIN", "TN_FF_SINK", "TN_FF_SOURCE", "TN_FF_CLOCK", "TN_CONSTANT_GEN_SOURCE" };

 	t_linked_int *next_crit_node;
 	float Tdel;

 	inode = critical_path_node->data;
 	type = tnode[inode].type;
 	iblk = tnode[inode].block;
 	pb_graph_pin = tnode[inode].pb_graph_pin;

 	fprintf(fp, "Node: %d  %s Block #%d (%s)\n", inode, tnode_type_names[type],
 		iblk, block[iblk].name);

 	if (pb_graph_pin == NULL) {
 		assert(
 				type == TN_INPAD_SOURCE || type == TN_OUTPAD_SINK || type == TN_FF_SOURCE || type == TN_FF_SINK);
 	}

 	if (pb_graph_pin != NULL) {
 		fprintf(fp, "Pin: %s.%s[%d] pb (%s)", pb_graph_pin->parent_node->pb_type->name,
 			pb_graph_pin->port->name, pb_graph_pin->pin_number, block[iblk].pb->rr_node_to_pb_mapping[pb_graph_pin->pin_count_in_cluster]->name);
 	}
 	if (type != TN_INPAD_SOURCE && type != TN_OUTPAD_SINK) {
 		fprintf(fp, "\n");
 	}

 	fprintf(fp, "T_arr: %g  T_req: %g  ", tnode[inode].T_arr,
 			tnode[inode].T_req);

 	next_crit_node = critical_path_node->next;
 	if (next_crit_node != NULL) {
 		downstream_node = next_crit_node->data;
 		Tdel = tnode[downstream_node].T_arr - tnode[inode].T_arr;
 		fprintf(fp, "Tdel: %g\n", Tdel);
 	} else { /* last node, no Tdel. */
 		Tdel = 0.;
 		fprintf(fp, "\n");
 	}

 	if (type == TN_CB_OPIN) {
 		inet =
 				block[iblk].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].net_num;
 		inet = vpack_to_clb_net_mapping[inet];
 		fprintf(fp, "External-to-Block Net: #%d (%s).  Pins on net: %d.\n",
 			inet, g_clbs_nlist.net[inet].name, (int) g_clbs_nlist.net[inet].pins.size());
 	} else if (pb_graph_pin != NULL) {
 		inet =
 				block[iblk].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].net_num;
 		fprintf(fp, "Internal Net: #%d (%s).  Pins on net: %d.\n", inet,
 			g_atoms_nlist.net[inet].name, (int) g_atoms_nlist.net[inet].pins.size());
 	}

 	fprintf(fp, "\n");
 	return (Tdel);
 }

 //Repeatedly detects combinational loops and remove timing edges to break them.
 //
 // The idea behind the implementation of is to identify Strongly
 // Connected Components (SCCs) in the timing graph which, by definition,
 // must contain cycles if they include more than one element. This is done using
 // Tarjan's algorithm in O(V + E) time.
 //
 // Once the SCCs are identified, an arbitrary edge in the timing graph is
 // disconnected to break the cycle. Since it may be possible for smaller sub-SCCs
 // to result, this is done iteratively until no SCCs with more than one element
 // are found.
 void detect_and_fix_timing_graph_combinational_loops() {
     int comb_cycle_iter_count = 0;
     int comb_cycle_count = 0;

     vpr_printf_info("Iteratively removing timing edges to break combinational cycles in timing graph.\n");

     std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops();

     //Repeat until all loops broken
     while(tnode_comb_loops.size() > 0) {
         comb_cycle_iter_count++;
         vpr_printf_info("Found %d Combinational Loops in the timing graph on iteration %d.\n",
                         tnode_comb_loops.size(), comb_cycle_iter_count);
         vpr_printf_warning(__FILE__, __LINE__,
                             "Combinational Loops can not be analyzed properly and will be "
                             "arbitrarily disconnected.\n");

         break_timing_graph_combinational_loops(tnode_comb_loops);

         comb_cycle_count += tnode_comb_loops.size();

         tnode_comb_loops = detect_timing_graph_combinational_loops();
     }
     vpr_printf_info("Removed %d combinational cycles from timing graph after %d iteration(s)\n",
                     comb_cycle_count, comb_cycle_iter_count);
 }

 /*
  * Identify combinational loops in the timing graph
  */
 std::vector<std::vector<int> > detect_timing_graph_combinational_loops() {
     //Combinational loops are SCC with >= 2 elements in the
     //timing graph
     return identify_strongly_connected_components(2);
 }

 /*
  * This function breaks every combinational loop passed to it. Each loop is represented
  * as a vector of tnode indicies*/
 void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops) {
     size_t iloop;
     for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) {
         break_timing_graph_combinational_loop(tnode_comb_loops[iloop]);
     }
 }

 /*
  * Given a set of tnode indicies forming a combinational loop,
  * this breaks the loop by removing an arbitrary edge from the
  * cycle.
  */
 void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes) {
     int i_first_tnode;
     int i_edge;
     int i_to_tnode;

     assert(loop_tnodes.size() >= 2); //Must have atleast 2 nodes for a valid cycle

     //Find an edge between two tnodes in the loop set
     // arbitrarily decide that it will be the first edge
     // from the first tnode which fans out to another tnode
     // in the loop set that will be cut
     i_first_tnode = loop_tnodes[0];

     for(i_edge = 0; i_edge < tnode[i_first_tnode].num_edges; i_edge++) {
         i_to_tnode = tnode[i_first_tnode].out_edges[i_edge].to_node;

         if(std::find(loop_tnodes.begin(), loop_tnodes.end(), i_to_tnode) != loop_tnodes.end()) {
             //This edge does fanout into the loop_tnodes set
             // so cut it
             vpr_printf_warning(__FILE__, __LINE__, "Disconnecting timing graph edge from tnode %d to tnode %d to break combinational cycle\n", i_first_tnode, i_to_tnode);

             //Mark the original target node as a combinational loop breakpoint
             tnode[i_to_tnode].is_comb_loop_breakpoint = TRUE;

             //Mark the edge as invalid
             tnode[i_first_tnode].out_edges[i_edge].to_node = DO_NOT_ANALYSE;

             return;
         }
     }
     vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
             "Could not find edge to break combinational loop in timing graph.\n");
 }

 /*
  * Tarjan's algorithm for finding Strongly Connected Components (SCCs) in
  *  a direct graph. Only SCCs with min_size or greater members are returned.
  *
  *  We keep track of the following information:
  *    - The current 'index' of the node (stored in tnode_indexes), this
  *      corresponds to the order the node was traversed in the DFS
  *    - The current 'lowlink' of the node (stored in tnode_lowlinks), this
  *      corresponds to the lowest node index which connects to the current
  *      node
  *    - Whether the node is currently in the stack (stored in tnode_instack)
  *    - A stack (tnode_stack) of elements in the current SCC
  *
  *  The key idea behind the algorithm is that a node stays on the stack if it
  *  connects to a node earlier in the traversal.
  */
 std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size) {
     int i;
     int index = 0; //The current index of the traversal
     std::vector<std::vector<int> > tnode_sccs;

     //Allocate book-keeping information
     int* tnode_indexes = (int*) my_calloc(num_tnodes, sizeof(int));
     int* tnode_lowlinks = (int*) my_calloc(num_tnodes, sizeof(int));
     boolean* tnode_instack = (boolean*) my_calloc(num_tnodes, sizeof(boolean));

     //Initialize everything to unvisited
     for(i = 0; i < num_tnodes; i++) {
         tnode_indexes[i] = -1;
         tnode_lowlinks[i] = -1;
         tnode_instack[i] = FALSE;
     }

     //The stack of nodes
     std::stack<int> tnode_stack;

     //We ensure that every node gets traversed
     for(i = 0 ; i < num_tnodes; i++) {
         if(tnode_indexes[i] == -1) {
             strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, i);
         }
     }

     //Clean-up
     free(tnode_indexes);
     free(tnode_lowlinks);
     free(tnode_instack);

     return tnode_sccs;
 }

 void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, boolean* tnode_instack,
                    std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs,
                    size_t min_size, int inode) {
     int iedge; //Index for out-going edges of the current node (inode)
     int iscc_element; //Index for the current SCC element (used when poping stack)
     int to_node_index; //Index to the sink node for the current edge

     //Mark this node as visited
     tnode_indexes[inode] = index;
     tnode_lowlinks[inode] = index;
     index += 1;

     //Add it to the stack
     tnode_stack.push(inode);
     tnode_instack[inode] = TRUE;

     //Fanout of inode
     for(iedge = 0; iedge < tnode[inode].num_edges; iedge++) {
         to_node_index = tnode[inode].out_edges[iedge].to_node;
         if(to_node_index == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes

         if(tnode_indexes[to_node_index] == -1) {
             //Haven't visited successor of inode (to_node) yet, recurse
             strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, to_node_index);
             assert(tnode_lowlinks[inode] >= 0);
             assert(tnode_lowlinks[to_node_index] >= 0);

             //We are connected to to_node, so our lowest link should be either ourselves, or
             //to_node's lowest link
             tnode_lowlinks[inode] = min(tnode_lowlinks[inode], tnode_lowlinks[to_node_index]);
         } else if (tnode_instack[to_node_index]) {
             //to_node was in the stack, and so is part of the current SCC
             assert(tnode_lowlinks[inode] >= 0);
             assert(tnode_indexes[to_node_index] >= 0);

             //to_node was on the stack, since we connect to it our lowest link is either ourselves
             //or the index of to_node (since it may have been traversed earlier)
             tnode_lowlinks[inode] = min(tnode_lowlinks[inode], tnode_indexes[to_node_index]);
         }
     }

     assert(tnode_indexes[inode] >= 0);

     if(tnode_lowlinks[inode] == tnode_indexes[inode]) {
         //This inode is the root of a new SCC

         //Create a new SCC
         std::vector<int> scc;
         //Pop of elements of the stack until we reach ourselves
         do {
             iscc_element = tnode_stack.top();
             tnode_stack.pop();
             tnode_instack[iscc_element] = FALSE;
             scc.push_back(iscc_element); //Add to the SCC
          } while(iscc_element != inode);

         //Add the SCC to the list of SCC if the meet
         // the minimum size requirement
         if(scc.size() >= min_size) {
             tnode_sccs.push_back(scc);
         }
     }
 }

 void print_comb_loop(std::vector<int>& loop_tnodes) {
     printf("Comb Loop:\n");
     for(std::vector<int>::iterator it = loop_tnodes.begin(); it != loop_tnodes.end(); it++) {
         int i_tnode = *it;
         if(tnode[i_tnode].pb_graph_pin != NULL) {
             printf("\ttnode: %d %s.%s[%d]\n", i_tnode,
                             tnode[i_tnode].pb_graph_pin->parent_node->pb_type->name,
                             tnode[i_tnode].pb_graph_pin->port->name,
                             tnode[i_tnode].pb_graph_pin->pin_number);
         } else {
             printf("\ttnode: %d\n", i_tnode);
         }
     }
 }
	#include <assert.h>

	#include <stack>
	#include <vector>
	#include <algorithm>


	#include "util.h"
	#include "vpr_types.h"
	#include "globals.h"
	#include "path_delay2.h"
	#include "read_xml_arch_file.h"

	#include "path_delay.h"
	/*********** Variables (globals) shared by all path_delay modules ********/

	int num_tnode_levels; /* Number of levels in the timing graph. */

	struct s_ivec *tnodes_at_level;
	/* [0..num__tnode_levels - 1]. Count and list of tnodes at each level of
	* the timing graph, to make topological searches easier. Level-0 nodes are
	* sources to the timing graph (types TN_FF_SOURCE, TN_INPAD_SOURCE
	* and TN_CONSTANT_GEN_SOURCE). Level-N nodes are in the immediate fanout of
	* nodes with level at most N-1. */

	/***************** Subroutines local to this module **********************/

	static int alloc_and_load_tnode_fanin_and_check_edges(int num_sinks_ptr);

	void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops);

	void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes);

	std::vector<std::vector<int> > detect_timing_graph_combinational_loops();

	std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size);

	void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, boolean* tnode_instack,
	std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs,
	size_t min_size, int inode);

	void print_comb_loop(std::vector<int>& loop_tnodes);
	/************************ Subroutine definitions *************************/

	static int *
	alloc_and_load_tnode_fanin_and_check_edges(int *num_sinks_ptr) {

	/* Allocates an array and fills it with the number of in-edges (inputs) to *
	* each tnode. While doing this it also checks that each edge in the timing *
	* graph points to a valid tnode. Also counts the number of sinks. */

	int inode, iedge, to_node, num_edges, error, num_sinks;
	int *tnode_num_fanin;
	t_tedge *tedge;

	tnode_num_fanin = (int *) my_calloc(num_tnodes, sizeof(int));
	error = 0;
	num_sinks = 0;

	for (inode = 0; inode < num_tnodes; inode++) {
	num_edges = tnode[inode].num_edges;

	if (num_edges > 0) {
	tedge = tnode[inode].out_edges;
	for (iedge = 0; iedge < num_edges; iedge++) {
	to_node = tedge[iedge].to_node;
	if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes

	if (to_node < 0 \|\| to_node >= num_tnodes) {
	vpr_printf_error(__FILE__, __LINE__,
	"in alloc_and_load_tnode_fanin_and_check_edges:\n");
	vpr_printf_error(__FILE__, __LINE__,
	"\ttnode #%d edge #%d goes to illegal node #%d.\n",
	inode, iedge, to_node);
	error++;
	}

	tnode_num_fanin[to_node]++;
	}
	}

	else if (num_edges == 0) {
	num_sinks++;
	}

	else {
	vpr_printf_error(__FILE__, __LINE__,
	"in alloc_and_load_tnode_fanin_and_check_edges:\n");
	vpr_printf_error(__FILE__, __LINE__,
	"\ttnode #%d has %d edges.\n",
	inode, num_edges);
	error++;
	}

	}

	if (error != 0) {
	vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
	"Found %d Errors in the timing graph. Aborting.\n", error);
	}

	*num_sinks_ptr = num_sinks;
	return (tnode_num_fanin);
	}

	int alloc_and_load_timing_graph_levels(void) {

	/* Does a breadth-first search through the timing graph in order to levelize *
	* it. This allows subsequent traversals to be done topologically for speed. *
	* Also returns the number of sinks in the graph (nodes with no fanout). */

	t_linked_int free_list_head, nodes_at_level_head;
	int inode, num_at_level, iedge, to_node, num_edges, num_sinks, num_levels,
	i;
	t_tedge *tedge;

	/* [0..num_tnodes-1]. # of in-edges to each tnode that have not yet been *
	* seen in this traversal. */

	int *tnode_fanin_left;

	tnode_fanin_left = alloc_and_load_tnode_fanin_and_check_edges(&num_sinks);

	free_list_head = NULL;
	nodes_at_level_head = NULL;

	/* Very conservative -> max number of levels = num_tnodes. Realloc later. *
	* Temporarily need one extra level on the end because I look at the first *
	* empty level. */

	tnodes_at_level = (struct s_ivec *) my_malloc(
	(num_tnodes + 1) * sizeof(struct s_ivec));

	/* Scan through the timing graph, putting all the primary input nodes (no *
	* fanin) into level 0 of the level structure. */

	num_at_level = 0;

	for (inode = 0; inode < num_tnodes; inode++) {
	if (tnode_fanin_left[inode] == 0) {
	num_at_level++;
	nodes_at_level_head = insert_in_int_list(nodes_at_level_head, inode,
	&free_list_head);
	}
	}

	alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level,
	&tnodes_at_level[0], &free_list_head);

	num_levels = 0;

	while (num_at_level != 0) { /* Until there's nothing in the queue. */
	num_levels++;
	num_at_level = 0;

	for (i = 0; i < tnodes_at_level[num_levels - 1].nelem; i++) {
	inode = tnodes_at_level[num_levels - 1].list[i];
	tedge = tnode[inode].out_edges;
	num_edges = tnode[inode].num_edges;

	for (iedge = 0; iedge < num_edges; iedge++) {
	to_node = tedge[iedge].to_node;
	if(to_node == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes

	tnode_fanin_left[to_node]--;

	if (tnode_fanin_left[to_node] == 0) {
	num_at_level++;
	nodes_at_level_head = insert_in_int_list(
	nodes_at_level_head, to_node, &free_list_head);
	}
	}
	}

	alloc_ivector_and_copy_int_list(&nodes_at_level_head, num_at_level,
	&tnodes_at_level[num_levels], &free_list_head);
	}

	tnodes_at_level = (struct s_ivec *) my_realloc(tnodes_at_level,
	num_levels * sizeof(struct s_ivec));
	num_tnode_levels = num_levels;

	free(tnode_fanin_left);
	free_int_list(&free_list_head);
	return (num_sinks);
	}

	void check_timing_graph(int num_sinks) {

	/* Checks the timing graph to see that: (1) all the tnodes have been put *
	* into some level of the timing graph; */

	/* Addition error checks that need to be done but not yet implemented: (2) the number of primary inputs *
	* to the timing graph is equal to the number of input pads + the number of *
	* constant generators; and (3) the number of sinks (nodes with no fanout) *
	* equals the number of output pads + the number of flip flops. */

	int num_tnodes_check, ilevel, error;

	error = 0;
	num_tnodes_check = 0;

	/* TODO: Rework error checks for I/Os*/

	for (ilevel = 0; ilevel < num_tnode_levels; ilevel++)
	num_tnodes_check += tnodes_at_level[ilevel].nelem;

	if (num_tnodes_check != num_tnodes) {
	vpr_printf_error(__FILE__, __LINE__,
	"Error in check_timing_graph: %d tnodes appear in the tnode level structure. Expected %d.\n",
	num_tnodes_check, num_tnodes);
	vpr_printf_info("Checking the netlist for combinational cycles:\n");
	if (num_tnodes > num_tnodes_check) {
	std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops();

	//Inform user about Combinational Loops
	size_t iloop;
	size_t itnode;
	for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) {
	vpr_printf_info(" Combinational Loop %d contains the following nodes:\n", iloop);
	for(itnode = 0; itnode < tnode_comb_loops[iloop].size(); itnode++) {
	vpr_printf_info(" tnode: %d\n", tnode_comb_loops[iloop][itnode]);
	}
	}
	}
	error++;
	}
	/* Todo: Add error checks that # of flip-flops, memories, and other
	black boxes match # of sinks/sources*/

	if (error != 0) {
	vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
	"Found %d Errors in the timing graph. Aborting.\n", error);
	}
	}

	float print_critical_path_node(FILE * fp, t_linked_int * critical_path_node) {

	/* Prints one tnode on the critical path out to fp. Returns the delay to the next node. */

	int inode, iblk, inet, downstream_node;
	t_pb_graph_pin * pb_graph_pin;
	e_tnode_type type;
	static const char *tnode_type_names[] = { "TN_INPAD_SOURCE", "TN_INPAD_OPIN",
	"TN_OUTPAD_IPIN", "TN_OUTPAD_SINK", "TN_CB_IPIN", "TN_CB_OPIN",
	"TN_INTERMEDIATE_NODE", "TN_PRIMITIVE_IPIN", "TN_PRIMITIVE_OPIN", "TN_FF_IPIN",
	"TN_FF_OPIN", "TN_FF_SINK", "TN_FF_SOURCE", "TN_FF_CLOCK", "TN_CONSTANT_GEN_SOURCE" };

	t_linked_int *next_crit_node;
	float Tdel;

	inode = critical_path_node->data;
	type = tnode[inode].type;
	iblk = tnode[inode].block;
	pb_graph_pin = tnode[inode].pb_graph_pin;

	fprintf(fp, "Node: %d %s Block #%d (%s)\n", inode, tnode_type_names[type],
	iblk, block[iblk].name);

	if (pb_graph_pin == NULL) {
	assert(
	type == TN_INPAD_SOURCE \|\| type == TN_OUTPAD_SINK \|\| type == TN_FF_SOURCE \|\| type == TN_FF_SINK);
	}

	if (pb_graph_pin != NULL) {
	fprintf(fp, "Pin: %s.%s[%d] pb (%s)", pb_graph_pin->parent_node->pb_type->name,
	pb_graph_pin->port->name, pb_graph_pin->pin_number, block[iblk].pb->rr_node_to_pb_mapping[pb_graph_pin->pin_count_in_cluster]->name);
	}
	if (type != TN_INPAD_SOURCE && type != TN_OUTPAD_SINK) {
	fprintf(fp, "\n");
	}

	fprintf(fp, "T_arr: %g T_req: %g ", tnode[inode].T_arr,
	tnode[inode].T_req);

	next_crit_node = critical_path_node->next;
	if (next_crit_node != NULL) {
	downstream_node = next_crit_node->data;
	Tdel = tnode[downstream_node].T_arr - tnode[inode].T_arr;
	fprintf(fp, "Tdel: %g\n", Tdel);
	} else { /* last node, no Tdel. */
	Tdel = 0.;
	fprintf(fp, "\n");
	}

	if (type == TN_CB_OPIN) {
	inet =
	block[iblk].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].net_num;
	inet = vpack_to_clb_net_mapping[inet];
	fprintf(fp, "External-to-Block Net: #%d (%s). Pins on net: %d.\n",
	inet, g_clbs_nlist.net[inet].name, (int) g_clbs_nlist.net[inet].pins.size());
	} else if (pb_graph_pin != NULL) {
	inet =
	block[iblk].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].net_num;
	fprintf(fp, "Internal Net: #%d (%s). Pins on net: %d.\n", inet,
	g_atoms_nlist.net[inet].name, (int) g_atoms_nlist.net[inet].pins.size());
	}

	fprintf(fp, "\n");
	return (Tdel);
	}

	//Repeatedly detects combinational loops and remove timing edges to break them.
	//
	// The idea behind the implementation of is to identify Strongly
	// Connected Components (SCCs) in the timing graph which, by definition,
	// must contain cycles if they include more than one element. This is done using
	// Tarjan's algorithm in O(V + E) time.
	//
	// Once the SCCs are identified, an arbitrary edge in the timing graph is
	// disconnected to break the cycle. Since it may be possible for smaller sub-SCCs
	// to result, this is done iteratively until no SCCs with more than one element
	// are found.
	void detect_and_fix_timing_graph_combinational_loops() {
	int comb_cycle_iter_count = 0;
	int comb_cycle_count = 0;

	vpr_printf_info("Iteratively removing timing edges to break combinational cycles in timing graph.\n");

	std::vector< std::vector<int> > tnode_comb_loops = detect_timing_graph_combinational_loops();

	//Repeat until all loops broken
	while(tnode_comb_loops.size() > 0) {
	comb_cycle_iter_count++;
	vpr_printf_info("Found %d Combinational Loops in the timing graph on iteration %d.\n",
	tnode_comb_loops.size(), comb_cycle_iter_count);
	vpr_printf_warning(__FILE__, __LINE__,
	"Combinational Loops can not be analyzed properly and will be "
	"arbitrarily disconnected.\n");

	break_timing_graph_combinational_loops(tnode_comb_loops);

	comb_cycle_count += tnode_comb_loops.size();

	tnode_comb_loops = detect_timing_graph_combinational_loops();
	}
	vpr_printf_info("Removed %d combinational cycles from timing graph after %d iteration(s)\n",
	comb_cycle_count, comb_cycle_iter_count);
	}

	/*
	* Identify combinational loops in the timing graph
	*/
	std::vector<std::vector<int> > detect_timing_graph_combinational_loops() {
	//Combinational loops are SCC with >= 2 elements in the
	//timing graph
	return identify_strongly_connected_components(2);
	}

	/*
	* This function breaks every combinational loop passed to it. Each loop is represented
	* as a vector of tnode indicies*/
	void break_timing_graph_combinational_loops(std::vector<std::vector<int> >& tnode_comb_loops) {
	size_t iloop;
	for(iloop = 0; iloop < tnode_comb_loops.size(); iloop++) {
	break_timing_graph_combinational_loop(tnode_comb_loops[iloop]);
	}
	}

	/*
	* Given a set of tnode indicies forming a combinational loop,
	* this breaks the loop by removing an arbitrary edge from the
	* cycle.
	*/
	void break_timing_graph_combinational_loop(std::vector<int>& loop_tnodes) {
	int i_first_tnode;
	int i_edge;
	int i_to_tnode;

	assert(loop_tnodes.size() >= 2); //Must have atleast 2 nodes for a valid cycle

	//Find an edge between two tnodes in the loop set
	// arbitrarily decide that it will be the first edge
	// from the first tnode which fans out to another tnode
	// in the loop set that will be cut
	i_first_tnode = loop_tnodes[0];

	for(i_edge = 0; i_edge < tnode[i_first_tnode].num_edges; i_edge++) {
	i_to_tnode = tnode[i_first_tnode].out_edges[i_edge].to_node;

	if(std::find(loop_tnodes.begin(), loop_tnodes.end(), i_to_tnode) != loop_tnodes.end()) {
	//This edge does fanout into the loop_tnodes set
	// so cut it
	vpr_printf_warning(__FILE__, __LINE__, "Disconnecting timing graph edge from tnode %d to tnode %d to break combinational cycle\n", i_first_tnode, i_to_tnode);

	//Mark the original target node as a combinational loop breakpoint
	tnode[i_to_tnode].is_comb_loop_breakpoint = TRUE;

	//Mark the edge as invalid
	tnode[i_first_tnode].out_edges[i_edge].to_node = DO_NOT_ANALYSE;

	return;
	}
	}
	vpr_throw(VPR_ERROR_TIMING, __FILE__, __LINE__,
	"Could not find edge to break combinational loop in timing graph.\n");
	}

	/*
	* Tarjan's algorithm for finding Strongly Connected Components (SCCs) in
	* a direct graph. Only SCCs with min_size or greater members are returned.
	*
	* We keep track of the following information:
	* - The current 'index' of the node (stored in tnode_indexes), this
	* corresponds to the order the node was traversed in the DFS
	* - The current 'lowlink' of the node (stored in tnode_lowlinks), this
	* corresponds to the lowest node index which connects to the current
	* node
	* - Whether the node is currently in the stack (stored in tnode_instack)
	* - A stack (tnode_stack) of elements in the current SCC
	*
	* The key idea behind the algorithm is that a node stays on the stack if it
	* connects to a node earlier in the traversal.
	*/
	std::vector<std::vector<int> > identify_strongly_connected_components(size_t min_size) {
	int i;
	int index = 0; //The current index of the traversal
	std::vector<std::vector<int> > tnode_sccs;

	//Allocate book-keeping information
	int* tnode_indexes = (int*) my_calloc(num_tnodes, sizeof(int));
	int* tnode_lowlinks = (int*) my_calloc(num_tnodes, sizeof(int));
	boolean* tnode_instack = (boolean*) my_calloc(num_tnodes, sizeof(boolean));

	//Initialize everything to unvisited
	for(i = 0; i < num_tnodes; i++) {
	tnode_indexes[i] = -1;
	tnode_lowlinks[i] = -1;
	tnode_instack[i] = FALSE;
	}

	//The stack of nodes
	std::stack<int> tnode_stack;

	//We ensure that every node gets traversed
	for(i = 0 ; i < num_tnodes; i++) {
	if(tnode_indexes[i] == -1) {
	strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, i);
	}
	}

	//Clean-up
	free(tnode_indexes);
	free(tnode_lowlinks);
	free(tnode_instack);

	return tnode_sccs;
	}

	void strongconnect(int& index, int* tnode_indexes, int* tnode_lowlinks, boolean* tnode_instack,
	std::stack<int>& tnode_stack, std::vector<std::vector<int> >& tnode_sccs,
	size_t min_size, int inode) {
	int iedge; //Index for out-going edges of the current node (inode)
	int iscc_element; //Index for the current SCC element (used when poping stack)
	int to_node_index; //Index to the sink node for the current edge

	//Mark this node as visited
	tnode_indexes[inode] = index;
	tnode_lowlinks[inode] = index;
	index += 1;

	//Add it to the stack
	tnode_stack.push(inode);
	tnode_instack[inode] = TRUE;

	//Fanout of inode
	for(iedge = 0; iedge < tnode[inode].num_edges; iedge++) {
	to_node_index = tnode[inode].out_edges[iedge].to_node;
	if(to_node_index == DO_NOT_ANALYSE) continue; //Skip marked invalid nodes

	if(tnode_indexes[to_node_index] == -1) {
	//Haven't visited successor of inode (to_node) yet, recurse
	strongconnect(index, tnode_indexes, tnode_lowlinks, tnode_instack, tnode_stack, tnode_sccs, min_size, to_node_index);
	assert(tnode_lowlinks[inode] >= 0);
	assert(tnode_lowlinks[to_node_index] >= 0);

	//We are connected to to_node, so our lowest link should be either ourselves, or
	//to_node's lowest link
	tnode_lowlinks[inode] = min(tnode_lowlinks[inode], tnode_lowlinks[to_node_index]);
	} else if (tnode_instack[to_node_index]) {
	//to_node was in the stack, and so is part of the current SCC
	assert(tnode_lowlinks[inode] >= 0);
	assert(tnode_indexes[to_node_index] >= 0);

	//to_node was on the stack, since we connect to it our lowest link is either ourselves
	//or the index of to_node (since it may have been traversed earlier)
	tnode_lowlinks[inode] = min(tnode_lowlinks[inode], tnode_indexes[to_node_index]);
	}
	}

	assert(tnode_indexes[inode] >= 0);

	if(tnode_lowlinks[inode] == tnode_indexes[inode]) {
	//This inode is the root of a new SCC

	//Create a new SCC
	std::vector<int> scc;
	//Pop of elements of the stack until we reach ourselves
	do {
	iscc_element = tnode_stack.top();
	tnode_stack.pop();
	tnode_instack[iscc_element] = FALSE;
	scc.push_back(iscc_element); //Add to the SCC
	} while(iscc_element != inode);

	//Add the SCC to the list of SCC if the meet
	// the minimum size requirement
	if(scc.size() >= min_size) {
	tnode_sccs.push_back(scc);
	}
	}
	}

	void print_comb_loop(std::vector<int>& loop_tnodes) {
	printf("Comb Loop:\n");
	for(std::vector<int>::iterator it = loop_tnodes.begin(); it != loop_tnodes.end(); it++) {
	int i_tnode = *it;
	if(tnode[i_tnode].pb_graph_pin != NULL) {
	printf("\ttnode: %d %s.%s[%d]\n", i_tnode,
	tnode[i_tnode].pb_graph_pin->parent_node->pb_type->name,
	tnode[i_tnode].pb_graph_pin->port->name,
	tnode[i_tnode].pb_graph_pin->pin_number);
	} else {
	printf("\ttnode: %d\n", i_tnode);
	}
	}
	}