vpr/SRC/timing/path_delay.c - third_party/vtr-verilog-to-routing - Git at Google

 #include <stdio.h>
 #include <string.h>

 #include "util.h"
 #include "vpr_types.h"
 #include "globals.h"
 #include "path_delay.h"
 #include "path_delay2.h"
 #include "net_delay.h"
 #include "vpr_utils.h"
 #include <assert.h>
 #include "read_xml_arch_file.h"

 /****************** Timing graph Structure ************************************
  *                                                                            *
  * In the timing graph I create, input pads and constant generators have no   *
  * inputs; everything else has inputs.  Every input pad and output pad is     *
  * represented by two tnodes -- an input pin and an output pin.  For an input *
  * pad the input pin comes from off chip and has no fanin, while the output   *
  * pin drives outpads and/or CBs.  For output pads, the input node is driven *
  * by a CB or input pad, and the output node goes off chip and has no        *
  * fanout (out-edges).  I need two nodes to respresent things like pads       *
  * because I mark all delay on tedges, not on tnodes.                         *
  *                                                                            *
  * Every used (not OPEN) CB pin becomes a timing node.  As well, every used  *
  * subblock pin within a CB also becomes a timing node.  Unused (OPEN) pins  *
  * don't create any timing nodes. If a subblock is used in combinational mode *
  * (i.e. its clock pin is open), I just hook the subblock input tnodes to the *
  * subblock output tnode.  If the subblock is used in sequential mode, I      *
  * create two extra tnodes.  One is just the subblock clock pin, which is     *
  * connected to the subblock output.  This means that FFs don't generate      *
  * their output until their clock arrives.  For global clocks coming from an  *
  * input pad, the delay of the clock is 0, so the FFs generate their outputs  *
  * at T = 0, as usual.  For locally-generated or gated clocks, however, the   *
  * clock will arrive later, and the FF output will be generated later.  This  *
  * lets me properly model things like ripple counters and gated clocks.  The  *
  * other extra node is the FF storage node (i.e. a sink), which connects to   *
  * the subblock inputs and has no fanout.                                     *
  *                                                                            *
  * One other subblock that needs special attention is a constant generator.   *
  * This has no used inputs, but its output is used.  I create an extra tnode, *
  * a dummy input, in addition to the output pin tnode.  The dummy tnode has   *
  * no fanin.  Since constant generators really generate their outputs at T =  *
  * -infinity, I set the delay from the input tnode to the output to a large-  *
  * magnitude negative number.  This guarantees every block that needs the     *
  * output of a constant generator sees it available very early.               *
  *                                                                            *
  * For this routine to work properly, subblocks whose outputs are unused must *
  * be completely empty -- all their input pins and their clock pin must be    *
  * OPEN.  Check_netlist checks the input netlist to guarantee this -- don't   *
  * disable that check.                                                        *
  *                                                                            *
  * NB:  The discussion below is only relevant for circuits with multiple      *
  * clocks.  For circuits with a single clock, everything I do is exactly      *
  * correct.                                                                   *
  *                                                                            *
  * A note about how I handle FFs:  By hooking the clock pin up to the FF      *
  * output, I properly model the time at which the FF generates its output.    *
  * I don't do a completely rigorous job of modelling required arrival time at *
  * the FF input, however.  I assume every FF and outpad needs its input at    *
  * T = 0, which is when the earliest clock arrives.  This can be conservative *
  * -- a fuller analysis would be to do a fast path analysis of the clock      *
  * feeding each FF and subtract its earliest arrival time from the delay of   *
  * the D signal to the FF input.  This is too much work, so I'm not doing it. *
  * Alternatively, when one has N clocks, it might be better to just do N      *
  * separate timing analyses, with only signals from FFs clocked on clock i    *
  * being propagated forward on analysis i, and only FFs clocked on i being    *
  * considered as sinks.  This gives all the critical paths within clock       *
  * domains, but ignores interactions.  Instead, I assume all the clocks are   *
  * more-or-less synchronized (they might be gated or locally-generated, but   *
  * they all have the same frequency) and explore all interactions.  Tough to  *
  * say what's the better way.  Since multiple clocks aren't important for my  *
  * work, it's not worth bothering about much.                                 *
  *                                                                            *
  ******************************************************************************/

 #define T_CONSTANT_GENERATOR -1000	/* Essentially -ve infinity */

 /***************** Types local to this module ***************************/

 enum e_subblock_pin_type
 { SUB_INPUT = 0, SUB_OUTPUT, SUB_CLOCK, NUM_SUB_PIN_TYPES };

 /***************** Variables local to this module ***************************/

 /* Variables for "chunking" the tedge memory.  If the head pointer is NULL, *
  * no timing graph exists now.                                              */

 static struct s_linked_vptr *tedge_ch_list_head = NULL;
 static int tedge_ch_bytes_avail = 0;
 static char *tedge_ch_next_avail = NULL;

 static struct s_net *timing_nets = NULL;
 static int num_timing_nets = 0;

 /***************** Subroutines local to this module *************************/

 static float **alloc_net_slack();

 static void compute_net_slacks(float **net_slack);

 static void build_cb_tnodes(int iblk);

 static void alloc_and_load_tnodes(t_timing_inf timing_inf);

 static void alloc_and_load_tnodes_from_prepacked_netlist(float block_delay, float inter_cluster_net_delay);

 static void load_tnode(INP t_pb_graph_pin *pb_graph_pin, INP int iblock, INOUTP int *inode, INP t_timing_inf timing_inf);

 static void normalize_costs(float t_crit, long max_critical_input_paths, long max_critical_output_paths);

 static void print_primitive_as_blif (FILE *fpout, int iblk);


 /********************* Subroutine definitions *******************************/


 float **
 alloc_and_load_timing_graph(t_timing_inf timing_inf)
 {

 /* This routine builds the graph used for timing analysis.  Every cb pin is a
  * timing node (tnode).  The connectivity between pins is *
  * represented by timing edges (tedges).  All delay is marked on edges, not *
  * on nodes.  This routine returns an array that will store slack values:   *
  * net_slack[0..num_nets-1][1..num_pins-1].                                 */
 /* The are below are valid only for CBs, not pads.                  */

 /* Array for mapping from a pin on a block to a tnode index. For pads, only *
  * the first two pin locations are used (input to pad is first, output of   *
  * pad is second).  For CLBs, all OPEN pins on the cb have their mapping   *
  * set to OPEN so I won't use it by mistake.                                */

     int num_sinks;
     float **net_slack;		/* [0..num_nets-1][1..num_pins-1]. */

 /************* End of variable declarations ********************************/

     if(tedge_ch_list_head != NULL)
 	{
 	    printf("Error in alloc_and_load_timing_graph:\n"
 		   "\tAn old timing graph still exists.\n");
 	    exit(1);
 	}
 	num_timing_nets = num_nets;
 	timing_nets = clb_net;

 	alloc_and_load_tnodes(timing_inf);

     num_sinks = alloc_and_load_timing_graph_levels();

 	#ifdef CREATE_ECHO_FILES
 		print_timing_graph("initial_timing_graph.echo");
 	#endif

 	check_timing_graph(num_sinks);

     net_slack = alloc_net_slack();

 	return (net_slack);
 }


 float** alloc_and_load_pre_packing_timing_graph(float block_delay, float inter_cluster_net_delay, t_model *models)
 {

 /* This routine builds the graph used for timing analysis.  Every technology mapped netlist pin is a
  * timing node (tnode).  The connectivity between pins is *
  * represented by timing edges (tedges).  All delay is marked on edges, not *
  * on nodes.  This routine returns an array that will store slack values:   *
  * net_slack[0..num_nets-1][1..num_pins-1].                                 */
 /* The are below are valid only for CBs, not pads.                  */

 /* Array for mapping from a pin on a block to a tnode index. For pads, only *
  * the first two pin locations are used (input to pad is first, output of   *
  * pad is second).  For CLBs, all OPEN pins on the cb have their mapping   *
  * set to OPEN so I won't use it by mistake.                                */

     int num_sinks;

 	float **net_slack;

 /************* End of variable declarations ********************************/

     if(tedge_ch_list_head != NULL)
 	{
 	    printf("Error in alloc_and_load_timing_graph:\n"
 		   "\tAn old timing graph still exists.\n");
 	    exit(1);
 	}

 	num_timing_nets = num_logical_nets;
 	timing_nets = vpack_net;

 	alloc_and_load_tnodes_from_prepacked_netlist(block_delay, inter_cluster_net_delay);

     num_sinks = alloc_and_load_timing_graph_levels();

 	net_slack = alloc_net_slack();

 	#ifdef CREATE_ECHO_FILES
 		print_timing_graph("pre_packing_timing_graph.echo");
 		print_timing_graph_as_blif("pre_packing_timing_graph_as_blif.blif", models);
 	#endif

 	check_timing_graph(num_sinks);

 	return net_slack;
 }


 static float **
 alloc_net_slack()
 {

 /* Allocates the net_slack structure.  Chunk allocated to save space.      */

     float **net_slack;		/* [0..num_nets-1][1..num_pins-1]  */
     int inet, j;

     net_slack = (float **)my_malloc(num_timing_nets * sizeof(float *));

     for(inet = 0; inet < num_timing_nets; inet++)
 	{
 	    net_slack[inet] = (float *)my_chunk_malloc((timing_nets[inet].num_sinks + 1) *
 					 sizeof(float), &tedge_ch_list_head,
 					 &tedge_ch_bytes_avail,
 					 &tedge_ch_next_avail);
 		for(j = 0; j <= timing_nets[inet].num_sinks; j++) {
 			net_slack[inet][j] = UNDEFINED;
 		}
 	}

     return (net_slack);
 }

 void
 load_timing_graph_net_delays(float **net_delay)
 {

 /* Sets the delays of the inter-CLB nets to the values specified by          *
  * net_delay[0..num_nets-1][1..num_pins-1].  These net delays should have    *
  * been allocated and loaded with the net_delay routines.  This routine      *
  * marks the corresponding edges in the timing graph with the proper delay.  */

     int inet, ipin, inode;
     t_tedge *tedge;

     for(inet = 0; inet < num_timing_nets; inet++)
 	{
 	    inode = net_to_driver_tnode[inet];
 	    tedge = tnode[inode].out_edges;

 /* Note that the edges of a tnode corresponding to a CLB or INPAD opin must  *
  * be in the same order as the pins of the net driven by the tnode.          */

 	    for(ipin = 1; ipin < (timing_nets[inet].num_sinks + 1); ipin++)
 		tedge[ipin - 1].Tdel = net_delay[inet][ipin];
 	}
 }


 void
 free_timing_graph(float **net_slack)
 {

 /* Frees the timing graph data. */

     if(tedge_ch_list_head == NULL)
 	{
 	    printf("Error in free_timing_graph: No timing graph to free.\n");
 	    exit(1);
 	}

     free_chunk_memory(tedge_ch_list_head);
     free(tnode);
     free(net_to_driver_tnode);
     free_ivec_vector(tnodes_at_level, 0, num_tnode_levels - 1);
     free(net_slack);

     tedge_ch_list_head = NULL;
     tedge_ch_bytes_avail = 0;
     tedge_ch_next_avail = NULL;

     tnode = NULL;
     num_tnodes = 0;
     net_to_driver_tnode = NULL;
     tnodes_at_level = NULL;
     num_tnode_levels = 0;
 }


 void
 print_net_slack(char *fname,
 		float **net_slack)
 {

 /* Prints the net slacks into a file.                                     */

     int inet, ipin;
     FILE *fp;

     fp = my_fopen(fname, "w", 0);

     fprintf(fp, "Net #\tSlacks\n\n");

     for(inet = 0; inet < num_timing_nets; inet++)
 	{
 	    fprintf(fp, "%5d", inet);
 	    for(ipin = 1; ipin < (timing_nets[inet].num_sinks + 1); ipin++)
 		{
 		    fprintf(fp, "\t%g", net_slack[inet][ipin]);
 		}
 	    fprintf(fp, "\n");
 	}
 }

 /* Count # of tnodes, allocates space, and loads the tnodes and its associated edges */
 static void alloc_and_load_tnodes(t_timing_inf timing_inf) {
 	int i, j, k;
 	int inode;
 	int num_nodes_in_block;
 	int count;
 	int iblock, irr_node;
 	int inet, dport, dpin, dblock, dnode;
 	int normalized_pin, normalization;
 	t_pb_graph_pin *ipb_graph_pin;
 	t_rr_node *local_rr_graph, *d_rr_graph;

 	net_to_driver_tnode = my_malloc(num_timing_nets * sizeof(int));

 	for(i = 0; i < num_timing_nets; i++) {
 		net_to_driver_tnode[i] = OPEN;
 	}

 	/* allocate space for tnodes */
 	num_tnodes = 0;
 	for(i = 0; i < num_blocks; i++) {
 		num_nodes_in_block = 0;
 		for(j = 0; j < block[i].pb->pb_graph_node->total_pb_pins; j++) {
 			if(block[i].pb->rr_graph[j].net_num != OPEN) {
 				if(block[i].pb->rr_graph[j].pb_graph_pin->type == PB_PIN_INPAD ||
 				   block[i].pb->rr_graph[j].pb_graph_pin->type == PB_PIN_OUTPAD ||
 				   block[i].pb->rr_graph[j].pb_graph_pin->type == PB_PIN_SEQUENTIAL) {
 					num_nodes_in_block += 2;
 				} else {
 					num_nodes_in_block++;
 				}
 			}
 		}
 		num_tnodes += num_nodes_in_block;
 	}
 	tnode = my_calloc(num_tnodes, sizeof(t_tnode));

 	/* load tnodes with all info except edge info */
 	/* populate tnode lookups for edge info */
 	inode = 0;
 	for(i = 0; i < num_blocks; i++) {
 		for(j = 0; j < block[i].pb->pb_graph_node->total_pb_pins; j++) {
 			if(block[i].pb->rr_graph[j].net_num != OPEN) {
 				assert(tnode[inode].pb_graph_pin == NULL);
 				load_tnode(block[i].pb->rr_graph[j].pb_graph_pin, i, &inode, timing_inf);
 			}
 		}
 	}
 	assert(inode == num_tnodes);

 	/* load edge delays */
 	for(i = 0; i < num_tnodes; i++) {
 		/* 3 primary scenarios for edge delays
 		1.  Point-to-point delays inside block
 		2.
 		*/
 		count = 0;
 		iblock = tnode[i].block;
 		switch (tnode[i].type) {
 			case INPAD_OPIN:
 			case INTERMEDIATE_NODE:
 			case PRIMITIVE_OPIN:
 			case FF_OPIN:
 			case CB_IPIN:
 				/* fanout is determined by intra-cluster connections */
 				/* Allocate space for edges  */
 				irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster;
 				local_rr_graph = block[iblock].pb->rr_graph;
 				ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin;

 				if(ipb_graph_pin->parent_node->pb_type->max_internal_delay != UNDEFINED) {
 					if(pb_max_internal_delay == UNDEFINED) {
 						pb_max_internal_delay = ipb_graph_pin->parent_node->pb_type->max_internal_delay;
 						pbtype_max_internal_delay = ipb_graph_pin->parent_node->pb_type;
 					} else if(pb_max_internal_delay < ipb_graph_pin->parent_node->pb_type->max_internal_delay) {
 						pb_max_internal_delay = ipb_graph_pin->parent_node->pb_type->max_internal_delay;
 						pbtype_max_internal_delay = ipb_graph_pin->parent_node->pb_type;
 					}
 				}

 				for(j = 0; j < block[iblock].pb->rr_graph[irr_node].num_edges; j++) {
 					dnode = local_rr_graph[irr_node].edges[j];
 					if((local_rr_graph[dnode].prev_node == irr_node) && (j == local_rr_graph[dnode].prev_edge)) {
 						count++;
 					}
 				}
 				assert(count > 0);
 				tnode[i].num_edges = count;
 				tnode[i].out_edges = (t_tedge *) my_chunk_malloc(count *
 							    sizeof(t_tedge),
 							    &tedge_ch_list_head,
 							    &tedge_ch_bytes_avail,
 							    &tedge_ch_next_avail);

 				/* Load edges */
 				count = 0;
 				for(j = 0; j < local_rr_graph[irr_node].num_edges; j++) {
 					dnode = local_rr_graph[irr_node].edges[j];
 					if((local_rr_graph[dnode].prev_node == irr_node) && (j == local_rr_graph[dnode].prev_edge)) {
 						assert((ipb_graph_pin->output_edges[j]->num_output_pins == 1) &&
 							(local_rr_graph[ipb_graph_pin->output_edges[j]->output_pins[0]->pin_count_in_cluster].net_num == local_rr_graph[irr_node].net_num));


 						tnode[i].out_edges[count].Tdel = ipb_graph_pin->output_edges[j]->delay_max;
 						tnode[i].out_edges[count].to_node = local_rr_graph[dnode].tnode->index;

 						if(vpack_net[local_rr_graph[irr_node].net_num].is_const_gen == TRUE && tnode[i].type == PRIMITIVE_OPIN) {
 							tnode[i].out_edges[count].Tdel = T_CONSTANT_GENERATOR;
 							tnode[i].type = CONSTANT_GEN_SOURCE;
 						}

 						count++;
 					}
 				}
 				assert(count > 0);

 				break;
 			case PRIMITIVE_IPIN:
 				/* Pin info comes from pb_graph block delays
 				*/
 				irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster;
 				local_rr_graph = block[iblock].pb->rr_graph;
 				ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin;
 				tnode[i].num_edges = ipb_graph_pin->num_pin_timing;
 				tnode[i].out_edges = (t_tedge *) my_chunk_malloc(ipb_graph_pin->num_pin_timing *
 												sizeof(t_tedge),
 												&tedge_ch_list_head,
 												&tedge_ch_bytes_avail,
 												&tedge_ch_next_avail);
 				k = 0;

 				for(j = 0; j < tnode[i].num_edges; j++) {
 					/* Some outpins aren't used, ignore these.  Only consider output pins that are used */
 					if(local_rr_graph[ipb_graph_pin->pin_timing[j]->pin_count_in_cluster].net_num != OPEN) {
 						tnode[i].out_edges[k].Tdel = ipb_graph_pin->pin_timing_del_max[j];
 						tnode[i].out_edges[k].to_node = local_rr_graph[ipb_graph_pin->pin_timing[j]->pin_count_in_cluster].tnode->index;
 						assert(tnode[i].out_edges[k].to_node != OPEN);
 						k++;
 					}
 				}
 				tnode[i].num_edges -= (j - k); /* remove unused edges */
 				if(tnode[i].num_edges == 0) {
 					printf(ERRTAG "No timing information for pin %s.%s[%d]\n", tnode[i].pb_graph_pin->parent_node->pb_type->name, tnode[i].pb_graph_pin->port->name,tnode[i].pb_graph_pin->pin_number);
 					exit(1);
 				}
 				break;
 			case CB_OPIN:
 				/* load up net info */
 				irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster;
 				local_rr_graph = block[iblock].pb->rr_graph;
 				ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin;
 				assert(local_rr_graph[irr_node].net_num != OPEN);
 				inet = vpack_to_clb_net_mapping[local_rr_graph[irr_node].net_num];
 				assert(inet != OPEN);
 				net_to_driver_tnode[inet] = i;
 				tnode[i].num_edges = clb_net[inet].num_sinks;
 				tnode[i].out_edges = (t_tedge *) my_chunk_malloc(clb_net[inet].num_sinks *
 												sizeof(t_tedge),
 												&tedge_ch_list_head,
 												&tedge_ch_bytes_avail,
 												&tedge_ch_next_avail);
 				for(j = 1; j <= clb_net[inet].num_sinks; j++) {
 					dblock = clb_net[inet].node_block[j];
 					normalization = block[dblock].type->num_pins / block[dblock].type->capacity;
 					normalized_pin = clb_net[inet].node_block_pin[j] % normalization;
 					d_rr_graph = block[dblock].pb->rr_graph;
 					dpin = OPEN;
 					dport = OPEN;
 					count = 0;

 					for(k = 0; k < block[dblock].pb->pb_graph_node->num_input_ports && dpin == OPEN; k++) {
 						if(normalized_pin >= count && (count + block[dblock].pb->pb_graph_node->num_input_pins[k] > normalized_pin)) {
 							dpin = normalized_pin - count;
 							dport = k;
 							break;
 						}
 						count += block[dblock].pb->pb_graph_node->num_input_pins[k];
 					}
 					if(dpin == OPEN) {
 						for(k = 0; k < block[dblock].pb->pb_graph_node->num_output_ports && dpin == OPEN; k++) {
 							count += block[dblock].pb->pb_graph_node->num_output_pins[k];
 						}
 						for(k = 0; k < block[dblock].pb->pb_graph_node->num_clock_ports && dpin == OPEN; k++) {
 							if(normalized_pin >= count && (count + block[dblock].pb->pb_graph_node->num_clock_pins[k] > normalized_pin)) {
 								dpin = normalized_pin - count;
 								dport = k;
 							}
 							count += block[dblock].pb->pb_graph_node->num_clock_pins[k];
 						}
 						assert(dpin != OPEN);
 						assert(inet == vpack_to_clb_net_mapping[d_rr_graph[block[dblock].pb->pb_graph_node->clock_pins[dport][dpin].pin_count_in_cluster].net_num]);
 						tnode[i].out_edges[j-1].to_node = d_rr_graph[block[dblock].pb->pb_graph_node->clock_pins[dport][dpin].pin_count_in_cluster].tnode->index;
 					} else {
 						assert(dpin != OPEN);
 						assert(inet == vpack_to_clb_net_mapping[d_rr_graph[block[dblock].pb->pb_graph_node->input_pins[dport][dpin].pin_count_in_cluster].net_num]);
 						/* delays are assigned post routing */
 						tnode[i].out_edges[j-1].to_node = d_rr_graph[block[dblock].pb->pb_graph_node->input_pins[dport][dpin].pin_count_in_cluster].tnode->index;
 					}
 					assert(inet != OPEN);
 				}
 				break;
 			case OUTPAD_IPIN:
 			case INPAD_SOURCE:
 			case OUTPAD_SINK:
 			case FF_SINK:
 			case FF_SOURCE:
 			case FF_IPIN:
 				break;
 			default:
 				printf(ERRTAG "Consistency check failed: Unknown tnode type %d\n", tnode[i].type);
 				assert(0);
 			break;
 		}
 	}
 }

 /* Allocate timing graph for pre packed netlist
   Count number of tnodes first
   Then connect up tnodes with edges
 */
 static void alloc_and_load_tnodes_from_prepacked_netlist(float block_delay, float inter_cluster_net_delay) {
 	int i, j, k;
 	t_model *model;
 	t_model_ports *model_port;
 	int inode, inet, iblock;
 	int incr;
 	int count;

 	net_to_driver_tnode = my_malloc(num_logical_nets * sizeof(int));

 	for(i = 0; i < num_logical_nets; i++) {
 		net_to_driver_tnode[i] = OPEN;
 	}


 	/* allocate space for tnodes */
 	num_tnodes = 0;
 	for(i = 0; i < num_logical_blocks; i++) {
 		model = logical_block[i].model;
 		logical_block[i].clock_net_tnode = NULL;
 		if(logical_block[i].type == VPACK_INPAD) {
 			logical_block[i].output_net_tnodes = my_calloc(1, sizeof(t_tnode**));
 			num_tnodes += 2;
 		} else if(logical_block[i].type == VPACK_OUTPAD) {
 			logical_block[i].input_net_tnodes = my_calloc(1, sizeof(t_tnode**));
 			num_tnodes += 2;
 		} else {
 			if(logical_block[i].clock_net == OPEN) {
 				incr = 1;
 			} else {
 				incr = 2;
 			}
 			j = 0;
 			model_port = model->inputs;
 			while(model_port) {
 				if(model_port->is_clock == FALSE) {
 					for(k = 0; k < model_port->size; k++) {
 						if(logical_block[i].input_nets[j][k] != OPEN) {
 							num_tnodes += incr;
 						}
 					}
 					j++;
 				} else {
 					num_tnodes++;
 				}
 				model_port = model_port->next;
 			}
 			logical_block[i].input_net_tnodes = my_calloc(j, sizeof(t_tnode**));

 			j = 0;
 			model_port = model->outputs;
 			while(model_port) {
 				for(k = 0; k < model_port->size; k++) {
 					if(logical_block[i].output_nets[j][k] != OPEN) {
 						num_tnodes += incr;
 					}
 				}
 				j++;
 				model_port = model_port->next;
 			}
 			logical_block[i].output_net_tnodes = my_calloc(j, sizeof(t_tnode**));
 		}
 	}
 	tnode = my_calloc(num_tnodes, sizeof(t_tnode));
 	for(i = 0; i < num_tnodes; i++) {
 		tnode[i].index = i;
 	}

 	/* load tnodes, alloc edges for tnodes, load all known tnodes */
 	inode = 0;
 	for(i = 0; i < num_logical_blocks; i++) {
 		model = logical_block[i].model;
 		if(logical_block[i].type == VPACK_INPAD) {
 			logical_block[i].output_net_tnodes[0] = my_calloc(1, sizeof(t_tnode*));
 			logical_block[i].output_net_tnodes[0][0] = &tnode[inode];
 			net_to_driver_tnode[logical_block[i].output_nets[0][0]] = inode;
 			tnode[inode].model_pin = 0;
 			tnode[inode].model_port = 0;
 			tnode[inode].block = i;
 			tnode[inode].type = INPAD_OPIN;

 			tnode[inode].num_edges = vpack_net[logical_block[i].output_nets[0][0]].num_sinks;
 			tnode[inode].out_edges =
 								(t_tedge *) my_chunk_malloc(tnode[inode].num_edges *
 												sizeof(t_tedge),
 												&tedge_ch_list_head,
 												&tedge_ch_bytes_avail,
 												&tedge_ch_next_avail);
 			tnode[inode + 1].num_edges = 1;
 			tnode[inode + 1].out_edges =
 				(t_tedge *) my_chunk_malloc(1 *
 							    sizeof(t_tedge),
 							    &tedge_ch_list_head,
 							    &tedge_ch_bytes_avail,
 							    &tedge_ch_next_avail);
 			tnode[inode + 1].out_edges->Tdel = 0;
 			tnode[inode + 1].out_edges->to_node = inode;
 			tnode[inode + 1].T_req = 0;
 			tnode[inode + 1].T_arr = 0;
 			tnode[inode + 1].type = INPAD_SOURCE;
 			tnode[inode + 1].block = i;
 			inode += 2;
 		} else if (logical_block[i].type == VPACK_OUTPAD) {
 			logical_block[i].input_net_tnodes[0] = my_calloc(1, sizeof(t_tnode*));
 			logical_block[i].input_net_tnodes[0][0] = &tnode[inode];
 			tnode[inode].model_pin = 0;
 			tnode[inode].model_port = 0;
 			tnode[inode].block = i;
 			tnode[inode].type = OUTPAD_IPIN;
 			tnode[inode].num_edges = 1;
 			tnode[inode].out_edges =
 				(t_tedge *) my_chunk_malloc(1 *
 							    sizeof(t_tedge),
 							    &tedge_ch_list_head,
 							    &tedge_ch_bytes_avail,
 							    &tedge_ch_next_avail);
 			tnode[inode].out_edges->Tdel = 0;
 			tnode[inode].out_edges->to_node = inode + 1;
 			tnode[inode + 1].T_req = 0;
 			tnode[inode + 1].T_arr = 0;
 			tnode[inode + 1].type = OUTPAD_SINK;
 			tnode[inode + 1].block = i;
 			tnode[inode + 1].num_edges = 0;
 			tnode[inode + 1].out_edges = NULL;
 			tnode[inode + 1].index = inode + 1;
 			inode += 2;
 		} else {
 			j = 0;
 			model_port = model->outputs;
 			count = 0;
 			while(model_port) {
 				logical_block[i].output_net_tnodes[j] = my_calloc(model_port->size, sizeof(t_tnode*));
 				for(k = 0; k < model_port->size; k++) {
 					if(logical_block[i].output_nets[j][k] != OPEN) {
 						count++;
 						tnode[inode].model_pin = k;
 						tnode[inode].model_port = j;
 						tnode[inode].block = i;
 						net_to_driver_tnode[logical_block[i].output_nets[j][k]] = inode;
 						logical_block[i].output_net_tnodes[j][k] = &tnode[inode];

 						tnode[inode].num_edges = vpack_net[logical_block[i].output_nets[j][k]].num_sinks;
 						tnode[inode].out_edges =
 								(t_tedge *) my_chunk_malloc(tnode[inode].num_edges *
 												sizeof(t_tedge),
 												&tedge_ch_list_head,
 												&tedge_ch_bytes_avail,
 												&tedge_ch_next_avail);

 						if (logical_block[i].clock_net == OPEN) {
 							tnode[inode].type = PRIMITIVE_OPIN;
 							inode++;
 						} else {
 							tnode[inode].type = FF_OPIN;
 							tnode[inode + 1].num_edges = 1;
 							tnode[inode + 1].out_edges =
 								(t_tedge *) my_chunk_malloc(1 *
 												sizeof(t_tedge),
 												&tedge_ch_list_head,
 												&tedge_ch_bytes_avail,
 												&tedge_ch_next_avail);
 							tnode[inode + 1].out_edges->to_node = inode;
 							tnode[inode + 1].out_edges->Tdel = 0;
 							tnode[inode + 1].type = FF_SOURCE;
 							tnode[inode + 1].block = i;
 							inode += 2;
 						}
 					}
 				}
 				j++;
 				model_port = model_port->next;
 			}

 			j = 0;
 			model_port = model->inputs;
 			while(model_port) {
 				if(model_port->is_clock == FALSE) {
 					logical_block[i].input_net_tnodes[j] = my_calloc(model_port->size, sizeof(t_tnode*));
 					for(k = 0; k < model_port->size; k++) {
 						if(logical_block[i].input_nets[j][k] != OPEN) {
 							tnode[inode].model_pin = k;
 							tnode[inode].model_port = j;
 							tnode[inode].block = i;
 							logical_block[i].input_net_tnodes[j][k] = &tnode[inode];

 							if (logical_block[i].clock_net == OPEN) {
 								tnode[inode].type = PRIMITIVE_IPIN;
 								tnode[inode].out_edges =
 										(t_tedge *) my_chunk_malloc(count *
 														sizeof(t_tedge),
 														&tedge_ch_list_head,
 														&tedge_ch_bytes_avail,
 														&tedge_ch_next_avail);
 								tnode[inode].num_edges = count;
 								inode++;
 							} else {
 								tnode[inode].type = FF_IPIN;
 								tnode[inode].num_edges = 1;
 								tnode[inode].out_edges =
 									(t_tedge *) my_chunk_malloc(1 *
 													sizeof(t_tedge),
 													&tedge_ch_list_head,
 													&tedge_ch_bytes_avail,
 													&tedge_ch_next_avail);
 								tnode[inode].out_edges->to_node = inode + 1;
 								tnode[inode].out_edges->Tdel = 0;
 								tnode[inode + 1].type = FF_SINK;
 								tnode[inode + 1].num_edges = 0;
 								tnode[inode + 1].out_edges = NULL;
 								inode += 2;
 							}
 						}
 					}
 					j++;
 				} else {
 					if(logical_block[i].clock_net != OPEN) {
 						assert(logical_block[i].clock_net_tnode == NULL);
 						logical_block[i].clock_net_tnode = &tnode[inode];
 						tnode[inode].block = i;
 						tnode[inode].model_pin = 0;
 						tnode[inode].model_port = 0;
 						tnode[inode].num_edges = 0;
 						tnode[inode].out_edges = NULL;
 						tnode[inode].type = FF_SINK;
 						inode++;
 					}
 				}
 				model_port = model_port->next;
 			}
 		}
 	}
 	assert(inode == num_tnodes);

 	/* load edge delays */
 	for(i = 0; i < num_tnodes; i++) {
 		/* 3 primary scenarios for edge delays
 		1.  Point-to-point delays inside block
 		2.
 		*/
 		count = 0;
 		iblock = tnode[i].block;
 		switch (tnode[i].type) {
 			case INPAD_OPIN:
 			case PRIMITIVE_OPIN:
 			case FF_OPIN:
 				/* fanout is determined by intra-cluster connections */
 				/* Allocate space for edges  */
 				inet = logical_block[tnode[i].block].output_nets[tnode[i].model_port][tnode[i].model_pin];
 				assert(inet != OPEN);

 				for(j = 1; j <= vpack_net[inet].num_sinks; j++) {
 					if(vpack_net[inet].is_const_gen) {
 						tnode[i].out_edges[j - 1].Tdel = T_CONSTANT_GENERATOR;
 						tnode[i].type = CONSTANT_GEN_SOURCE;
 					} else {
 						tnode[i].out_edges[j - 1].Tdel = inter_cluster_net_delay;
 					}
 					assert(logical_block[vpack_net[inet].node_block[j]].input_net_tnodes[vpack_net[inet].node_block_port[j]][vpack_net[inet].node_block_pin[j]] != NULL);
 					if(vpack_net[inet].is_global) {
 						assert(logical_block[vpack_net[inet].node_block[j]].clock_net == inet);
 						tnode[i].out_edges[j - 1].to_node = logical_block[vpack_net[inet].node_block[j]].clock_net_tnode->index;
 					} else {
 						tnode[i].out_edges[j - 1].to_node = logical_block[vpack_net[inet].node_block[j]].input_net_tnodes[vpack_net[inet].node_block_port[j]][vpack_net[inet].node_block_pin[j]]->index;
 					}
 				}
 				assert(tnode[i].num_edges == vpack_net[inet].num_sinks);
 				break;
 			case PRIMITIVE_IPIN:
 				model_port = logical_block[iblock].model->outputs;
 				count = 0;
 				j = 0;
 				while(model_port) {
 					for(k = 0; k < model_port->size; k++) {
 						if(logical_block[iblock].output_nets[j][k] != OPEN) {
 							tnode[i].out_edges[count].Tdel = block_delay;
 							tnode[i].out_edges[count].to_node = logical_block[iblock].output_net_tnodes[j][k]->index;
 							count++;
 						}
 						else {
 							assert(logical_block[iblock].output_net_tnodes[j][k] == NULL);
 						}
 					}
 					j++;
 					model_port = model_port->next;
 				}
 				assert(count == tnode[i].num_edges);
 				break;
 			case OUTPAD_IPIN:
 			case INPAD_SOURCE:
 			case OUTPAD_SINK:
 			case FF_SINK:
 			case FF_SOURCE:
 			case FF_IPIN:
 				break;
 			default:
 				printf(ERRTAG "Consistency check failed: Unknown tnode type %d\n", tnode[i].type);
 				assert(0);
 			break;
 		}
 	}

 	for(i = 0; i < num_logical_nets; i++) {
 		assert(net_to_driver_tnode[i] != OPEN);
 	}
 }

 static void load_tnode(INP t_pb_graph_pin *pb_graph_pin, INP int iblock, INOUTP int *inode, INP t_timing_inf timing_inf) {
 	int i;
 	i = *inode;
 	tnode[i].pb_graph_pin = pb_graph_pin;
 	tnode[i].block = iblock;
 	tnode[i].index = i;
 	block[iblock].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].tnode = &tnode[i];
 	if(tnode[i].pb_graph_pin->parent_node->pb_type->blif_model == NULL) {
 		assert(tnode[i].pb_graph_pin->type == PB_PIN_NORMAL);
 		if(tnode[i].pb_graph_pin->parent_node->parent_pb_graph_node == NULL) {
 			if(tnode[i].pb_graph_pin->port->type == IN_PORT) {
 				tnode[i].type = CB_IPIN;
 			} else {
 				assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);
 				tnode[i].type = CB_OPIN;
 			}
 		} else {
 			tnode[i].type = INTERMEDIATE_NODE;
 		}
 	} else {
 		if(tnode[i].pb_graph_pin->type == PB_PIN_INPAD) {
 			assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);
 			tnode[i].type = INPAD_OPIN;
 			tnode[i + 1].num_edges = 1;
 			tnode[i + 1].out_edges =
 				(t_tedge *) my_chunk_malloc(1 *
 							    sizeof(t_tedge),
 							    &tedge_ch_list_head,
 							    &tedge_ch_bytes_avail,
 							    &tedge_ch_next_avail);
 			tnode[i + 1].out_edges->Tdel = 0;
 			tnode[i + 1].out_edges->to_node = i;
 			tnode[i + 1].pb_graph_pin = NULL;
 			tnode[i + 1].T_req = 0;
 			tnode[i + 1].T_arr = 0;
 			tnode[i + 1].type = INPAD_SOURCE;
 			tnode[i + 1].block = iblock;
 			tnode[i + 1].index = i + 1;
 			(*inode)++;
 		} else if(tnode[i].pb_graph_pin->type == PB_PIN_OUTPAD) {
 			assert(tnode[i].pb_graph_pin->port->type == IN_PORT);
 			tnode[i].type = OUTPAD_IPIN;
 			tnode[i].num_edges = 1;
 			tnode[i].out_edges =
 				(t_tedge *) my_chunk_malloc(1 *
 							    sizeof(t_tedge),
 							    &tedge_ch_list_head,
 							    &tedge_ch_bytes_avail,
 							    &tedge_ch_next_avail);
 			tnode[i].out_edges->Tdel = 0;
 			tnode[i].out_edges->to_node = i + 1;
 			tnode[i + 1].pb_graph_pin = NULL;
 			tnode[i + 1].T_req = 0;
 			tnode[i + 1].T_arr = 0;
 			tnode[i + 1].type = OUTPAD_SINK;
 			tnode[i + 1].block = iblock;
 			tnode[i + 1].num_edges = 0;
 			tnode[i + 1].out_edges = NULL;
 			tnode[i + 1].index = i + 1;
 			(*inode)++;
 		} else if(tnode[i].pb_graph_pin->type == PB_PIN_SEQUENTIAL) {
 			if(tnode[i].pb_graph_pin->port->type == IN_PORT) {
 				tnode[i].type = FF_IPIN;
 				tnode[i].num_edges = 1;
 				tnode[i].out_edges = (t_tedge *) my_chunk_malloc(1 *
 							    sizeof(t_tedge),
 							    &tedge_ch_list_head,
 							    &tedge_ch_bytes_avail,
 							    &tedge_ch_next_avail);
 				tnode[i].out_edges->Tdel = pb_graph_pin->tsu_tco;
 				tnode[i].out_edges->to_node = i + 1;
 				tnode[i + 1].pb_graph_pin = NULL;
 				tnode[i + 1].T_req = 0;
 				tnode[i + 1].T_arr = 0;
 				tnode[i + 1].type = FF_SINK;
 				tnode[i + 1].block = iblock;
 				tnode[i + 1].num_edges = 0;
 				tnode[i + 1].out_edges = NULL;
 				tnode[i + 1].index = i + 1;
 			} else {
 				assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);
 				tnode[i].type = FF_OPIN;
 				tnode[i + 1].num_edges = 1;
 				tnode[i + 1].out_edges = (t_tedge *) my_chunk_malloc(1 *
 							    sizeof(t_tedge),
 							    &tedge_ch_list_head,
 							    &tedge_ch_bytes_avail,
 							    &tedge_ch_next_avail);
 				tnode[i + 1].out_edges->Tdel = pb_graph_pin->tsu_tco;
 				tnode[i + 1].out_edges->to_node = i;
 				tnode[i + 1].pb_graph_pin = NULL;
 				tnode[i + 1].T_req = 0;
 				tnode[i + 1].T_arr = 0;
 				tnode[i + 1].type = FF_SOURCE;
 				tnode[i + 1].block = iblock;
 				tnode[i + 1].index = i + 1;
 			}
 			(*inode)++;
 		} else if (tnode[i].pb_graph_pin->type == PB_PIN_CLOCK) {
 			tnode[i].type = FF_SINK;
 			tnode[i].num_edges = 0;
 			tnode[i].out_edges = NULL;
 		} else {
 			if(tnode[i].pb_graph_pin->port->type == IN_PORT) {
 				assert(tnode[i].pb_graph_pin->type == PB_PIN_TERMINAL);
 				tnode[i].type = PRIMITIVE_IPIN;
 			} else {
 				assert(tnode[i].pb_graph_pin->port->type == OUT_PORT);
 				assert(tnode[i].pb_graph_pin->type == PB_PIN_TERMINAL);
 				tnode[i].type = PRIMITIVE_OPIN;
 			}
 		}
 	}
 	(*inode)++;
 }

 void
 print_timing_graph(char *fname)
 {

 /* Prints the timing graph into a file.           */

     FILE *fp;
     int inode, iedge, ilevel, i;
     t_tedge *tedge;
     t_tnode_type itype;
     char *tnode_type_names[] = { "INPAD_SOURCE", "INPAD_OPIN", "OUTPAD_IPIN",
 		"OUTPAD_SINK", "CB_IPIN", "CB_OPIN", "INTERMEDIATE_NODE", "PRIMITIVE_IPIN", "PRIMITIVE_OPIN",
 		"FF_IPIN", "FF_OPIN",
 		"FF_SINK", "FF_SOURCE",
 		"CONSTANT_GEN_SOURCE"
     };


     fp = my_fopen(fname, "w", 0);

     fprintf(fp, "num_tnodes: %d\n", num_tnodes);
     fprintf(fp, "Node #\tType\t\tipin\tiblk\t# edges\t"
 	    "Edges (to_node, Tdel)\n\n");


     for(inode = 0; inode < num_tnodes; inode++)
 	{
 	    fprintf(fp, "%d\t", inode);

 	    itype = tnode[inode].type;
 	    fprintf(fp, "%-15.15s\t", tnode_type_names[itype]);

 		if(tnode[inode].pb_graph_pin != NULL) {
 			fprintf(fp, "%d\t%d\t", tnode[inode].pb_graph_pin->pin_count_in_cluster,
 		    tnode[inode].block);
 		} else {
 			fprintf(fp, "%d\t",  tnode[inode].block);
 		}

 	    fprintf(fp, "%d\t", tnode[inode].num_edges);


 	    tedge = tnode[inode].out_edges;
 		for(iedge = 0; iedge < tnode[inode].num_edges; iedge++)
 		{
 			fprintf(fp, "\t(%4d,%7.3g)", tedge[iedge].to_node,
 			    tedge[iedge].Tdel);
 		}
 	    fprintf(fp, "\n");
 	}

     fprintf(fp, "\n\nnum_tnode_levels: %d\n", num_tnode_levels);

     for(ilevel = 0; ilevel < num_tnode_levels; ilevel++)
 	{
 	    fprintf(fp, "\n\nLevel: %d  Num_nodes: %d\nNodes:", ilevel,
 		    tnodes_at_level[ilevel].nelem);
 	    for(i = 0; i < tnodes_at_level[ilevel].nelem; i++)
 		fprintf(fp, "\t%d", tnodes_at_level[ilevel].list[i]);
 	}

     fprintf(fp, "\n");
     fprintf(fp, "\n\nNet #\tNet_to_driver_tnode\n");

     for(i = 0; i < num_nets; i++)
 	fprintf(fp, "%4d\t%6d\n", i, net_to_driver_tnode[i]);

     fprintf(fp, "\n\nNode #\t\tT_arr\t\tT_req\n\n");

 	for(inode = 0; inode < num_tnodes; inode++) {
 		fprintf(fp, "%d\t%12g\t%12g\n", inode, tnode[inode].T_arr,
 			tnode[inode].T_req);
 	}

     fclose(fp);
 }


 float
 load_net_slack(float **net_slack,
 	       float target_cycle_time)
 {

 /* Determines the slack of every source-sink pair of block pins in the      *
  * circuit.  The timing graph must have already been built.  target_cycle_  *
  * time is the target delay for the circuit -- if 0, the target_cycle_time  *
  * is set to the critical path found in the timing graph.  This routine     *
  * loads net_slack, and returns the current critical path delay.            */

     float T_crit, T_arr, Tdel, T_cycle, T_req;
     int inode, ilevel, num_at_level, i, num_edges, iedge, to_node;
 	int total;
     t_tedge *tedge;

 /* Reset all arrival times to -ve infinity.  Can't just set to zero or the   *
  * constant propagation (constant generators work at -ve infinity) won't     *
  * work.                                                                     */

 	long max_critical_input_paths = 0;
 	long max_critical_output_paths = 0;

     for(inode = 0; inode < num_tnodes; inode++)
 	tnode[inode].T_arr = T_CONSTANT_GENERATOR;

 /* Compute all arrival times with a breadth-first analysis from inputs to   *
  * outputs.  Also compute critical path (T_crit).                           */

     T_crit = 0.;

 /* Primary inputs arrive at T = 0. */

     num_at_level = tnodes_at_level[0].nelem;
     for(i = 0; i < num_at_level; i++)
 	{
 	    inode = tnodes_at_level[0].list[i];
 	    tnode[inode].T_arr = 0.;
 	}

 	total = 0;
     for(ilevel = 0; ilevel < num_tnode_levels; ilevel++)
 	{
 	    num_at_level = tnodes_at_level[ilevel].nelem;
 		total += num_at_level;

 	    for(i = 0; i < num_at_level; i++)
 		{
 		    inode = tnodes_at_level[ilevel].list[i];
 			if(i == 0) {
 				tnode[i].num_critical_input_paths = 1;
 			}
 		    T_arr = tnode[inode].T_arr;
 		    num_edges = tnode[inode].num_edges;
 		    tedge = tnode[inode].out_edges;
 		    T_crit = max(T_crit, T_arr);

 		    for(iedge = 0; iedge < num_edges; iedge++)
 			{
 			    to_node = tedge[iedge].to_node;
 			    Tdel = tedge[iedge].Tdel;

 				/* Update number of near critical paths entering tnode */
 					/*check for approximate equality*/
 				if (fabs(tnode[to_node].T_arr - (T_arr + Tdel)) < EQUAL_DEF){
 					tnode[to_node].num_critical_input_paths += tnode[inode].num_critical_input_paths;
 				}
 				else if (tnode[to_node].T_arr < T_arr + Tdel) {
 					tnode[to_node].num_critical_input_paths = tnode[inode].num_critical_input_paths;
 				}

 				if (tnode[to_node].num_critical_input_paths > max_critical_input_paths)
 					max_critical_input_paths = tnode[to_node].num_critical_input_paths;


 			    tnode[to_node].T_arr =
 				max(tnode[to_node].T_arr, T_arr + Tdel);
 			}
 		}

 	}
 	assert(total == num_tnodes);

     if(target_cycle_time > 0.)	/* User specified target cycle time */
 	T_cycle = target_cycle_time;
     else			/* Otherwise, target = critical path */
 	T_cycle = T_crit;

 /* Compute the required arrival times with a backward breadth-first analysis *
  * from sinks (output pads, etc.) to primary inputs.                         */

     for(ilevel = num_tnode_levels - 1; ilevel >= 0; ilevel--)
 	{
 	    num_at_level = tnodes_at_level[ilevel].nelem;

 	    for(i = 0; i < num_at_level; i++)
 		{
 		    inode = tnodes_at_level[ilevel].list[i];
 		    num_edges = tnode[inode].num_edges;

 			if(ilevel == 0) {
 				assert(tnode[inode].type == INPAD_SOURCE || tnode[inode].type == FF_SOURCE || tnode[inode].type == CONSTANT_GEN_SOURCE);
 			} else {
 				assert(!(tnode[inode].type == INPAD_SOURCE || tnode[inode].type == FF_SOURCE || tnode[inode].type == CONSTANT_GEN_SOURCE));
 			}

 		    if(num_edges == 0)
 			{	/* sink */
 				assert(tnode[inode].type == OUTPAD_SINK || tnode[inode].type == FF_SINK);
 			    tnode[inode].T_req = T_cycle;
 				tnode[inode].num_critical_output_paths = 1;
 			}
 		    else
 			{
 				assert(!(tnode[inode].type == OUTPAD_SINK || tnode[inode].type == FF_SINK));
 			    tedge = tnode[inode].out_edges;
 			    to_node = tedge[0].to_node;
 			    Tdel = tedge[0].Tdel;
 			    T_req = tnode[to_node].T_req - Tdel;

 				tnode[inode].num_critical_output_paths = tnode[to_node].num_critical_output_paths;
 				if (tnode[to_node].num_critical_output_paths > max_critical_output_paths)
 					max_critical_output_paths = tnode[to_node].num_critical_output_paths;

 			    for(iedge = 1; iedge < num_edges; iedge++)
 				{
 				    to_node = tedge[iedge].to_node;
 				    Tdel = tedge[iedge].Tdel;

 					/* Update number of near critical paths affected by output of tnode */
 						/*check for approximate equality*/
 					if (fabs(tnode[to_node].T_req - Tdel - T_req) < EQUAL_DEF){
 						tnode[inode].num_critical_output_paths += tnode[to_node].num_critical_output_paths;
 					}
 					else if (tnode[to_node].T_req - Tdel < T_req) {
 						tnode[inode].num_critical_output_paths = tnode[to_node].num_critical_output_paths;
 					}

 					if (tnode[to_node].num_critical_output_paths > max_critical_output_paths)
 						max_critical_output_paths = tnode[to_node].num_critical_output_paths;

 				    T_req =
 					min(T_req,
 					    tnode[to_node].T_req - Tdel);
 				}

 			    tnode[inode].T_req = T_req;
 			}
 		}
 	}

 	normalize_costs(T_crit, max_critical_input_paths, max_critical_output_paths);

     compute_net_slacks(net_slack);

     return (T_crit);
 }


 static void
 compute_net_slacks(float **net_slack)
 {

 /* Puts the slack of each source-sink pair of block pins in net_slack.     */

     int inet, iedge, inode, to_node, num_edges;
     t_tedge *tedge;
     float T_arr, Tdel, T_req;

     for(inet = 0; inet < num_timing_nets; inet++)
 	{
 	    inode = net_to_driver_tnode[inet];
 	    T_arr = tnode[inode].T_arr;
 	    num_edges = tnode[inode].num_edges;
 	    tedge = tnode[inode].out_edges;

 	    for(iedge = 0; iedge < num_edges; iedge++)
 		{
 		    to_node = tedge[iedge].to_node;
 		    Tdel = tedge[iedge].Tdel;
 		    T_req = tnode[to_node].T_req;
 		    net_slack[inet][iedge + 1] = T_req - T_arr - Tdel;
 		}
 	}
 }


 void
 print_critical_path(char *fname)
 {

 /* Prints out the critical path to a file.  */

     t_linked_int *critical_path_head, *critical_path_node;
     FILE *fp;
     int non_global_nets_on_crit_path, global_nets_on_crit_path;
     int tnodes_on_crit_path, inode, iblk, inet;
     t_tnode_type type;
     float total_net_delay, total_logic_delay, Tdel;

     critical_path_head = allocate_and_load_critical_path();
     critical_path_node = critical_path_head;

     fp = my_fopen(fname, "w", 0);

     non_global_nets_on_crit_path = 0;
     global_nets_on_crit_path = 0;
     tnodes_on_crit_path = 0;
     total_net_delay = 0.;
     total_logic_delay = 0.;

     while(critical_path_node != NULL)
 	{
 	    Tdel =
 		print_critical_path_node(fp, critical_path_node);
 	    inode = critical_path_node->data;
 	    type = tnode[inode].type;
 	    tnodes_on_crit_path++;

 	    if(type == CB_OPIN)
 		{
 		    get_tnode_block_and_output_net(inode, &iblk, &inet);

 		    if(!timing_nets[inet].is_global)
 			non_global_nets_on_crit_path++;
 		    else
 			global_nets_on_crit_path++;

 		    total_net_delay += Tdel;
 		}
 	    else
 		{
 		    total_logic_delay += Tdel;
 		}

 	    critical_path_node = critical_path_node->next;
 	}

     fprintf(fp,
 	    "\nTnodes on crit. path: %d  Non-global nets on crit. path: %d."
 	    "\n", tnodes_on_crit_path, non_global_nets_on_crit_path);
     fprintf(fp, "Global nets on crit. path: %d.\n", global_nets_on_crit_path);
     fprintf(fp, "Total logic delay: %g (s)  Total net delay: %g (s)\n",
 	    total_logic_delay, total_net_delay);

     printf("Nets on crit. path: %d normal, %d global.\n",
 	   non_global_nets_on_crit_path, global_nets_on_crit_path);

     printf("Total logic delay: %g (s)  Total net delay: %g (s)\n",
 	   total_logic_delay, total_net_delay);

     fclose(fp);
     free_int_list(&critical_path_head);
 }


 t_linked_int *
 allocate_and_load_critical_path(void)
 {

 /* Finds the critical path and puts a list of the tnodes on the critical    *
  * path in a linked list, from the path SOURCE to the path SINK.            */

     t_linked_int *critical_path_head, *curr_crit_node, *prev_crit_node;
     int inode, iedge, to_node, num_at_level, i, crit_node, num_edges;
     float min_slack, slack;
     t_tedge *tedge;

     num_at_level = tnodes_at_level[0].nelem;
     min_slack = HUGE_FLOAT;
     crit_node = OPEN;		/* Stops compiler warnings. */

     for(i = 0; i < num_at_level; i++)
 	{			/* Path must start at SOURCE (no inputs) */
 	    inode = tnodes_at_level[0].list[i];
 	    slack = tnode[inode].T_req - tnode[inode].T_arr;

 	    if(slack < min_slack)
 		{
 		    crit_node = inode;
 		    min_slack = slack;
 		}
 	}

     critical_path_head = (t_linked_int *) my_malloc(sizeof(t_linked_int));
     critical_path_head->data = crit_node;
     prev_crit_node = critical_path_head;
     num_edges = tnode[crit_node].num_edges;

     while(num_edges != 0)
 	{			/* Path will end at SINK (no fanout) */
 	    curr_crit_node = (t_linked_int *) my_malloc(sizeof(t_linked_int));
 	    prev_crit_node->next = curr_crit_node;
 	    tedge = tnode[crit_node].out_edges;
 	    min_slack = HUGE_FLOAT;

 	    for(iedge = 0; iedge < num_edges; iedge++)
 		{
 		    to_node = tedge[iedge].to_node;
 		    slack = tnode[to_node].T_req - tnode[to_node].T_arr;

 		    if(slack < min_slack)
 			{
 			    crit_node = to_node;
 			    min_slack = slack;
 			}
 		}

 	    curr_crit_node->data = crit_node;
 	    prev_crit_node = curr_crit_node;
 	    num_edges = tnode[crit_node].num_edges;
 	}

     prev_crit_node->next = NULL;
     return (critical_path_head);
 }


 void
 get_tnode_block_and_output_net(int inode,
 			       int *iblk_ptr,
 			       int *inet_ptr)
 {

 /* Returns the index of the block that this tnode is part of.  If the tnode *
  * is a CB_OPIN or INPAD_OPIN (i.e. if it drives a net), the net index is  *
  * returned via inet_ptr.  Otherwise inet_ptr points at OPEN.               */

     int inet, ipin, iblk;
     t_tnode_type tnode_type;

     iblk = tnode[inode].block;
     tnode_type = tnode[inode].type;

     if(tnode_type == CB_OPIN)
 	{
 	    ipin = tnode[inode].pb_graph_pin->pin_count_in_cluster;
 		inet = block[iblk].pb->rr_graph[ipin].net_num;
 		assert(inet != OPEN);
 		inet = vpack_to_clb_net_mapping[inet];
 	}
     else
 	{
 	    inet = OPEN;
 	}

     *iblk_ptr = iblk;
     *inet_ptr = inet;
 }


 void
 do_constant_net_delay_timing_analysis(t_timing_inf timing_inf,
 				      float constant_net_delay_value)
 {

 /* Does a timing analysis (simple) where it assumes that each net has a      *
  * constant delay value.  Used only when operation == TIMING_ANALYSIS_ONLY.  */

     struct s_linked_vptr *net_delay_chunk_list_head;
     float **net_delay, **net_slack;

     float T_crit;

     net_slack = alloc_and_load_timing_graph(timing_inf);
     net_delay = alloc_net_delay(&net_delay_chunk_list_head, timing_nets, num_timing_nets);

     load_constant_net_delay(net_delay, constant_net_delay_value, timing_nets, num_timing_nets);
     load_timing_graph_net_delays(net_delay);
     T_crit = load_net_slack(net_slack, 0);

     printf("\n");
     printf("\nCritical Path: %g (s)\n", T_crit);

 #ifdef CREATE_ECHO_FILES
     print_critical_path("critical_path.echo");
     print_timing_graph("timing_graph.echo");
     print_net_slack("net_slack.echo", net_slack);
     print_net_delay(net_delay, "net_delay.echo", timing_nets, num_timing_nets);
 #endif /* CREATE_ECHO_FILES */

     free_timing_graph(net_slack);
     free_net_delay(net_delay, &net_delay_chunk_list_head);
 }

 static void normalize_costs(float t_crit, long max_critical_input_paths, long max_critical_output_paths) {
 	int i;
 	for(i = 0; i < num_tnodes; i++) {
 		tnode[i].normalized_slack = ((float)tnode[i].T_req - tnode[i].T_arr) / t_crit;
 		tnode[i].normalized_T_arr = (float)tnode[i].T_arr/(float)t_crit;
 		tnode[i].normalized_total_critical_paths = ((float)tnode[i].num_critical_input_paths + tnode[i].num_critical_output_paths) / ((float)max_critical_input_paths + max_critical_output_paths);
 	}
 }


 void
 print_timing_graph_as_blif(char *fname, t_model *models)
 {
 	struct s_model_ports *port;
 	struct s_linked_vptr *p_io_removed;
 /* Prints out the critical path to a file.  */

     FILE *fp;
 	int i, j;

     fp = my_fopen(fname, "w", 0);

 	fprintf(fp, ".model %s\n", blif_circuit_name);

 	fprintf(fp, ".inputs ");
 	for(i = 0; i < num_logical_blocks; i++) {
 		if(logical_block[i].type == VPACK_INPAD) {
 			fprintf(fp, "\\\n%s ", logical_block[i].name);
 		}
 	}
 	p_io_removed = circuit_p_io_removed;
 	while(p_io_removed) {
 		fprintf(fp, "\\\n%s ", (char *) p_io_removed->data_vptr);
 		p_io_removed = p_io_removed->next;
 	}

 	fprintf(fp, "\n");

 	fprintf(fp, ".outputs ");
 	for(i = 0; i < num_logical_blocks; i++) {
 		if(logical_block[i].type == VPACK_OUTPAD) {
 			/* Outputs have a "out:" prepended to them, must remove */
 			fprintf(fp, "\\\n%s ", &logical_block[i].name[4]);
 		}
 	}
 	fprintf(fp, "\n");
 	fprintf(fp, ".names unconn\n");
 	fprintf(fp, " 0\n\n");

 	/* Print out primitives */
 	for(i = 0; i < num_logical_blocks; i++) {
 		print_primitive_as_blif(fp, i);
 	}

 	/* Print out tnode connections */
 	for(i = 0; i < num_tnodes; i++) {
 		if(tnode[i].type != PRIMITIVE_IPIN && tnode[i].type != FF_SOURCE && tnode[i].type != INPAD_SOURCE && tnode[i].type != OUTPAD_IPIN) {
 			for(j = 0; j < tnode[i].num_edges; j++) {
 				fprintf(fp, ".names tnode_%d tnode_%d\n", i, tnode[i].out_edges[j]);
 				fprintf(fp, "1 1\n\n");
 			}
 		}
 	}

 	fprintf(fp, ".end\n\n");

 	/* Print out .subckt models */
 	while(models) {
 		fprintf(fp, ".model %s\n", models->name);
 		fprintf(fp, ".inputs ");
 		port = models->inputs;
 		while(port) {
 			if(port->size > 1) {
 				for(j = 0; j < port->size; j++) {
 					fprintf(fp, "%s[%d] ", port->name, j);
 				}
 			} else {
 				fprintf(fp, "%s ", port->name);
 			}
 			port = port->next;
 		}
 		fprintf(fp, "\n");
 		fprintf(fp, ".outputs ");
 		port = models->outputs;
 		while(port) {
 			if(port->size > 1) {
 				for(j = 0; j < port->size; j++) {
 					fprintf(fp, "%s[%d] ", port->name, j);
 				}
 			} else {
 				fprintf(fp, "%s ", port->name);
 			}
 			port = port->next;
 		}
 		fprintf(fp, "\n.blackbox\n.end\n\n");
 		fprintf(fp, "\n\n");
 		models = models->next;
 	}
     fclose(fp);
 }

 static void print_primitive_as_blif (FILE *fpout, int iblk) {
 	int i, j;
 	struct s_model_ports *port;
 	struct s_linked_vptr *truth_table;
 	t_rr_node *irr_graph;
 	t_pb_graph_node *pb_graph_node;
 	int node;

 	/* Print primitives found in timing graph in blif format based on whether this is a logical primitive or a physical primitive */

 	if(logical_block[iblk].type == VPACK_INPAD) {
 		if(logical_block[iblk].pb == NULL) {
 			fprintf(fpout, ".names %s tnode_%d\n", logical_block[iblk].name, logical_block[iblk].output_net_tnodes[0][0]->index);
 		} else {
 			fprintf(fpout, ".names %s tnode_%d\n", logical_block[iblk].name,
 				logical_block[iblk].pb->rr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode->index);
 		}
 		fprintf(fpout, "1 1\n\n");
 	} else if(logical_block[iblk].type == VPACK_OUTPAD) {
 		/* outputs have the symbol out: automatically prepended to it, must remove */
 		if(logical_block[iblk].pb == NULL) {
 			fprintf(fpout, ".names tnode_%d %s\n", logical_block[iblk].input_net_tnodes[0][0]->index, &logical_block[iblk].name[4]);
 		} else {
 			/* avoid the out: from output pad naming */
 			fprintf(fpout, ".names tnode_%d %s\n",
 				logical_block[iblk].pb->rr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].tnode->index,
 				(logical_block[iblk].name + 4));
 		}
 		fprintf(fpout, "1 1\n\n");
 	} else if(strcmp(logical_block[iblk].model->name, "latch") == 0) {
 		fprintf(fpout, ".latch ");
 		node = OPEN;

 		if(logical_block[iblk].pb == NULL) {
 			i = 0;
 			port = logical_block[iblk].model->inputs;
 			while(port) {
 				if(!port->is_clock) {
 					assert(port->size == 1);
 					for(j = 0; j < port->size; j++) {
 						if(logical_block[iblk].input_net_tnodes[i][j] != NULL) {
 							fprintf(fpout, "tnode_%d ", logical_block[iblk].input_net_tnodes[i][j]->index);
 						} else {
 							assert(0);
 						}
 					}
 					i++;
 				}
 				port = port->next;
 			}
 			assert(i == 1);

 			i = 0;
 			port = logical_block[iblk].model->outputs;
 			while(port) {
 				assert(port->size == 1);
 				for(j = 0; j < port->size; j++) {
 					if(logical_block[iblk].output_net_tnodes[i][j] != NULL) {
 						node = logical_block[iblk].output_net_tnodes[i][j]->index;
 						fprintf(fpout, "latch_%s re ", logical_block[iblk].name);
 					} else {
 						assert(0);
 					}
 				}
 				i++;
 				port = port->next;
 			}
 			assert(i == 1);

 			i = 0;
 			port = logical_block[iblk].model->inputs;
 			while(port) {
 				if(port->is_clock) {
 					for(j = 0; j < port->size; j++) {
 						if(logical_block[iblk].clock_net_tnode != NULL) {
 							fprintf(fpout, "tnode_%d 0\n\n", logical_block[iblk].clock_net_tnode->index);
 						} else {
 							assert(0);
 						}
 					}
 					i++;
 				}
 				port = port->next;
 			}
 			assert(i == 1);
 		} else {
 			irr_graph = logical_block[iblk].pb->rr_graph;
 			assert(irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].net_num != OPEN);
 			fprintf(fpout, "tnode_%d ", irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].tnode->index);
 			node = irr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode->index;
 			fprintf(fpout, "latch_%s re ", logical_block[iblk].name);
 			assert(irr_graph[logical_block[iblk].pb->pb_graph_node->clock_pins[0][0].pin_count_in_cluster].net_num != OPEN);
 			fprintf(fpout, "tnode_%d 0\n\n", irr_graph[logical_block[iblk].pb->pb_graph_node->clock_pins[0][0].pin_count_in_cluster].tnode->index);
 		}
 		assert(node != OPEN);
 		fprintf(fpout, ".names latch_%s tnode_%d\n", logical_block[iblk].name, node);
 		fprintf(fpout, "1 1\n\n");
 	} else if(strcmp(logical_block[iblk].model->name, "names") == 0) {
 		fprintf(fpout, ".names ");
 		node = OPEN;

 		if(logical_block[iblk].pb == NULL) {
 			i = 0;
 			port = logical_block[iblk].model->inputs;
 			while(port) {
 				assert(!port->is_clock);
 				for(j = 0; j < port->size; j++) {
 					if(logical_block[iblk].input_net_tnodes[i][j] != NULL) {
 						fprintf(fpout, "tnode_%d ", logical_block[iblk].input_net_tnodes[i][j]->index);
 					} else {
 						break;
 					}
 				}
 				i++;
 				port = port->next;
 			}
 			assert(i == 1);

 			i = 0;
 			port = logical_block[iblk].model->outputs;
 			while(port) {
 				assert(port->size == 1);
 				fprintf(fpout, "lut_%s\n", logical_block[iblk].name);
 				node = logical_block[iblk].output_net_tnodes[0][0]->index;
 				assert(node != OPEN);
 				i++;
 				port = port->next;
 			}
 			assert(i == 1);
 		} else {
 			irr_graph = logical_block[iblk].pb->rr_graph;
 			assert(logical_block[iblk].pb->pb_graph_node->num_input_ports == 1);
 			for(i = 0; i < logical_block[iblk].pb->pb_graph_node->num_input_pins[0]; i++) {
 				if(irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][i].pin_count_in_cluster].net_num != OPEN) {
 					fprintf(fpout, "tnode_%d ", irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][i].pin_count_in_cluster].tnode->index);
 				} else {
 					if(i > 0 && i < logical_block[iblk].pb->pb_graph_node->num_input_pins[0] - 1) {
 						assert(irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][i + 1].pin_count_in_cluster].net_num == OPEN);
 					}
 				}
 			}
 			assert(logical_block[iblk].pb->pb_graph_node->num_output_ports == 1);
 			assert(logical_block[iblk].pb->pb_graph_node->num_output_pins[0] == 1);
 			fprintf(fpout, "lut_%s\n", logical_block[iblk].name);
 			node = irr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode->index;
 		}
 		assert(node != OPEN);
 		truth_table = logical_block[iblk].truth_table;
 		while(truth_table) {
 			fprintf(fpout, "%s\n", (char*)truth_table->data_vptr);
 			truth_table = truth_table->next;
 		}
 		fprintf(fpout, "\n");
 		fprintf(fpout, ".names lut_%s tnode_%d\n", logical_block[iblk].name, node);
 		fprintf(fpout, "1 1\n\n");
 	} else {
 		/* This is a standard .subckt blif structure */
 		fprintf(fpout, ".subckt %s ", logical_block[iblk].model->name);
 		if(logical_block[iblk].pb == NULL) {
 			i = 0;
 			port = logical_block[iblk].model->inputs;
 			while(port) {
 				if(!port->is_clock) {
 					for(j = 0; j < port->size; j++) {
 						if(logical_block[iblk].input_net_tnodes[i][j] != NULL) {
 							if(port->size > 1) {
 								fprintf(fpout, "\\\n%s[%d]=tnode_%d ", port->name, j, logical_block[iblk].input_net_tnodes[i][j]->index);
 							} else {
 								fprintf(fpout, "\\\n%s=tnode_%d ", port->name, logical_block[iblk].input_net_tnodes[i][j]->index);
 							}
 						} else {
 							if(port->size > 1) {
 								fprintf(fpout, "\\\n%s[%d]=unconn ", port->name, j);
 							} else {
 								fprintf(fpout, "\\\n%s=unconn ", port->name);
 							}
 						}
 					}
 					i++;
 				}
 				port = port->next;
 			}

 			i = 0;
 			port = logical_block[iblk].model->outputs;
 			while(port) {
 				for(j = 0; j < port->size; j++) {
 					if(logical_block[iblk].output_net_tnodes[i][j] != NULL) {
 						if(port->size > 1) {
 							fprintf(fpout, "\\\n%s[%d]=%s ", port->name, j, vpack_net[logical_block[iblk].output_nets[i][j]].name);
 						} else {
 							fprintf(fpout, "\\\n%s=%s ", port->name, vpack_net[logical_block[iblk].output_nets[i][j]].name);
 						}
 					} else {
 						if(port->size > 1) {
 							fprintf(fpout, "\\\n%s[%d]=unconn_%d_%s_%d ", port->name, j, iblk, port->name, j);
 						} else {
 							fprintf(fpout, "\\\n%s=unconn_%d_%s_%d ", port->name, iblk, port->name);
 						}
 					}
 				}
 				i++;
 				port = port->next;
 			}


 			i = 0;
 			port = logical_block[iblk].model->inputs;
 			while(port) {
 				if(port->is_clock) {
 					assert(port->size == 1);
 					if(logical_block[iblk].clock_net_tnode != NULL) {
 						fprintf(fpout, "\\\n%s=tnode_%d ", port->name, logical_block[iblk].clock_net_tnode->index);
 					} else {
 						fprintf(fpout, "\\\n%s=unconn ", port->name);
 					}
 					i++;
 				}
 				port = port->next;
 			}

 			fprintf(fpout, "\n\n");


 			i = 0;
 			port = logical_block[iblk].model->outputs;
 			while(port) {
 				for(j = 0; j < port->size; j++) {
 					if(logical_block[iblk].output_net_tnodes[i][j] != NULL) {
 						fprintf(fpout, ".names %s tnode_%d\n", vpack_net[logical_block[iblk].output_nets[i][j]].name, logical_block[iblk].output_net_tnodes[i][j]->index);
 						fprintf(fpout, "1 1\n\n");
 					}
 				}
 				i++;
 				port = port->next;
 			}
 		} else {
 			irr_graph = logical_block[iblk].pb->rr_graph;
 			pb_graph_node = logical_block[iblk].pb->pb_graph_node;
 			for(i = 0; i < pb_graph_node->num_input_ports; i++) {
 				for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
 					if(irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num != OPEN) {
 						if(pb_graph_node->num_input_pins[i] > 1) {
 							fprintf(fpout, "\\\n%s[%d]=tnode_%d ", pb_graph_node->input_pins[i][j].port->name, j, irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].tnode->index);
 						} else {
 							fprintf(fpout, "\\\n%s=tnode_%d ", pb_graph_node->input_pins[i][j].port->name, irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].tnode->index);
 						}
 					} else {
 						if(pb_graph_node->num_input_pins[i] > 1) {
 							fprintf(fpout, "\\\n%s[%d]=unconn ", pb_graph_node->input_pins[i][j].port->name, j);
 						} else {
 							fprintf(fpout, "\\\n%s=unconn ", pb_graph_node->input_pins[i][j].port->name);
 						}
 					}
 				}
 			}
 			for(i = 0; i < pb_graph_node->num_output_ports; i++) {
 				for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
 					if(irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num != OPEN) {
 						if(pb_graph_node->num_output_pins[i] > 1) {
 							fprintf(fpout, "\\\n%s[%d]=%s ", pb_graph_node->output_pins[i][j].port->name, j,
 								vpack_net[irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num].name);
 						} else {
 							fprintf(fpout, "\\\n%s=%s ", pb_graph_node->output_pins[i][j].port->name,
 								vpack_net[irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].tnode->index].name);
 						}
 					} else {
 						if(pb_graph_node->num_output_pins[i] > 1) {
 							fprintf(fpout, "\\\n%s[%d]=unconn ", pb_graph_node->output_pins[i][j].port->name, j);
 						} else {
 							fprintf(fpout, "\\\n%s=unconn ", pb_graph_node->output_pins[i][j].port->name);
 						}
 					}
 				}
 			}
 			for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
 				for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
 					if(irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num != OPEN) {
 						if(pb_graph_node->num_clock_pins[i] > 1) {
 							fprintf(fpout, "\\\n%s[%d]=tnode_%d ", pb_graph_node->clock_pins[i][j].port->name, j, irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].tnode->index);
 						} else {
 							fprintf(fpout, "\\\n%s=tnode_%d ", pb_graph_node->clock_pins[i][j].port->name, irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].tnode->index);
 						}
 					} else {
 						if(pb_graph_node->num_clock_pins[i] > 1) {
 							fprintf(fpout, "\\\n%s[%d]=unconn ", pb_graph_node->clock_pins[i][j].port->name, j);
 						} else {
 							fprintf(fpout, "\\\n%s=unconn ", pb_graph_node->clock_pins[i][j].port->name);
 						}
 					}
 				}
 			}

 			fprintf(fpout, "\n\n");
 			/* connect up output port names to output tnodes */
 			for(i = 0; i < pb_graph_node->num_output_ports; i++) {
 				for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
 					if(irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num != OPEN) {
 						fprintf(fpout, ".names %s tnode_%d\n", vpack_net[irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num].name, irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].tnode->index);
 						fprintf(fpout, "1 1\n\n");
 					}
 				}
 			}
 		}
 	}
 }