vpr/SRC/pack/cluster_legality.c - third_party/vtr-verilog-to-routing - Git at Google

 #include <stdio.h>
 #include <assert.h>
 #include <string.h>

 #include "util.h"
 #include "physical_types.h"
 #include "vpr_types.h"
 #include "globals.h"
 #include "mst.h"
 #include "route_export.h"
 #include "route_common.h"
 #include "cluster_legality.h"
 #include "rr_graph.h"

 static struct s_linked_vptr *rr_mem_chunk_list_head = NULL;
 static int chunk_bytes_avail = 0;
 static char *chunk_next_avail_mem = NULL;
 static struct s_trace **best_routing;

 /* nets_in_cluster: array of all nets contained in the cluster */
 static int *nets_in_cluster;    /* [0..num_nets_in_cluster-1] */
 static int num_nets_in_cluster;
 static int curr_cluster_index;

 static int ext_input_rr_node_index, ext_output_rr_node_index, ext_clock_rr_node_index, max_ext_index;
 static int **saved_net_rr_terminals;
 static float pres_fac;
 static float *rr_intrinsic_cost;
 static int num_rr_intrinsic_cost = 0;


 /********************* Subroutines local to this module *********************/
 static boolean is_net_in_cluster(INP int inet);

 static boolean add_net_rr_terminal_cluster(int iblk_net, t_pb * primitive, int ilogical_block, t_model_ports * model_port, int ipin);

 static boolean reload_ext_net_rr_terminal_cluster();

 static boolean try_breadth_first_route_cluster();

 static boolean breadth_first_route_net_cluster(int inet);

 static void breadth_first_expand_trace_segment_cluster(struct s_trace *start_ptr,
 					       int remaining_connections_to_sink);

 static void breadth_first_expand_neighbours_cluster(int inode,
 					    float pcost,
 					    int inet,
 						boolean first_time);

 static void breadth_first_add_source_to_heap_cluster(int inet);

 static void alloc_net_rr_terminals_cluster();

 static void save_and_reset_routing_cluster();

 static void restore_routing_cluster();

 static int get_num_violated_nets();

 static void mark_ends_cluster(int inet);

 static void print_intra_cluster_route(char *route_file);

 static float rr_node_intrinsic_cost(int inode);


 /************************ Subroutine definitions ****************************/

 static boolean is_net_in_cluster(INP int inet) {
 	int i;
 	for(i = 0; i < num_nets_in_cluster; i++) {
 		if(nets_in_cluster[i] == inet) {
 			return TRUE;
 		}
 	}
 	return FALSE;
 }

 /* load rr_node for source and sinks of net if exists, return FALSE otherwise */
 /* Todo: Note this is an inefficient way to determine port, better to use a lookup, worry about this if runtime becomes an issue */
 static boolean add_net_rr_terminal_cluster(int iblk_net, t_pb * primitive, int ilogical_block, t_model_ports * model_port, int ipin) {
 	/* Ensure at most one external input/clock source and one external output sink for net */
 	int i, net_pin;
 	t_port *prim_port;
 	const t_pb_type *pb_type;
 	boolean found;

 	int input_port;
 	int output_port;
 	int clock_port;

 	input_port = output_port = clock_port = 0;

 	pb_type = primitive->pb_graph_node->pb_type;
 	prim_port = NULL;

 	assert(pb_type->num_modes == 0);

 	found = FALSE;
 	/* TODO: This is inelegant design, I should change the primitive ports in pb_type to be input, output, or clock instead of this lookup */
 	for(i = 0; i < pb_type->num_ports && !found; i++) {
 		prim_port = &pb_type->ports[i];
 		if(pb_type->ports[i].model_port == model_port) {
 			found = TRUE;
 		} else {
 			if(prim_port->is_clock) {
 				clock_port++;
 				assert(prim_port->type == IN_PORT);
 			} else if(prim_port->type == IN_PORT) {
 				input_port++;
 			} else if(prim_port->type == OUT_PORT) {
 				output_port++;
 			} else {
 				assert(0);
 			}
 		}
 	}
 	if(!found) {
 		return FALSE;
 	}

 	if(ipin >= prim_port->num_pins) {
 		return FALSE;
 	} else {
 		net_pin = OPEN;
 		if(prim_port->is_clock) {
 			for(i = 1; i <= vpack_net[iblk_net].num_sinks; i++) {
 				if(vpack_net[iblk_net].node_block[i] == ilogical_block &&
 					vpack_net[iblk_net].node_block_port[i] == model_port->index &&
 					vpack_net[iblk_net].node_block_pin[i] == ipin) {
 					net_pin = i;
 					break;
 				}
 			}
 			assert(net_pin != OPEN);
 			net_rr_terminals[iblk_net][net_pin] =
 				primitive->pb_graph_node->clock_pins[clock_port][ipin].pin_count_in_cluster;
 		} else if(prim_port->type == IN_PORT) {
 			for(i = 1; i <= vpack_net[iblk_net].num_sinks; i++) {
 				if(vpack_net[iblk_net].node_block[i] == ilogical_block &&
 					vpack_net[iblk_net].node_block_port[i] == model_port->index &&
 					vpack_net[iblk_net].node_block_pin[i] == ipin) {
 					net_pin = i;
 					break;
 				}
 			}
 			assert(net_pin != OPEN);
 			net_rr_terminals[iblk_net][net_pin] =
 				primitive->pb_graph_node->input_pins[input_port][ipin].pin_count_in_cluster;
 		} else if(prim_port->type == OUT_PORT) {
 			i = 0;
 			if(vpack_net[iblk_net].node_block[i] == ilogical_block &&
 				vpack_net[iblk_net].node_block_port[i] == model_port->index &&
 				vpack_net[iblk_net].node_block_pin[i] == ipin) {
 				net_pin = i;
 			}
 			assert(net_pin != OPEN);
 			net_rr_terminals[iblk_net][net_pin] =
 				primitive->pb_graph_node->output_pins[output_port][ipin].pin_count_in_cluster;
 		} else {
 			assert(0);
 		}
 		return TRUE;
 	}
 }

 static boolean reload_ext_net_rr_terminal_cluster() {
 	int i, j, net_index;
 	boolean has_ext_sink, has_ext_source;
 	int curr_ext_output, curr_ext_input, curr_ext_clock;

 	curr_ext_input = ext_input_rr_node_index;
 	curr_ext_output = ext_output_rr_node_index;
 	curr_ext_clock = ext_clock_rr_node_index;

 	for(i = 0; i < num_nets_in_cluster; i++)
 	{
 		net_index = nets_in_cluster[i];
 		has_ext_sink = FALSE;
 		has_ext_source = (logical_block[vpack_net[net_index].node_block[0]].clb_index != curr_cluster_index);
 		if(has_ext_source) {
 			/* Instantiate a source of this net */
 			if(vpack_net[net_index].is_global) {
 				net_rr_terminals[net_index][0] = curr_ext_clock;
 				curr_ext_clock++;
 			} else {
 				net_rr_terminals[net_index][0] = curr_ext_input;
 				curr_ext_input++;
 			}
 		}
 		for(j = 1; j <= vpack_net[net_index].num_sinks; j++) {
 			if(logical_block[vpack_net[net_index].node_block[j]].clb_index != curr_cluster_index) {
 				if(has_ext_sink || has_ext_source) {
 					/* Only need one node driving external routing, either this cluster drives external routing or another cluster does it */
 					net_rr_terminals[net_index][j] = OPEN;
 				} else {
 					/* External sink, only need to route once, externally routing will take care of the rest */
 					net_rr_terminals[net_index][j] = curr_ext_output;
 					curr_ext_output++;
 					has_ext_sink = TRUE;
 				}
 			}
 		}

 		if( curr_ext_input > ext_output_rr_node_index ||
 			curr_ext_output > ext_clock_rr_node_index ||
 			curr_ext_clock > max_ext_index) {
 			/* failed, not enough pins of proper type, overran index */
 			return FALSE;
 		}
 	}

 	return TRUE;
 }

 void alloc_and_load_cluster_legality_checker() {
 	best_routing = (struct s_trace **)my_calloc(num_logical_nets, sizeof(struct s_trace *));
 	nets_in_cluster = my_malloc(num_logical_nets * sizeof(int));
 	num_nets_in_cluster = 0;
 	num_nets = num_logical_nets;

 	/* inside a cluster, I do not consider rr_indexed_data cost, set to 1 since other costs are multiplied by it */
 	num_rr_indexed_data = 1;
 	rr_indexed_data = my_calloc(1, sizeof(t_rr_indexed_data));
 	rr_indexed_data[0].base_cost = 1;

 	/* alloc routing structures */
 	alloc_route_static_structs();
 	alloc_net_rr_terminals_cluster();
 }


 void free_cluster_legality_checker() {
 	int inet;
 	free(rr_indexed_data);
 	free_rr_node_route_structs();
 	free_route_structs(NULL);
 	free_trace_structs();

 	free_chunk_memory(rr_mem_chunk_list_head);
 	rr_mem_chunk_list_head = NULL;

     for(inet = 0; inet < num_logical_nets; inet++)
 	{
 	    free(saved_net_rr_terminals[inet]);
 	}
 	free(net_rr_terminals);
 	free(saved_net_rr_terminals);
 }


 void alloc_and_load_rr_graph_for_pb_graph_node(INP t_pb_graph_node *pb_graph_node, INP const t_arch* arch, int mode) {

 	int i, j, k, index;
 	boolean is_primitive;

 	is_primitive = (pb_graph_node->pb_type->num_modes == 0);

 	for(i = 0; i < pb_graph_node->num_input_ports; i++) {
 		for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
 			index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
 			rr_node[index].pb_graph_pin = &pb_graph_node->input_pins[i][j];
 			rr_node[index].fan_in = pb_graph_node->input_pins[i][j].num_input_edges;
 			rr_node[index].num_edges = pb_graph_node->input_pins[i][j].num_output_edges;
 			rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
 			rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
 			rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
 			rr_node[index].net_num = OPEN;
 			rr_node[index].prev_node = OPEN;
 			rr_node[index].prev_edge = OPEN;
 			if(mode == 0) { /* default mode is the first mode */
 				rr_node[index].capacity = 1;
 			} else {
 				rr_node[index].capacity = 0;
 			}
 			for(k = 0; k < pb_graph_node->input_pins[i][j].num_output_edges; k++) {
 				/* TODO: Intention was to do bus-based implementation here */
 				rr_node[index].edges[k] = pb_graph_node->input_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
 				rr_node[index].switches[k] = arch->num_switches - 1; /* last switch in arch switch properties is a delayless switch */
 				assert(pb_graph_node->input_pins[i][j].output_edges[k]->num_output_pins == 1);
 			}
 			rr_node[index].type = INTRA_CLUSTER_EDGE;
 			if(is_primitive) {
 				rr_node[index].type = SINK;
 			}
 		}
 	}

 	for(i = 0; i < pb_graph_node->num_output_ports; i++) {
 		for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
 			index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
 			rr_node[index].pb_graph_pin = &pb_graph_node->output_pins[i][j];
 			rr_node[index].fan_in = pb_graph_node->output_pins[i][j].num_input_edges;
 			rr_node[index].num_edges = pb_graph_node->output_pins[i][j].num_output_edges;
 			rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
 			rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
 			rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
 			rr_node[index].net_num = OPEN;
 			rr_node[index].prev_node = OPEN;
 			rr_node[index].prev_edge = OPEN;
 			if(mode == 0) { /* Default mode is the first mode */
 				rr_node[index].capacity = 1;
 			} else {
 				rr_node[index].capacity = 0;
 			}
 			for(k = 0; k < pb_graph_node->output_pins[i][j].num_output_edges; k++) {
 				/* TODO: Intention was to do bus-based implementation here */
 				rr_node[index].edges[k] = pb_graph_node->output_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
 				rr_node[index].switches[k] = arch->num_switches - 1;
 				assert(pb_graph_node->output_pins[i][j].output_edges[k]->num_output_pins == 1);
 			}
 			rr_node[index].type = INTRA_CLUSTER_EDGE;
 			if(is_primitive) {
 				rr_node[index].type = SOURCE;
 			}
 		}
 	}

 	for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
 		for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
 			index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
 			rr_node[index].pb_graph_pin = &pb_graph_node->clock_pins[i][j];
 			rr_node[index].fan_in = pb_graph_node->clock_pins[i][j].num_input_edges;
 			rr_node[index].num_edges = pb_graph_node->clock_pins[i][j].num_output_edges;
 			rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
 			rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
 			rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
 			rr_node[index].net_num = OPEN;
 			rr_node[index].prev_node = OPEN;
 			rr_node[index].prev_edge = OPEN;
 			if(mode == 0) { /* default mode is the first mode (useful for routing */
 				rr_node[index].capacity = 1;
 			} else {
 				rr_node[index].capacity = 0;
 			}
 			for(k = 0; k < pb_graph_node->clock_pins[i][j].num_output_edges; k++) {
 				/* TODO: Intention was to do bus-based implementation here */
 				rr_node[index].edges[k] = pb_graph_node->clock_pins[i][j].output_edges[k]->output_pins[0]->pin_count_in_cluster;
 				rr_node[index].switches[k] = arch->num_switches - 1;
 				assert(pb_graph_node->clock_pins[i][j].output_edges[k]->num_output_pins == 1);
 			}
 			rr_node[index].type = INTRA_CLUSTER_EDGE;
 			if(is_primitive) {
 				rr_node[index].type = SINK;
 			}
 		}
 	}

 	for(i = 0; i < pb_graph_node->pb_type->num_modes; i++) {
 		for(j = 0; j < pb_graph_node->pb_type->modes[i].num_pb_type_children; j++) {
 			for(k = 0; k < pb_graph_node->pb_type->modes[i].pb_type_children[j].num_pb; k++) {
 				alloc_and_load_rr_graph_for_pb_graph_node(&pb_graph_node->child_pb_graph_nodes[i][j][k], arch, i);
 			}
 		}
 	}

 }


 void alloc_and_load_legalizer_for_cluster(INP t_block* clb, INP int clb_index, INP const t_arch *arch) {

     /**
 	 * Structure: Model external routing and internal routing
 	 *
 	 * 1.  Model external routing
 	 *   num input pins == num external sources for input pins, fully connect them to input pins (simulates external routing)
 	 *   num output pins == num external sinks for output pins, fully connect them to output pins (simulates external routing)
 	 *   num clock pins == num external sources for clock pins, fully connect them to clock pins (simulates external routing)
 	 * 2.  Model internal routing
 	 *
 	 */
     /* make each rr_node one correspond with pin and correspond with pin's index pin_count_in_cluster */
     int i, j, k, m, index, pb_graph_rr_index;
 	int count_pins;
 	t_pb_type * pb_type;
 	t_pb_graph_node *pb_graph_node;
 	int ipin;

 	/* Create rr_graph */
 	pb_type = clb->type->pb_type;
 	pb_graph_node = clb->type->pb_graph_head;
 	num_rr_nodes = pb_graph_node->total_pb_pins + pb_type->num_input_pins +
 					pb_type->num_output_pins + pb_type->num_clock_pins;
 	if(num_rr_nodes > num_rr_intrinsic_cost) {
 		free(rr_intrinsic_cost);
 		rr_intrinsic_cost = my_calloc(num_rr_nodes, sizeof(float));
 		num_rr_intrinsic_cost = num_rr_nodes;
 	}
 	rr_node = my_calloc(num_rr_nodes, sizeof(t_rr_node));
 	clb->pb->rr_graph = rr_node;

 	alloc_and_load_rr_graph_for_pb_graph_node(pb_graph_node, arch, 0);

 	curr_cluster_index = clb_index;

 	/*   Alloc and load rr_graph external sources and sinks */
 	ext_input_rr_node_index = pb_graph_node->total_pb_pins;
 	ext_output_rr_node_index = pb_type->num_input_pins + pb_graph_node->total_pb_pins;
 	ext_clock_rr_node_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
 	max_ext_index = pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins + pb_graph_node->total_pb_pins;

 	for(i = 0; i < pb_type->num_input_pins; i++) {
 		index = i + pb_graph_node->total_pb_pins;
 		rr_node[index].type = SOURCE;
 		rr_node[index].fan_in = 0;
 		rr_node[index].num_edges = pb_type->num_input_pins;
 		rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
 		rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
 		rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
 		rr_node[index].capacity = 1;
 		rr_intrinsic_cost[index] = 0;
 	}

 	for(i = 0; i < pb_type->num_output_pins; i++) {
 		index = i + pb_type->num_input_pins + pb_graph_node->total_pb_pins;
 		rr_node[index].type = SINK;
 		rr_node[index].fan_in = pb_type->num_output_pins;
 		rr_node[index].num_edges = 0;
 		rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
 		rr_node[index].capacity = 1;
 		rr_intrinsic_cost[index] = 0;
 	}

 	for(i = 0; i < pb_type->num_clock_pins; i++) {
 		index = i + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
 		rr_node[index].type = SOURCE;
 		rr_node[index].fan_in = 0;
 		rr_node[index].num_edges = pb_type->num_clock_pins;
 		rr_node[index].pack_intrinsic_cost = 1 + (float)rr_node[index].num_edges / 5; /* need to normalize better than 5 */
 		rr_node[index].edges = my_malloc(rr_node[index].num_edges * sizeof(int));
 		rr_node[index].switches = my_calloc(rr_node[index].num_edges, sizeof(int));
 		rr_node[index].capacity = 1;
 		rr_intrinsic_cost[index] = 0;
 	}

 	ipin = 0;
 	for(i = 0; i < pb_graph_node->num_input_ports; i++) {
 		for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
 			pb_graph_rr_index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
 			for(k = 0; k < pb_type->num_input_pins; k++) {
 				index = k + pb_graph_node->total_pb_pins;
 				rr_node[index].edges[ipin] = pb_graph_rr_index;
 				rr_node[index].switches[ipin] = arch->num_switches - 1;
 			}
 			rr_node[pb_graph_rr_index].pack_intrinsic_cost = MAX_SHORT; /* using an input pin should be made costly */
 			ipin++;
 		}
 	}

 	/* Must attach output pins to input pins because if a connection cannot fit using intra-cluster routing, it can also use external routing */
 	for(i = 0; i < pb_graph_node->num_output_ports; i++) {
 		for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
 			count_pins = pb_graph_node->output_pins[i][j].num_output_edges + pb_type->num_output_pins + pb_type->num_input_pins;
 			pb_graph_rr_index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
 			rr_node[pb_graph_rr_index].edges = my_realloc(rr_node[pb_graph_rr_index].edges,
 				(count_pins) * sizeof(int));
 			rr_node[pb_graph_rr_index].switches = my_realloc(rr_node[pb_graph_rr_index].switches,
 				(count_pins) * sizeof(int));

 			ipin = 0;
 			for(k = 0; k < pb_graph_node->num_input_ports; k++) {
 				for(m = 0; m < pb_graph_node->num_input_pins[k]; m++) {
 					index = pb_graph_node->input_pins[k][m].pin_count_in_cluster;
 					rr_node[pb_graph_rr_index].edges[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = index;
 					rr_node[pb_graph_rr_index].switches[ipin + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1;
 					ipin++;
 				}
 			}
 			for(k = 0; k < pb_type->num_output_pins; k++) {
 				index = k + pb_type->num_input_pins + pb_graph_node->total_pb_pins;
 				rr_node[pb_graph_rr_index].edges[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = index;
 				rr_node[pb_graph_rr_index].switches[k + pb_type->num_input_pins + pb_graph_node->output_pins[i][j].num_output_edges] = arch->num_switches - 1;
 			}
 			rr_node[pb_graph_rr_index].num_edges += pb_type->num_output_pins + pb_type->num_input_pins;
 			rr_node[pb_graph_rr_index].pack_intrinsic_cost = 1 + (float)rr_node[pb_graph_rr_index].num_edges / 5; /* need to normalize better than 5 */
 		}
 	}

 	ipin = 0;
 	for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
 		for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
 			for(k = 0; k < pb_type->num_clock_pins; k++) {
 				index = k + pb_type->num_input_pins + pb_type->num_output_pins + pb_graph_node->total_pb_pins;
 				pb_graph_rr_index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
 				rr_node[index].edges[ipin] = pb_graph_rr_index;
 				rr_node[index].switches[ipin] = arch->num_switches - 1;
 			}
 			ipin++;
 		}
 	}

 	alloc_and_load_rr_node_route_structs();
 	num_nets_in_cluster = 0;

 }


 void free_legalizer_for_cluster(INP t_block* clb) {
 	int i;

 	free_rr_node_route_structs();
 	for(i = 0; i < num_rr_nodes; i++) {
 		if(clb->pb->rr_graph[i].edges != NULL) {
 			free(clb->pb->rr_graph[i].edges);
 		}
 		if(clb->pb->rr_graph[i].switches != NULL) {
 			free(clb->pb->rr_graph[i].switches);
 		}
 	}
 	free(clb->pb->rr_graph);
 }

 void reset_legalizer_for_cluster(t_block *clb) {
 	int i;
 	for(i = 0; i < num_nets_in_cluster; i++) {
 		free_traceback(nets_in_cluster[i]);
 		trace_head[nets_in_cluster[i]] = best_routing[nets_in_cluster[i]];
 		free_traceback(nets_in_cluster[i]);
 		best_routing[nets_in_cluster[i]] = NULL;
 	}

 	free_rr_node_route_structs();
 	num_nets_in_cluster = 0;
 }


 /**
  *
  * internal_nets: index of nets to route [0..num_internal_nets - 1]
  */
 static boolean try_breadth_first_route_cluster()
 {

 /* Iterated maze router ala Pathfinder Negotiated Congestion algorithm,  *
  * (FPGA 95 p. 111).  Returns TRUE if it can route this FPGA, FALSE if   *
  * it can't.                                                             */

  /* For different modes, when a mode is turned on, I set the max occupancy of all rr_nodes in the mode to 1 and all others to 0 */
 	/* TODO: There is a bug for route-throughs where edges in route-throughs do not get turned off because the rr_edge is in a particular mode but the two rr_nodes are outside */

     boolean success, is_routable;
     int itry, inet, net_index;
 	struct s_router_opts router_opts;


 /* Usually the first iteration uses a very small (or 0) pres_fac to find  *
  * the shortest path and get a congestion map.  For fast compiles, I set  *
  * pres_fac high even for the first iteration.                            */

 	/* sets up a fast breadth-first router */
     router_opts.first_iter_pres_fac = 10;
 	router_opts.max_router_iterations = 20;
 	router_opts.initial_pres_fac = 10;
 	router_opts.pres_fac_mult = 2;
 	router_opts.acc_fac = 1;

     pres_fac = router_opts.first_iter_pres_fac;

     for(itry = 1; itry <= router_opts.max_router_iterations; itry++)
 	{
 	    for(inet = 0; inet < num_nets_in_cluster; inet++)
 		{
 			net_index = nets_in_cluster[inet];

 		    pathfinder_update_one_cost(trace_head[net_index], -1,
 					       pres_fac);

 		    is_routable =
 			breadth_first_route_net_cluster(net_index);

 		    /* Impossible to route? (disconnected rr_graph) */

 		    if(!is_routable)
 			{
 				/* TODO: Inelegant, can be more intelligent */
 				printf("Failed routing net %s\n", vpack_net[net_index].name);
 			    printf("Routing failed. Disconnected rr_graph\n");
 			    return FALSE;
 			}

 		    pathfinder_update_one_cost(trace_head[net_index], 1,
 					       pres_fac);

 		}


 	    success = feasible_routing();
 	    if(success)
 		{
 		    return (TRUE);
 		}

 	    if(itry == 1)
 			pres_fac = router_opts.initial_pres_fac;
 	    else
 			pres_fac *= router_opts.pres_fac_mult;

 		pres_fac = min (pres_fac, HUGE_FLOAT / 1e5);

 	    pathfinder_update_cost(pres_fac, router_opts.acc_fac);
 	}

 	return (FALSE);
 }


 static boolean
 breadth_first_route_net_cluster(int inet)
 {

 /* Uses a maze routing (Dijkstra's) algorithm to route a net.  The net       *
  * begins at the net output, and expands outward until it hits a target      *
  * pin.  The algorithm is then restarted with the entire first wire segment  *
  * included as part of the source this time.  For an n-pin net, the maze     *
  * router is invoked n-1 times to complete all the connections.  Inet is     *
  * the index of the net to be routed.                                *
  * If this routine finds that a net *cannot* be connected (due to a complete *
  * lack of potential paths, rather than congestion), it returns FALSE, as    *
  * routing is impossible on this architecture.  Otherwise it returns TRUE.   */

     int i, inode, prev_node, remaining_connections_to_sink;
     float pcost, new_pcost;
     struct s_heap *current;
     struct s_trace *tptr;
 	boolean first_time;

     free_traceback(inet);
     breadth_first_add_source_to_heap_cluster(inet);
     mark_ends_cluster(inet);

     tptr = NULL;
     remaining_connections_to_sink = 0;

     for(i = 1; i <= vpack_net[inet].num_sinks; i++)
 	{			/* Need n-1 wires to connect n pins */

 		/* Do not connect open terminals */
 		if(net_rr_terminals[inet][i] == OPEN)
 			continue;
 		/* Expand and begin routing */
 	    breadth_first_expand_trace_segment_cluster(tptr,
 					       remaining_connections_to_sink);
 	    current = get_heap_head();

 	    if(current == NULL)
 		{		/* Infeasible routing.  No possible path for net. */
 		    reset_path_costs();	/* Clean up before leaving. */
 		    return (FALSE);
 		}

 	    inode = current->index;

 	    while(rr_node_route_inf[inode].target_flag == 0)
 		{
 		    pcost = rr_node_route_inf[inode].path_cost;
 		    new_pcost = current->cost;
 		    if(pcost > new_pcost)
 			{	/* New path is lowest cost. */
 			    rr_node_route_inf[inode].path_cost = new_pcost;
 			    prev_node = current->u.prev_node;
 			    rr_node_route_inf[inode].prev_node = prev_node;
 			    rr_node_route_inf[inode].prev_edge =
 				current->prev_edge;
 				first_time = FALSE;

 				if(pcost > 0.99 * HUGE_FLOAT)	/* First time touched. */ {
 					add_to_mod_list(&rr_node_route_inf[inode].
 						path_cost);
 					first_time = TRUE;
 				}

 			    breadth_first_expand_neighbours_cluster(inode, new_pcost, inet, first_time);
 			}

 		    free_heap_data(current);
 		    current = get_heap_head();

 		    if(current == NULL)
 			{	/* Impossible routing. No path for net. */
 			    reset_path_costs();
 			    return (FALSE);
 			}

 		    inode = current->index;
 		}

 	    rr_node_route_inf[inode].target_flag--;	/* Connected to this SINK. */
 	    remaining_connections_to_sink =
 		rr_node_route_inf[inode].target_flag;
 	    tptr = update_traceback(current, inet);
 	    free_heap_data(current);
 	}

     empty_heap();
     reset_path_costs();
     return (TRUE);
 }


 static void
 breadth_first_expand_trace_segment_cluster(struct s_trace *start_ptr,
 				   int remaining_connections_to_sink)
 {

 /* Adds all the rr_nodes in the traceback segment starting at tptr (and     *
  * continuing to the end of the traceback) to the heap with a cost of zero. *
  * This allows expansion to begin from the existing wiring.  The            *
  * remaining_connections_to_sink value is 0 if the route segment ending     *
  * at this location is the last one to connect to the SINK ending the route *
  * segment.  This is the usual case.  If it is not the last connection this *
  * net must make to this SINK, I have a hack to ensure the next connection  *
  * to this SINK goes through a different IPIN.  Without this hack, the      *
  * router would always put all the connections from this net to this SINK   *
  * through the same IPIN.  With LUTs or cluster-based logic blocks, you     *
  * should never have a net connecting to two logically-equivalent pins on   *
  * the same logic block, so the hack will never execute.  If your logic     *
  * block is an and-gate, however, nets might connect to two and-inputs on   *
  * the same logic block, and since the and-inputs are logically-equivalent, *
  * this means two connections to the same SINK.                             */

     struct s_trace *tptr;

     tptr = start_ptr;

 	/* For intra-cluster routing, logical equivalence does not occur, so it is impossible to get a value bigger than 0 */
 	assert(remaining_connections_to_sink == 0);

     while(tptr != NULL)
 	{
 	    node_to_heap(tptr->index, 0., NO_PREVIOUS, NO_PREVIOUS,
 			 OPEN, OPEN);
 	    tptr = tptr->next;
 	}
 }


 static void
 breadth_first_expand_neighbours_cluster(int inode,
 				float pcost,
 				int inet,
 				boolean first_time)
 {

 /* Puts all the rr_nodes adjacent to inode on the heap.  rr_nodes outside   *
  * the expanded bounding box specified in route_bb are not added to the     *
  * heap.  pcost is the path_cost to get to inode.                           */

     int iconn, to_node, num_edges;
     float tot_cost;

     num_edges = rr_node[inode].num_edges;
     for(iconn = 0; iconn < num_edges; iconn++)
 	{
 	    to_node = rr_node[inode].edges[iconn];
 		/*if(first_time) { */
 			tot_cost = pcost + get_rr_cong_cost(to_node) * rr_node_intrinsic_cost(to_node);
 			/*
 		} else {
 			tot_cost = pcost + get_rr_cong_cost(to_node);
 		}*/
 	    node_to_heap(to_node, tot_cost, inode, iconn, OPEN, OPEN);
 	}
 }


 static void
 breadth_first_add_source_to_heap_cluster(int inet)
 {

 /* Adds the SOURCE of this net to the heap.  Used to start a net's routing. */

     int inode;
     float cost;

     inode = net_rr_terminals[inet][0];	/* SOURCE */
     cost = get_rr_cong_cost(inode);

     node_to_heap(inode, cost, NO_PREVIOUS, NO_PREVIOUS, OPEN, OPEN);
 }


 static void
 mark_ends_cluster(int inet)
 {

 /* Mark all the SINKs of this net as targets by setting their target flags  *
  * to the number of times the net must connect to each SINK.  Note that     *
  * this number can occassionally be greater than 1 -- think of connecting   *
  * the same net to two inputs of an and-gate (and-gate inputs are logically *
  * equivalent, so both will connect to the same SINK).                      */

     int ipin, inode;

     for(ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++)
 	{
 	    inode = net_rr_terminals[inet][ipin];
 		if(inode == OPEN)
 			continue;
 	    rr_node_route_inf[inode].target_flag++;
 		assert(rr_node_route_inf[inode].target_flag == 1);
 	}
 }


 static void
 alloc_net_rr_terminals_cluster()
 {
     int inet;

     net_rr_terminals = (int **)my_malloc(num_logical_nets * sizeof(int *));
 	saved_net_rr_terminals = (int **)my_malloc(num_logical_nets * sizeof(int *));

     for(inet = 0; inet < num_logical_nets; inet++)
 	{
 	    net_rr_terminals[inet] =
 		(int *)my_chunk_malloc((vpack_net[inet].num_sinks + 1) *
 				       sizeof(int), &rr_mem_chunk_list_head,
 				       &chunk_bytes_avail,
 				       &chunk_next_avail_mem);

 		saved_net_rr_terminals[inet] = (int *)my_malloc((vpack_net[inet].num_sinks + 1) * sizeof(int));
 	}
 }


 enum e_block_pack_status
 try_add_block_to_current_cluster_and_primitive(INP int iblock, INP t_pb *primitive)
 {

     /* Allocates and loads the net_rr_terminals data structure.  For each net   *
      * it stores the rr_node index of the SOURCE of the net and all the SINKs   *
      * of the net.  [0..num_logical_nets-1][0..num_pins-1].   */
     int ipin, iblk_net;
 	int orig_num_nets_in_cluster;
 	boolean success, found;
 	t_model_ports *port;

 	success = FALSE;
 	found = FALSE;

 	assert(primitive->pb_graph_node->pb_type->num_modes == 0); /* check if primitive */
 	assert(primitive->logical_block == OPEN && logical_block[iblock].clb_index == NO_CLUSTER); /* check if primitive and block is open */

 	/* check if block type matches primitive type */
 	if(logical_block[iblock].model != primitive->pb_graph_node->pb_type->model) {
 		/* End early, model is incompatible */
 		return BLK_FAILED_FEASIBLE;
 	}

 	orig_num_nets_in_cluster = num_nets_in_cluster;
 	save_and_reset_routing_cluster();

 	/* try pack it in */
 	assert(primitive->logical_block == OPEN);
 	assert(logical_block[iblock].clb_index == NO_CLUSTER);

 	primitive->logical_block = iblock;
 	logical_block[iblock].pb = primitive;
 	logical_block[iblock].clb_index = curr_cluster_index;

 	/* for each net of logical block, check if it is in cluster, if not add it */
 	/*   also check if pins on primitive can fit logical block */

 	port = logical_block[iblock].model->inputs;
 	success = TRUE;

 	while(port && success) {
 		for(ipin = 0; ipin < port->size && success; ipin++) {
 			if(port->is_clock) {
 				assert(port->size == 1);
 				iblk_net = logical_block[iblock].clock_net;
 			} else {
 				iblk_net = logical_block[iblock].input_nets[port->index][ipin];
 			}
 			if(iblk_net == OPEN) {
 				continue;
 			}
 			if(!is_net_in_cluster(iblk_net)){
 				nets_in_cluster[num_nets_in_cluster] = iblk_net;
 				num_nets_in_cluster++;
 			}
 			success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin);
 		}
 		port = port->next;
 	}

 	port = logical_block[iblock].model->outputs;
 	while(port && success) {
 		for(ipin = 0; ipin < port->size && success; ipin++) {
 			iblk_net = logical_block[iblock].output_nets[port->index][ipin];
 			if(iblk_net == OPEN) {
 				continue;
 			}
 			if(!is_net_in_cluster(iblk_net)){
 				nets_in_cluster[num_nets_in_cluster] = iblk_net;
 				num_nets_in_cluster++;
 			}
 			success = add_net_rr_terminal_cluster(iblk_net, primitive, iblock, port, ipin);
 		}
 		port = port->next;
 	}

 	if(success) {
 		success = reload_ext_net_rr_terminal_cluster();
 	}

 	if(success) {
 		/* route it */
 		reset_rr_node_route_structs(); /* Clear all prior rr_graph history */
 		success = try_breadth_first_route_cluster(); /* route from scratch */
 	}

 	if(success) {
 		return BLK_PASSED;
 	} else {
 		/* Cannot pack, restore cluster */
 		primitive->logical_block = OPEN;
 		logical_block[iblock].pb = NULL;
 		logical_block[iblock].clb_index = NO_CLUSTER;
 		restore_routing_cluster();
 		num_nets_in_cluster = orig_num_nets_in_cluster;
 		return BLK_FAILED_ROUTE;
 	}
 }


 static void
 save_and_reset_routing_cluster() {

 /* This routing frees any routing currently held in best routing,       *
  * then copies over the current routing (held in trace_head), and       *
  * finally sets trace_head and trace_tail to all NULLs so that the      *
  * connection to the saved routing is broken.  This is necessary so     *
  * that the next iteration of the router does not free the saved        *
  * routing elements.  Also, the routing path costs and net_rr_terminals is stripped from the
  * existing rr_graph so that the saved routing does not affect the graph */

     int inet, i, j;
     struct s_trace *tempptr;

     for(i = 0; i < num_nets_in_cluster; i++)
 	{
 		inet = nets_in_cluster[i];
 		for(j = 0; j <= vpack_net[inet].num_sinks; j++) {
 			saved_net_rr_terminals[inet][j] = net_rr_terminals[inet][j];
 		}

 /* Free any previously saved routing.  It is no longer best. */
 /* Also Save a pointer to the current routing in best_routing. */

 		pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);
 		tempptr = trace_head[inet];
 	    trace_head[inet] = best_routing[inet];
 	    free_traceback(inet);
 		best_routing[inet] = tempptr;

 /* Set the current (working) routing to NULL so the current trace       *
  * elements won't be reused by the memory allocator.                    */

 	    trace_head[inet] = NULL;
 	    trace_tail[inet] = NULL;
 	}
 }


 static void
 restore_routing_cluster()
 {

 /* Deallocates any current routing in trace_head, and replaces it with    *
  * the routing in best_routing.  Best_routing is set to NULL to show that *
  * it no longer points to a valid routing.  NOTE:  trace_tail is not      *
  * restored -- it is set to all NULLs since it is only used in            *
  * update_traceback.  If you need trace_tail restored, modify this        *
  * routine.  Also restores the locally used opin data.                    */

     int inet, i, j;

     for(i = 0; i < num_nets_in_cluster; i++)
 	{
 		inet = nets_in_cluster[i];

 		pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);

 	    /* Free any current routing. */
 	    free_traceback(inet);

 	    /* Set the current routing to the saved one. */
 	    trace_head[inet] = best_routing[inet];
 	    best_routing[inet] = NULL;	/* No stored routing. */

 		/* restore net terminals */
 		for(j = 0; j <= vpack_net[inet].num_sinks; j++) {
 			net_rr_terminals[inet][j] = saved_net_rr_terminals[inet][j];
 		}

 		/* restore old routing */
 		pathfinder_update_one_cost(trace_head[inet], 1, pres_fac);
 	}
 }


 void save_cluster_solution()
 {

 /* This routine updates the occupancy and pres_cost of the rr_nodes that are *
  * affected by the portion of the routing of one net that starts at          *
  * route_segment_start.  If route_segment_start is trace_head[inet], the     *
  * cost of all the nodes in the routing of net inet are updated.  If         *
  * add_or_sub is -1 the net (or net portion) is ripped up, if it is 1 the    *
  * net is added to the routing.  The size of pres_fac determines how severly *
  * oversubscribed rr_nodes are penalized.                                    */

 	int i, j, net_index;
     struct s_trace *tptr, *prev;
     int inode;
 	for(i = 0; i < max_ext_index; i++) {
 		rr_node[i].net_num = OPEN;
 		rr_node[i].prev_edge = OPEN;
 		rr_node[i].prev_node = OPEN;
 	}
 	for(i = 0; i < num_nets_in_cluster; i++) {
 		prev = NULL;
 		net_index = nets_in_cluster[i];
 		tptr = trace_head[net_index];
 		if(tptr == NULL)		/* No routing yet. */
 		return;

 		for(;;)
 		{
 			inode = tptr->index;
 			rr_node[inode].net_num = net_index;
 			if(prev != NULL) {
 				rr_node[inode].prev_node = prev->index;
 				for(j = 0; j < rr_node[prev->index].num_edges; j++) {
 					if(rr_node[prev->index].edges[j] == inode) {
 						rr_node[inode].prev_edge = j;
 						break;
 					}
 				}
 				assert(j != rr_node[prev->index].num_edges);
 			} else {
 				rr_node[inode].prev_node = OPEN;
 				rr_node[inode].prev_edge = OPEN;
 			}

 			if(rr_node[inode].type == SINK)
 			{
 				tptr = tptr->next;	/* Skip next segment. */
 				if(tptr == NULL)
 				break;
 			}

 			prev = tptr;
 			tptr = tptr->next;

 		}			/* End while loop -- did an entire traceback. */
 	}
 }

 boolean is_pin_open(int i) {
 	return (rr_node[i].occ == 0);
 }


 void
 print_intra_cluster_route(char *route_file)
 {

 /* Prints out the routing to file route_file.  */

     int inet, inode;
     t_rr_type rr_type;
     struct s_trace *tptr;
     char *name_type[] =
 	{ "SOURCE", "SINK", "IPIN", "OPIN", "CHANX", "CHANY", "INTRA_CLUSTER_EDGE" };
     FILE *fp;

     fp = my_fopen(route_file, "w", 0);

     fprintf(fp, "\nRouting:");
     for(inet = 0; inet < num_nets; inet++)
 	{

 		if(vpack_net[inet].num_sinks == FALSE)
 		{
 			fprintf(fp, "\n\nNet %d (%s)\n\n", inet, vpack_net[inet].name);
 			fprintf(fp, "\n\n Used in local cluster only, reserved one CLB pin\n\n");
 		} else if(trace_head[inet]){
 			fprintf(fp, "\n\nNet %d (%s)\n\n", inet, vpack_net[inet].name);
 			tptr = trace_head[inet];

 			while(tptr != NULL)
 			{
 				inode = tptr->index;
 				rr_type = rr_node[inode].type;

 				fprintf(fp, "%6s %d ", name_type[rr_type], inode);


 /* Uncomment line below if you're debugging and want to see the switch types *
  * used in the routing.                                                      */
 /*          fprintf (fp, "Switch: %d", tptr->iswitch);    */

 				fprintf(fp, "\n");

 				tptr = tptr->next;
 			}
 		}
 	}

     fclose(fp);
 }

 static float rr_node_intrinsic_cost(int inode) {
 	/* This is a tie breaker to avoid using nodes with more edges whenever possible */
 	float value;
 	value = rr_node[inode].pack_intrinsic_cost;
 	return value;
 }

 /* turns on mode for a pb by setting capacity of its rr_nodes to 1 */
 void set_pb_mode(t_pb *pb, int mode, int isOn) {
 	int i, j, index;
 	int i_pb_type, i_pb_inst;
 	const t_pb_type *pb_type;
 	t_pb_graph_node *pb_graph_node;

 	pb_type = pb->pb_graph_node->pb_type;
 	for(i_pb_type = 0; i_pb_type < pb_type->modes[mode].num_pb_type_children; i_pb_type++) {
 		for(i_pb_inst = 0; i_pb_inst < pb_type->modes[mode].pb_type_children[i_pb_type].num_pb; i_pb_inst++) {
 			pb_graph_node = &pb->pb_graph_node->child_pb_graph_nodes[mode][i_pb_type][i_pb_inst];
 			for(i = 0; i < pb_graph_node->num_input_ports; i++) {
 				for(j = 0; j < pb_graph_node->num_input_pins[i]; j++) {
 					index = pb_graph_node->input_pins[i][j].pin_count_in_cluster;
 					rr_node[index].capacity = isOn;
 				}
 			}

 			for(i = 0; i < pb_graph_node->num_output_ports; i++) {
 				for(j = 0; j < pb_graph_node->num_output_pins[i]; j++) {
 					index = pb_graph_node->output_pins[i][j].pin_count_in_cluster;
 					rr_node[index].capacity = isOn;
 				}
 			}

 			for(i = 0; i < pb_graph_node->num_clock_ports; i++) {
 				for(j = 0; j < pb_graph_node->num_clock_pins[i]; j++) {
 					index = pb_graph_node->clock_pins[i][j].pin_count_in_cluster;
 					rr_node[index].capacity = isOn;
 				}
 			}
 		}
 	}
 }