vpr/SRC/pack/cluster.c - third_party/vtr-verilog-to-routing - Git at Google

 /*
  * Main clustering algorithm
  * Author(s): Vaughn Betz (first revision - VPack), Alexander Marquardt (second revision - T-VPack), Jason Luu (third revision - AAPack)
  * June 8, 2011
  */

 #include <stdio.h>
 #include <assert.h>
 #include <string.h>

 #include "util.h"
 #include "vpr_types.h"
 #include "globals.h"
 #include "cluster.h"
 #include "heapsort.h"
 #include "output_clustering.h"
 #include "output_blif.h"
 #include "SetupGrid.h"
 #include "read_xml_arch_file.h"
 #include "cluster_legality.h"
 #include "path_delay2.h"
 #include "path_delay.h"

 #define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE 30      /* This value is used to determine the max size of the priority queue for candidates that pass the early filter legality test but not the more detailed routing test */

 #define SCALE_NUM_PATHS 1e-2     /*this value is used as a multiplier to assign a    *
 				  *slightly higher criticality value to nets that    *
 				  *affect a large number of critical paths versus    *
 				  *nets that do not have such a broad effect.        *
 				  *Note that this multiplier is intentionally very   *
 				  *small compared to the total criticality because   *
 				  *we want to make sure that vpack_net criticality is      *
 				  *primarily determined by slacks, with this acting  *
 				  *only as a tie-breaker between otherwise equal nets*/
 #define SCALE_DISTANCE_VAL 1e-4  /*this value is used as a multiplier to assign a    *
 				  *slightly higher criticality value to nets that    *
 				  *are otherwise equal but one is  farther           *
 				  *from sources (the farther one being made slightly *
 				  *more critical)                                    */


 enum e_gain_update {GAIN, NO_GAIN};
 enum e_feasibility {FEASIBLE, INFEASIBLE};
 enum e_gain_type {HILL_CLIMBING, NOT_HILL_CLIMBING};
 enum e_removal_policy {REMOVE_CLUSTERED, LEAVE_CLUSTERED};
 enum e_net_relation_to_clustered_block {INPUT, OUTPUT};

 /* Linked list structure.  Stores one integer (iblk). */
 struct s_ilink {int iblk; struct s_ilink *next;};


 /* 1/MARKED_FRAC is the fraction of nets or blocks that must be *
  * marked in order for the brute force (go through the whole    *
  * data structure linearly) gain update etc. code to be used.   *
  * This is done for speed only; make MARKED_FRAC whatever       *
  * number speeds the code up most.                              */
 #define MARKED_FRAC 2

 /* Keeps a linked list of the unclustered blocks to speed up looking for *
  * unclustered blocks with a certain number of *external* inputs.        *
  * [0..lut_size].  Unclustered_list_head[i] points to the head of the    *
  * list of blocks with i inputs to be hooked up via external interconnect. */
 static struct s_ilink *unclustered_list_head;
 int unclustered_list_head_size;
 static struct s_ilink *memory_pool; /*Declared here so I can free easily.*/

 /* Does the logical_block that drives the output of this vpack_net also appear as a   *
  * receiver (input) pin of the vpack_net?  [0..num_logical_nets-1].  If so, then by how much? This is used     *
  * in the gain routines to avoid double counting the connections from   *
  * the current cluster to other blocks (hence yielding better           *
  * clusterings).  The only time a logical_block should connect to the same vpack_net  *
  * twice is when one connection is an output and the other is an input, *
  * so this should take care of all multiple connections.                */
 static int *net_output_feeds_driving_block_input;

 static int hack_frac_lut_no_legality;
 static int hack_force_safe_latch;

 static int indexofcrit; /* index of next most timing critical block */

 /* Timing information for blocks */
 static float *criticality = NULL;
 static int *critindexarray = NULL;

 static float **net_pin_backward_criticality = NULL;
 static float **net_pin_forward_criticality = NULL;


 /*****************************************/
 /*local functions*/
 /*****************************************/

 static int num_ext_inputs (int iblk);
 static void check_clocks (boolean *is_clock);

 static int get_max_cluster_size(t_pb_type *pb_type);
 static int get_max_primitive_inputs(t_pb_type *pb_type);
 static int get_max_pb_depth(t_pb_type *pb_type);

 #if 0
 static void check_for_duplicate_inputs ();
 #endif
 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb);

 static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb);

 static void alloc_and_init_clustering (boolean global_clocks, float alpha, float beta, int max_cluster_size, int max_primitive_inputs, int max_pb_depth, int max_models);
 static void free_pb_stats_recursive (t_pb *pb, int max_models);
 static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb);
 static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode);
 static boolean primitive_feasible(int iblk, t_pb *cur_pb);
 static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk);


 static int get_logical_block_by_num_ext_inputs (INP enum e_packer_algorithm packer_algorithm,
 										INOUTP t_pb *cur_pb,
 										INP int ext_inps,
 										INP enum e_removal_policy remove_flag);

 static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm,
 																	INOUTP t_pb *cur_pb);

 static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs);
 static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs);

 static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock, INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size, INP int max_primitive_inputs);

 static void update_connection_gain_values (int inet, int clustered_block, t_pb * cur_pb,
 						enum e_net_relation_to_clustered_block net_relation_to_clustered_block);

 static void update_length_gain_values (int inet, int clustered_block, t_pb* cur_pb,
 						enum e_net_relation_to_clustered_block net_relation_to_clustered_block);

 static void mark_and_update_partial_gain (int inet,
 										  enum e_gain_update gain_flag,
 										  int clustered_block,
 										  int port_on_clustered_block,
 										  int pin_on_clustered_block,
 										  boolean timing_driven,
 										  boolean connection_driven,
 										  enum e_net_relation_to_clustered_block net_relation_to_clustered_block);

 static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean
 			      connection_driven, boolean global_clocks, t_pb *pb);

 static void update_cluster_stats (	INP int new_blk,
 									INP int clb_index,
 									INP boolean *is_clock,
 									INP boolean global_clocks,
 									INP float alpha,
 									INP float beta,
 									INP boolean timing_driven,
 									INP boolean connection_driven);

 static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm,
 							  INP const t_arch * arch,
 							  INOUTP t_block *new_cluster,
 							  INP int clb_index,
 							  INP int istart,
 								INP float aspect,
 								INOUTP int *num_used_instances_type,
 								INOUTP int *num_instances_type,
 								INP int num_models,
 								INP int max_cluster_size,
 								INP int max_primitive_inputs);

 static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb);

 static int get_highest_gain_logical_block (INP enum e_packer_algorithm packer_algorithm,
 										   INOUTP t_pb *cur_pb,
 									INP boolean *is_clock,
 									INP boolean (*is_feasible)
 									(enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb),
 									INP enum e_gain_type gain_mode);

 static int get_logical_block_for_cluster (INP enum e_packer_algorithm packer_algorithm,
 										  INP t_pb *cur_pb,
 										 INP boolean *is_clock,
 										 INP boolean allow_unrelated_clustering);

 static int get_free_logical_block_with_fewest_ext_inputs (INP enum e_packer_algorithm packer_algorithm,
 														  INOUTP t_pb *cur_pb);

 static int get_most_feasible_logical_block (INOUTP t_pb *cur_pb);

 static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb);

 static void check_clustering (int num_clb, t_block *clb, boolean *is_clock);

 static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked);

 static int get_most_critical_unclustered_block(void);

 static void free_cb (t_pb *pb, int max_models);
 static void free_pb_stats(t_pb_stats pb_stats, int max_models);
 static void free_pb (t_pb *pb, int max_models);

 /*****************************************/
 /*globally accessable function*/
 void do_clustering (const t_arch *arch, int num_models, boolean global_clocks,
        boolean *is_clock, boolean hill_climbing_flag,
        char *out_fname, boolean timing_driven,
        enum e_cluster_seed cluster_seed_type, float alpha, float beta,
        int recompute_timing_after, float block_delay,
        float intra_cluster_net_delay, float inter_cluster_net_delay,
 	   float aspect,
        boolean allow_unrelated_clustering,
        boolean allow_early_exit, boolean connection_driven,
 	   enum e_packer_algorithm packer_algorithm,
 	   boolean hack_no_legal_frac_lut, boolean hack_safe_latch){

 /* Does the actual work of clustering multiple VPACK_LUT+FF logic blocks *
  * into clusters.                                                  */

   /*note: I allow timing_driven and connection_driven to be simultaneously true*/
   /*in this case, connection_driven is responsible for all clustering decisions*/
   /*but timing analysis is still enabled (useful for viewing the constant delay*/
   /*results) */

   /* Algorithm employed
   1.  Find type that can legally hold block and create cluster with pb info
   2.  Populate started cluster
   3.  Repeat 1 until no more blocks need to be clustered

   */

  int istart, i, j, k, m, iblk;
  int blocks_since_last_analysis;
  int next_blk, prev_blk;

  int num_blocks_hill_added;
  int num_logical_blocks_clustered;
  int max_cluster_size, cur_cluster_size, max_primitive_inputs, cur_primitive_inputs, max_pb_depth, cur_pb_depth;

  int *hill_climbing_inputs_avail;

  t_block *clb;
  t_pb *cur_pb;
  int num_clb;

  boolean critical_path_minimized;
  boolean early_exit;

  enum e_block_pack_status block_pack_status;

  int *num_used_instances_type, *num_instances_type; /* [0..num_types] Holds array for total number of each cluster_type available */
  float **net_slack = NULL;
  float num_paths_scaling, distance_scaling;

  float crit;

  hack_frac_lut_no_legality = hack_no_legal_frac_lut;
  hack_force_safe_latch = hack_safe_latch;

  /* TODO: This is memory inefficient, fix if causes problems */
  clb = my_calloc(num_logical_blocks,sizeof(t_block));
  num_clb = 0;
  istart = NO_CLUSTER;

  /* determine bound on cluster size and primitive input size */
  max_cluster_size = 0;
  max_primitive_inputs = 0;
  max_pb_depth = 0;

  indexofcrit = 0;

  for(i = 0; i < num_types; i++) {
 	 if(EMPTY_TYPE == &type_descriptors[i])
 		 continue;
 	 cur_cluster_size = get_max_cluster_size(type_descriptors[i].pb_type);
 	 cur_primitive_inputs = get_max_primitive_inputs(type_descriptors[i].pb_type);
 	 cur_pb_depth = get_max_pb_depth(type_descriptors[i].pb_type);
 	 if(cur_cluster_size > max_cluster_size) {
 		 max_cluster_size = cur_cluster_size;
 	 }
 	 if(cur_primitive_inputs > max_primitive_inputs) {
 		 max_primitive_inputs = cur_primitive_inputs;
 	 }
 	 if(cur_pb_depth > max_pb_depth) {
 		 max_pb_depth = cur_pb_depth;
 	 }
  }

  if(hill_climbing_flag) {
 	 hill_climbing_inputs_avail = (int *) my_calloc (max_cluster_size + 1, sizeof(int));
  } else {
 	 hill_climbing_inputs_avail = NULL; /* if used, die hard */
  }

  /* TODO: make better estimate for nx and ny */
  nx = ny = 1;

  check_clocks (is_clock);
 #if 0
  check_for_duplicate_inputs ();
 #endif
  alloc_and_init_clustering (global_clocks, alpha, beta, max_cluster_size, max_primitive_inputs, max_pb_depth, num_models);

  blocks_since_last_analysis = 0;
  critical_path_minimized = FALSE;
  early_exit = FALSE;
  num_logical_blocks_clustered = 0;
  num_blocks_hill_added = 0;
  num_used_instances_type = (int*)my_calloc (num_types, sizeof (int));
  num_instances_type = (int*)my_calloc (num_types, sizeof (int));

  assert(max_cluster_size < MAX_SHORT); /* Limit maximum number of elements for each cluster */

  if (timing_driven){
 	 net_slack = alloc_and_load_pre_packing_timing_graph(block_delay, inter_cluster_net_delay, arch->models);
 	load_net_slack(net_slack, 0);

 	criticality=(float*)my_calloc(num_logical_blocks,sizeof(float));

 	critindexarray=(int*)my_malloc(num_logical_blocks*sizeof(int));

 	for(i = 0; i < num_logical_blocks; i++) {
 		critindexarray[i] = i;
 	}

 	/* Calculate criticality based on slacks and tie breakers (# paths, distance from source) */
 	for(i = 0; i < num_tnodes; i++) {
 		iblk = tnode[i].block;
 		num_paths_scaling = SCALE_NUM_PATHS * (float)tnode[i].normalized_total_critical_paths;
 		distance_scaling = SCALE_DISTANCE_VAL * (float)tnode[i].normalized_T_arr;
 		crit = (1 - tnode[i].normalized_slack) + num_paths_scaling + distance_scaling;
 		if(criticality[iblk] < crit) {
 			criticality[iblk] = crit;
 		}
 	}

 	net_pin_backward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
 	net_pin_forward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
 	for(i = 0; i < num_logical_nets; i++) {
 		net_pin_backward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
 		net_pin_forward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
 		for(j = 0; j <= vpack_net[i].num_sinks; j++) {
 			net_pin_backward_criticality[i][j] = criticality[vpack_net[i].node_block[j]];
 			net_pin_forward_criticality[i][j] = criticality[vpack_net[i].node_block[0]];
 		}
 	}

 	heapsort(critindexarray, criticality, num_logical_blocks, 1);

 	/*print_critical_path("clustering_critical_path.echo");*/


    if (cluster_seed_type == VPACK_TIMING){
      istart=get_most_critical_unclustered_block();
    }
    else {/*max input seed*/
 	printf("get_seed_logical_block_with_most_ext_inputs\n");
 	   istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
    }
  }
  else /*cluster seed is max input (since there is no timing information)*/{
 	 istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
  }

  while (istart != NO_CLUSTER) {

 	reset_legalizer_for_cluster(&clb[num_clb]);

 	/* start a new cluster and reset all stats */
 	start_new_cluster(packer_algorithm, arch, &clb[num_clb], num_clb, istart, aspect, num_used_instances_type, num_instances_type, num_models, max_cluster_size, max_primitive_inputs);
 	if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD)
 	{
 		printf("Complex Block %d: %s type %s\n", num_clb, clb[num_clb].name, clb[num_clb].type->name);
 		fflush(stdout);
 	}
 	update_cluster_stats (istart, num_clb, is_clock, global_clocks, alpha, beta, timing_driven, connection_driven);
 	num_clb++;


     num_logical_blocks_clustered ++;

     if (timing_driven && !early_exit) {
       blocks_since_last_analysis++;
       /*it doesn't make sense to do a timing analysis here since there*
        *is only one logical_block clustered it would not change anything      */
     }

 	/* First try to pack primitive into cluster, select next block based on closest open sibling if available */
 	cur_pb = logical_block[istart].pb->parent_pb;
 	prev_blk = NO_CLUSTER;

 	while (cur_pb) {

 		if(packer_algorithm == PACK_BRUTE_FORCE) { /* if using brute force approach, look at all possible free blocks in cluster */
 			cur_pb = clb[num_clb - 1].pb;
 		}

 		next_blk = get_logical_block_for_cluster(packer_algorithm,
 										cur_pb,
 										 is_clock,
 										 allow_unrelated_clustering);

 		block_pack_status = alloc_and_load_pb(packer_algorithm, cur_pb->pb_graph_node, next_blk, cur_pb, num_models, max_cluster_size, max_primitive_inputs);
 		if(block_pack_status != BLK_PASSED) {
 			if(next_blk != NO_CLUSTER && prev_blk != next_blk) {
 				if(block_pack_status == BLK_FAILED_ROUTE) {
 					#ifdef DEBUG_FAILED_PACKING_CANDIDATES
 						printf("NO_ROUTE:%s#%d|%s ", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name);
 					#else
 						printf(".");
 					#endif
 				} else {
 					#ifdef DEBUG_FAILED_PACKING_CANDIDATES
 						printf("FAILED_CHECK:%s#%d|%s check %d", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name, block_pack_status);
 					#else
 						printf(".");
 					#endif
 				}
 				prev_blk = next_blk;
 			} else {
 				prev_blk = NO_CLUSTER;
 				cur_pb = cur_pb->parent_pb;
 			}
 			continue;
 		} else {
 			/* Continue packing by filling smallest cluster */
 			if(hack_frac_lut_no_legality) {
 				logical_block[next_blk].clb_index = num_clb - 1;
 			}
 			cur_pb = logical_block[next_blk].pb->parent_pb;
 			prev_blk = NO_CLUSTER;
 			#ifdef DEBUG_FAILED_PACKING_CANDIDATES
 				printf("PASSED:%s#%d ", logical_block[next_blk].name, next_blk);
 			#else
 				printf(".");
 			#endif
 		}
 		update_cluster_stats (next_blk, num_clb - 1, is_clock, global_clocks,
 						alpha, beta, timing_driven, connection_driven);

 		num_logical_blocks_clustered ++;

 		if (timing_driven && !early_exit) {
 		blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */

       }
     }
 	if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD)
 	{
 		printf("\n");
 	}

 	save_cluster_solution();
 	if(hack_frac_lut_no_legality) {
 		k = 0;
 		for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_input_ports; i++) {
 			for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_input_pins[i]; j++) {
 				while(k < num_logical_nets) {
 					if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
 					{
 						rr_node[clb[num_clb-1].pb->pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num = k;
 						k++;
 						break;
 					}
 					k++;
 				}
 			}
 		}
 		/* check if nets are overused */
 		while(k < num_logical_nets) {
 			if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
 			{
 				printf(ERRTAG "Too many input nets used in cluster %s\n", clb[num_clb-1].name);
 				assert(0);
 			}
 			k++;
 		}
 		k = 0;
 		for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_output_ports; i++) {
 			for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_output_pins[i]; j++) {
 				while(k < num_logical_nets) {
 					for(m = 0; m <= vpack_net[k].num_sinks; m++) {
 						if(logical_block[vpack_net[k].node_block[m]].clb_index != num_clb - 1) {
 							break;
 						}
 					}
 					if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
 					{
 						rr_node[clb[num_clb-1].pb->pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num = k;
 						k++;
 						break;
 					}
 					k++;
 				}
 			}
 		}
 		while(k < num_logical_nets) {
 			if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
 			{
 				printf(ERRTAG "Too many output nets used in cluster %s\n", clb[num_clb-1].name);
 				assert(0);
 			}
 			k++;
 		}
 		k = 0;
 		for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_clock_ports; i++) {
 			for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_clock_pins[i]; j++) {
 				while(k < num_logical_nets) {
 					if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
 					{
 						rr_node[clb[num_clb-1].pb->pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num = k;
 						k++;
 						break;
 					}
 					k++;
 				}
 			}
 		}
 		while(k < num_logical_nets) {
 			if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
 			{
 				printf(ERRTAG "Too many clock nets used in cluster %s\n", clb[num_clb-1].name);
 				assert(0);
 			}
 			k++;
 		}
 	}

     if (timing_driven){
       if (num_blocks_hill_added > 0  && !early_exit) {
 		blocks_since_last_analysis += num_blocks_hill_added;
       }

       if (cluster_seed_type == VPACK_TIMING){
 	istart=get_most_critical_unclustered_block();
       }
       else { /*max input seed*/
 		istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
       }
     }
     else /*cluster seed is max input (since there is no timing information)*/
       istart=get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);

     free_pb_stats_recursive (clb[num_clb-1].pb, num_models);
  }

  if (timing_driven){
    free_timing_graph(net_slack);
  }

  free_cluster_legality_checker();

  alloc_and_load_cluster_info (num_clb, clb);

  check_clustering (num_clb, clb, is_clock);


  output_clustering(	clb, num_clb, global_clocks, is_clock, out_fname, FALSE);
 #ifdef DUMP_BLIF_ECHO
  if(!hack_frac_lut_no_legality) /* must have routing graph before dumping blif */
 	output_blif(	clb, num_clb, global_clocks, is_clock, "post_packing_blif.echo", FALSE);
 #endif

  if(hill_climbing_flag)
  {
 	 free (hill_climbing_inputs_avail);
  }

  for(i = 0; i < num_clb; i++) {
 	 free_cb(clb[i].pb, num_models);
 	free(clb[i].name);
 	free(clb[i].nets);
 	free(clb[i].pb);
  }
  free(clb);

  free (num_used_instances_type);
  free (num_instances_type);
  free (unclustered_list_head);
  free (memory_pool);
  free (net_output_feeds_driving_block_input);

 }

 static int num_ext_inputs (int iblk) {

 /* Returns the number of input pins on this logical_block that must be hooked *
  * up through external interconnect.  That is, the number of input    *
  * pins used - the number which connect (internally) to the outputs.   */

  int ext_inps, output_net, ipin, opin;

  t_model_ports *port, *out_port;


  /* TODO: process to get ext_inps is slow, should cache in lookup table */
  ext_inps = 0;
  port = logical_block[iblk].model->inputs;
  while(port) {
 	 if(port->is_clock == FALSE) {
 		 for(ipin = 0; ipin < port->size; ipin++) {
 			 if(logical_block[iblk].input_nets[port->index][ipin] != OPEN) {
 				 ext_inps++;
 			 }
 			 out_port = logical_block[iblk].model->outputs;
 			 while(out_port) {
 				 for(opin = 0; opin < out_port->size; opin++) {
 					 output_net = logical_block[iblk].output_nets[out_port->index][opin];
 					 if(output_net == OPEN)
 						continue;
 					/* TODO: I could speed things up a bit by computing the number of inputs *
 					* and number of external inputs for each logic logical_block at the start of   *
 					* clustering and storing them in arrays.  Look into if speed is a      *
 					* problem.                                                             */

 					 if (logical_block[iblk].input_nets[port->index][ipin] == output_net) {
 						ext_inps--;
 						break;
 					}
 				 }
 				 out_port = out_port->next;
 			 }
 		 }
 	 }
 	 port = port->next;
  }

  assert(ext_inps >= 0);

  return (ext_inps);
 }


 /*****************************************/
 static void check_clocks (boolean *is_clock) {

 /* Checks that nets used as clock inputs to latches are never also used *
  * as VPACK_LUT inputs.  It's electrically questionable, and more importantly *
  * would break the clustering code.                                     */

  int inet, iblk, ipin;
  t_model_ports *port;

  for (iblk=0;iblk<num_logical_blocks;iblk++) {
     if (logical_block[iblk].type != VPACK_OUTPAD) {
 		port = logical_block[iblk].model->inputs;
 		while(port) {
 		   for (ipin = 0; ipin < port->size; ipin++) {
 			   inet = logical_block[iblk].input_nets[port->index][ipin];
 			  if (inet != OPEN) {
 				 if (is_clock[inet]) {
 					printf("Error in check_clocks.  Net %d (%s) is a clock, but "
 						   "also\n"
 						   "\tconnects to a logic block input on logical_block %d (%s).\n", inet,
 						   vpack_net[inet].name, iblk, logical_block[iblk].name);
 					printf("This would break the current clustering "
 						   "implementation and is electrically\n"
 						   "\tquestionable, so clustering has been aborted.\n");
 					exit (1);
 				 }
 			  }
 		   }
 		   port = port->next;
 		}
     }
  }
 }

 static int get_max_cluster_size(t_pb_type *pb_type) {
 	int i, j;
 	int max_size, temp_size;
 	if (pb_type->modes == 0) {
 		max_size = pb_type->num_pb;
 	} else {
 		max_size = 0;
 		for(i = 0; i < pb_type->num_modes; i++) {
 			temp_size = 0;
 			for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
 				temp_size += pb_type->modes[i].pb_type_children[j].num_pb *
 					get_max_cluster_size(&pb_type->modes[i].pb_type_children[j]);
 			}
 			if(temp_size > max_size) {
 				max_size = temp_size;
 			}
 		}
 	}
 	return max_size;
 }


 static int get_max_primitive_inputs(t_pb_type *pb_type) {
 	int i, j;
 	int max_size, temp_size;
 	if (pb_type->blif_model != NULL) {
 		max_size = pb_type->num_input_pins;
 	} else {
 		max_size = 0;
 		for(i = 0; i < pb_type->num_modes; i++) {
 			temp_size = 0;
 			for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
 				temp_size = get_max_primitive_inputs(&pb_type->modes[i].pb_type_children[j]);
 				if(temp_size > max_size) {
 					max_size = temp_size;
 				}
 			}
 		}
 	}
 	return max_size;
 }

 static int get_max_pb_depth(t_pb_type *pb_type) {
 	int i, j;
 	int max_depth, temp_depth;
 	max_depth = pb_type->depth;
 	for(i = 0; i < pb_type->num_modes; i++) {
 		for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
 			temp_depth = get_max_pb_depth(&pb_type->modes[i].pb_type_children[j]);
 			if(temp_depth > max_depth) {
 				max_depth = temp_depth;
 			}
 		}
 	}
 	return max_depth;
 }

 /* Determine if logical block is in pb */
 static boolean is_logical_blk_in_pb(int iblk, t_pb *pb) {
 	t_pb * cur_pb;
 	cur_pb = logical_block[iblk].pb;
 	while(cur_pb) {
 		if(cur_pb == pb) {
 			return TRUE;
 		}
 		cur_pb = cur_pb->parent_pb;
 	}
 	return FALSE;
 }

 /* Add blk to list of feasible blocks sorted according to gain */
 static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb) {
 	int i, j;
 	i = 0;
 	if(i == pb->pb_stats.num_marked_models) {
 		/* Corresponding model not found, add it */
 		pb->pb_stats.num_marked_models++;
 		pb->pb_stats.num_feasible_blocks[i] = 1;
 		pb->pb_stats.feasible_blocks[i][0] = iblk;
 	} else {
 		/* found matching model, bubble sort */
 		if(pb->pb_stats.num_feasible_blocks[i] >= AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE - 1) {
 			/* maximum size for array, remove smallest gain element and sort */
 			if(gain[iblk] > gain[pb->pb_stats.feasible_blocks[i][0]]) {
 				/* single loop insertion sort */
 				for(j = 0; j < pb->pb_stats.num_feasible_blocks[i] - 1; j++) {
 					if(gain[iblk] <= gain[pb->pb_stats.feasible_blocks[i][j + 1]]) {
 						pb->pb_stats.feasible_blocks[i][j] = iblk;
 						break;
 					} else {
 						pb->pb_stats.feasible_blocks[i][j] = pb->pb_stats.feasible_blocks[i][j + 1];
 					}
 				}
 				if(j == pb->pb_stats.num_feasible_blocks[i] - 1) {
 					pb->pb_stats.feasible_blocks[i][j] = iblk;
 				}
 			}
 		} else {
 			/* Expand array and single loop insertion sort */
 			for(j = pb->pb_stats.num_feasible_blocks[i] - 1; j >= 0; j--) {
 				if(gain[pb->pb_stats.feasible_blocks[i][j]] > gain[iblk]) {
 					pb->pb_stats.feasible_blocks[i][j + 1] = pb->pb_stats.feasible_blocks[i][j];
 				} else {
 					pb->pb_stats.feasible_blocks[i][j + 1] = iblk;
 					break;
 				}
 			}
 			if(j < 0) {
 				pb->pb_stats.feasible_blocks[i][0] = iblk;
 			}
 			pb->pb_stats.num_feasible_blocks[i]++;
 		}
 	}
 }


 /*****************************************/
 static void alloc_and_init_clustering (boolean global_clocks,
 									   float alpha, float beta,
 									   int max_cluster_size,
 									   int max_primitive_inputs,
 									   int max_pb_depth,
 									   int max_models) {

 /* Allocates the main data structures used for clustering and properly *
  * initializes them.                                                   */

  int i, ext_inps, ipin, driving_blk, inet;
  struct s_ilink *next_ptr;

  alloc_and_load_cluster_legality_checker();

  for (i=0;i<num_logical_blocks;i++) {
 	logical_block[i].clb_index = NO_CLUSTER;
  }

  unclustered_list_head = (struct s_ilink *) my_calloc(max_primitive_inputs + 1, sizeof(struct s_ilink));
  unclustered_list_head_size = max_primitive_inputs + 1;

  for (i = 0; i <= max_primitive_inputs; i++) {
     unclustered_list_head[i].next = NULL;
  }


  memory_pool = (struct s_ilink *) my_malloc (num_logical_blocks * sizeof (struct s_ilink));
  next_ptr = memory_pool;

  for (i=0;i<num_logical_blocks;i++) {
    ext_inps = num_ext_inputs(i);
    next_ptr->next = unclustered_list_head[ext_inps].next;
    unclustered_list_head[ext_inps].next = next_ptr;
    next_ptr->iblk = i;
    next_ptr++;
  }


  net_output_feeds_driving_block_input = (int *) my_malloc (
                  num_logical_nets * sizeof (int));

 	for (inet=0;inet<num_logical_nets;inet++) {
 		net_output_feeds_driving_block_input[inet] = 0;
 		driving_blk = vpack_net[inet].node_block[0];
 		for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) {
 			if (vpack_net[inet].node_block[ipin] == driving_blk) {
 				net_output_feeds_driving_block_input[inet]++;
 			}
 		}
 	}
 }


 /*****************************************/
 static void free_pb_stats_recursive (t_pb *pb, int max_models) {

 	int i, j;
 	/* Releases all the memory used by clustering data structures.   */
 	if(pb) {
 		if(pb->pb_graph_node != NULL) {
 			if(pb->pb_graph_node->pb_type->num_modes != 0) {
 				for(i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) {
 					for(j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) {
 						if(pb->child_pbs && pb->child_pbs[i]) {
 							free_pb_stats_recursive (&pb->child_pbs[i][j], max_models);
 						}
 					}
 				}
 			}
 		}
 		free_pb_stats(pb->pb_stats, max_models);
 	}
 }


 /*****************************************/
 static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) {

 /* Checks if logical_block iblk could be adding to the open cluster without *
  * violating clocking constraints.  Returns TRUE if it's OK.  Some  *
  * parameters are unused since this function needs the same inter-  *
  * face as the other feasibility checkers.                          */

  int inet, depth, clocks_avail;
  t_model_ports *port;
  int ipin, output_net, outputs_avail;
  boolean clocks_feasible, outputs_feasible;
  t_pb * temp_pb;

  /* Assumptions: 1. Clock network unique, can only connect to clock network */
  /*              2. Logic block output can't internally connect to clocks. */
  clocks_feasible = outputs_feasible = TRUE;
  temp_pb = cur_pb;
  while(temp_pb && clocks_feasible && outputs_feasible) {
 	depth = temp_pb->pb_graph_node->pb_type->depth;

 	clocks_avail = cur_pb->pb_stats.clocks_avail;
 	if(clocks_avail == NOT_VALID) {
 		clocks_avail = temp_pb->pb_graph_node->pb_type->num_clock_pins;
 	}

 	 inet = logical_block[iblk].clock_net;
 	 if (inet != OPEN) {
 		 if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
 		   clocks_avail--;
 		}
 		else if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1 &&
 		   temp_pb->pb_stats.net_output_in_pb[inet]) {
 		   clocks_avail--;
 		}
 	 }


 	 outputs_avail = temp_pb->pb_stats.outputs_avail;

 	 port = logical_block[iblk].model->outputs;
 	 while(port) {
 		/* Outputs that connect to internal blocks free up an input pin. */
 		 for(ipin = 0; ipin < port->size; ipin++) {
 			 output_net = logical_block[iblk].output_nets[port->index][ipin];
 			 if (output_net != OPEN) {
 				 if(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] >= vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]) {
 					 if((temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net])) {
 						 printf("[INTERNAL ERROR] net %d %s %d != %d\n", output_net, vpack_net[output_net].name,
 							 temp_pb->pb_stats.num_pins_of_net_in_pb[output_net], vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]);
 					 }
 					 assert(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] == vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]);
 				 } else {
 					 outputs_avail--;
 				 }
 			 }
 		 }
 		 port = port->next;
 	 }

 	 port = logical_block[iblk].model->inputs;
 	 while(port) {
 		 if(port->is_clock == TRUE) {
 			 port = port->next;
 			 continue;
 		 }
 		 for (ipin = 0; ipin < port->size; ipin++) {
 			inet = logical_block[iblk].input_nets[port->index][ipin];
 			if(inet != OPEN) {
 				if(temp_pb->pb_stats.net_output_in_pb[inet] &&
 					temp_pb->pb_stats.num_pins_of_net_in_pb[inet] + net_output_feeds_driving_block_input[inet] == vpack_net[inet].num_sinks) {
 					outputs_avail++;
 				}
 			}
 		 }
 		port = port->next;
 	 }

 	 clocks_feasible = (clocks_avail >= 0);
 	 outputs_feasible = (outputs_avail >= 0);

 	 temp_pb = temp_pb->parent_pb;
  }

  if(models_feasible(packer_algorithm, iblk, cur_pb->pb_graph_node->pb_type, cur_pb, cur_pb->mode)) {
 	 if (clocks_feasible && outputs_feasible)
 		return (TRUE);
 	 else
 		return (FALSE);
  } else {
 	 return FALSE;
  }
 }


 static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode) {
 	struct s_linked_vptr *cur_model;
 	int i, j, k, cur_ptr;
 	struct s_ilink *ptr;
 	boolean feasible;
 	const t_pb_type *child_pb_type;

 	cur_model = cur_pb_type->models_contained;


 	assert(cur_pb == NULL || cur_pb->pb_graph_node->pb_type == cur_pb_type);
 	assert(cur_pb == NULL || cur_pb->mode == mode);

 	feasible = FALSE;
 	while(cur_model) {
 		if(cur_model->data_vptr == logical_block[iblk].model) {
 			feasible = TRUE;
 			break;
 		}
 		cur_model = cur_model->next;
 	}

 	if(!feasible) {
 		return FALSE;
 	}
 	if(packer_algorithm == PACK_BRUTE_FORCE) {
 		/* let the brute force packer determine if an empty slot exists */
 		return TRUE;
 	}

 	feasible = FALSE;

 	if(cur_pb_type->num_modes == 0) {
 		if(hack_force_safe_latch) {
 			/* TODO: Either remove hack or actually make it a feature properly

 			   hack to make the LUT get packed before the LATCH gets packed for cases that the LATCH can be absorbed
 			   for fracturable LUT architectures
 			*/
 			if(strcmp(logical_block[iblk].model->name, "latch") == 0) {
 				i = logical_block[iblk].input_nets[0][0];
 				j = vpack_net[i].node_block[0];
 				if(logical_block[j].clb_index == NO_CLUSTER && vpack_net[i].num_sinks == 1 &&
 					strcmp(logical_block[j].model->name, "names") == 0) {
 					cur_ptr = logical_block[j].model->inputs[0].size;
 					ptr = unclustered_list_head[cur_ptr].next;
 					while(ptr == NULL && cur_ptr > 0) {
 						cur_ptr--;
 						ptr = unclustered_list_head[cur_ptr].next;
 					}
 					if (cur_ptr > 2)
 						return FALSE;
 				}
 			}
 		}

 		return primitive_type_feasible(iblk, cur_pb_type, NULL, OPEN);
 	}

 	for(i = 0; i < cur_pb_type->modes[mode].num_pb_type_children; i++) {
 		child_pb_type = &cur_pb_type->modes[mode].pb_type_children[i];
 		if(cur_pb) {
 			for(k = 0; k < child_pb_type->num_pb && cur_pb->child_pbs && cur_pb->child_pbs[i]; k++) {
 				if(cur_pb->child_pbs[i][k].name == NULL) {
 					break;
 				}
 			}
 			if(k == child_pb_type->num_pb) {
 				/* no free child */
 				continue;
 			}
 		}
 		if(child_pb_type->num_modes == 0) {
 			feasible = primitive_type_feasible(iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, OPEN);
 			if(feasible) {
 				return TRUE;
 			}
 		} else {
 			for(j = 0; j < cur_pb_type->modes[mode].pb_type_children[i].num_modes; j++) {
 				feasible = models_feasible(packer_algorithm, iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, j);
 				if(feasible) {
 					return TRUE;
 				}
 			}
 		}
 	}

 	return FALSE;
 }

 static boolean primitive_feasible(int iblk, t_pb *cur_pb) {
 	const t_pb_type *cur_pb_type;
 	int i;
 	t_pb *memory_class_pb; /* Used for memory class only, for memories, open pins must be the same among siblings */
 	int sibling_memory_blk;

 	cur_pb_type = cur_pb->pb_graph_node->pb_type;
 	memory_class_pb = NULL;
 	sibling_memory_blk = OPEN;

 	assert(cur_pb_type->num_modes == 0); /* primitive */
 	if(cur_pb->logical_block != OPEN) {
 		/* This pb already has a logical block */
 		return FALSE;
 	}

 	if(cur_pb_type->class_type == MEMORY_CLASS) {
 		/* memory class is special, all siblings must share all nets, including open nets, with the exception of data nets */
 		/* find sibling if one exists */
 		memory_class_pb = cur_pb->parent_pb;
 		for(i = 0; i < cur_pb_type->parent_mode->num_pb_type_children; i++) {
 			if(memory_class_pb->child_pbs[cur_pb->mode][i].name != NULL && memory_class_pb->child_pbs[cur_pb->mode][i].logical_block != OPEN) {
 				sibling_memory_blk = memory_class_pb->child_pbs[cur_pb->mode][i].logical_block;
 			}
 		}
 		if(sibling_memory_blk == OPEN) {
 			memory_class_pb = NULL;
 		}
 	}

 	return primitive_type_feasible(iblk, cur_pb_type, memory_class_pb, sibling_memory_blk);
 }

 static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk) {
 	t_model_ports *port;
 	int i, j;
 	boolean second_pass;

 	/* check if ports are big enough */
 	/* for memories, also check that pins are the same with existing siblings */
 	port = logical_block[iblk].model->inputs;
 	second_pass = FALSE;
 	while(port || !second_pass) {
 		/* TODO: This is slow if the number of ports are large, fix if becomes a problem */
 		if(!port) {
 			second_pass = TRUE;
 			port = logical_block[iblk].model->outputs;
 		}
 		for(i = 0; i < cur_pb_type->num_ports; i++) {
 			if(cur_pb_type->ports[i].model_port == port) {
 				/* TODO: This is slow, I only need to check from 0 if it is a memory block, other blocks I can check from port->size onwards */
 				for(j = 0; j < port->size; j++) {
 					if(port->dir == IN_PORT && !port->is_clock) {
 						if(memory_class_pb) {
 							if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) {
 								if(logical_block[iblk].input_nets[port->index][j] != logical_block[sibling_memory_blk].input_nets[port->index][j]) {
 									return FALSE;
 								}
 							}
 						}
 						if(logical_block[iblk].input_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) {
 							return FALSE;
 						}
 					} else if(port->dir == OUT_PORT) {
 						if(memory_class_pb) {
 							if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) {
 								if(logical_block[iblk].output_nets[port->index][j] != logical_block[sibling_memory_blk].output_nets[port->index][j]) {
 									return FALSE;
 								}
 							}
 						}
 						if(logical_block[iblk].output_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) {
 							return FALSE;
 						}
 					} else {
 						assert(port->dir == IN_PORT && port->is_clock);
 						assert(j == 0);
 						if(memory_class_pb) {
 							if(logical_block[iblk].clock_net != logical_block[sibling_memory_blk].clock_net) {
 								return FALSE;
 							}
 						}
 						if(logical_block[iblk].clock_net != OPEN && j >= cur_pb_type->ports[i].num_pins) {
 							return FALSE;
 						}
 					}
 				}
 				break;
 			}
 		}
 		if(i == cur_pb_type->num_ports) {
 			if((logical_block[iblk].model->inputs != NULL && !second_pass) ||
 				logical_block[iblk].model->outputs != NULL && second_pass) {
 				/* physical port not found */
 				return FALSE;
 			}
 		}
 		if(port) {
 			port = port->next;
 		}
 	}
 	return TRUE;
 }

 /*****************************************/
 static int get_logical_block_by_num_ext_inputs (	    INP enum e_packer_algorithm packer_algorithm,
 														INOUTP t_pb *cur_pb,
 														INP int ext_inps,
 														INP enum e_removal_policy remove_flag) {

 /* This routine returns a logical_block which has not been clustered, has  *
  * no connection to the current cluster, satisfies the cluster     *
  * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available,
 	and has ext_inps external inputs.  If        *
  * there is no such logical_block it returns NO_CLUSTER.  Remove_flag      *
  * controls whether or not blocks that have already been clustered *
  * are removed from the unclustered_list data structures.  NB:     *
  * to get a logical_block regardless of clock constraints just set clocks_ *
  * avail > 0.                                                      */

  struct s_ilink *ptr, *prev_ptr;
  int ilogical_blk;

  prev_ptr = &unclustered_list_head[ext_inps];
  ptr = unclustered_list_head[ext_inps].next;

  while (ptr != NULL) {
 	 ilogical_blk = ptr->iblk;
 	 /* TODO: Get better candidate logical block in future, eg. return most timing critical or some other smarter metric */
 	 if (logical_block[ilogical_blk].clb_index == NO_CLUSTER) {
 		if (outputs_clocks_and_models_feasible(packer_algorithm, ilogical_blk, NULL, cur_pb)) {
 			return ilogical_blk;
 		}

        prev_ptr = ptr;

     }

     else if (remove_flag == REMOVE_CLUSTERED) {
        prev_ptr->next = ptr->next;
     }

     ptr = ptr->next;
  }

  return NO_CLUSTER;
 }


 /*****************************************/
 static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm,
 																	INOUTP t_pb *cur_pb) {

 /* This routine is used to find new blocks for clustering when there are no feasible       *
  * blocks with any attraction to the current cluster (i.e. it finds       *
  * blocks which are unconnected from the current cluster).  It returns    *
  * the logical_block with the largest number of used inputs that satisfies the    *
  * clocking and number of inputs constraints.  If no suitable logical_block is    *
  * found, the routine returns NO_CLUSTER.                                 */

  int ext_inps;
  int best_block;

  int inputs_avail;

  inputs_avail = cur_pb->pb_stats.inputs_avail;
  if(inputs_avail == NOT_VALID) {
 	 inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins;
  }

  best_block = NO_CLUSTER;

  if(inputs_avail >= unclustered_list_head_size) {
 	 inputs_avail = unclustered_list_head_size - 1;
  }

  for (ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
     best_block = get_logical_block_by_num_ext_inputs (packer_algorithm, cur_pb, ext_inps, REMOVE_CLUSTERED);
 	if (best_block != NO_CLUSTER) {
        break;
 	}
  }
  return best_block;
 }

 /*****************************************/
 static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs) {

 /* This routine is used to find the first seed logical_block for the clustering.  It returns    *
  * the logical_block with the largest number of used inputs that satisfies the    *
  * clocking and number of inputs constraints.  If no suitable logical_block is    *
  * found, the routine returns NO_CLUSTER.                                 */

  int iblk, ext_inps;
  struct s_ilink *ptr, *prev_ptr;

  iblk = NO_CLUSTER;

  for (ext_inps=max_primitive_inputs;ext_inps>=0;ext_inps--) {
 	prev_ptr = &unclustered_list_head[ext_inps];
 	ptr = unclustered_list_head[ext_inps].next;

 	while (ptr != NULL) {
 		iblk = ptr->iblk;

 		if (logical_block[iblk].clb_index == NO_CLUSTER) {
 			prev_ptr->next = ptr->next; /* remove blk from list */
 			break;
 		} else {
 			iblk = NO_CLUSTER;
 		}

 		prev_ptr = ptr;
 		ptr = ptr->next;
 	}

     if (iblk != NO_CLUSTER)
        return (iblk);
  }

  return (NO_CLUSTER);   /* No suitable logical_block found. */
 }


 /*****************************************/
 static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs) {

 /* Call this routine when starting to fill up a new cluster.  It resets *
  * the gain vector, etc.                                                */

  int j;

 /* If statement below is for speed.  If nets are reasonably low-fanout,  *
  * only a relatively small number of blocks will be marked, and updating *
  * only those logical_block structures will be fastest.  If almost all blocks    *
  * have been touched it should be faster to just run through them all    *
  * in order (less addressing and better cache locality).                 */


 	pb->pb_stats.gain = (float *) my_malloc (num_logical_blocks * sizeof (float));
 	pb->pb_stats.lengthgain=(float*) my_malloc (num_logical_blocks * sizeof (float));
 	pb->pb_stats.sharinggain=(float*) my_malloc (num_logical_blocks * sizeof (float));
 	pb->pb_stats.hillgain =(float*) my_malloc (num_logical_blocks * sizeof (float));
 	pb->pb_stats.connectiongain = (float*) my_malloc (num_logical_blocks * sizeof (float));
 	pb->pb_stats.inputs_avail = NOT_VALID;
 	pb->pb_stats.outputs_avail = NOT_VALID;
 	pb->pb_stats.clocks_avail = NOT_VALID;
 	pb->pb_stats.num_marked_models = 0;
 	pb->pb_stats.cur_marked_model = NOT_VALID;
 	pb->pb_stats.num_feasible_blocks = my_calloc(max_models, sizeof(int));
 	pb->pb_stats.feasible_blocks = my_malloc(max_models * sizeof(int*));


 	for (j=0;j<max_models;j++) {
 		pb->pb_stats.feasible_blocks[j] = (int*) my_malloc (AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE * sizeof (int));
 	}

 	for (j=0;j<num_logical_blocks;j++) {
 		pb->pb_stats.gain[j] = 0.0;
 		pb->pb_stats.lengthgain[j]= 0.0;
 		pb->pb_stats.connectiongain[j] = 0;
 		pb->pb_stats.sharinggain[j]=NOT_VALID;
 		pb->pb_stats.hillgain[j]=NOT_VALID;
 	}

 	pb->pb_stats.num_pins_of_net_in_pb = (int *) my_malloc (num_logical_nets * sizeof(int));
 	pb->pb_stats.net_output_in_pb = (boolean *) my_malloc (num_logical_nets * sizeof(boolean));

 	for (j=0;j<num_logical_nets;j++) {
 		pb->pb_stats.num_pins_of_net_in_pb[j] = 0;
 		pb->pb_stats.net_output_in_pb[j] = FALSE;
 	}

 	pb->pb_stats.marked_nets = (int *) my_malloc ((max_primitive_inputs + 2) * max_cluster_size * sizeof(int));
 	pb->pb_stats.marked_blocks = (int *) my_malloc (num_logical_blocks * sizeof(int));

 	pb->pb_stats.num_marked_nets = 0;
 	pb->pb_stats.num_marked_blocks = 0;

 }
 /*****************************************/

 /* Allocate space within pb and load data for it given the current logical block
 	Note: pb space and parent info must be loaded before calling this function

 	TODO: add-in some heuristic for determing which pb to select if there are multiple choices
 */
 static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock,
 							  INOUTP t_pb * pb, INP int max_models, int max_cluster_size, int max_primitive_inputs) {
 	int i, j, k;
 	const t_pb_type *pb_type;

 	boolean is_primitive;
 	enum e_block_pack_status block_pack_status;

 	pb->pb_graph_node = pb_graph_node;
 	pb->logical_block = NO_CLUSTER;

 	block_pack_status = BLK_STATUS_UNDEFINED;

 	if(iblock == NO_CLUSTER) {
 		return BLK_FAILED_FEASIBLE;
 	}

 	pb_type = pb_graph_node->pb_type;

 	is_primitive = (pb_type->num_modes == 0);

 	/* TODO: It would be good to save and prune away nodes of types that I know won't fit, that way, I don't have to traverse the tree
 	for types that don't work, this runtime optimization can be done later */
 	if(is_primitive) {
 		if(!primitive_feasible(iblock, pb)) {
 			return BLK_FAILED_FEASIBLE;
 		}
 		assert(pb->logical_block == OPEN);
 		/* check if I can add and route this block, if they yes, then success, if not, fail */
 		if(hack_frac_lut_no_legality) {
 			block_pack_status = BLK_PASSED;
 			pb->logical_block = iblock;
 			logical_block[iblock].pb = pb;
 		} else {
 			block_pack_status = try_add_block_to_current_cluster_and_primitive(iblock, pb);
 		}
 		if(block_pack_status == BLK_PASSED) {
 			pb->name = my_strdup(logical_block[iblock].name);
 		}
 	} else {
 		if(pb->child_pbs == NULL) {
 			/* first time here so must allocate space and determine mode of operation */
 			for(i = 0; i < pb_type->num_modes && block_pack_status != BLK_PASSED; i++) {
 				pb->mode = i;
 				if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) {
 					continue;
 				}
 				set_pb_mode(pb, 0, 0); /* default mode is to use mode 1 */
 				set_pb_mode(pb, i, 1);
 				pb->child_pbs = my_calloc(pb_type->modes[i].num_pb_type_children, sizeof(t_pb *));

 				for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
 					/* allocate space for children if first time allocating space */
 					pb->child_pbs[j] = my_calloc(pb_type->modes[i].pb_type_children[j].num_pb, sizeof(t_pb));
 					for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
 						pb->child_pbs[j][k].parent_pb = pb;
 						alloc_and_load_pb_stats(&pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
 					}
 				}
 				for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) {
 					for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) {
 						block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
 						if(block_pack_status == BLK_FAILED_FEASIBLE) {
 							/* sibling blocks differ only in interconnect,
 							  if feasibility fails then it is failing on something other than interconnect
 							  therefore, no point trying the same thing for siblings */
 							break;
 						}
 					}
 				}
 				if(block_pack_status == BLK_PASSED) {
 					pb->name = my_strdup(logical_block[iblock].name);
 				} else {
 					set_pb_mode(pb, i, 0); /* default mode is to use mode 1 */
 					set_pb_mode(pb, 0, 1);
 					for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
 						if(pb->child_pbs[j] != NULL) {
 							for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
 								free_pb_stats(pb->child_pbs[j][k].pb_stats, max_models);
 							}
 							free(pb->child_pbs[j]);
 						}
 					}
 					free(pb->child_pbs);
 					pb->child_pbs = NULL;
 				}
 			}
 		} else {
 			/*
 			 * Another block has been allocated inside so must select placement carefully
 			 */
 			i = pb->mode;
 			if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) {
 				block_pack_status = BLK_FAILED_FEASIBLE;
 			} else {
 				for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) {
 					for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) {
 						/* Simple heuristic: assume that any packed cluster is well-packed, try to pack into new child only */
 						/* Brute force heuristic: traverse whole tree always */
 						if(pb->child_pbs[j][k].name == NULL) {
 							block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
 						} else if (packer_algorithm == PACK_BRUTE_FORCE && pb_type->modes[i].pb_type_children[j].num_modes != 0) {
 							block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
 						}
 						if(block_pack_status == BLK_FAILED_FEASIBLE && packer_algorithm != PACK_BRUTE_FORCE) {
 							/* Children blocks differ only in routing, don't continue if feasibility fails
 							   Brute force algorithm may have just ran into a node that was full, therefore it should not break out
 							*/
 							break;
 						}
 					}
 				}
 			}
 		}
 	}

 	return block_pack_status;
 }

 static void update_connection_gain_values (int inet, int clustered_block, t_pb *cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {
   /*This function is called when the connectiongain values on the vpack_net*
    *inet require updating.   */

   int iblk, ipin;
   int clb_index;
   int num_internal_connections, num_open_connections, num_stuck_connections;

   num_internal_connections = num_open_connections = num_stuck_connections = 0;

   clb_index = logical_block[clustered_block].clb_index;

   /* may wish to speed things up by ignoring clock nets since they are high fanout */

   for (ipin=0;ipin<=vpack_net[inet].num_sinks;ipin++) {
 	iblk = vpack_net[inet].node_block[ipin];
 	if (logical_block[iblk].clb_index == clb_index && is_logical_blk_in_pb(iblk, logical_block[clustered_block].pb)) {
 		num_internal_connections++;
 	} else if (logical_block[iblk].clb_index == OPEN) {
 		num_open_connections++;
 	} else {
 		num_stuck_connections++;
 	}
   }

   if (net_relation_to_clustered_block==OUTPUT){
     for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) {
 		iblk = vpack_net[inet].node_block[ipin];
 		if (logical_block[iblk].clb_index == NO_CLUSTER) {
 			/* Only works if net has one connection to block, TODO: handle case where net has multi-connection to block */
 			if(num_internal_connections > 1) {
 				cur_pb->pb_stats.connectiongain[iblk] -= 1 /(float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections);
 			}
 			cur_pb->pb_stats.connectiongain[iblk] += 1 /(float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections);
 		}
     }
   }

   if(net_relation_to_clustered_block==INPUT){
     /*Calculate the connectiongain for the logical_block which is driving *
      *the vpack_net that is an input to a logical_block in the cluster */

 	  iblk=vpack_net[inet].node_block[0];
     if (logical_block[iblk].clb_index == NO_CLUSTER) {
 		if(num_internal_connections > 1) {
 			cur_pb->pb_stats.connectiongain[iblk] -= 1 / (float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections);
 		}
 		cur_pb->pb_stats.connectiongain[iblk] += 1 / (float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections);
     }
   }
 }
 /*****************************************/
 static void update_length_gain_values (int inet, int clustered_block, t_pb *cur_pb,
 					   enum  e_net_relation_to_clustered_block net_relation_to_clustered_block) {

   /*This function is called when the length_gain values on the vpack_net*
    *inet requires updating.   */
   float lengain;
   int newblk, ifirst;
   int iblk, ipin;

 /* Check if this vpack_net lists its driving logical_block twice.  If so, avoid  *
  * double counting this logical_block by skipping the first (driving) pin. */
    if (net_output_feeds_driving_block_input[inet] == FALSE)
     ifirst = 0;
   else
     ifirst = 1;

   if (net_relation_to_clustered_block == OUTPUT && !vpack_net[inet].is_global){
 	  for (ipin=ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) {
 		iblk = vpack_net[inet].node_block[ipin];
 		if (logical_block[iblk].clb_index == NO_CLUSTER) {
 			lengain=net_pin_backward_criticality[inet][ipin];
 			if (lengain>cur_pb->pb_stats.lengthgain[iblk])
 				cur_pb->pb_stats.lengthgain[iblk]=lengain;
 		}
 	  }
   }

   if(net_relation_to_clustered_block == INPUT && !vpack_net[inet].is_global){
     /*Calculate the length gain for the logical_block which is driving *
      *the vpack_net that is an input to a logical_block in the cluster */
 	  newblk = vpack_net[inet].node_block[0];
 	  if (logical_block[newblk].clb_index == NO_CLUSTER) {
 		  for (ipin=1; ipin <= vpack_net[inet].num_sinks; ipin++) {
 			lengain=net_pin_forward_criticality[inet][ipin];
 			if (lengain>cur_pb->pb_stats.lengthgain[newblk])
 				cur_pb->pb_stats.lengthgain[newblk]=lengain;

 		  }
 	  }
   }
 }
 /*****************************************/
 static void mark_and_update_partial_gain (int inet,
        enum e_gain_update gain_flag, int clustered_block, int port_on_clustered_block, int pin_on_clustered_block,
        boolean timing_driven, boolean connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {

 /* Updates the marked data structures, and if gain_flag is GAIN,  *
  * the gain when a logic logical_block is added to a cluster.  The        *
  * sharinggain is the number of inputs that a logical_block shares with   *
  * blocks that are already in the cluster. Hillgain is the        *
  * reduction in number of pins-required by adding a logical_block to the  *
  * cluster. The lengthgain is the criticality of the most critical*
  * vpack_net between this logical_block and a logical_block in the cluster.             */

  int iblk, ipin, ifirst;
  t_pb *cur_pb;

  cur_pb = logical_block[clustered_block].pb->parent_pb;

  while(cur_pb) {
 	 /* Mark vpack_net as being visited, if necessary. */

 	 if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
 		cur_pb->pb_stats.marked_nets[cur_pb->pb_stats.num_marked_nets] = inet;
 		cur_pb->pb_stats.num_marked_nets++;
 	 }

 	/* Update gains of affected blocks. */

 	 if (gain_flag == GAIN) {

 	/* Check if this vpack_net lists its driving logical_block twice.  If so, avoid  *
 	 * double counting this logical_block by skipping the first (driving) pin. */

 		if (net_output_feeds_driving_block_input[inet] == 0)
 		   ifirst = 0;
 		else
 		   ifirst = 1;

 	   if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet]==0) {
 		   for (ipin=ifirst;ipin<=vpack_net[inet].num_sinks;ipin++) {
 			   iblk = vpack_net[inet].node_block[ipin];
 			   if (logical_block[iblk].clb_index == NO_CLUSTER) {

 		 if (cur_pb->pb_stats.sharinggain[iblk] == NOT_VALID) {
 		   cur_pb->pb_stats.marked_blocks[cur_pb->pb_stats.num_marked_blocks] = iblk;
 		   cur_pb->pb_stats.num_marked_blocks++;
 		   cur_pb->pb_stats.sharinggain[iblk]=1;
 		   cur_pb->pb_stats.hillgain[iblk] = 1 - num_ext_inputs (iblk);
 		 }
 		 else {
 		   cur_pb->pb_stats.sharinggain[iblk]++;
 		   cur_pb->pb_stats.hillgain[iblk]++;
 		 }
 		   }
 		 }
 	   }

 	   if (connection_driven) {
 		   update_connection_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block);
 	   }

 	   if (timing_driven) {
 		 update_length_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block);
 	   }
 	 }

 	 cur_pb->pb_stats.num_pins_of_net_in_pb[inet]++;
 	 cur_pb = cur_pb->parent_pb;
  }
 }

 /*****************************************/
 static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean
 			      connection_driven, boolean global_clocks, t_pb *pb){

   /*Updates the total  gain array to reflect the desired tradeoff between*
    *input sharing (sharinggain) and path_length minimization (lengthgain)*/

   int i, iblk, j, k;
   t_pb * cur_pb;
   int num_input_pins, num_output_pins;
   int num_used_input_pins, num_used_output_pins;
   t_model_ports *port;

   cur_pb = pb;
   while(cur_pb) {

 	  for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) {
 		  iblk=cur_pb->pb_stats.marked_blocks[i];

 		  /* Todo: This was used to explore different normalization options, can be made more efficient once we decide on which one to use*/
 		  num_input_pins = 0;
 		  port = logical_block[iblk].model->inputs;
 		  j =0;
 		  num_used_input_pins = 0;
 		  while(port) {
 			   num_input_pins += port->size;
 			   if(!port->is_clock) {
 				   for(k = 0; k < port->size; k++) {
 					   if(logical_block[iblk].input_nets[j][k] != OPEN) {
 							num_used_input_pins++;
 					   }
 				   }
 				   j++;
 			   }
 			   port = port->next;
 		   }
 		  if(num_input_pins == 0) {
 			   num_input_pins = 1;
 		  }

 		   num_used_output_pins = 0;
 		   j = 0;
 		   num_output_pins = 0;
 		   port = logical_block[iblk].model->outputs;
 		   while(port) {
 			   num_output_pins += port->size;
 	   			for(k = 0; k < port->size; k++) {
 				   if(logical_block[iblk].output_nets[j][k] != OPEN) {
 						num_used_output_pins++;
 				   }
 				}
 			   port = port->next;
 			   j++;
 		   }
 		   /* end todo */

 	      /* Calculate area-only cost function */
 		  if (connection_driven) {
 			/*try to absorb as many connections as possible*/
 			   /* TODO: change back to used pins when done testing */
 			   cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_input_pins + num_output_pins);
 			   /* cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_used_input_pins + num_used_output_pins); */
 		  } else {
 			  /*cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_used_input_pins + num_used_output_pins); */
 			  cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_input_pins + num_output_pins);
 		  }

 		  /* Add in timing driven cost into cost function */
 		  if (timing_driven) {
 			  cur_pb->pb_stats.gain[iblk] = alpha*cur_pb->pb_stats.lengthgain[iblk] + (1.0-alpha)*(float)cur_pb->pb_stats.gain[iblk];
 		  }
 	  }
 	  cur_pb = cur_pb->parent_pb;
   }
 }

 /*****************************************/
 static void update_cluster_stats (	INP int new_blk,
 									INP int clb_index,
 									INP boolean *is_clock,
 									INP boolean global_clocks,
 									INP float alpha,
 									INP float beta,
 									INP boolean timing_driven,
 									INP boolean connection_driven) {

 /* Updates cluster stats such as gain, used pins, and clock structures.  */

  int i, ipin, inet, depth;
  t_model_ports *port;
  t_pb *cur_pb;

  /* TODO: what a scary comment from Vaughn, we'll have to watch out for this causing problems */
 /* Output can be open so the check is necessary.  I don't change  *
  * the gain for clock outputs when clocks are globally distributed  *
  * because I assume there is no real need to pack similarly clocked *
  * FFs together then.  Note that by updating the gain when the      *
  * clock driver is placed in a cluster implies that the output of   *
  * LUTs can be connected to clock inputs internally.  Probably not  *
  * true, but it doesn't make much difference, since it will still   *
  * make local routing of this clock very short, and none of my      *
  * benchmarks actually generate local clocks (all come from pads).  */


  logical_block[new_blk].clb_index = clb_index;

  cur_pb = logical_block[new_blk].pb->parent_pb;
  while(cur_pb) {
  	depth = cur_pb->pb_graph_node->pb_type->depth;
 	/* reset list of feasible blocks */
 	for (i=0;i<cur_pb->pb_stats.num_marked_models;i++){
 		cur_pb->pb_stats.num_feasible_blocks[i] = 0;
 	}
 	cur_pb->pb_stats.num_marked_models = 0;
 	cur_pb->pb_stats.cur_marked_model = NOT_VALID;

 	/* determine valid pins */
 	if (cur_pb->pb_stats.inputs_avail == NOT_VALID)
 		cur_pb->pb_stats.inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins;

 	if (cur_pb->pb_stats.outputs_avail == NOT_VALID)
 		cur_pb->pb_stats.outputs_avail = cur_pb->pb_graph_node->pb_type->num_output_pins;

 	if (cur_pb->pb_stats.clocks_avail == NOT_VALID)
 		cur_pb->pb_stats.clocks_avail = cur_pb->pb_graph_node->pb_type->num_clock_pins;

 	cur_pb = cur_pb->parent_pb;
  }

  port = logical_block[new_blk].model->outputs;
  while(port) {
 	 for(ipin = 0; ipin < port->size; ipin++) {
 		 inet = logical_block[new_blk].output_nets[port->index][ipin];    /* Output pin first. */
 		 if (inet != OPEN) {
 			if (!is_clock[inet] || !global_clocks)
 				mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin,
 							timing_driven, connection_driven, OUTPUT);
 			else
 				mark_and_update_partial_gain (inet, NO_GAIN, new_blk, port->index, ipin,
 							timing_driven, connection_driven, OUTPUT);

 			cur_pb = logical_block[new_blk].pb->parent_pb;
 			while(cur_pb) {
 				depth = cur_pb->pb_graph_node->pb_type->depth;
 				cur_pb->pb_stats.net_output_in_pb[inet] = TRUE;

 				if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] > 1 && !is_clock[inet])
 					cur_pb->pb_stats.inputs_avail++;

 				if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] +
 					net_output_feeds_driving_block_input[inet] != vpack_net[inet].num_sinks + 1) {
 						cur_pb->pb_stats.outputs_avail--;
 				} else if (hack_frac_lut_no_legality) {
 					if(depth > 1)
 						cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels  */
 				}

 				cur_pb = cur_pb->parent_pb;
 			}
 		 }
 	 }
 	 port = port->next;
  }
  port = logical_block[new_blk].model->inputs;
  while(port) {
 	 if(port->is_clock) {
 		 port = port->next;
 		 continue;
 	 }
 	 for (ipin=0; ipin<port->size; ipin++) {   /*  VPACK_BLOCK input pins. */

 		 inet = logical_block[new_blk].input_nets[port->index][ipin];
 		if (inet != OPEN) {
 		   mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin, timing_driven, connection_driven, INPUT);
 		   cur_pb = logical_block[new_blk].pb->parent_pb;
 		   while(cur_pb) {
 				depth = cur_pb->pb_graph_node->pb_type->depth;

 				if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1)
 					cur_pb->pb_stats.inputs_avail--;

 				assert(cur_pb->pb_stats.inputs_avail >= 0);
 				if (cur_pb->pb_stats.net_output_in_pb[inet] &&
 					cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == vpack_net[inet].num_sinks + 1) {
 						cur_pb->pb_stats.outputs_avail++;
 						if (hack_frac_lut_no_legality && depth > 1)
 							cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels */
 				}

 				cur_pb = cur_pb->parent_pb;
 			}
 		}
 	 }
 	 port = port->next;
  }


 /* Note:  The code below ONLY WORKS when nets that go to clock inputs *
  * NEVER go to the input of a VPACK_COMB.  It doesn't really make electrical *
  * sense for that to happen, and I check this in the check_clocks     *
  * function.  Don't disable that sanity check.                        */
  inet = logical_block[new_blk].clock_net;    /* Clock input pin. */
  if (inet != OPEN) {
 	 if (global_clocks)
        mark_and_update_partial_gain (inet, NO_GAIN, new_blk, 0,
 				     0, timing_driven,
 				     connection_driven, INPUT);
     else
        mark_and_update_partial_gain (inet, GAIN, new_blk, 0,
 				     0, timing_driven,
 				     connection_driven, INPUT);


 	cur_pb = logical_block[new_blk].pb->parent_pb;
    while(cur_pb) {
 		depth = cur_pb->pb_graph_node->pb_type->depth;

 		if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1) {
 			cur_pb->pb_stats.clocks_avail--;
 		}
 		else if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 2 && cur_pb->pb_stats.net_output_in_pb[inet]) {
 			cur_pb->pb_stats.clocks_avail--;
 		}
 	/* Note: unlike inputs, I'm currently assuming there is no internal *
 	 * connection in a cluster from VPACK_LUT outputs to clock inputs.        */

 		cur_pb = cur_pb->parent_pb;
 	}
  }

  update_total_gain(alpha, beta, timing_driven, connection_driven, global_clocks, logical_block[new_blk].pb->parent_pb);

 }


 static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm,
 							  INP const t_arch * arch,
 							  INOUTP t_block *new_cluster,
 							    INP int clb_index,
 								INP int istart,
 								INP float aspect,
 								INOUTP int *num_used_instances_type,
 								INOUTP int *num_instances_type,
 								INP int num_models,
 								INP int max_cluster_size,
 								INP int max_primitive_inputs) {
  /* Given a starting seed block, start_new_cluster determines the next cluster type to use
     It expands the FPGA if it cannot find a legal cluster for the logical block
  */
  int i, j;
  boolean success;

  assert(new_cluster->name == NULL); /* Check if this cluster is really empty */

  /* Allocate a dummy initial cluster and load a logical block as a seed and check if it is legal */
  new_cluster->name = my_malloc(strlen(logical_block[istart].name) + 4);
  sprintf(new_cluster->name, "cb.%s", logical_block[istart].name);
  new_cluster->nets = NULL;
  new_cluster->type = NULL;
  new_cluster->pb = NULL;
  new_cluster->x = UNDEFINED;
  new_cluster->y = UNDEFINED;
  new_cluster->z = UNDEFINED;

  success = FALSE;

  while(!success) {
 	 for(i = 0; i < num_types; i++) {
 		if(num_used_instances_type[i] < num_instances_type[i]) {
 			new_cluster->type = &type_descriptors[i];
 			if(new_cluster->type == EMPTY_TYPE) {
 				continue;
 			}
 			new_cluster->pb = my_calloc(1, sizeof(t_pb));
 			alloc_and_load_pb_stats(new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs);
 			new_cluster->pb->parent_pb = NULL;
 			new_cluster->pb->pb_graph_node = new_cluster->type->pb_graph_head;

 			alloc_and_load_legalizer_for_cluster(new_cluster, clb_index, arch);
 			for(j = 0; j < new_cluster->type->pb_graph_head->pb_type->num_modes && !success; j++) {
 				new_cluster->pb->mode = j;
 				if(models_feasible(packer_algorithm, istart, new_cluster->pb->pb_graph_node->pb_type, new_cluster->pb, new_cluster->pb->mode)) {
 					success = (BLK_PASSED == alloc_and_load_pb(packer_algorithm, new_cluster->type->pb_graph_head, istart, new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs));
 				}
 			}
 			if(success) {

 				if(hack_frac_lut_no_legality) {
 					logical_block[istart].clb_index = clb_index;
 				}
 				/* TODO: For now, just grab any working cluster, in the future, heuristic needed to grab best complex block based on supply and demand */
 				break;
 			} else {
 				free_legalizer_for_cluster(new_cluster);
 				free_pb_stats(new_cluster->pb->pb_stats, num_models);
 				free(new_cluster->pb);
 			}
 		}
 	 }

 	 /* Expand FPGA size and recalculate number of available cluster types*/
 	 if (!success) {
 		if(aspect >= 1.0)
 		{
 		    ny++;
 		    nx = nint(ny * aspect);
 		}
 	    else
 		{
 		    nx++;
 		    ny = nint(nx / aspect);
 		}
 		printf("Not enough resources expand FPGA size to x = %d y = %d\n", nx, ny);
 		if((nx > MAX_SHORT) || (ny > MAX_SHORT)) {
 			printf("Circuit cannot pack into architecture, architecture size (nx = %d, ny = %d) exceeds packer range.\n", nx, ny);
 			exit(1);
 		}
 		alloc_and_load_grid(num_instances_type);
 		freeGrid();
 	 }
  }
  num_used_instances_type[new_cluster->type->index]++;
 }


 /*****************************************/
 static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) {

 /* Checks if adding iblk to the currently open cluster would violate *
  * the cluster input and clock pin limitations.  Returns TRUE if     *
  * iblk can be added to the cluster, FALSE otherwise.                */

  int ipin, inet, output_net;
  t_model_ports *port;
  t_pb* temp_pb;
  boolean inputs_feasible = TRUE;

  int inputs_avail;

  temp_pb = cur_pb;
  while(temp_pb && inputs_feasible) {
 	 inputs_avail = temp_pb->pb_stats.inputs_avail;
 	 if(inputs_avail == NOT_VALID) {
 		 inputs_avail = temp_pb->pb_graph_node->pb_type->num_input_pins;
 	 }

 	 port = logical_block[iblk].model->outputs;
 	 while(port) {
 		/* Outputs that connect to internal blocks free up an input pin. */
 		 for(ipin = 0; ipin < port->size; ipin++) {
 			 output_net = logical_block[iblk].output_nets[port->index][ipin];
 			 if (output_net != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != 0 && !is_clock[output_net])
 				inputs_avail++;
 		 }
 		 port = port->next;
 	 }

 	 port = logical_block[iblk].model->inputs;
 	 while(port) {
 		if(!port->is_clock) {
 		/* Inputs that do not connect to an output pin of an internal block (including this one) require an input pin. */
 		 for (ipin = 0; ipin < port->size; ipin++) {
 			inet = logical_block[iblk].input_nets[port->index][ipin];
 			if (inet != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
 				if(net_output_feeds_driving_block_input[inet] == 0) {
 					inputs_avail--;
 				}
 			}
 		 }
 		}
 		port = port->next;
 	 }
 	 inputs_feasible = (inputs_avail >= 0);
 	 temp_pb = temp_pb->parent_pb;
  }
  if (outputs_clocks_and_models_feasible (packer_algorithm, iblk, is_clock, cur_pb) == FALSE)
     return (FALSE);

  if (inputs_feasible)
     return (TRUE);
  else
     return (FALSE);
 }

 /*****************************************/
 static int get_highest_gain_logical_block ( INP enum e_packer_algorithm packer_algorithm,
 										   INOUTP t_pb *cur_pb,
 												INP boolean *is_clock,
 												INP boolean (*is_feasible) (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb),
 												INP enum e_gain_type gain_mode) {

 /* This routine populates a list of feasible blocks outside the cluster then returns the best one for the list    *
  * not currently in a cluster and satisfies the feasibility     *
  * function passed in as is_feasible.  If there are no feasible *
  * blocks it returns NO_CLUSTER.                                */

 	int i, j, iblk, index;
 	int best_hillgain;
 	float best_gain;

 	int best_block;

 	best_gain = (float)NOT_VALID + 1.0;
 	best_hillgain=NOT_VALID+1;

 	best_block = NO_CLUSTER;

 	if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) {
 		/* Divide into two cases for speed only. */
 		/* Typical case:  not too many blocks have been marked. */

 		if (cur_pb->pb_stats.num_marked_blocks < num_logical_blocks / MARKED_FRAC) {
 			for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) {
 				iblk = cur_pb->pb_stats.marked_blocks[i];
 				if (logical_block[iblk].clb_index == NO_CLUSTER) {
 					if (gain_mode != HILL_CLIMBING){
 						if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
 							add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb);
 							if (cur_pb->pb_stats.gain[iblk] > best_gain) {
 								best_gain = cur_pb->pb_stats.gain[iblk];
 								best_block = iblk;
 							}
 						}
 					}
 					else{  /*hill climbing*/
 						if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
 							add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb);
 							if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) {
 								best_hillgain = cur_pb->pb_stats.hillgain[iblk];
 								best_block = iblk;
 							}
 						}
 					}
 				}
 			}
 		}
 		else {        /* Some high fanout nets marked lots of blocks. */
 			for (iblk=0;iblk<num_logical_blocks;iblk++) {
 				if (logical_block[iblk].clb_index == NO_CLUSTER) {
 					if (gain_mode != HILL_CLIMBING){
 						if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
 							add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb);
 							if (cur_pb->pb_stats.gain[iblk] > best_gain) {
 								best_gain = cur_pb->pb_stats.gain[iblk];
 								best_block = iblk;
 							}
 						}
 					}
 					else{ /*hill climbing*/
 						if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
 							add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb);
 							if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) {
 								best_hillgain = cur_pb->pb_stats.hillgain[iblk];
 								best_block = iblk;
 							}
 						}
 					}
 				}
 			}
 		}

 		for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) {
 			index = cur_pb->pb_stats.feasible_blocks[i][cur_pb->pb_stats.num_feasible_blocks[i] - 1];
 			if (gain_mode != HILL_CLIMBING) {
 				if(cur_pb->pb_stats.gain[index] == cur_pb->pb_stats.gain[best_block]) {
 					cur_pb->pb_stats.cur_marked_model = i;
 					break;
 				}
 			} else {
 				if(cur_pb->pb_stats.hillgain[index] == cur_pb->pb_stats.hillgain[best_block]) {
 					cur_pb->pb_stats.cur_marked_model = i;
 					break;
 				}
 			}
 		}
 		assert(cur_pb->pb_stats.num_marked_models == 0 || i < cur_pb->pb_stats.num_marked_models);
 	}

 	if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) {
 		return NO_CLUSTER;
 	}

 	for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) {
 		for(j = 0; j < cur_pb->pb_stats.num_feasible_blocks[i]; j++) {
 			if(cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model] != 0) {
 				cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model]--;
 				index = cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model];
 				iblk = cur_pb->pb_stats.feasible_blocks[cur_pb->pb_stats.cur_marked_model][index];
 				cur_pb->pb_stats.cur_marked_model = (cur_pb->pb_stats.cur_marked_model + 1) % cur_pb->pb_stats.num_marked_models;
 				return iblk;
 			}
 		}
 	}

 	return NO_CLUSTER;
 }


 /*****************************************/
 static int get_logical_block_for_cluster (	INP enum e_packer_algorithm packer_algorithm,
 											INOUTP t_pb *cur_pb,
 											INP boolean *is_clock,
 											INP boolean allow_unrelated_clustering) {

 /* Finds the vpack block with the the greatest gain that satisifies the      *
  * input, clock and capacity constraints of a cluster that are       *
  * passed in.  If no suitable vpack block is found it returns NO_CLUSTER.
  */


  int best_block;

  /* If cannot pack into primitive, try packing into cluster */


  best_block = get_highest_gain_logical_block (packer_algorithm, cur_pb, is_clock, inputs_outputs_models_and_clocks_feasible,
 						 NOT_HILL_CLIMBING);

 /* If no blocks have any gain to the current cluster, the code above *
  * will not find anything.  However, another logical_block with no inputs in *
  * common with the cluster may still be inserted into the cluster.   */

  if (allow_unrelated_clustering)
    if (best_block == NO_CLUSTER)
      best_block = get_free_logical_block_with_most_ext_inputs_for_cluster (packer_algorithm, cur_pb);

  return best_block;
 }

 /*****************************************/
 static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb) {

 	/* Loads all missing clustering info necessary to complete clustering.  */
 	int i, j, i_clb, node_index, ipin, iclass;
 	int inport, outport, clockport;

 	const t_pb_type * pb_type;
 	t_pb *pb;

 	for(i_clb = 0; i_clb < num_clb; i_clb++) {
 		rr_node = clb[i_clb].pb->rr_graph;
 		pb_type = clb[i_clb].pb->pb_graph_node->pb_type;
 		pb = clb[i_clb].pb;

 		clb[i_clb].nets = my_malloc(clb[i_clb].type->num_pins * sizeof(int));
 		for(i = 0; i < clb[i_clb].type->num_pins; i++) {
 			clb[i_clb].nets[i] = OPEN;
 		}

 		inport = outport = clockport = 0;
 		ipin = 0;
 		/* Assume top-level pb and clb share a one-to-one connection */
 		for(i = 0; i < pb_type->num_ports; i++) {
 			if(!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) {
 				for(j = 0; j < pb_type->ports[i].num_pins; j++) {
 					iclass = clb[i_clb].type->pin_class[ipin];
 					assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
 					assert(!clb[i_clb].type->is_global_pin[ipin]);
 					node_index = pb->pb_graph_node->input_pins[inport][j].pin_count_in_cluster;
 					clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
 					ipin++;
 				}
 				inport++;
 			} else if(pb_type->ports[i].type == OUT_PORT) {
 				for(j = 0; j < pb_type->ports[i].num_pins; j++) {
 					iclass = clb[i_clb].type->pin_class[ipin];
 					assert(clb[i_clb].type->class_inf[iclass].type == DRIVER);
 					node_index = pb->pb_graph_node->output_pins[outport][j].pin_count_in_cluster;
 					clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
 					ipin++;
 				}
 				outport++;
 			} else {
 				assert(pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT);
 				for(j = 0; j < pb_type->ports[i].num_pins; j++) {
 					iclass = clb[i_clb].type->pin_class[ipin];
 					assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
 					assert(clb[i_clb].type->is_global_pin[ipin]);
 					node_index = pb->pb_graph_node->clock_pins[clockport][j].pin_count_in_cluster;
 					clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
 					ipin++;
 				}
 				clockport++;
 			}
 		}
 	}
 }

 /* TODO: Add more error checking, too light */
 /*****************************************/
 static void check_clustering (int num_clb, t_block *clb, boolean *is_clock) {
 	int i;
 	t_pb * cur_pb;
 	boolean *blocks_checked;

 	blocks_checked = my_calloc(num_logical_blocks, sizeof(boolean));

 	/*
 	 * Check that each logical block connects to one primitive and that the primitive links up to the parent clb
 	 */
 	for(i = 0; i < num_blocks; i++) {
 		if(logical_block[i].pb->logical_block != i) {
 			printf(ERRTAG "pb %s does not contain logical block %s but logical block %s #%d links to pb\n",
 					logical_block[i].pb->name, logical_block[i].name, logical_block[i].name, i);
 			exit(1);
 		}
 		cur_pb = logical_block[i].pb;
 		assert(strcmp(cur_pb->name, logical_block[i].name) == 0);
 		while(cur_pb->parent_pb) {
 			cur_pb = cur_pb->parent_pb;
 			assert(cur_pb->name);
 		}
 		if(cur_pb != clb[num_clb].pb) {
 			printf(ERRTAG "CLB %s does not match CLB contained by pb %s\n",
 				cur_pb->name, logical_block[i].pb->name);
 			exit(1);
 		}
 	}

 	/* Check that I do not have spurious links in children pbs */
 	for(i = 0; i < num_clb; i++) {
 		check_cluster_logical_blocks(clb[i].pb, blocks_checked);
 	}

 	for(i = 0; i < num_logical_blocks; i++) {
 		if(blocks_checked[i] == FALSE) {
 			printf(ERRTAG "Logical block %s #%d not found in any cluster\n", logical_block[i].name, i);
 			exit(1);
 		}
 	}

 	free(blocks_checked);
 }

 /* TODO: May want to check that all logical blocks are actually reached (low priority, nice to have) */
 static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked) {
 	int i, j;
 	const t_pb_type *pb_type;
 	boolean has_child;

 	has_child = FALSE;
 	pb_type = pb->pb_graph_node->pb_type;
 	if(pb_type->num_modes == 0) {
 		/* primitive */
 		if(pb->logical_block != OPEN) {
 			if(blocks_checked[pb->logical_block] != FALSE) {
 				printf(ERRTAG "pb %s contains logical block %s #%d but logical block is already contained in another pb\n",
 					pb->name, logical_block[pb->logical_block].name, pb->logical_block);
 				exit(1);
 			}
 			blocks_checked[pb->logical_block] = TRUE;
 			if(pb != logical_block[pb->logical_block].pb) {
 				printf(ERRTAG "pb %s contains logical block %s #%d but logical block does not link to pb\n",
 					pb->name, logical_block[pb->logical_block].name, pb->logical_block);
 				exit(1);
 			}
 		}
 	} else {
 		/* this is a container pb, all container pbs must contain children */
 		for(i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) {
 			for(j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) {
 				if(pb->child_pbs[i] != NULL) {
 					if(pb->child_pbs[i][j].name != NULL) {
 						has_child = TRUE;
 						check_cluster_logical_blocks(&pb->child_pbs[i][j], blocks_checked);
 					}
 				}
 			}
 		}
 		assert(has_child);
 	}
 }

 static int get_most_critical_unclustered_block(void) {
   /*calculate_timing_information must be called before this, or this function*
    *will return garbage */

   /*indexofcrit is a global variable that is reset to 0 each time a          *
    *timing analysis is done (reset in  sort_blocks_by_criticality)           */

   int critidx, blkidx;

   for (critidx=indexofcrit; critidx<num_logical_blocks; critidx++) {
     blkidx=critindexarray[critidx];
 	if (logical_block[blkidx].clb_index == NO_CLUSTER) {
       indexofcrit=critidx+1;
       return(blkidx);
     }
   }

   /*if it makes it to here , there are no more blocks available*/
   return(NO_CLUSTER);
 }

 static void free_cb (t_pb *pb, int max_models) {
 	const t_pb_type * pb_type;
 	int i, total_nodes;

 	pb_type = pb->pb_graph_node->pb_type;

 	total_nodes = pb->pb_graph_node->total_pb_pins + pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins;

 	for(i = 0; i < total_nodes; i++) {
 		if(pb->rr_graph[i].edges != NULL) {
 			free(pb->rr_graph[i].edges);
 		}
 		if(pb->rr_graph[i].switches != NULL) {
 			free(pb->rr_graph[i].switches);
 		}
 	}
 	free(pb->rr_graph);
 	free_pb(pb, max_models);
 }


 static void free_pb (t_pb *pb, int max_models) {
 	const t_pb_type * pb_type;
 	int i, j, mode;

 	pb_type = pb->pb_graph_node->pb_type;

 	if(pb_type->blif_model == NULL) {
 		mode = pb->mode;
 		for(i = 0; i < pb_type->modes[mode].num_pb_type_children && pb->child_pbs != NULL; i++) {
 			for(j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != NULL; j++) {
 				if(pb->child_pbs[i][j].name != NULL) {
 					free_pb(&pb->child_pbs[i][j], max_models);
 				}
 			}
 			if(pb->child_pbs[i])
 				free(pb->child_pbs[i]);
 		}
 		if(pb->child_pbs);
 			free(pb->child_pbs);
 		free(pb->name);
 	} else {
 		/* Primitive */
 		if(pb->name)
 			free(pb->name);
 		pb->name = NULL;
 	}
 }

 static void free_pb_stats(t_pb_stats pb_stats, int max_models) {
 	int i;

 	if(pb_stats.gain != NULL) {
 		free(pb_stats.gain);
 		free(pb_stats.lengthgain);
 		free(pb_stats.sharinggain);
 		free(pb_stats.hillgain);
 		free(pb_stats.connectiongain);
 		free(pb_stats.num_feasible_blocks);

 		for(i = 0; i < max_models; i ++) {
 			free(pb_stats.feasible_blocks[i]);
 		}
 		free(pb_stats.feasible_blocks);

 		free(pb_stats.num_pins_of_net_in_pb);
 		free(pb_stats.net_output_in_pb);
 		free(pb_stats.marked_nets);
 		free(pb_stats.marked_blocks);

 		pb_stats.gain = NULL;
 	}
 }