blob: 0f6640e6181879e4e0a5c88e9f82946cc6cde2b0 [file] [log] [blame]
/*
* Main clustering algorithm
* Author(s): Vaughn Betz (first revision - VPack), Alexander Marquardt (second revision - T-VPack), Jason Luu (third revision - AAPack)
* June 8, 2011
*/
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "cluster.h"
#include "heapsort.h"
#include "output_clustering.h"
#include "output_blif.h"
#include "SetupGrid.h"
#include "read_xml_arch_file.h"
#include "cluster_legality.h"
#include "path_delay2.h"
#include "path_delay.h"
#define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE 30 /* This value is used to determine the max size of the priority queue for candidates that pass the early filter legality test but not the more detailed routing test */
#define SCALE_NUM_PATHS 1e-2 /*this value is used as a multiplier to assign a *
*slightly higher criticality value to nets that *
*affect a large number of critical paths versus *
*nets that do not have such a broad effect. *
*Note that this multiplier is intentionally very *
*small compared to the total criticality because *
*we want to make sure that vpack_net criticality is *
*primarily determined by slacks, with this acting *
*only as a tie-breaker between otherwise equal nets*/
#define SCALE_DISTANCE_VAL 1e-4 /*this value is used as a multiplier to assign a *
*slightly higher criticality value to nets that *
*are otherwise equal but one is farther *
*from sources (the farther one being made slightly *
*more critical) */
enum e_gain_update {GAIN, NO_GAIN};
enum e_feasibility {FEASIBLE, INFEASIBLE};
enum e_gain_type {HILL_CLIMBING, NOT_HILL_CLIMBING};
enum e_removal_policy {REMOVE_CLUSTERED, LEAVE_CLUSTERED};
enum e_net_relation_to_clustered_block {INPUT, OUTPUT};
/* Linked list structure. Stores one integer (iblk). */
struct s_ilink {int iblk; struct s_ilink *next;};
/* 1/MARKED_FRAC is the fraction of nets or blocks that must be *
* marked in order for the brute force (go through the whole *
* data structure linearly) gain update etc. code to be used. *
* This is done for speed only; make MARKED_FRAC whatever *
* number speeds the code up most. */
#define MARKED_FRAC 2
/* Keeps a linked list of the unclustered blocks to speed up looking for *
* unclustered blocks with a certain number of *external* inputs. *
* [0..lut_size]. Unclustered_list_head[i] points to the head of the *
* list of blocks with i inputs to be hooked up via external interconnect. */
static struct s_ilink *unclustered_list_head;
int unclustered_list_head_size;
static struct s_ilink *memory_pool; /*Declared here so I can free easily.*/
/* Does the logical_block that drives the output of this vpack_net also appear as a *
* receiver (input) pin of the vpack_net? [0..num_logical_nets-1]. If so, then by how much? This is used *
* in the gain routines to avoid double counting the connections from *
* the current cluster to other blocks (hence yielding better *
* clusterings). The only time a logical_block should connect to the same vpack_net *
* twice is when one connection is an output and the other is an input, *
* so this should take care of all multiple connections. */
static int *net_output_feeds_driving_block_input;
static int hack_frac_lut_no_legality;
static int hack_force_safe_latch;
static int indexofcrit; /* index of next most timing critical block */
/* Timing information for blocks */
static float *criticality = NULL;
static int *critindexarray = NULL;
static float **net_pin_backward_criticality = NULL;
static float **net_pin_forward_criticality = NULL;
/*****************************************/
/*local functions*/
/*****************************************/
static int num_ext_inputs (int iblk);
static void check_clocks (boolean *is_clock);
static int get_max_cluster_size(t_pb_type *pb_type);
static int get_max_primitive_inputs(t_pb_type *pb_type);
static int get_max_pb_depth(t_pb_type *pb_type);
#if 0
static void check_for_duplicate_inputs ();
#endif
static boolean is_logical_blk_in_pb(int iblk, t_pb *pb);
static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb);
static void alloc_and_init_clustering (boolean global_clocks, float alpha, float beta, int max_cluster_size, int max_primitive_inputs, int max_pb_depth, int max_models);
static void free_pb_stats_recursive (t_pb *pb, int max_models);
static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb);
static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode);
static boolean primitive_feasible(int iblk, t_pb *cur_pb);
static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk);
static int get_logical_block_by_num_ext_inputs (INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb,
INP int ext_inps,
INP enum e_removal_policy remove_flag);
static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb);
static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs);
static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs);
static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock, INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size, INP int max_primitive_inputs);
static void update_connection_gain_values (int inet, int clustered_block, t_pb * cur_pb,
enum e_net_relation_to_clustered_block net_relation_to_clustered_block);
static void update_length_gain_values (int inet, int clustered_block, t_pb* cur_pb,
enum e_net_relation_to_clustered_block net_relation_to_clustered_block);
static void mark_and_update_partial_gain (int inet,
enum e_gain_update gain_flag,
int clustered_block,
int port_on_clustered_block,
int pin_on_clustered_block,
boolean timing_driven,
boolean connection_driven,
enum e_net_relation_to_clustered_block net_relation_to_clustered_block);
static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean
connection_driven, boolean global_clocks, t_pb *pb);
static void update_cluster_stats ( INP int new_blk,
INP int clb_index,
INP boolean *is_clock,
INP boolean global_clocks,
INP float alpha,
INP float beta,
INP boolean timing_driven,
INP boolean connection_driven);
static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm,
INP const t_arch * arch,
INOUTP t_block *new_cluster,
INP int clb_index,
INP int istart,
INP float aspect,
INOUTP int *num_used_instances_type,
INOUTP int *num_instances_type,
INP int num_models,
INP int max_cluster_size,
INP int max_primitive_inputs);
static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb);
static int get_highest_gain_logical_block (INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb,
INP boolean *is_clock,
INP boolean (*is_feasible)
(enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb),
INP enum e_gain_type gain_mode);
static int get_logical_block_for_cluster (INP enum e_packer_algorithm packer_algorithm,
INP t_pb *cur_pb,
INP boolean *is_clock,
INP boolean allow_unrelated_clustering);
static int get_free_logical_block_with_fewest_ext_inputs (INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb);
static int get_most_feasible_logical_block (INOUTP t_pb *cur_pb);
static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb);
static void check_clustering (int num_clb, t_block *clb, boolean *is_clock);
static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked);
static int get_most_critical_unclustered_block(void);
static void free_cb (t_pb *pb, int max_models);
static void free_pb_stats(t_pb_stats pb_stats, int max_models);
static void free_pb (t_pb *pb, int max_models);
/*****************************************/
/*globally accessable function*/
void do_clustering (const t_arch *arch, int num_models, boolean global_clocks,
boolean *is_clock, boolean hill_climbing_flag,
char *out_fname, boolean timing_driven,
enum e_cluster_seed cluster_seed_type, float alpha, float beta,
int recompute_timing_after, float block_delay,
float intra_cluster_net_delay, float inter_cluster_net_delay,
float aspect,
boolean allow_unrelated_clustering,
boolean allow_early_exit, boolean connection_driven,
enum e_packer_algorithm packer_algorithm,
boolean hack_no_legal_frac_lut, boolean hack_safe_latch){
/* Does the actual work of clustering multiple VPACK_LUT+FF logic blocks *
* into clusters. */
/*note: I allow timing_driven and connection_driven to be simultaneously true*/
/*in this case, connection_driven is responsible for all clustering decisions*/
/*but timing analysis is still enabled (useful for viewing the constant delay*/
/*results) */
/* Algorithm employed
1. Find type that can legally hold block and create cluster with pb info
2. Populate started cluster
3. Repeat 1 until no more blocks need to be clustered
*/
int istart, i, j, k, m, iblk;
int blocks_since_last_analysis;
int next_blk, prev_blk;
int num_blocks_hill_added;
int num_logical_blocks_clustered;
int max_cluster_size, cur_cluster_size, max_primitive_inputs, cur_primitive_inputs, max_pb_depth, cur_pb_depth;
int *hill_climbing_inputs_avail;
t_block *clb;
t_pb *cur_pb;
int num_clb;
boolean critical_path_minimized;
boolean early_exit;
enum e_block_pack_status block_pack_status;
int *num_used_instances_type, *num_instances_type; /* [0..num_types] Holds array for total number of each cluster_type available */
float **net_slack = NULL;
float num_paths_scaling, distance_scaling;
float crit;
hack_frac_lut_no_legality = hack_no_legal_frac_lut;
hack_force_safe_latch = hack_safe_latch;
/* TODO: This is memory inefficient, fix if causes problems */
clb = my_calloc(num_logical_blocks,sizeof(t_block));
num_clb = 0;
istart = NO_CLUSTER;
/* determine bound on cluster size and primitive input size */
max_cluster_size = 0;
max_primitive_inputs = 0;
max_pb_depth = 0;
indexofcrit = 0;
for(i = 0; i < num_types; i++) {
if(EMPTY_TYPE == &type_descriptors[i])
continue;
cur_cluster_size = get_max_cluster_size(type_descriptors[i].pb_type);
cur_primitive_inputs = get_max_primitive_inputs(type_descriptors[i].pb_type);
cur_pb_depth = get_max_pb_depth(type_descriptors[i].pb_type);
if(cur_cluster_size > max_cluster_size) {
max_cluster_size = cur_cluster_size;
}
if(cur_primitive_inputs > max_primitive_inputs) {
max_primitive_inputs = cur_primitive_inputs;
}
if(cur_pb_depth > max_pb_depth) {
max_pb_depth = cur_pb_depth;
}
}
if(hill_climbing_flag) {
hill_climbing_inputs_avail = (int *) my_calloc (max_cluster_size + 1, sizeof(int));
} else {
hill_climbing_inputs_avail = NULL; /* if used, die hard */
}
/* TODO: make better estimate for nx and ny */
nx = ny = 1;
check_clocks (is_clock);
#if 0
check_for_duplicate_inputs ();
#endif
alloc_and_init_clustering (global_clocks, alpha, beta, max_cluster_size, max_primitive_inputs, max_pb_depth, num_models);
blocks_since_last_analysis = 0;
critical_path_minimized = FALSE;
early_exit = FALSE;
num_logical_blocks_clustered = 0;
num_blocks_hill_added = 0;
num_used_instances_type = (int*)my_calloc (num_types, sizeof (int));
num_instances_type = (int*)my_calloc (num_types, sizeof (int));
assert(max_cluster_size < MAX_SHORT); /* Limit maximum number of elements for each cluster */
if (timing_driven){
net_slack = alloc_and_load_pre_packing_timing_graph(block_delay, inter_cluster_net_delay, arch->models);
load_net_slack(net_slack, 0);
criticality=(float*)my_calloc(num_logical_blocks,sizeof(float));
critindexarray=(int*)my_malloc(num_logical_blocks*sizeof(int));
for(i = 0; i < num_logical_blocks; i++) {
critindexarray[i] = i;
}
/* Calculate criticality based on slacks and tie breakers (# paths, distance from source) */
for(i = 0; i < num_tnodes; i++) {
iblk = tnode[i].block;
num_paths_scaling = SCALE_NUM_PATHS * (float)tnode[i].normalized_total_critical_paths;
distance_scaling = SCALE_DISTANCE_VAL * (float)tnode[i].normalized_T_arr;
crit = (1 - tnode[i].normalized_slack) + num_paths_scaling + distance_scaling;
if(criticality[iblk] < crit) {
criticality[iblk] = crit;
}
}
net_pin_backward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
net_pin_forward_criticality = (float**)my_calloc(num_logical_nets, sizeof(float*));
for(i = 0; i < num_logical_nets; i++) {
net_pin_backward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
net_pin_forward_criticality[i] = (float *)my_calloc(vpack_net[i].num_sinks + 1, sizeof(float));
for(j = 0; j <= vpack_net[i].num_sinks; j++) {
net_pin_backward_criticality[i][j] = criticality[vpack_net[i].node_block[j]];
net_pin_forward_criticality[i][j] = criticality[vpack_net[i].node_block[0]];
}
}
heapsort(critindexarray, criticality, num_logical_blocks, 1);
/*print_critical_path("clustering_critical_path.echo");*/
if (cluster_seed_type == VPACK_TIMING){
istart=get_most_critical_unclustered_block();
}
else {/*max input seed*/
printf("get_seed_logical_block_with_most_ext_inputs\n");
istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
}
}
else /*cluster seed is max input (since there is no timing information)*/{
istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
}
while (istart != NO_CLUSTER) {
reset_legalizer_for_cluster(&clb[num_clb]);
/* start a new cluster and reset all stats */
start_new_cluster(packer_algorithm, arch, &clb[num_clb], num_clb, istart, aspect, num_used_instances_type, num_instances_type, num_models, max_cluster_size, max_primitive_inputs);
if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD)
{
printf("Complex Block %d: %s type %s\n", num_clb, clb[num_clb].name, clb[num_clb].type->name);
fflush(stdout);
}
update_cluster_stats (istart, num_clb, is_clock, global_clocks, alpha, beta, timing_driven, connection_driven);
num_clb++;
num_logical_blocks_clustered ++;
if (timing_driven && !early_exit) {
blocks_since_last_analysis++;
/*it doesn't make sense to do a timing analysis here since there*
*is only one logical_block clustered it would not change anything */
}
/* First try to pack primitive into cluster, select next block based on closest open sibling if available */
cur_pb = logical_block[istart].pb->parent_pb;
prev_blk = NO_CLUSTER;
while (cur_pb) {
if(packer_algorithm == PACK_BRUTE_FORCE) { /* if using brute force approach, look at all possible free blocks in cluster */
cur_pb = clb[num_clb - 1].pb;
}
next_blk = get_logical_block_for_cluster(packer_algorithm,
cur_pb,
is_clock,
allow_unrelated_clustering);
block_pack_status = alloc_and_load_pb(packer_algorithm, cur_pb->pb_graph_node, next_blk, cur_pb, num_models, max_cluster_size, max_primitive_inputs);
if(block_pack_status != BLK_PASSED) {
if(next_blk != NO_CLUSTER && prev_blk != next_blk) {
if(block_pack_status == BLK_FAILED_ROUTE) {
#ifdef DEBUG_FAILED_PACKING_CANDIDATES
printf("NO_ROUTE:%s#%d|%s ", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name);
#else
printf(".");
#endif
} else {
#ifdef DEBUG_FAILED_PACKING_CANDIDATES
printf("FAILED_CHECK:%s#%d|%s check %d", logical_block[next_blk].name, next_blk, logical_block[next_blk].model->name, block_pack_status);
#else
printf(".");
#endif
}
prev_blk = next_blk;
} else {
prev_blk = NO_CLUSTER;
cur_pb = cur_pb->parent_pb;
}
continue;
} else {
/* Continue packing by filling smallest cluster */
if(hack_frac_lut_no_legality) {
logical_block[next_blk].clb_index = num_clb - 1;
}
cur_pb = logical_block[next_blk].pb->parent_pb;
prev_blk = NO_CLUSTER;
#ifdef DEBUG_FAILED_PACKING_CANDIDATES
printf("PASSED:%s#%d ", logical_block[next_blk].name, next_blk);
#else
printf(".");
#endif
}
update_cluster_stats (next_blk, num_clb - 1, is_clock, global_clocks,
alpha, beta, timing_driven, connection_driven);
num_logical_blocks_clustered ++;
if (timing_driven && !early_exit) {
blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */
}
}
if(logical_block[istart].type != VPACK_INPAD && logical_block[istart].type != VPACK_OUTPAD)
{
printf("\n");
}
save_cluster_solution();
if(hack_frac_lut_no_legality) {
k = 0;
for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_input_ports; i++) {
for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_input_pins[i]; j++) {
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
rr_node[clb[num_clb-1].pb->pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num = k;
k++;
break;
}
k++;
}
}
}
/* check if nets are overused */
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && !vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
printf(ERRTAG "Too many input nets used in cluster %s\n", clb[num_clb-1].name);
assert(0);
}
k++;
}
k = 0;
for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_output_ports; i++) {
for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_output_pins[i]; j++) {
while(k < num_logical_nets) {
for(m = 0; m <= vpack_net[k].num_sinks; m++) {
if(logical_block[vpack_net[k].node_block[m]].clb_index != num_clb - 1) {
break;
}
}
if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
{
rr_node[clb[num_clb-1].pb->pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num = k;
k++;
break;
}
k++;
}
}
}
while(k < num_logical_nets) {
if(clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && (clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] < vpack_net[k].num_sinks + 1))
{
printf(ERRTAG "Too many output nets used in cluster %s\n", clb[num_clb-1].name);
assert(0);
}
k++;
}
k = 0;
for(i = 0; i < clb[num_clb-1].pb->pb_graph_node->num_clock_ports; i++) {
for(j = 0; j < clb[num_clb-1].pb->pb_graph_node->num_clock_pins[i]; j++) {
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
rr_node[clb[num_clb-1].pb->pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num = k;
k++;
break;
}
k++;
}
}
}
while(k < num_logical_nets) {
if(!clb[num_clb-1].pb->pb_stats.net_output_in_pb[k] && vpack_net[k].is_global && clb[num_clb-1].pb->pb_stats.num_pins_of_net_in_pb[k] != 0)
{
printf(ERRTAG "Too many clock nets used in cluster %s\n", clb[num_clb-1].name);
assert(0);
}
k++;
}
}
if (timing_driven){
if (num_blocks_hill_added > 0 && !early_exit) {
blocks_since_last_analysis += num_blocks_hill_added;
}
if (cluster_seed_type == VPACK_TIMING){
istart=get_most_critical_unclustered_block();
}
else { /*max input seed*/
istart = get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
}
}
else /*cluster seed is max input (since there is no timing information)*/
istart=get_seed_logical_block_with_most_ext_inputs(max_primitive_inputs);
free_pb_stats_recursive (clb[num_clb-1].pb, num_models);
}
if (timing_driven){
free_timing_graph(net_slack);
}
free_cluster_legality_checker();
alloc_and_load_cluster_info (num_clb, clb);
check_clustering (num_clb, clb, is_clock);
output_clustering( clb, num_clb, global_clocks, is_clock, out_fname, FALSE);
#ifdef DUMP_BLIF_ECHO
if(!hack_frac_lut_no_legality) /* must have routing graph before dumping blif */
output_blif( clb, num_clb, global_clocks, is_clock, "post_packing_blif.echo", FALSE);
#endif
if(hill_climbing_flag)
{
free (hill_climbing_inputs_avail);
}
for(i = 0; i < num_clb; i++) {
free_cb(clb[i].pb, num_models);
free(clb[i].name);
free(clb[i].nets);
free(clb[i].pb);
}
free(clb);
free (num_used_instances_type);
free (num_instances_type);
free (unclustered_list_head);
free (memory_pool);
free (net_output_feeds_driving_block_input);
}
static int num_ext_inputs (int iblk) {
/* Returns the number of input pins on this logical_block that must be hooked *
* up through external interconnect. That is, the number of input *
* pins used - the number which connect (internally) to the outputs. */
int ext_inps, output_net, ipin, opin;
t_model_ports *port, *out_port;
/* TODO: process to get ext_inps is slow, should cache in lookup table */
ext_inps = 0;
port = logical_block[iblk].model->inputs;
while(port) {
if(port->is_clock == FALSE) {
for(ipin = 0; ipin < port->size; ipin++) {
if(logical_block[iblk].input_nets[port->index][ipin] != OPEN) {
ext_inps++;
}
out_port = logical_block[iblk].model->outputs;
while(out_port) {
for(opin = 0; opin < out_port->size; opin++) {
output_net = logical_block[iblk].output_nets[out_port->index][opin];
if(output_net == OPEN)
continue;
/* TODO: I could speed things up a bit by computing the number of inputs *
* and number of external inputs for each logic logical_block at the start of *
* clustering and storing them in arrays. Look into if speed is a *
* problem. */
if (logical_block[iblk].input_nets[port->index][ipin] == output_net) {
ext_inps--;
break;
}
}
out_port = out_port->next;
}
}
}
port = port->next;
}
assert(ext_inps >= 0);
return (ext_inps);
}
/*****************************************/
static void check_clocks (boolean *is_clock) {
/* Checks that nets used as clock inputs to latches are never also used *
* as VPACK_LUT inputs. It's electrically questionable, and more importantly *
* would break the clustering code. */
int inet, iblk, ipin;
t_model_ports *port;
for (iblk=0;iblk<num_logical_blocks;iblk++) {
if (logical_block[iblk].type != VPACK_OUTPAD) {
port = logical_block[iblk].model->inputs;
while(port) {
for (ipin = 0; ipin < port->size; ipin++) {
inet = logical_block[iblk].input_nets[port->index][ipin];
if (inet != OPEN) {
if (is_clock[inet]) {
printf("Error in check_clocks. Net %d (%s) is a clock, but "
"also\n"
"\tconnects to a logic block input on logical_block %d (%s).\n", inet,
vpack_net[inet].name, iblk, logical_block[iblk].name);
printf("This would break the current clustering "
"implementation and is electrically\n"
"\tquestionable, so clustering has been aborted.\n");
exit (1);
}
}
}
port = port->next;
}
}
}
}
static int get_max_cluster_size(t_pb_type *pb_type) {
int i, j;
int max_size, temp_size;
if (pb_type->modes == 0) {
max_size = pb_type->num_pb;
} else {
max_size = 0;
for(i = 0; i < pb_type->num_modes; i++) {
temp_size = 0;
for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
temp_size += pb_type->modes[i].pb_type_children[j].num_pb *
get_max_cluster_size(&pb_type->modes[i].pb_type_children[j]);
}
if(temp_size > max_size) {
max_size = temp_size;
}
}
}
return max_size;
}
static int get_max_primitive_inputs(t_pb_type *pb_type) {
int i, j;
int max_size, temp_size;
if (pb_type->blif_model != NULL) {
max_size = pb_type->num_input_pins;
} else {
max_size = 0;
for(i = 0; i < pb_type->num_modes; i++) {
temp_size = 0;
for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
temp_size = get_max_primitive_inputs(&pb_type->modes[i].pb_type_children[j]);
if(temp_size > max_size) {
max_size = temp_size;
}
}
}
}
return max_size;
}
static int get_max_pb_depth(t_pb_type *pb_type) {
int i, j;
int max_depth, temp_depth;
max_depth = pb_type->depth;
for(i = 0; i < pb_type->num_modes; i++) {
for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
temp_depth = get_max_pb_depth(&pb_type->modes[i].pb_type_children[j]);
if(temp_depth > max_depth) {
max_depth = temp_depth;
}
}
}
return max_depth;
}
/* Determine if logical block is in pb */
static boolean is_logical_blk_in_pb(int iblk, t_pb *pb) {
t_pb * cur_pb;
cur_pb = logical_block[iblk].pb;
while(cur_pb) {
if(cur_pb == pb) {
return TRUE;
}
cur_pb = cur_pb->parent_pb;
}
return FALSE;
}
/* Add blk to list of feasible blocks sorted according to gain */
static void add_blk_to_pb_stats_candidates(int iblk, float *gain, t_pb *pb) {
int i, j;
i = 0;
if(i == pb->pb_stats.num_marked_models) {
/* Corresponding model not found, add it */
pb->pb_stats.num_marked_models++;
pb->pb_stats.num_feasible_blocks[i] = 1;
pb->pb_stats.feasible_blocks[i][0] = iblk;
} else {
/* found matching model, bubble sort */
if(pb->pb_stats.num_feasible_blocks[i] >= AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE - 1) {
/* maximum size for array, remove smallest gain element and sort */
if(gain[iblk] > gain[pb->pb_stats.feasible_blocks[i][0]]) {
/* single loop insertion sort */
for(j = 0; j < pb->pb_stats.num_feasible_blocks[i] - 1; j++) {
if(gain[iblk] <= gain[pb->pb_stats.feasible_blocks[i][j + 1]]) {
pb->pb_stats.feasible_blocks[i][j] = iblk;
break;
} else {
pb->pb_stats.feasible_blocks[i][j] = pb->pb_stats.feasible_blocks[i][j + 1];
}
}
if(j == pb->pb_stats.num_feasible_blocks[i] - 1) {
pb->pb_stats.feasible_blocks[i][j] = iblk;
}
}
} else {
/* Expand array and single loop insertion sort */
for(j = pb->pb_stats.num_feasible_blocks[i] - 1; j >= 0; j--) {
if(gain[pb->pb_stats.feasible_blocks[i][j]] > gain[iblk]) {
pb->pb_stats.feasible_blocks[i][j + 1] = pb->pb_stats.feasible_blocks[i][j];
} else {
pb->pb_stats.feasible_blocks[i][j + 1] = iblk;
break;
}
}
if(j < 0) {
pb->pb_stats.feasible_blocks[i][0] = iblk;
}
pb->pb_stats.num_feasible_blocks[i]++;
}
}
}
/*****************************************/
static void alloc_and_init_clustering (boolean global_clocks,
float alpha, float beta,
int max_cluster_size,
int max_primitive_inputs,
int max_pb_depth,
int max_models) {
/* Allocates the main data structures used for clustering and properly *
* initializes them. */
int i, ext_inps, ipin, driving_blk, inet;
struct s_ilink *next_ptr;
alloc_and_load_cluster_legality_checker();
for (i=0;i<num_logical_blocks;i++) {
logical_block[i].clb_index = NO_CLUSTER;
}
unclustered_list_head = (struct s_ilink *) my_calloc(max_primitive_inputs + 1, sizeof(struct s_ilink));
unclustered_list_head_size = max_primitive_inputs + 1;
for (i = 0; i <= max_primitive_inputs; i++) {
unclustered_list_head[i].next = NULL;
}
memory_pool = (struct s_ilink *) my_malloc (num_logical_blocks * sizeof (struct s_ilink));
next_ptr = memory_pool;
for (i=0;i<num_logical_blocks;i++) {
ext_inps = num_ext_inputs(i);
next_ptr->next = unclustered_list_head[ext_inps].next;
unclustered_list_head[ext_inps].next = next_ptr;
next_ptr->iblk = i;
next_ptr++;
}
net_output_feeds_driving_block_input = (int *) my_malloc (
num_logical_nets * sizeof (int));
for (inet=0;inet<num_logical_nets;inet++) {
net_output_feeds_driving_block_input[inet] = 0;
driving_blk = vpack_net[inet].node_block[0];
for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) {
if (vpack_net[inet].node_block[ipin] == driving_blk) {
net_output_feeds_driving_block_input[inet]++;
}
}
}
}
/*****************************************/
static void free_pb_stats_recursive (t_pb *pb, int max_models) {
int i, j;
/* Releases all the memory used by clustering data structures. */
if(pb) {
if(pb->pb_graph_node != NULL) {
if(pb->pb_graph_node->pb_type->num_modes != 0) {
for(i = 0; i < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; i++) {
for(j = 0; j < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) {
if(pb->child_pbs && pb->child_pbs[i]) {
free_pb_stats_recursive (&pb->child_pbs[i][j], max_models);
}
}
}
}
}
free_pb_stats(pb->pb_stats, max_models);
}
}
/*****************************************/
static boolean outputs_clocks_and_models_feasible (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) {
/* Checks if logical_block iblk could be adding to the open cluster without *
* violating clocking constraints. Returns TRUE if it's OK. Some *
* parameters are unused since this function needs the same inter- *
* face as the other feasibility checkers. */
int inet, depth, clocks_avail;
t_model_ports *port;
int ipin, output_net, outputs_avail;
boolean clocks_feasible, outputs_feasible;
t_pb * temp_pb;
/* Assumptions: 1. Clock network unique, can only connect to clock network */
/* 2. Logic block output can't internally connect to clocks. */
clocks_feasible = outputs_feasible = TRUE;
temp_pb = cur_pb;
while(temp_pb && clocks_feasible && outputs_feasible) {
depth = temp_pb->pb_graph_node->pb_type->depth;
clocks_avail = cur_pb->pb_stats.clocks_avail;
if(clocks_avail == NOT_VALID) {
clocks_avail = temp_pb->pb_graph_node->pb_type->num_clock_pins;
}
inet = logical_block[iblk].clock_net;
if (inet != OPEN) {
if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
clocks_avail--;
}
else if (temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1 &&
temp_pb->pb_stats.net_output_in_pb[inet]) {
clocks_avail--;
}
}
outputs_avail = temp_pb->pb_stats.outputs_avail;
port = logical_block[iblk].model->outputs;
while(port) {
/* Outputs that connect to internal blocks free up an input pin. */
for(ipin = 0; ipin < port->size; ipin++) {
output_net = logical_block[iblk].output_nets[port->index][ipin];
if (output_net != OPEN) {
if(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] >= vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]) {
if((temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net])) {
printf("[INTERNAL ERROR] net %d %s %d != %d\n", output_net, vpack_net[output_net].name,
temp_pb->pb_stats.num_pins_of_net_in_pb[output_net], vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]);
}
assert(temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] == vpack_net[output_net].num_sinks - net_output_feeds_driving_block_input[output_net]);
} else {
outputs_avail--;
}
}
}
port = port->next;
}
port = logical_block[iblk].model->inputs;
while(port) {
if(port->is_clock == TRUE) {
port = port->next;
continue;
}
for (ipin = 0; ipin < port->size; ipin++) {
inet = logical_block[iblk].input_nets[port->index][ipin];
if(inet != OPEN) {
if(temp_pb->pb_stats.net_output_in_pb[inet] &&
temp_pb->pb_stats.num_pins_of_net_in_pb[inet] + net_output_feeds_driving_block_input[inet] == vpack_net[inet].num_sinks) {
outputs_avail++;
}
}
}
port = port->next;
}
clocks_feasible = (clocks_avail >= 0);
outputs_feasible = (outputs_avail >= 0);
temp_pb = temp_pb->parent_pb;
}
if(models_feasible(packer_algorithm, iblk, cur_pb->pb_graph_node->pb_type, cur_pb, cur_pb->mode)) {
if (clocks_feasible && outputs_feasible)
return (TRUE);
else
return (FALSE);
} else {
return FALSE;
}
}
static boolean models_feasible(enum e_packer_algorithm packer_algorithm, int iblk, const t_pb_type *cur_pb_type, t_pb *cur_pb, int mode) {
struct s_linked_vptr *cur_model;
int i, j, k, cur_ptr;
struct s_ilink *ptr;
boolean feasible;
const t_pb_type *child_pb_type;
cur_model = cur_pb_type->models_contained;
assert(cur_pb == NULL || cur_pb->pb_graph_node->pb_type == cur_pb_type);
assert(cur_pb == NULL || cur_pb->mode == mode);
feasible = FALSE;
while(cur_model) {
if(cur_model->data_vptr == logical_block[iblk].model) {
feasible = TRUE;
break;
}
cur_model = cur_model->next;
}
if(!feasible) {
return FALSE;
}
if(packer_algorithm == PACK_BRUTE_FORCE) {
/* let the brute force packer determine if an empty slot exists */
return TRUE;
}
feasible = FALSE;
if(cur_pb_type->num_modes == 0) {
if(hack_force_safe_latch) {
/* TODO: Either remove hack or actually make it a feature properly
hack to make the LUT get packed before the LATCH gets packed for cases that the LATCH can be absorbed
for fracturable LUT architectures
*/
if(strcmp(logical_block[iblk].model->name, "latch") == 0) {
i = logical_block[iblk].input_nets[0][0];
j = vpack_net[i].node_block[0];
if(logical_block[j].clb_index == NO_CLUSTER && vpack_net[i].num_sinks == 1 &&
strcmp(logical_block[j].model->name, "names") == 0) {
cur_ptr = logical_block[j].model->inputs[0].size;
ptr = unclustered_list_head[cur_ptr].next;
while(ptr == NULL && cur_ptr > 0) {
cur_ptr--;
ptr = unclustered_list_head[cur_ptr].next;
}
if (cur_ptr > 2)
return FALSE;
}
}
}
return primitive_type_feasible(iblk, cur_pb_type, NULL, OPEN);
}
for(i = 0; i < cur_pb_type->modes[mode].num_pb_type_children; i++) {
child_pb_type = &cur_pb_type->modes[mode].pb_type_children[i];
if(cur_pb) {
for(k = 0; k < child_pb_type->num_pb && cur_pb->child_pbs && cur_pb->child_pbs[i]; k++) {
if(cur_pb->child_pbs[i][k].name == NULL) {
break;
}
}
if(k == child_pb_type->num_pb) {
/* no free child */
continue;
}
}
if(child_pb_type->num_modes == 0) {
feasible = primitive_type_feasible(iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, OPEN);
if(feasible) {
return TRUE;
}
} else {
for(j = 0; j < cur_pb_type->modes[mode].pb_type_children[i].num_modes; j++) {
feasible = models_feasible(packer_algorithm, iblk, &cur_pb_type->modes[mode].pb_type_children[i], NULL, j);
if(feasible) {
return TRUE;
}
}
}
}
return FALSE;
}
static boolean primitive_feasible(int iblk, t_pb *cur_pb) {
const t_pb_type *cur_pb_type;
int i;
t_pb *memory_class_pb; /* Used for memory class only, for memories, open pins must be the same among siblings */
int sibling_memory_blk;
cur_pb_type = cur_pb->pb_graph_node->pb_type;
memory_class_pb = NULL;
sibling_memory_blk = OPEN;
assert(cur_pb_type->num_modes == 0); /* primitive */
if(cur_pb->logical_block != OPEN) {
/* This pb already has a logical block */
return FALSE;
}
if(cur_pb_type->class_type == MEMORY_CLASS) {
/* memory class is special, all siblings must share all nets, including open nets, with the exception of data nets */
/* find sibling if one exists */
memory_class_pb = cur_pb->parent_pb;
for(i = 0; i < cur_pb_type->parent_mode->num_pb_type_children; i++) {
if(memory_class_pb->child_pbs[cur_pb->mode][i].name != NULL && memory_class_pb->child_pbs[cur_pb->mode][i].logical_block != OPEN) {
sibling_memory_blk = memory_class_pb->child_pbs[cur_pb->mode][i].logical_block;
}
}
if(sibling_memory_blk == OPEN) {
memory_class_pb = NULL;
}
}
return primitive_type_feasible(iblk, cur_pb_type, memory_class_pb, sibling_memory_blk);
}
static boolean primitive_type_feasible(int iblk, const t_pb_type *cur_pb_type, t_pb *memory_class_pb, int sibling_memory_blk) {
t_model_ports *port;
int i, j;
boolean second_pass;
/* check if ports are big enough */
/* for memories, also check that pins are the same with existing siblings */
port = logical_block[iblk].model->inputs;
second_pass = FALSE;
while(port || !second_pass) {
/* TODO: This is slow if the number of ports are large, fix if becomes a problem */
if(!port) {
second_pass = TRUE;
port = logical_block[iblk].model->outputs;
}
for(i = 0; i < cur_pb_type->num_ports; i++) {
if(cur_pb_type->ports[i].model_port == port) {
/* TODO: This is slow, I only need to check from 0 if it is a memory block, other blocks I can check from port->size onwards */
for(j = 0; j < port->size; j++) {
if(port->dir == IN_PORT && !port->is_clock) {
if(memory_class_pb) {
if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) {
if(logical_block[iblk].input_nets[port->index][j] != logical_block[sibling_memory_blk].input_nets[port->index][j]) {
return FALSE;
}
}
}
if(logical_block[iblk].input_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) {
return FALSE;
}
} else if(port->dir == OUT_PORT) {
if(memory_class_pb) {
if(strstr(cur_pb_type->ports[i].port_class, "data") != cur_pb_type->ports[i].port_class) {
if(logical_block[iblk].output_nets[port->index][j] != logical_block[sibling_memory_blk].output_nets[port->index][j]) {
return FALSE;
}
}
}
if(logical_block[iblk].output_nets[port->index][j] != OPEN && j >= cur_pb_type->ports[i].num_pins) {
return FALSE;
}
} else {
assert(port->dir == IN_PORT && port->is_clock);
assert(j == 0);
if(memory_class_pb) {
if(logical_block[iblk].clock_net != logical_block[sibling_memory_blk].clock_net) {
return FALSE;
}
}
if(logical_block[iblk].clock_net != OPEN && j >= cur_pb_type->ports[i].num_pins) {
return FALSE;
}
}
}
break;
}
}
if(i == cur_pb_type->num_ports) {
if((logical_block[iblk].model->inputs != NULL && !second_pass) ||
logical_block[iblk].model->outputs != NULL && second_pass) {
/* physical port not found */
return FALSE;
}
}
if(port) {
port = port->next;
}
}
return TRUE;
}
/*****************************************/
static int get_logical_block_by_num_ext_inputs ( INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb,
INP int ext_inps,
INP enum e_removal_policy remove_flag) {
/* This routine returns a logical_block which has not been clustered, has *
* no connection to the current cluster, satisfies the cluster *
* clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available,
and has ext_inps external inputs. If *
* there is no such logical_block it returns NO_CLUSTER. Remove_flag *
* controls whether or not blocks that have already been clustered *
* are removed from the unclustered_list data structures. NB: *
* to get a logical_block regardless of clock constraints just set clocks_ *
* avail > 0. */
struct s_ilink *ptr, *prev_ptr;
int ilogical_blk;
prev_ptr = &unclustered_list_head[ext_inps];
ptr = unclustered_list_head[ext_inps].next;
while (ptr != NULL) {
ilogical_blk = ptr->iblk;
/* TODO: Get better candidate logical block in future, eg. return most timing critical or some other smarter metric */
if (logical_block[ilogical_blk].clb_index == NO_CLUSTER) {
if (outputs_clocks_and_models_feasible(packer_algorithm, ilogical_blk, NULL, cur_pb)) {
return ilogical_blk;
}
prev_ptr = ptr;
}
else if (remove_flag == REMOVE_CLUSTERED) {
prev_ptr->next = ptr->next;
}
ptr = ptr->next;
}
return NO_CLUSTER;
}
/*****************************************/
static int get_free_logical_block_with_most_ext_inputs_for_cluster (INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb) {
/* This routine is used to find new blocks for clustering when there are no feasible *
* blocks with any attraction to the current cluster (i.e. it finds *
* blocks which are unconnected from the current cluster). It returns *
* the logical_block with the largest number of used inputs that satisfies the *
* clocking and number of inputs constraints. If no suitable logical_block is *
* found, the routine returns NO_CLUSTER. */
int ext_inps;
int best_block;
int inputs_avail;
inputs_avail = cur_pb->pb_stats.inputs_avail;
if(inputs_avail == NOT_VALID) {
inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins;
}
best_block = NO_CLUSTER;
if(inputs_avail >= unclustered_list_head_size) {
inputs_avail = unclustered_list_head_size - 1;
}
for (ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) {
best_block = get_logical_block_by_num_ext_inputs (packer_algorithm, cur_pb, ext_inps, REMOVE_CLUSTERED);
if (best_block != NO_CLUSTER) {
break;
}
}
return best_block;
}
/*****************************************/
static int get_seed_logical_block_with_most_ext_inputs (int max_primitive_inputs) {
/* This routine is used to find the first seed logical_block for the clustering. It returns *
* the logical_block with the largest number of used inputs that satisfies the *
* clocking and number of inputs constraints. If no suitable logical_block is *
* found, the routine returns NO_CLUSTER. */
int iblk, ext_inps;
struct s_ilink *ptr, *prev_ptr;
iblk = NO_CLUSTER;
for (ext_inps=max_primitive_inputs;ext_inps>=0;ext_inps--) {
prev_ptr = &unclustered_list_head[ext_inps];
ptr = unclustered_list_head[ext_inps].next;
while (ptr != NULL) {
iblk = ptr->iblk;
if (logical_block[iblk].clb_index == NO_CLUSTER) {
prev_ptr->next = ptr->next; /* remove blk from list */
break;
} else {
iblk = NO_CLUSTER;
}
prev_ptr = ptr;
ptr = ptr->next;
}
if (iblk != NO_CLUSTER)
return (iblk);
}
return (NO_CLUSTER); /* No suitable logical_block found. */
}
/*****************************************/
static void alloc_and_load_pb_stats (t_pb *pb, int max_models, int max_cluster_size, int max_primitive_inputs) {
/* Call this routine when starting to fill up a new cluster. It resets *
* the gain vector, etc. */
int j;
/* If statement below is for speed. If nets are reasonably low-fanout, *
* only a relatively small number of blocks will be marked, and updating *
* only those logical_block structures will be fastest. If almost all blocks *
* have been touched it should be faster to just run through them all *
* in order (less addressing and better cache locality). */
pb->pb_stats.gain = (float *) my_malloc (num_logical_blocks * sizeof (float));
pb->pb_stats.lengthgain=(float*) my_malloc (num_logical_blocks * sizeof (float));
pb->pb_stats.sharinggain=(float*) my_malloc (num_logical_blocks * sizeof (float));
pb->pb_stats.hillgain =(float*) my_malloc (num_logical_blocks * sizeof (float));
pb->pb_stats.connectiongain = (float*) my_malloc (num_logical_blocks * sizeof (float));
pb->pb_stats.inputs_avail = NOT_VALID;
pb->pb_stats.outputs_avail = NOT_VALID;
pb->pb_stats.clocks_avail = NOT_VALID;
pb->pb_stats.num_marked_models = 0;
pb->pb_stats.cur_marked_model = NOT_VALID;
pb->pb_stats.num_feasible_blocks = my_calloc(max_models, sizeof(int));
pb->pb_stats.feasible_blocks = my_malloc(max_models * sizeof(int*));
for (j=0;j<max_models;j++) {
pb->pb_stats.feasible_blocks[j] = (int*) my_malloc (AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE * sizeof (int));
}
for (j=0;j<num_logical_blocks;j++) {
pb->pb_stats.gain[j] = 0.0;
pb->pb_stats.lengthgain[j]= 0.0;
pb->pb_stats.connectiongain[j] = 0;
pb->pb_stats.sharinggain[j]=NOT_VALID;
pb->pb_stats.hillgain[j]=NOT_VALID;
}
pb->pb_stats.num_pins_of_net_in_pb = (int *) my_malloc (num_logical_nets * sizeof(int));
pb->pb_stats.net_output_in_pb = (boolean *) my_malloc (num_logical_nets * sizeof(boolean));
for (j=0;j<num_logical_nets;j++) {
pb->pb_stats.num_pins_of_net_in_pb[j] = 0;
pb->pb_stats.net_output_in_pb[j] = FALSE;
}
pb->pb_stats.marked_nets = (int *) my_malloc ((max_primitive_inputs + 2) * max_cluster_size * sizeof(int));
pb->pb_stats.marked_blocks = (int *) my_malloc (num_logical_blocks * sizeof(int));
pb->pb_stats.num_marked_nets = 0;
pb->pb_stats.num_marked_blocks = 0;
}
/*****************************************/
/* Allocate space within pb and load data for it given the current logical block
Note: pb space and parent info must be loaded before calling this function
TODO: add-in some heuristic for determing which pb to select if there are multiple choices
*/
static enum e_block_pack_status alloc_and_load_pb(INP enum e_packer_algorithm packer_algorithm, INP t_pb_graph_node *pb_graph_node, INP int iblock,
INOUTP t_pb * pb, INP int max_models, int max_cluster_size, int max_primitive_inputs) {
int i, j, k;
const t_pb_type *pb_type;
boolean is_primitive;
enum e_block_pack_status block_pack_status;
pb->pb_graph_node = pb_graph_node;
pb->logical_block = NO_CLUSTER;
block_pack_status = BLK_STATUS_UNDEFINED;
if(iblock == NO_CLUSTER) {
return BLK_FAILED_FEASIBLE;
}
pb_type = pb_graph_node->pb_type;
is_primitive = (pb_type->num_modes == 0);
/* TODO: It would be good to save and prune away nodes of types that I know won't fit, that way, I don't have to traverse the tree
for types that don't work, this runtime optimization can be done later */
if(is_primitive) {
if(!primitive_feasible(iblock, pb)) {
return BLK_FAILED_FEASIBLE;
}
assert(pb->logical_block == OPEN);
/* check if I can add and route this block, if they yes, then success, if not, fail */
if(hack_frac_lut_no_legality) {
block_pack_status = BLK_PASSED;
pb->logical_block = iblock;
logical_block[iblock].pb = pb;
} else {
block_pack_status = try_add_block_to_current_cluster_and_primitive(iblock, pb);
}
if(block_pack_status == BLK_PASSED) {
pb->name = my_strdup(logical_block[iblock].name);
}
} else {
if(pb->child_pbs == NULL) {
/* first time here so must allocate space and determine mode of operation */
for(i = 0; i < pb_type->num_modes && block_pack_status != BLK_PASSED; i++) {
pb->mode = i;
if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) {
continue;
}
set_pb_mode(pb, 0, 0); /* default mode is to use mode 1 */
set_pb_mode(pb, i, 1);
pb->child_pbs = my_calloc(pb_type->modes[i].num_pb_type_children, sizeof(t_pb *));
for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
/* allocate space for children if first time allocating space */
pb->child_pbs[j] = my_calloc(pb_type->modes[i].pb_type_children[j].num_pb, sizeof(t_pb));
for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
pb->child_pbs[j][k].parent_pb = pb;
alloc_and_load_pb_stats(&pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
}
}
for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) {
for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) {
block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
if(block_pack_status == BLK_FAILED_FEASIBLE) {
/* sibling blocks differ only in interconnect,
if feasibility fails then it is failing on something other than interconnect
therefore, no point trying the same thing for siblings */
break;
}
}
}
if(block_pack_status == BLK_PASSED) {
pb->name = my_strdup(logical_block[iblock].name);
} else {
set_pb_mode(pb, i, 0); /* default mode is to use mode 1 */
set_pb_mode(pb, 0, 1);
for(j = 0; j < pb_type->modes[i].num_pb_type_children; j++) {
if(pb->child_pbs[j] != NULL) {
for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb; k++) {
free_pb_stats(pb->child_pbs[j][k].pb_stats, max_models);
}
free(pb->child_pbs[j]);
}
}
free(pb->child_pbs);
pb->child_pbs = NULL;
}
}
} else {
/*
* Another block has been allocated inside so must select placement carefully
*/
i = pb->mode;
if(!models_feasible(packer_algorithm, iblock, pb_type, pb, pb->mode)) {
block_pack_status = BLK_FAILED_FEASIBLE;
} else {
for(j = 0; j < pb_type->modes[i].num_pb_type_children && block_pack_status != BLK_PASSED; j++) {
for(k = 0; k < pb_type->modes[i].pb_type_children[j].num_pb && block_pack_status != BLK_PASSED; k++) {
/* Simple heuristic: assume that any packed cluster is well-packed, try to pack into new child only */
/* Brute force heuristic: traverse whole tree always */
if(pb->child_pbs[j][k].name == NULL) {
block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
} else if (packer_algorithm == PACK_BRUTE_FORCE && pb_type->modes[i].pb_type_children[j].num_modes != 0) {
block_pack_status = alloc_and_load_pb(packer_algorithm, &pb_graph_node->child_pb_graph_nodes[i][j][k], iblock, &pb->child_pbs[j][k], max_models, max_cluster_size, max_primitive_inputs);
}
if(block_pack_status == BLK_FAILED_FEASIBLE && packer_algorithm != PACK_BRUTE_FORCE) {
/* Children blocks differ only in routing, don't continue if feasibility fails
Brute force algorithm may have just ran into a node that was full, therefore it should not break out
*/
break;
}
}
}
}
}
}
return block_pack_status;
}
static void update_connection_gain_values (int inet, int clustered_block, t_pb *cur_pb, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {
/*This function is called when the connectiongain values on the vpack_net*
*inet require updating. */
int iblk, ipin;
int clb_index;
int num_internal_connections, num_open_connections, num_stuck_connections;
num_internal_connections = num_open_connections = num_stuck_connections = 0;
clb_index = logical_block[clustered_block].clb_index;
/* may wish to speed things up by ignoring clock nets since they are high fanout */
for (ipin=0;ipin<=vpack_net[inet].num_sinks;ipin++) {
iblk = vpack_net[inet].node_block[ipin];
if (logical_block[iblk].clb_index == clb_index && is_logical_blk_in_pb(iblk, logical_block[clustered_block].pb)) {
num_internal_connections++;
} else if (logical_block[iblk].clb_index == OPEN) {
num_open_connections++;
} else {
num_stuck_connections++;
}
}
if (net_relation_to_clustered_block==OUTPUT){
for (ipin=1;ipin<=vpack_net[inet].num_sinks;ipin++) {
iblk = vpack_net[inet].node_block[ipin];
if (logical_block[iblk].clb_index == NO_CLUSTER) {
/* Only works if net has one connection to block, TODO: handle case where net has multi-connection to block */
if(num_internal_connections > 1) {
cur_pb->pb_stats.connectiongain[iblk] -= 1 /(float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections);
}
cur_pb->pb_stats.connectiongain[iblk] += 1 /(float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections);
}
}
}
if(net_relation_to_clustered_block==INPUT){
/*Calculate the connectiongain for the logical_block which is driving *
*the vpack_net that is an input to a logical_block in the cluster */
iblk=vpack_net[inet].node_block[0];
if (logical_block[iblk].clb_index == NO_CLUSTER) {
if(num_internal_connections > 1) {
cur_pb->pb_stats.connectiongain[iblk] -= 1 / (float)(vpack_net[inet].num_sinks - (num_internal_connections - 1) + 1 + 1 * num_stuck_connections);
}
cur_pb->pb_stats.connectiongain[iblk] += 1 / (float)(vpack_net[inet].num_sinks - num_internal_connections + 1 + 1 * num_stuck_connections);
}
}
}
/*****************************************/
static void update_length_gain_values (int inet, int clustered_block, t_pb *cur_pb,
enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {
/*This function is called when the length_gain values on the vpack_net*
*inet requires updating. */
float lengain;
int newblk, ifirst;
int iblk, ipin;
/* Check if this vpack_net lists its driving logical_block twice. If so, avoid *
* double counting this logical_block by skipping the first (driving) pin. */
if (net_output_feeds_driving_block_input[inet] == FALSE)
ifirst = 0;
else
ifirst = 1;
if (net_relation_to_clustered_block == OUTPUT && !vpack_net[inet].is_global){
for (ipin=ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) {
iblk = vpack_net[inet].node_block[ipin];
if (logical_block[iblk].clb_index == NO_CLUSTER) {
lengain=net_pin_backward_criticality[inet][ipin];
if (lengain>cur_pb->pb_stats.lengthgain[iblk])
cur_pb->pb_stats.lengthgain[iblk]=lengain;
}
}
}
if(net_relation_to_clustered_block == INPUT && !vpack_net[inet].is_global){
/*Calculate the length gain for the logical_block which is driving *
*the vpack_net that is an input to a logical_block in the cluster */
newblk = vpack_net[inet].node_block[0];
if (logical_block[newblk].clb_index == NO_CLUSTER) {
for (ipin=1; ipin <= vpack_net[inet].num_sinks; ipin++) {
lengain=net_pin_forward_criticality[inet][ipin];
if (lengain>cur_pb->pb_stats.lengthgain[newblk])
cur_pb->pb_stats.lengthgain[newblk]=lengain;
}
}
}
}
/*****************************************/
static void mark_and_update_partial_gain (int inet,
enum e_gain_update gain_flag, int clustered_block, int port_on_clustered_block, int pin_on_clustered_block,
boolean timing_driven, boolean connection_driven, enum e_net_relation_to_clustered_block net_relation_to_clustered_block) {
/* Updates the marked data structures, and if gain_flag is GAIN, *
* the gain when a logic logical_block is added to a cluster. The *
* sharinggain is the number of inputs that a logical_block shares with *
* blocks that are already in the cluster. Hillgain is the *
* reduction in number of pins-required by adding a logical_block to the *
* cluster. The lengthgain is the criticality of the most critical*
* vpack_net between this logical_block and a logical_block in the cluster. */
int iblk, ipin, ifirst;
t_pb *cur_pb;
cur_pb = logical_block[clustered_block].pb->parent_pb;
while(cur_pb) {
/* Mark vpack_net as being visited, if necessary. */
if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
cur_pb->pb_stats.marked_nets[cur_pb->pb_stats.num_marked_nets] = inet;
cur_pb->pb_stats.num_marked_nets++;
}
/* Update gains of affected blocks. */
if (gain_flag == GAIN) {
/* Check if this vpack_net lists its driving logical_block twice. If so, avoid *
* double counting this logical_block by skipping the first (driving) pin. */
if (net_output_feeds_driving_block_input[inet] == 0)
ifirst = 0;
else
ifirst = 1;
if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet]==0) {
for (ipin=ifirst;ipin<=vpack_net[inet].num_sinks;ipin++) {
iblk = vpack_net[inet].node_block[ipin];
if (logical_block[iblk].clb_index == NO_CLUSTER) {
if (cur_pb->pb_stats.sharinggain[iblk] == NOT_VALID) {
cur_pb->pb_stats.marked_blocks[cur_pb->pb_stats.num_marked_blocks] = iblk;
cur_pb->pb_stats.num_marked_blocks++;
cur_pb->pb_stats.sharinggain[iblk]=1;
cur_pb->pb_stats.hillgain[iblk] = 1 - num_ext_inputs (iblk);
}
else {
cur_pb->pb_stats.sharinggain[iblk]++;
cur_pb->pb_stats.hillgain[iblk]++;
}
}
}
}
if (connection_driven) {
update_connection_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block);
}
if (timing_driven) {
update_length_gain_values(inet, clustered_block, cur_pb, net_relation_to_clustered_block);
}
}
cur_pb->pb_stats.num_pins_of_net_in_pb[inet]++;
cur_pb = cur_pb->parent_pb;
}
}
/*****************************************/
static void update_total_gain(float alpha, float beta, boolean timing_driven, boolean
connection_driven, boolean global_clocks, t_pb *pb){
/*Updates the total gain array to reflect the desired tradeoff between*
*input sharing (sharinggain) and path_length minimization (lengthgain)*/
int i, iblk, j, k;
t_pb * cur_pb;
int num_input_pins, num_output_pins;
int num_used_input_pins, num_used_output_pins;
t_model_ports *port;
cur_pb = pb;
while(cur_pb) {
for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) {
iblk=cur_pb->pb_stats.marked_blocks[i];
/* Todo: This was used to explore different normalization options, can be made more efficient once we decide on which one to use*/
num_input_pins = 0;
port = logical_block[iblk].model->inputs;
j =0;
num_used_input_pins = 0;
while(port) {
num_input_pins += port->size;
if(!port->is_clock) {
for(k = 0; k < port->size; k++) {
if(logical_block[iblk].input_nets[j][k] != OPEN) {
num_used_input_pins++;
}
}
j++;
}
port = port->next;
}
if(num_input_pins == 0) {
num_input_pins = 1;
}
num_used_output_pins = 0;
j = 0;
num_output_pins = 0;
port = logical_block[iblk].model->outputs;
while(port) {
num_output_pins += port->size;
for(k = 0; k < port->size; k++) {
if(logical_block[iblk].output_nets[j][k] != OPEN) {
num_used_output_pins++;
}
}
port = port->next;
j++;
}
/* end todo */
/* Calculate area-only cost function */
if (connection_driven) {
/*try to absorb as many connections as possible*/
/* TODO: change back to used pins when done testing */
cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_input_pins + num_output_pins);
/* cur_pb->pb_stats.gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats.sharinggain[iblk] + beta*(float)cur_pb->pb_stats.connectiongain[iblk])/(num_used_input_pins + num_used_output_pins); */
} else {
/*cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_used_input_pins + num_used_output_pins); */
cur_pb->pb_stats.gain[iblk] = ((float)cur_pb->pb_stats.sharinggain[iblk])/(num_input_pins + num_output_pins);
}
/* Add in timing driven cost into cost function */
if (timing_driven) {
cur_pb->pb_stats.gain[iblk] = alpha*cur_pb->pb_stats.lengthgain[iblk] + (1.0-alpha)*(float)cur_pb->pb_stats.gain[iblk];
}
}
cur_pb = cur_pb->parent_pb;
}
}
/*****************************************/
static void update_cluster_stats ( INP int new_blk,
INP int clb_index,
INP boolean *is_clock,
INP boolean global_clocks,
INP float alpha,
INP float beta,
INP boolean timing_driven,
INP boolean connection_driven) {
/* Updates cluster stats such as gain, used pins, and clock structures. */
int i, ipin, inet, depth;
t_model_ports *port;
t_pb *cur_pb;
/* TODO: what a scary comment from Vaughn, we'll have to watch out for this causing problems */
/* Output can be open so the check is necessary. I don't change *
* the gain for clock outputs when clocks are globally distributed *
* because I assume there is no real need to pack similarly clocked *
* FFs together then. Note that by updating the gain when the *
* clock driver is placed in a cluster implies that the output of *
* LUTs can be connected to clock inputs internally. Probably not *
* true, but it doesn't make much difference, since it will still *
* make local routing of this clock very short, and none of my *
* benchmarks actually generate local clocks (all come from pads). */
logical_block[new_blk].clb_index = clb_index;
cur_pb = logical_block[new_blk].pb->parent_pb;
while(cur_pb) {
depth = cur_pb->pb_graph_node->pb_type->depth;
/* reset list of feasible blocks */
for (i=0;i<cur_pb->pb_stats.num_marked_models;i++){
cur_pb->pb_stats.num_feasible_blocks[i] = 0;
}
cur_pb->pb_stats.num_marked_models = 0;
cur_pb->pb_stats.cur_marked_model = NOT_VALID;
/* determine valid pins */
if (cur_pb->pb_stats.inputs_avail == NOT_VALID)
cur_pb->pb_stats.inputs_avail = cur_pb->pb_graph_node->pb_type->num_input_pins;
if (cur_pb->pb_stats.outputs_avail == NOT_VALID)
cur_pb->pb_stats.outputs_avail = cur_pb->pb_graph_node->pb_type->num_output_pins;
if (cur_pb->pb_stats.clocks_avail == NOT_VALID)
cur_pb->pb_stats.clocks_avail = cur_pb->pb_graph_node->pb_type->num_clock_pins;
cur_pb = cur_pb->parent_pb;
}
port = logical_block[new_blk].model->outputs;
while(port) {
for(ipin = 0; ipin < port->size; ipin++) {
inet = logical_block[new_blk].output_nets[port->index][ipin]; /* Output pin first. */
if (inet != OPEN) {
if (!is_clock[inet] || !global_clocks)
mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin,
timing_driven, connection_driven, OUTPUT);
else
mark_and_update_partial_gain (inet, NO_GAIN, new_blk, port->index, ipin,
timing_driven, connection_driven, OUTPUT);
cur_pb = logical_block[new_blk].pb->parent_pb;
while(cur_pb) {
depth = cur_pb->pb_graph_node->pb_type->depth;
cur_pb->pb_stats.net_output_in_pb[inet] = TRUE;
if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] > 1 && !is_clock[inet])
cur_pb->pb_stats.inputs_avail++;
if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] +
net_output_feeds_driving_block_input[inet] != vpack_net[inet].num_sinks + 1) {
cur_pb->pb_stats.outputs_avail--;
} else if (hack_frac_lut_no_legality) {
if(depth > 1)
cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels */
}
cur_pb = cur_pb->parent_pb;
}
}
}
port = port->next;
}
port = logical_block[new_blk].model->inputs;
while(port) {
if(port->is_clock) {
port = port->next;
continue;
}
for (ipin=0; ipin<port->size; ipin++) { /* VPACK_BLOCK input pins. */
inet = logical_block[new_blk].input_nets[port->index][ipin];
if (inet != OPEN) {
mark_and_update_partial_gain (inet, GAIN, new_blk, port->index, ipin, timing_driven, connection_driven, INPUT);
cur_pb = logical_block[new_blk].pb->parent_pb;
while(cur_pb) {
depth = cur_pb->pb_graph_node->pb_type->depth;
if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1)
cur_pb->pb_stats.inputs_avail--;
assert(cur_pb->pb_stats.inputs_avail >= 0);
if (cur_pb->pb_stats.net_output_in_pb[inet] &&
cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == vpack_net[inet].num_sinks + 1) {
cur_pb->pb_stats.outputs_avail++;
if (hack_frac_lut_no_legality && depth > 1)
cur_pb->pb_stats.outputs_avail--; /* frac lut cannot absorb pins after the top two levels */
}
cur_pb = cur_pb->parent_pb;
}
}
}
port = port->next;
}
/* Note: The code below ONLY WORKS when nets that go to clock inputs *
* NEVER go to the input of a VPACK_COMB. It doesn't really make electrical *
* sense for that to happen, and I check this in the check_clocks *
* function. Don't disable that sanity check. */
inet = logical_block[new_blk].clock_net; /* Clock input pin. */
if (inet != OPEN) {
if (global_clocks)
mark_and_update_partial_gain (inet, NO_GAIN, new_blk, 0,
0, timing_driven,
connection_driven, INPUT);
else
mark_and_update_partial_gain (inet, GAIN, new_blk, 0,
0, timing_driven,
connection_driven, INPUT);
cur_pb = logical_block[new_blk].pb->parent_pb;
while(cur_pb) {
depth = cur_pb->pb_graph_node->pb_type->depth;
if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 1) {
cur_pb->pb_stats.clocks_avail--;
}
else if (cur_pb->pb_stats.num_pins_of_net_in_pb[inet] == 2 && cur_pb->pb_stats.net_output_in_pb[inet]) {
cur_pb->pb_stats.clocks_avail--;
}
/* Note: unlike inputs, I'm currently assuming there is no internal *
* connection in a cluster from VPACK_LUT outputs to clock inputs. */
cur_pb = cur_pb->parent_pb;
}
}
update_total_gain(alpha, beta, timing_driven, connection_driven, global_clocks, logical_block[new_blk].pb->parent_pb);
}
static void start_new_cluster(INP enum e_packer_algorithm packer_algorithm,
INP const t_arch * arch,
INOUTP t_block *new_cluster,
INP int clb_index,
INP int istart,
INP float aspect,
INOUTP int *num_used_instances_type,
INOUTP int *num_instances_type,
INP int num_models,
INP int max_cluster_size,
INP int max_primitive_inputs) {
/* Given a starting seed block, start_new_cluster determines the next cluster type to use
It expands the FPGA if it cannot find a legal cluster for the logical block
*/
int i, j;
boolean success;
assert(new_cluster->name == NULL); /* Check if this cluster is really empty */
/* Allocate a dummy initial cluster and load a logical block as a seed and check if it is legal */
new_cluster->name = my_malloc(strlen(logical_block[istart].name) + 4);
sprintf(new_cluster->name, "cb.%s", logical_block[istart].name);
new_cluster->nets = NULL;
new_cluster->type = NULL;
new_cluster->pb = NULL;
new_cluster->x = UNDEFINED;
new_cluster->y = UNDEFINED;
new_cluster->z = UNDEFINED;
success = FALSE;
while(!success) {
for(i = 0; i < num_types; i++) {
if(num_used_instances_type[i] < num_instances_type[i]) {
new_cluster->type = &type_descriptors[i];
if(new_cluster->type == EMPTY_TYPE) {
continue;
}
new_cluster->pb = my_calloc(1, sizeof(t_pb));
alloc_and_load_pb_stats(new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs);
new_cluster->pb->parent_pb = NULL;
new_cluster->pb->pb_graph_node = new_cluster->type->pb_graph_head;
alloc_and_load_legalizer_for_cluster(new_cluster, clb_index, arch);
for(j = 0; j < new_cluster->type->pb_graph_head->pb_type->num_modes && !success; j++) {
new_cluster->pb->mode = j;
if(models_feasible(packer_algorithm, istart, new_cluster->pb->pb_graph_node->pb_type, new_cluster->pb, new_cluster->pb->mode)) {
success = (BLK_PASSED == alloc_and_load_pb(packer_algorithm, new_cluster->type->pb_graph_head, istart, new_cluster->pb, num_models, max_cluster_size, max_primitive_inputs));
}
}
if(success) {
if(hack_frac_lut_no_legality) {
logical_block[istart].clb_index = clb_index;
}
/* TODO: For now, just grab any working cluster, in the future, heuristic needed to grab best complex block based on supply and demand */
break;
} else {
free_legalizer_for_cluster(new_cluster);
free_pb_stats(new_cluster->pb->pb_stats, num_models);
free(new_cluster->pb);
}
}
}
/* Expand FPGA size and recalculate number of available cluster types*/
if (!success) {
if(aspect >= 1.0)
{
ny++;
nx = nint(ny * aspect);
}
else
{
nx++;
ny = nint(nx / aspect);
}
printf("Not enough resources expand FPGA size to x = %d y = %d\n", nx, ny);
if((nx > MAX_SHORT) || (ny > MAX_SHORT)) {
printf("Circuit cannot pack into architecture, architecture size (nx = %d, ny = %d) exceeds packer range.\n", nx, ny);
exit(1);
}
alloc_and_load_grid(num_instances_type);
freeGrid();
}
}
num_used_instances_type[new_cluster->type->index]++;
}
/*****************************************/
static boolean inputs_outputs_models_and_clocks_feasible (INP enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb) {
/* Checks if adding iblk to the currently open cluster would violate *
* the cluster input and clock pin limitations. Returns TRUE if *
* iblk can be added to the cluster, FALSE otherwise. */
int ipin, inet, output_net;
t_model_ports *port;
t_pb* temp_pb;
boolean inputs_feasible = TRUE;
int inputs_avail;
temp_pb = cur_pb;
while(temp_pb && inputs_feasible) {
inputs_avail = temp_pb->pb_stats.inputs_avail;
if(inputs_avail == NOT_VALID) {
inputs_avail = temp_pb->pb_graph_node->pb_type->num_input_pins;
}
port = logical_block[iblk].model->outputs;
while(port) {
/* Outputs that connect to internal blocks free up an input pin. */
for(ipin = 0; ipin < port->size; ipin++) {
output_net = logical_block[iblk].output_nets[port->index][ipin];
if (output_net != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[output_net] != 0 && !is_clock[output_net])
inputs_avail++;
}
port = port->next;
}
port = logical_block[iblk].model->inputs;
while(port) {
if(!port->is_clock) {
/* Inputs that do not connect to an output pin of an internal block (including this one) require an input pin. */
for (ipin = 0; ipin < port->size; ipin++) {
inet = logical_block[iblk].input_nets[port->index][ipin];
if (inet != OPEN && temp_pb->pb_stats.num_pins_of_net_in_pb[inet] == 0) {
if(net_output_feeds_driving_block_input[inet] == 0) {
inputs_avail--;
}
}
}
}
port = port->next;
}
inputs_feasible = (inputs_avail >= 0);
temp_pb = temp_pb->parent_pb;
}
if (outputs_clocks_and_models_feasible (packer_algorithm, iblk, is_clock, cur_pb) == FALSE)
return (FALSE);
if (inputs_feasible)
return (TRUE);
else
return (FALSE);
}
/*****************************************/
static int get_highest_gain_logical_block ( INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb,
INP boolean *is_clock,
INP boolean (*is_feasible) (enum e_packer_algorithm packer_algorithm, int iblk, boolean *is_clock, t_pb *cur_pb),
INP enum e_gain_type gain_mode) {
/* This routine populates a list of feasible blocks outside the cluster then returns the best one for the list *
* not currently in a cluster and satisfies the feasibility *
* function passed in as is_feasible. If there are no feasible *
* blocks it returns NO_CLUSTER. */
int i, j, iblk, index;
int best_hillgain;
float best_gain;
int best_block;
best_gain = (float)NOT_VALID + 1.0;
best_hillgain=NOT_VALID+1;
best_block = NO_CLUSTER;
if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) {
/* Divide into two cases for speed only. */
/* Typical case: not too many blocks have been marked. */
if (cur_pb->pb_stats.num_marked_blocks < num_logical_blocks / MARKED_FRAC) {
for (i=0;i<cur_pb->pb_stats.num_marked_blocks;i++) {
iblk = cur_pb->pb_stats.marked_blocks[i];
if (logical_block[iblk].clb_index == NO_CLUSTER) {
if (gain_mode != HILL_CLIMBING){
if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb);
if (cur_pb->pb_stats.gain[iblk] > best_gain) {
best_gain = cur_pb->pb_stats.gain[iblk];
best_block = iblk;
}
}
}
else{ /*hill climbing*/
if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb);
if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) {
best_hillgain = cur_pb->pb_stats.hillgain[iblk];
best_block = iblk;
}
}
}
}
}
}
else { /* Some high fanout nets marked lots of blocks. */
for (iblk=0;iblk<num_logical_blocks;iblk++) {
if (logical_block[iblk].clb_index == NO_CLUSTER) {
if (gain_mode != HILL_CLIMBING){
if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.gain, cur_pb);
if (cur_pb->pb_stats.gain[iblk] > best_gain) {
best_gain = cur_pb->pb_stats.gain[iblk];
best_block = iblk;
}
}
}
else{ /*hill climbing*/
if (is_feasible (packer_algorithm, iblk, is_clock, cur_pb)) {
add_blk_to_pb_stats_candidates(iblk, cur_pb->pb_stats.hillgain, cur_pb);
if (cur_pb->pb_stats.hillgain[iblk] > best_hillgain) {
best_hillgain = cur_pb->pb_stats.hillgain[iblk];
best_block = iblk;
}
}
}
}
}
}
for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) {
index = cur_pb->pb_stats.feasible_blocks[i][cur_pb->pb_stats.num_feasible_blocks[i] - 1];
if (gain_mode != HILL_CLIMBING) {
if(cur_pb->pb_stats.gain[index] == cur_pb->pb_stats.gain[best_block]) {
cur_pb->pb_stats.cur_marked_model = i;
break;
}
} else {
if(cur_pb->pb_stats.hillgain[index] == cur_pb->pb_stats.hillgain[best_block]) {
cur_pb->pb_stats.cur_marked_model = i;
break;
}
}
}
assert(cur_pb->pb_stats.num_marked_models == 0 || i < cur_pb->pb_stats.num_marked_models);
}
if(cur_pb->pb_stats.cur_marked_model == NOT_VALID) {
return NO_CLUSTER;
}
for(i = 0; i < cur_pb->pb_stats.num_marked_models; i++) {
for(j = 0; j < cur_pb->pb_stats.num_feasible_blocks[i]; j++) {
if(cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model] != 0) {
cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model]--;
index = cur_pb->pb_stats.num_feasible_blocks[cur_pb->pb_stats.cur_marked_model];
iblk = cur_pb->pb_stats.feasible_blocks[cur_pb->pb_stats.cur_marked_model][index];
cur_pb->pb_stats.cur_marked_model = (cur_pb->pb_stats.cur_marked_model + 1) % cur_pb->pb_stats.num_marked_models;
return iblk;
}
}
}
return NO_CLUSTER;
}
/*****************************************/
static int get_logical_block_for_cluster ( INP enum e_packer_algorithm packer_algorithm,
INOUTP t_pb *cur_pb,
INP boolean *is_clock,
INP boolean allow_unrelated_clustering) {
/* Finds the vpack block with the the greatest gain that satisifies the *
* input, clock and capacity constraints of a cluster that are *
* passed in. If no suitable vpack block is found it returns NO_CLUSTER.
*/
int best_block;
/* If cannot pack into primitive, try packing into cluster */
best_block = get_highest_gain_logical_block (packer_algorithm, cur_pb, is_clock, inputs_outputs_models_and_clocks_feasible,
NOT_HILL_CLIMBING);
/* If no blocks have any gain to the current cluster, the code above *
* will not find anything. However, another logical_block with no inputs in *
* common with the cluster may still be inserted into the cluster. */
if (allow_unrelated_clustering)
if (best_block == NO_CLUSTER)
best_block = get_free_logical_block_with_most_ext_inputs_for_cluster (packer_algorithm, cur_pb);
return best_block;
}
/*****************************************/
static void alloc_and_load_cluster_info (INP int num_clb, INOUTP t_block *clb) {
/* Loads all missing clustering info necessary to complete clustering. */
int i, j, i_clb, node_index, ipin, iclass;
int inport, outport, clockport;
const t_pb_type * pb_type;
t_pb *pb;
for(i_clb = 0; i_clb < num_clb; i_clb++) {
rr_node = clb[i_clb].pb->rr_graph;
pb_type = clb[i_clb].pb->pb_graph_node->pb_type;
pb = clb[i_clb].pb;
clb[i_clb].nets = my_malloc(clb[i_clb].type->num_pins * sizeof(int));
for(i = 0; i < clb[i_clb].type->num_pins; i++) {
clb[i_clb].nets[i] = OPEN;
}
inport = outport = clockport = 0;
ipin = 0;
/* Assume top-level pb and clb share a one-to-one connection */
for(i = 0; i < pb_type->num_ports; i++) {
if(!pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT) {
for(j = 0; j < pb_type->ports[i].num_pins; j++) {
iclass = clb[i_clb].type->pin_class[ipin];
assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
assert(!clb[i_clb].type->is_global_pin[ipin]);
node_index = pb->pb_graph_node->input_pins[inport][j].pin_count_in_cluster;
clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
ipin++;
}
inport++;
} else if(pb_type->ports[i].type == OUT_PORT) {
for(j = 0; j < pb_type->ports[i].num_pins; j++) {
iclass = clb[i_clb].type->pin_class[ipin];
assert(clb[i_clb].type->class_inf[iclass].type == DRIVER);
node_index = pb->pb_graph_node->output_pins[outport][j].pin_count_in_cluster;
clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
ipin++;
}
outport++;
} else {
assert(pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT);
for(j = 0; j < pb_type->ports[i].num_pins; j++) {
iclass = clb[i_clb].type->pin_class[ipin];
assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER);
assert(clb[i_clb].type->is_global_pin[ipin]);
node_index = pb->pb_graph_node->clock_pins[clockport][j].pin_count_in_cluster;
clb[i_clb].nets[ipin] = rr_node[node_index].net_num;
ipin++;
}
clockport++;
}
}
}
}
/* TODO: Add more error checking, too light */
/*****************************************/
static void check_clustering (int num_clb, t_block *clb, boolean *is_clock) {
int i;
t_pb * cur_pb;
boolean *blocks_checked;
blocks_checked = my_calloc(num_logical_blocks, sizeof(boolean));
/*
* Check that each logical block connects to one primitive and that the primitive links up to the parent clb
*/
for(i = 0; i < num_blocks; i++) {
if(logical_block[i].pb->logical_block != i) {
printf(ERRTAG "pb %s does not contain logical block %s but logical block %s #%d links to pb\n",
logical_block[i].pb->name, logical_block[i].name, logical_block[i].name, i);
exit(1);
}
cur_pb = logical_block[i].pb;
assert(strcmp(cur_pb->name, logical_block[i].name) == 0);
while(cur_pb->parent_pb) {
cur_pb = cur_pb->parent_pb;
assert(cur_pb->name);
}
if(cur_pb != clb[num_clb].pb) {
printf(ERRTAG "CLB %s does not match CLB contained by pb %s\n",
cur_pb->name, logical_block[i].pb->name);
exit(1);
}
}
/* Check that I do not have spurious links in children pbs */
for(i = 0; i < num_clb; i++) {
check_cluster_logical_blocks(clb[i].pb, blocks_checked);
}
for(i = 0; i < num_logical_blocks; i++) {
if(blocks_checked[i] == FALSE) {
printf(ERRTAG "Logical block %s #%d not found in any cluster\n", logical_block[i].name, i);
exit(1);
}
}
free(blocks_checked);
}
/* TODO: May want to check that all logical blocks are actually reached (low priority, nice to have) */
static void check_cluster_logical_blocks (t_pb *pb, boolean *blocks_checked) {
int i, j;
const t_pb_type *pb_type;
boolean has_child;
has_child = FALSE;
pb_type = pb->pb_graph_node->pb_type;
if(pb_type->num_modes == 0) {
/* primitive */
if(pb->logical_block != OPEN) {
if(blocks_checked[pb->logical_block] != FALSE) {
printf(ERRTAG "pb %s contains logical block %s #%d but logical block is already contained in another pb\n",
pb->name, logical_block[pb->logical_block].name, pb->logical_block);
exit(1);
}
blocks_checked[pb->logical_block] = TRUE;
if(pb != logical_block[pb->logical_block].pb) {
printf(ERRTAG "pb %s contains logical block %s #%d but logical block does not link to pb\n",
pb->name, logical_block[pb->logical_block].name, pb->logical_block);
exit(1);
}
}
} else {
/* this is a container pb, all container pbs must contain children */
for(i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) {
for(j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; j++) {
if(pb->child_pbs[i] != NULL) {
if(pb->child_pbs[i][j].name != NULL) {
has_child = TRUE;
check_cluster_logical_blocks(&pb->child_pbs[i][j], blocks_checked);
}
}
}
}
assert(has_child);
}
}
static int get_most_critical_unclustered_block(void) {
/*calculate_timing_information must be called before this, or this function*
*will return garbage */
/*indexofcrit is a global variable that is reset to 0 each time a *
*timing analysis is done (reset in sort_blocks_by_criticality) */
int critidx, blkidx;
for (critidx=indexofcrit; critidx<num_logical_blocks; critidx++) {
blkidx=critindexarray[critidx];
if (logical_block[blkidx].clb_index == NO_CLUSTER) {
indexofcrit=critidx+1;
return(blkidx);
}
}
/*if it makes it to here , there are no more blocks available*/
return(NO_CLUSTER);
}
static void free_cb (t_pb *pb, int max_models) {
const t_pb_type * pb_type;
int i, total_nodes;
pb_type = pb->pb_graph_node->pb_type;
total_nodes = pb->pb_graph_node->total_pb_pins + pb_type->num_input_pins + pb_type->num_output_pins + pb_type->num_clock_pins;
for(i = 0; i < total_nodes; i++) {
if(pb->rr_graph[i].edges != NULL) {
free(pb->rr_graph[i].edges);
}
if(pb->rr_graph[i].switches != NULL) {
free(pb->rr_graph[i].switches);
}
}
free(pb->rr_graph);
free_pb(pb, max_models);
}
static void free_pb (t_pb *pb, int max_models) {
const t_pb_type * pb_type;
int i, j, mode;
pb_type = pb->pb_graph_node->pb_type;
if(pb_type->blif_model == NULL) {
mode = pb->mode;
for(i = 0; i < pb_type->modes[mode].num_pb_type_children && pb->child_pbs != NULL; i++) {
for(j = 0; j < pb_type->modes[mode].pb_type_children[i].num_pb && pb->child_pbs[i] != NULL; j++) {
if(pb->child_pbs[i][j].name != NULL) {
free_pb(&pb->child_pbs[i][j], max_models);
}
}
if(pb->child_pbs[i])
free(pb->child_pbs[i]);
}
if(pb->child_pbs);
free(pb->child_pbs);
free(pb->name);
} else {
/* Primitive */
if(pb->name)
free(pb->name);
pb->name = NULL;
}
}
static void free_pb_stats(t_pb_stats pb_stats, int max_models) {
int i;
if(pb_stats.gain != NULL) {
free(pb_stats.gain);
free(pb_stats.lengthgain);
free(pb_stats.sharinggain);
free(pb_stats.hillgain);
free(pb_stats.connectiongain);
free(pb_stats.num_feasible_blocks);
for(i = 0; i < max_models; i ++) {
free(pb_stats.feasible_blocks[i]);
}
free(pb_stats.feasible_blocks);
free(pb_stats.num_pins_of_net_in_pb);
free(pb_stats.net_output_in_pb);
free(pb_stats.marked_nets);
free(pb_stats.marked_blocks);
pb_stats.gain = NULL;
}
}