| /* | |
| * Main clustering algorithm | |
| * Author(s): Vaughn Betz (first revision - VPack), Alexander Marquardt (second revision - T-VPack), Jason Luu (third revision - AAPack) | |
| * June 8, 2011 | |
| */ | |
| #include <stdio.h> | |
| #include <assert.h> | |
| #include <string.h> | |
| #include <stdlib.h> | |
| #include <map> | |
| #include "util.h" | |
| #include "vpr_types.h" | |
| #include "globals.h" | |
| #include "cluster.h" | |
| #include "heapsort.h" | |
| #include "output_clustering.h" | |
| #include "output_blif.h" | |
| #include "SetupGrid.h" | |
| #include "read_xml_arch_file.h" | |
| #include "cluster_legality.h" | |
| #include "path_delay2.h" | |
| #include "path_delay.h" | |
| #include "vpr_utils.h" | |
| #include "cluster_placement.h" | |
| #include "ReadOptions.h" | |
| /*#define DEBUG_FAILED_PACKING_CANDIDATES*/ | |
| #define AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC 2 /* Maximum relative number of pins that can exceed input pins before giving up */ | |
| #define AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST 5 /* Maximum constant number of pins that can exceed input pins before giving up */ | |
| #define AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE 30 /* This value is used to determine the max size of the priority queue for candidates that pass the early filter legality test but not the more detailed routing test */ | |
| #define AAPACK_MAX_NET_SINKS_IGNORE 256 /* The packer looks at all sinks of a net when deciding what next candidate block to pack, for high-fanout nets, this is too runtime costly for marginal benefit, thus ignore those high fanout nets */ | |
| #define AAPACK_MAX_HIGH_FANOUT_EXPLORE 10 /* For high-fanout nets that are ignored, consider a maximum of this many nets */ | |
| #define SCALE_NUM_PATHS 1e-2 /*this value is used as a multiplier to assign a * | |
| *slightly higher criticality value to nets that * | |
| *affect a large number of critical paths versus * | |
| *nets that do not have such a broad effect. * | |
| *Note that this multiplier is intentionally very * | |
| *small compared to the total criticality because * | |
| *we want to make sure that vpack_net criticality is * | |
| *primarily determined by slacks, with this acting * | |
| *only as a tie-breaker between otherwise equal nets*/ | |
| #define SCALE_DISTANCE_VAL 1e-4 /*this value is used as a multiplier to assign a * | |
| *slightly higher criticality value to nets that * | |
| *are otherwise equal but one is farther * | |
| *from sources (the farther one being made slightly * | |
| *more critical) */ | |
| enum e_gain_update { | |
| GAIN, NO_GAIN | |
| }; | |
| enum e_feasibility { | |
| FEASIBLE, INFEASIBLE | |
| }; | |
| enum e_gain_type { | |
| HILL_CLIMBING, NOT_HILL_CLIMBING | |
| }; | |
| enum e_removal_policy { | |
| REMOVE_CLUSTERED, LEAVE_CLUSTERED | |
| }; | |
| /* TODO: REMOVE_CLUSTERED no longer used, remove */ | |
| enum e_net_relation_to_clustered_block { | |
| INPUT, OUTPUT | |
| }; | |
| enum e_detailed_routing_stages { | |
| E_DETAILED_ROUTE_AT_END_ONLY = 0, E_DETAILED_ROUTE_FOR_EACH_ATOM, E_DETAILED_ROUTE_END | |
| }; | |
| /* Linked list structure. Stores one integer (iblk). */ | |
| struct s_molecule_link { | |
| t_pack_molecule *moleculeptr; | |
| struct s_molecule_link *next; | |
| }; | |
| /* 1/MARKED_FRAC is the fraction of nets or blocks that must be * | |
| * marked in order for the brute force (go through the whole * | |
| * data structure linearly) gain update etc. code to be used. * | |
| * This is done for speed only; make MARKED_FRAC whatever * | |
| * number speeds the code up most. */ | |
| #define MARKED_FRAC 2 | |
| /* Keeps a linked list of the unclustered blocks to speed up looking for * | |
| * unclustered blocks with a certain number of *external* inputs. * | |
| * [0..lut_size]. Unclustered_list_head[i] points to the head of the * | |
| * list of blocks with i inputs to be hooked up via external interconnect. */ | |
| static struct s_molecule_link *unclustered_list_head; | |
| int unclustered_list_head_size; | |
| static struct s_molecule_link *memory_pool; /*Declared here so I can free easily.*/ | |
| /* Does the logical_block that drives the output of this vpack_net also appear as a * | |
| * receiver (input) pin of the vpack_net? [0..num_logical_nets-1]. If so, then by how much? This is used * | |
| * in the gain routines to avoid double counting the connections from * | |
| * the current cluster to other blocks (hence yielding better * | |
| * clusterings). The only time a logical_block should connect to the same vpack_net * | |
| * twice is when one connection is an output and the other is an input, * | |
| * so this should take care of all multiple connections. */ | |
| static int *net_output_feeds_driving_block_input; | |
| /* Timing information for blocks */ | |
| static float *block_criticality = NULL; | |
| static int *critindexarray = NULL; | |
| /*****************************************/ | |
| /*local functions*/ | |
| /*****************************************/ | |
| static void check_clocks(boolean *is_clock); | |
| #if 0 | |
| static void check_for_duplicate_inputs (); | |
| #endif | |
| static boolean is_logical_blk_in_pb(int iblk, t_pb *pb); | |
| static void add_molecule_to_pb_stats_candidates(t_pack_molecule *molecule, | |
| std::map<int, float> &gain, t_pb *pb); | |
| static void alloc_and_init_clustering(boolean global_clocks, float alpha, | |
| float beta, int max_cluster_size, int max_molecule_inputs, | |
| int max_pb_depth, int max_models, | |
| t_cluster_placement_stats **cluster_placement_stats, | |
| t_pb_graph_node ***primitives_list, t_pack_molecule *molecules_head, | |
| int num_molecules); | |
| static void free_pb_stats_recursive(t_pb *pb); | |
| static void try_update_lookahead_pins_used(t_pb *cur_pb); | |
| static void reset_lookahead_pins_used(t_pb *cur_pb); | |
| static void compute_and_mark_lookahead_pins_used(int ilogical_block); | |
| static void compute_and_mark_lookahead_pins_used_for_pin( | |
| t_pb_graph_pin *pb_graph_pin, t_pb *primitive_pb, int inet); | |
| static void commit_lookahead_pins_used(t_pb *cur_pb); | |
| static boolean check_lookahead_pins_used(t_pb *cur_pb); | |
| static boolean primitive_feasible(int iblk, t_pb *cur_pb); | |
| static boolean primitive_type_and_memory_feasible(int iblk, | |
| const t_pb_type *cur_pb_type, t_pb *memory_class_pb, | |
| int sibling_memory_blk); | |
| static t_pack_molecule *get_molecule_by_num_ext_inputs( | |
| INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, | |
| INP int ext_inps, INP enum e_removal_policy remove_flag, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr); | |
| static t_pack_molecule* get_free_molecule_with_most_ext_inputs_for_cluster( | |
| INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr); | |
| static t_pack_molecule* get_seed_logical_molecule_with_most_ext_inputs( | |
| int max_molecule_inputs); | |
| static enum e_block_pack_status try_pack_molecule( | |
| INOUTP t_cluster_placement_stats *cluster_placement_stats_ptr, | |
| INP t_pack_molecule *molecule, INOUTP t_pb_graph_node **primitives_list, | |
| INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size, | |
| INP int clb_index, INP int max_nets_in_pb_type, INP int detailed_routing_stage); | |
| static enum e_block_pack_status try_place_logical_block_rec( | |
| INP t_pb_graph_node *pb_graph_node, INP int ilogical_block, | |
| INP t_pb *cb, OUTP t_pb **parent, INP int max_models, | |
| INP int max_cluster_size, INP int clb_index, | |
| INP int max_nets_in_pb_type, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr, | |
| INP boolean is_root_of_chain, INP t_pb_graph_pin *chain_root_pin); | |
| static void revert_place_logical_block(INP int ilogical_block, | |
| INP int max_models); | |
| static void update_connection_gain_values(int inet, int clustered_block, | |
| t_pb * cur_pb, | |
| enum e_net_relation_to_clustered_block net_relation_to_clustered_block); | |
| static void update_timing_gain_values(int inet, int clustered_block, | |
| t_pb* cur_pb, | |
| enum e_net_relation_to_clustered_block net_relation_to_clustered_block, | |
| t_slack * slacks); | |
| static void mark_and_update_partial_gain(int inet, enum e_gain_update gain_flag, | |
| int clustered_block, int port_on_clustered_block, | |
| int pin_on_clustered_block, boolean timing_driven, | |
| boolean connection_driven, | |
| enum e_net_relation_to_clustered_block net_relation_to_clustered_block, | |
| t_slack * slacks); | |
| static void update_total_gain(float alpha, float beta, boolean timing_driven, | |
| boolean connection_driven, boolean global_clocks, t_pb *pb); | |
| static void update_cluster_stats( INP t_pack_molecule *molecule, | |
| INP int clb_index, INP boolean *is_clock, INP boolean global_clocks, | |
| INP float alpha, INP float beta, INP boolean timing_driven, | |
| INP boolean connection_driven, INP t_slack * slacks); | |
| static void start_new_cluster( | |
| INP t_cluster_placement_stats *cluster_placement_stats, | |
| INP t_pb_graph_node **primitives_list, INP const t_arch * arch, | |
| INOUTP t_block *new_cluster, INP int clb_index, | |
| INP t_pack_molecule *molecule, INP float aspect, | |
| INOUTP int *num_used_instances_type, INOUTP int *num_instances_type, | |
| INP int num_models, INP int max_cluster_size, | |
| INP int max_nets_in_pb_type, INP int detailed_routing_stage); | |
| static t_pack_molecule* get_highest_gain_molecule( | |
| INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, | |
| INP enum e_gain_type gain_mode, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr); | |
| static t_pack_molecule* get_molecule_for_cluster( | |
| INP enum e_packer_algorithm packer_algorithm, INP t_pb *cur_pb, | |
| INP boolean allow_unrelated_clustering, | |
| INOUTP int *num_unrelated_clustering_attempts, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr); | |
| static void alloc_and_load_cluster_info(INP int num_clb, INOUTP t_block *clb); | |
| static void check_clustering(int num_clb, t_block *clb, boolean *is_clock); | |
| static void check_cluster_logical_blocks(t_pb *pb, boolean *blocks_checked); | |
| static t_pack_molecule* get_most_critical_seed_molecule(int * indexofcrit); | |
| static float get_molecule_gain(t_pack_molecule *molecule, std::map<int, float> &blk_gain); | |
| static int compare_molecule_gain(const void *a, const void *b); | |
| static int get_net_corresponding_to_pb_graph_pin(t_pb *cur_pb, | |
| t_pb_graph_pin *pb_graph_pin); | |
| static void print_block_criticalities(const char * fname); | |
| /*****************************************/ | |
| /*globally accessable function*/ | |
| void do_clustering(const t_arch *arch, t_pack_molecule *molecule_head, | |
| int num_models, boolean global_clocks, boolean *is_clock, | |
| boolean hill_climbing_flag, char *out_fname, boolean timing_driven, | |
| enum e_cluster_seed cluster_seed_type, float alpha, float beta, | |
| int recompute_timing_after, float block_delay, | |
| float intra_cluster_net_delay, float inter_cluster_net_delay, | |
| float aspect, boolean allow_unrelated_clustering, | |
| boolean allow_early_exit, boolean connection_driven, | |
| enum e_packer_algorithm packer_algorithm, t_timing_inf timing_inf) { | |
| /* Does the actual work of clustering multiple netlist blocks * | |
| * into clusters. */ | |
| /* Algorithm employed | |
| 1. Find type that can legally hold block and create cluster with pb info | |
| 2. Populate started cluster | |
| 3. Repeat 1 until no more blocks need to be clustered | |
| */ | |
| int i, iblk, num_molecules, blocks_since_last_analysis, num_clb, max_nets_in_pb_type, | |
| cur_nets_in_pb_type, num_blocks_hill_added, max_cluster_size, cur_cluster_size, | |
| max_molecule_inputs, max_pb_depth, cur_pb_depth, num_unrelated_clustering_attempts, | |
| indexofcrit, savedindexofcrit /* index of next most timing critical block */, | |
| detailed_routing_stage, *hill_climbing_inputs_avail; | |
| int *num_used_instances_type, *num_instances_type; | |
| /* [0..num_types] Holds array for total number of each cluster_type available */ | |
| boolean early_exit, is_cluster_legal; | |
| enum e_block_pack_status block_pack_status; | |
| float crit; | |
| t_cluster_placement_stats *cluster_placement_stats, *cur_cluster_placement_stats_ptr; | |
| t_pb_graph_node **primitives_list; | |
| t_block *clb; | |
| t_slack * slacks = NULL; | |
| t_pack_molecule *istart, *next_molecule, *prev_molecule, *cur_molecule; | |
| #ifdef PATH_COUNTING | |
| int inet, ipin; | |
| #else | |
| int inode; | |
| float num_paths_scaling, distance_scaling; | |
| #endif | |
| /* TODO: This is memory inefficient, fix if causes problems */ | |
| clb = (t_block*)my_calloc(num_logical_blocks, sizeof(t_block)); | |
| num_clb = 0; | |
| istart = NULL; | |
| /* determine bound on cluster size and primitive input size */ | |
| max_cluster_size = 0; | |
| max_molecule_inputs = 0; | |
| max_pb_depth = 0; | |
| max_nets_in_pb_type = 0; | |
| indexofcrit = 0; | |
| cur_molecule = molecule_head; | |
| num_molecules = 0; | |
| while (cur_molecule != NULL) { | |
| cur_molecule->valid = TRUE; | |
| if (cur_molecule->num_ext_inputs > max_molecule_inputs) { | |
| max_molecule_inputs = cur_molecule->num_ext_inputs; | |
| } | |
| num_molecules++; | |
| cur_molecule = cur_molecule->next; | |
| } | |
| for (i = 0; i < num_types; i++) { | |
| if (EMPTY_TYPE == &type_descriptors[i]) | |
| continue; | |
| cur_cluster_size = get_max_primitives_in_pb_type( | |
| type_descriptors[i].pb_type); | |
| cur_pb_depth = get_max_depth_of_pb_type(type_descriptors[i].pb_type); | |
| cur_nets_in_pb_type = get_max_nets_in_pb_type( | |
| type_descriptors[i].pb_type); | |
| if (cur_cluster_size > max_cluster_size) { | |
| max_cluster_size = cur_cluster_size; | |
| } | |
| if (cur_pb_depth > max_pb_depth) { | |
| max_pb_depth = cur_pb_depth; | |
| } | |
| if (cur_nets_in_pb_type > max_nets_in_pb_type) { | |
| max_nets_in_pb_type = cur_nets_in_pb_type; | |
| } | |
| } | |
| if (hill_climbing_flag) { | |
| hill_climbing_inputs_avail = (int *) my_calloc(max_cluster_size + 1, | |
| sizeof(int)); | |
| } else { | |
| hill_climbing_inputs_avail = NULL; /* if used, die hard */ | |
| } | |
| /* TODO: make better estimate for nx and ny */ | |
| nx = ny = 1; | |
| check_clocks(is_clock); | |
| #if 0 | |
| check_for_duplicate_inputs (); | |
| #endif | |
| alloc_and_init_clustering(global_clocks, alpha, beta, max_cluster_size, | |
| max_molecule_inputs, max_pb_depth, num_models, | |
| &cluster_placement_stats, &primitives_list, molecule_head, | |
| num_molecules); | |
| blocks_since_last_analysis = 0; | |
| early_exit = FALSE; | |
| num_blocks_hill_added = 0; | |
| num_used_instances_type = (int*) my_calloc(num_types, sizeof(int)); | |
| num_instances_type = (int*) my_calloc(num_types, sizeof(int)); | |
| assert(max_cluster_size < MAX_SHORT); | |
| /* Limit maximum number of elements for each cluster */ | |
| if (timing_driven) { | |
| slacks = alloc_and_load_pre_packing_timing_graph(block_delay, | |
| inter_cluster_net_delay, arch->models, timing_inf); | |
| do_timing_analysis(slacks, TRUE, FALSE, FALSE); | |
| if (getEchoEnabled()) { | |
| if(isEchoFileEnabled(E_ECHO_PRE_PACKING_TIMING_GRAPH)) | |
| print_timing_graph(getEchoFileName(E_ECHO_PRE_PACKING_TIMING_GRAPH)); | |
| #ifndef PATH_COUNTING | |
| if(isEchoFileEnabled(E_ECHO_CLUSTERING_TIMING_INFO)) | |
| print_clustering_timing_info(getEchoFileName(E_ECHO_CLUSTERING_TIMING_INFO)); | |
| #endif | |
| if(isEchoFileEnabled(E_ECHO_PRE_PACKING_SLACK)) | |
| print_slack(slacks->slack, FALSE, getEchoFileName(E_ECHO_PRE_PACKING_SLACK)); | |
| if(isEchoFileEnabled(E_ECHO_PRE_PACKING_CRITICALITY)) | |
| print_criticality(slacks, FALSE, getEchoFileName(E_ECHO_PRE_PACKING_CRITICALITY)); | |
| } | |
| block_criticality = (float*) my_calloc(num_logical_blocks, sizeof(float)); | |
| critindexarray = (int*) my_malloc(num_logical_blocks * sizeof(int)); | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| assert(logical_block[i].index == i); | |
| critindexarray[i] = i; | |
| } | |
| #ifdef PATH_COUNTING | |
| /* Calculate block criticality from a weighted sum of timing and path criticalities. */ | |
| for (inet = 0; inet < num_logical_nets; inet++) { | |
| for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) { | |
| /* Find the logical block iblk which this pin is a sink on. */ | |
| iblk = vpack_net[inet].node_block[ipin]; | |
| /* The criticality of this pin is a sum of its timing and path criticalities. */ | |
| crit = PACK_PATH_WEIGHT * slacks->path_criticality[inet][ipin] | |
| + (1 - PACK_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin]; | |
| /* The criticality of each block is the maximum of the criticalities of all its pins. */ | |
| if (block_criticality[iblk] < crit) { | |
| block_criticality[iblk] = crit; | |
| } | |
| } | |
| } | |
| #else | |
| /* Calculate criticality based on slacks and tie breakers (# paths, distance from source) */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| /* Only calculate for tnodes which have valid normalized values. | |
| Either all values will be accurate or none will, so we only have | |
| to check whether one particular value (normalized_T_arr) is valid | |
| Tnodes that do not have both times valid were not part of the analysis. | |
| Because we calloc-ed the array criticality, such nodes will have criticality 0, the lowest possible value. */ | |
| if (has_valid_normalized_T_arr(inode)) { | |
| iblk = tnode[inode].block; | |
| num_paths_scaling = SCALE_NUM_PATHS | |
| * (float) tnode[inode].prepacked_data->normalized_total_critical_paths; | |
| distance_scaling = SCALE_DISTANCE_VAL | |
| * (float) tnode[inode].prepacked_data->normalized_T_arr; | |
| crit = (1 - tnode[inode].prepacked_data->normalized_slack) + num_paths_scaling | |
| + distance_scaling; | |
| if (block_criticality[iblk] < crit) { | |
| block_criticality[iblk] = crit; | |
| } | |
| } | |
| } | |
| #endif | |
| heapsort(critindexarray, block_criticality, num_logical_blocks, 1); | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES)) { | |
| print_block_criticalities(getEchoFileName(E_ECHO_CLUSTERING_BLOCK_CRITICALITIES)); | |
| } | |
| if (cluster_seed_type == VPACK_TIMING) { | |
| istart = get_most_critical_seed_molecule(&indexofcrit); | |
| } else {/*max input seed*/ | |
| istart = get_seed_logical_molecule_with_most_ext_inputs( | |
| max_molecule_inputs); | |
| } | |
| } else /*cluster seed is max input (since there is no timing information)*/ { | |
| istart = get_seed_logical_molecule_with_most_ext_inputs( | |
| max_molecule_inputs); | |
| } | |
| while (istart != NULL) { | |
| is_cluster_legal = FALSE; | |
| savedindexofcrit = indexofcrit; | |
| for (detailed_routing_stage = (int)E_DETAILED_ROUTE_AT_END_ONLY; !is_cluster_legal && detailed_routing_stage != (int)E_DETAILED_ROUTE_END; detailed_routing_stage++) { | |
| reset_legalizer_for_cluster(&clb[num_clb]); | |
| /* start a new cluster and reset all stats */ | |
| start_new_cluster(cluster_placement_stats, primitives_list, arch, | |
| &clb[num_clb], num_clb, istart, aspect, num_used_instances_type, | |
| num_instances_type, num_models, max_cluster_size, | |
| max_nets_in_pb_type, detailed_routing_stage); | |
| vpr_printf(TIO_MESSAGE_INFO, "Complex block %d: %s, type: %s\n", | |
| num_clb, clb[num_clb].name, clb[num_clb].type->name); | |
| vpr_printf(TIO_MESSAGE_INFO, "\t"); | |
| fflush(stdout); | |
| update_cluster_stats(istart, num_clb, is_clock, global_clocks, alpha, | |
| beta, timing_driven, connection_driven, slacks); | |
| num_clb++; | |
| if (timing_driven && !early_exit) { | |
| blocks_since_last_analysis++; | |
| /*it doesn't make sense to do a timing analysis here since there* | |
| *is only one logical_block clustered it would not change anything */ | |
| } | |
| cur_cluster_placement_stats_ptr = &cluster_placement_stats[clb[num_clb | |
| - 1].type->index]; | |
| num_unrelated_clustering_attempts = 0; | |
| next_molecule = get_molecule_for_cluster(PACK_BRUTE_FORCE, | |
| clb[num_clb - 1].pb, allow_unrelated_clustering, | |
| &num_unrelated_clustering_attempts, | |
| cur_cluster_placement_stats_ptr); | |
| prev_molecule = istart; | |
| while (next_molecule != NULL && prev_molecule != next_molecule) { | |
| block_pack_status = try_pack_molecule( | |
| cur_cluster_placement_stats_ptr, next_molecule, | |
| primitives_list, clb[num_clb - 1].pb, num_models, | |
| max_cluster_size, num_clb - 1, max_nets_in_pb_type, detailed_routing_stage); | |
| prev_molecule = next_molecule; | |
| if (block_pack_status != BLK_PASSED) { | |
| if (next_molecule != NULL) { | |
| if (block_pack_status == BLK_FAILED_ROUTE) { | |
| #ifdef DEBUG_FAILED_PACKING_CANDIDATES | |
| vpr_printf(TIO_MESSAGE_DIRECT, "\tNO_ROUTE:%s type %s/n", | |
| next_molecule->logical_block_ptrs[next_molecule->root]->name, | |
| next_molecule->logical_block_ptrs[next_molecule->root]->model->name); | |
| fflush(stdout); | |
| #else | |
| vpr_printf(TIO_MESSAGE_DIRECT, "."); | |
| #endif | |
| } else { | |
| #ifdef DEBUG_FAILED_PACKING_CANDIDATES | |
| vpr_printf(TIO_MESSAGE_DIRECT, "\tFAILED_CHECK:%s type %s check %d\n", | |
| next_molecule->logical_block_ptrs[next_molecule->root]->name, | |
| next_molecule->logical_block_ptrs[next_molecule->root]->model->name, | |
| block_pack_status); | |
| fflush(stdout); | |
| #else | |
| vpr_printf(TIO_MESSAGE_DIRECT, "."); | |
| #endif | |
| } | |
| } | |
| next_molecule = get_molecule_for_cluster(PACK_BRUTE_FORCE, | |
| clb[num_clb - 1].pb, allow_unrelated_clustering, | |
| &num_unrelated_clustering_attempts, | |
| cur_cluster_placement_stats_ptr); | |
| continue; | |
| } else { | |
| /* Continue packing by filling smallest cluster */ | |
| #ifdef DEBUG_FAILED_PACKING_CANDIDATES | |
| vpr_printf(TIO_MESSAGE_DIRECT, "\tPASSED:%s type %s\n", | |
| next_molecule->logical_block_ptrs[next_molecule->root]->name, | |
| next_molecule->logical_block_ptrs[next_molecule->root]->model->name); | |
| fflush(stdout); | |
| #else | |
| vpr_printf(TIO_MESSAGE_DIRECT, "."); | |
| #endif | |
| } | |
| update_cluster_stats(next_molecule, num_clb - 1, is_clock, | |
| global_clocks, alpha, beta, timing_driven, | |
| connection_driven, slacks); | |
| num_unrelated_clustering_attempts = 0; | |
| if (timing_driven && !early_exit) { | |
| blocks_since_last_analysis++; /* historically, timing slacks were recomputed after X number of blocks were packed, but this doesn't significantly alter results so I (jluu) did not port the code */ | |
| } | |
| next_molecule = get_molecule_for_cluster(PACK_BRUTE_FORCE, | |
| clb[num_clb - 1].pb, allow_unrelated_clustering, | |
| &num_unrelated_clustering_attempts, | |
| cur_cluster_placement_stats_ptr); | |
| } | |
| vpr_printf(TIO_MESSAGE_DIRECT, "\n"); | |
| if (detailed_routing_stage == (int)E_DETAILED_ROUTE_AT_END_ONLY) { | |
| is_cluster_legal = try_breadth_first_route_cluster(); | |
| if (is_cluster_legal == TRUE) { | |
| vpr_printf(TIO_MESSAGE_INFO, "Passed route at end.\n"); | |
| } else { | |
| vpr_printf(TIO_MESSAGE_INFO, "Failed route at end, repack cluster trying detailed routing at each stage.\n"); | |
| } | |
| } else { | |
| is_cluster_legal = TRUE; | |
| } | |
| if (is_cluster_legal == TRUE) { | |
| save_cluster_solution(); | |
| if (timing_driven) { | |
| if (num_blocks_hill_added > 0 && !early_exit) { | |
| blocks_since_last_analysis += num_blocks_hill_added; | |
| } | |
| if (cluster_seed_type == VPACK_TIMING) { | |
| istart = get_most_critical_seed_molecule(&indexofcrit); | |
| } else { /*max input seed*/ | |
| istart = get_seed_logical_molecule_with_most_ext_inputs( | |
| max_molecule_inputs); | |
| } | |
| } else | |
| /*cluster seed is max input (since there is no timing information)*/ | |
| istart = get_seed_logical_molecule_with_most_ext_inputs( | |
| max_molecule_inputs); | |
| free_pb_stats_recursive(clb[num_clb - 1].pb); | |
| } else { | |
| /* Free up data structures and requeue used molecules */ | |
| num_used_instances_type[clb[num_clb - 1].type->index]--; | |
| free_cb(clb[num_clb - 1].pb); | |
| free(clb[num_clb - 1].pb); | |
| free(clb[num_clb - 1].name); | |
| clb[num_clb - 1].name = NULL; | |
| clb[num_clb - 1].pb = NULL; | |
| num_clb--; | |
| indexofcrit = savedindexofcrit; | |
| } | |
| } | |
| } | |
| free_cluster_legality_checker(); | |
| alloc_and_load_cluster_info(num_clb, clb); | |
| check_clustering(num_clb, clb, is_clock); | |
| output_clustering(clb, num_clb, global_clocks, is_clock, out_fname, FALSE); | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_POST_PACK_NETLIST)) { | |
| output_blif (clb, num_clb, global_clocks, is_clock, | |
| getEchoFileName(E_ECHO_POST_PACK_NETLIST), FALSE); | |
| } | |
| if (hill_climbing_flag) { | |
| free(hill_climbing_inputs_avail); | |
| } | |
| free_cluster_placement_stats(cluster_placement_stats); | |
| for (i = 0; i < num_clb; i++) { | |
| free_cb(clb[i].pb); | |
| free(clb[i].name); | |
| free(clb[i].nets); | |
| free(clb[i].pb); | |
| } | |
| free(clb); | |
| free(num_used_instances_type); | |
| free(num_instances_type); | |
| free(unclustered_list_head); | |
| free(memory_pool); | |
| free(net_output_feeds_driving_block_input); | |
| if (timing_driven) { | |
| free(block_criticality); | |
| free(critindexarray); | |
| block_criticality = NULL; | |
| critindexarray = NULL; | |
| } | |
| if (timing_driven) { | |
| free_timing_graph(slacks); | |
| } | |
| free (primitives_list); | |
| } | |
| /*****************************************/ | |
| static void check_clocks(boolean *is_clock) { | |
| /* Checks that nets used as clock inputs to latches are never also used * | |
| * as VPACK_LUT inputs. It's electrically questionable, and more importantly * | |
| * would break the clustering code. */ | |
| int inet, iblk, ipin; | |
| t_model_ports *port; | |
| for (iblk = 0; iblk < num_logical_blocks; iblk++) { | |
| if (logical_block[iblk].type != VPACK_OUTPAD) { | |
| port = logical_block[iblk].model->inputs; | |
| while (port) { | |
| for (ipin = 0; ipin < port->size; ipin++) { | |
| inet = logical_block[iblk].input_nets[port->index][ipin]; | |
| if (inet != OPEN) { | |
| if (is_clock[inet]) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "Error in check_clocks.\n"); | |
| vpr_printf(TIO_MESSAGE_ERROR, "Net %d (%s) is a clock, but also connects to a logic block input on logical_block %d (%s).\n", | |
| inet, vpack_net[inet].name, iblk, logical_block[iblk].name); | |
| vpr_printf(TIO_MESSAGE_ERROR, "This would break the current clustering implementation and is electrically questionable, so clustering has been aborted.\n"); | |
| exit(1); | |
| } | |
| } | |
| } | |
| port = port->next; | |
| } | |
| } | |
| } | |
| } | |
| /* Determine if logical block is in pb */ | |
| static boolean is_logical_blk_in_pb(int iblk, t_pb *pb) { | |
| t_pb * cur_pb; | |
| cur_pb = logical_block[iblk].pb; | |
| while (cur_pb) { | |
| if (cur_pb == pb) { | |
| return TRUE; | |
| } | |
| cur_pb = cur_pb->parent_pb; | |
| } | |
| return FALSE; | |
| } | |
| /* Add blk to list of feasible blocks sorted according to gain */ | |
| static void add_molecule_to_pb_stats_candidates(t_pack_molecule *molecule, | |
| std::map<int, float> &gain, t_pb *pb) { | |
| int i, j; | |
| for (i = 0; i < pb->pb_stats->num_feasible_blocks; i++) { | |
| if (pb->pb_stats->feasible_blocks[i] == molecule) { | |
| return; /* already in queue, do nothing */ | |
| } | |
| } | |
| if (pb->pb_stats->num_feasible_blocks | |
| >= AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE - 1) { | |
| /* maximum size for array, remove smallest gain element and sort */ | |
| if (get_molecule_gain(molecule, gain) | |
| > get_molecule_gain(pb->pb_stats->feasible_blocks[0], gain)) { | |
| /* single loop insertion sort */ | |
| for (j = 0; j < pb->pb_stats->num_feasible_blocks - 1; j++) { | |
| if (get_molecule_gain(molecule, gain) | |
| <= get_molecule_gain( | |
| pb->pb_stats->feasible_blocks[j + 1], gain)) { | |
| pb->pb_stats->feasible_blocks[j] = molecule; | |
| break; | |
| } else { | |
| pb->pb_stats->feasible_blocks[j] = | |
| pb->pb_stats->feasible_blocks[j + 1]; | |
| } | |
| } | |
| if (j == pb->pb_stats->num_feasible_blocks - 1) { | |
| pb->pb_stats->feasible_blocks[j] = molecule; | |
| } | |
| } | |
| } else { | |
| /* Expand array and single loop insertion sort */ | |
| for (j = pb->pb_stats->num_feasible_blocks - 1; j >= 0; j--) { | |
| if (get_molecule_gain(pb->pb_stats->feasible_blocks[j], gain) | |
| > get_molecule_gain(molecule, gain)) { | |
| pb->pb_stats->feasible_blocks[j + 1] = | |
| pb->pb_stats->feasible_blocks[j]; | |
| } else { | |
| pb->pb_stats->feasible_blocks[j + 1] = molecule; | |
| break; | |
| } | |
| } | |
| if (j < 0) { | |
| pb->pb_stats->feasible_blocks[0] = molecule; | |
| } | |
| pb->pb_stats->num_feasible_blocks++; | |
| } | |
| } | |
| /*****************************************/ | |
| static void alloc_and_init_clustering(boolean global_clocks, float alpha, | |
| float beta, int max_cluster_size, int max_molecule_inputs, | |
| int max_pb_depth, int max_models, | |
| t_cluster_placement_stats **cluster_placement_stats, | |
| t_pb_graph_node ***primitives_list, t_pack_molecule *molecules_head, | |
| int num_molecules) { | |
| /* Allocates the main data structures used for clustering and properly * | |
| * initializes them. */ | |
| int i, ext_inps, ipin, driving_blk, inet; | |
| struct s_molecule_link *next_ptr; | |
| t_pack_molecule *cur_molecule; | |
| t_pack_molecule **molecule_array; | |
| int max_molecule_size; | |
| alloc_and_load_cluster_legality_checker(); | |
| /**cluster_placement_stats = alloc_and_load_cluster_placement_stats();*/ | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| logical_block[i].clb_index = NO_CLUSTER; | |
| } | |
| /* alloc and load list of molecules to pack */ | |
| unclustered_list_head = (struct s_molecule_link *) my_calloc( | |
| max_molecule_inputs + 1, sizeof(struct s_molecule_link)); | |
| unclustered_list_head_size = max_molecule_inputs + 1; | |
| for (i = 0; i <= max_molecule_inputs; i++) { | |
| unclustered_list_head[i].next = NULL; | |
| } | |
| molecule_array = (t_pack_molecule **) my_malloc( | |
| num_molecules * sizeof(t_pack_molecule*)); | |
| cur_molecule = molecules_head; | |
| for (i = 0; i < num_molecules; i++) { | |
| assert(cur_molecule != NULL); | |
| molecule_array[i] = cur_molecule; | |
| cur_molecule = cur_molecule->next; | |
| } | |
| assert(cur_molecule == NULL); | |
| qsort((void*) molecule_array, num_molecules, sizeof(t_pack_molecule*), | |
| compare_molecule_gain); | |
| memory_pool = (struct s_molecule_link *) my_malloc( | |
| num_molecules * sizeof(struct s_molecule_link)); | |
| next_ptr = memory_pool; | |
| for (i = 0; i < num_molecules; i++) { | |
| ext_inps = molecule_array[i]->num_ext_inputs; | |
| next_ptr->moleculeptr = molecule_array[i]; | |
| next_ptr->next = unclustered_list_head[ext_inps].next; | |
| unclustered_list_head[ext_inps].next = next_ptr; | |
| next_ptr++; | |
| } | |
| free(molecule_array); | |
| /* alloc and load net info */ | |
| net_output_feeds_driving_block_input = (int *) my_malloc( | |
| num_logical_nets * sizeof(int)); | |
| for (inet = 0; inet < num_logical_nets; inet++) { | |
| net_output_feeds_driving_block_input[inet] = 0; | |
| driving_blk = vpack_net[inet].node_block[0]; | |
| for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) { | |
| if (vpack_net[inet].node_block[ipin] == driving_blk) { | |
| net_output_feeds_driving_block_input[inet]++; | |
| } | |
| } | |
| } | |
| /* alloc and load cluster placement info */ | |
| *cluster_placement_stats = alloc_and_load_cluster_placement_stats(); | |
| /* alloc array that will store primitives that a molecule gets placed to, | |
| primitive_list is referenced by index, for example a logical block in index 2 of a molecule matches to a primitive in index 2 in primitive_list | |
| this array must be the size of the biggest molecule | |
| */ | |
| max_molecule_size = 1; | |
| cur_molecule = molecules_head; | |
| while (cur_molecule != NULL) { | |
| if (cur_molecule->num_blocks > max_molecule_size) { | |
| max_molecule_size = cur_molecule->num_blocks; | |
| } | |
| cur_molecule = cur_molecule->next; | |
| } | |
| *primitives_list = (t_pb_graph_node **)my_calloc(max_molecule_size, sizeof(t_pb_graph_node *)); | |
| } | |
| /*****************************************/ | |
| static void free_pb_stats_recursive(t_pb *pb) { | |
| int i, j; | |
| /* Releases all the memory used by clustering data structures. */ | |
| if (pb) { | |
| if (pb->pb_graph_node != NULL) { | |
| if (pb->pb_graph_node->pb_type->num_modes != 0) { | |
| for (i = 0; | |
| i | |
| < pb->pb_graph_node->pb_type->modes[pb->mode].num_pb_type_children; | |
| i++) { | |
| for (j = 0; | |
| j | |
| < pb->pb_graph_node->pb_type->modes[pb->mode].pb_type_children[i].num_pb; | |
| j++) { | |
| if (pb->child_pbs && pb->child_pbs[i]) { | |
| free_pb_stats_recursive(&pb->child_pbs[i][j]); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| free_pb_stats(pb); | |
| } | |
| } | |
| static boolean primitive_feasible(int iblk, t_pb *cur_pb) { | |
| const t_pb_type *cur_pb_type; | |
| int i; | |
| t_pb *memory_class_pb; /* Used for memory class only, for memories, open pins must be the same among siblings */ | |
| int sibling_memory_blk; | |
| cur_pb_type = cur_pb->pb_graph_node->pb_type; | |
| memory_class_pb = NULL; | |
| sibling_memory_blk = OPEN; | |
| assert(cur_pb_type->num_modes == 0); | |
| /* primitive */ | |
| if (cur_pb->logical_block != OPEN && cur_pb->logical_block != iblk) { | |
| /* This pb already has a different logical block */ | |
| return FALSE; | |
| } | |
| if (cur_pb_type->class_type == MEMORY_CLASS) { | |
| /* memory class is special, all siblings must share all nets, including open nets, with the exception of data nets */ | |
| /* find sibling if one exists */ | |
| memory_class_pb = cur_pb->parent_pb; | |
| for (i = 0; i < cur_pb_type->parent_mode->num_pb_type_children; i++) { | |
| if (memory_class_pb->child_pbs[cur_pb->mode][i].name != NULL | |
| && memory_class_pb->child_pbs[cur_pb->mode][i].logical_block | |
| != OPEN) { | |
| sibling_memory_blk = | |
| memory_class_pb->child_pbs[cur_pb->mode][i].logical_block; | |
| } | |
| } | |
| if (sibling_memory_blk == OPEN) { | |
| memory_class_pb = NULL; | |
| } | |
| } | |
| return primitive_type_and_memory_feasible(iblk, cur_pb_type, | |
| memory_class_pb, sibling_memory_blk); | |
| } | |
| static boolean primitive_type_and_memory_feasible(int iblk, | |
| const t_pb_type *cur_pb_type, t_pb *memory_class_pb, | |
| int sibling_memory_blk) { | |
| t_model_ports *port; | |
| int i, j; | |
| boolean second_pass; | |
| /* check if ports are big enough */ | |
| /* for memories, also check that pins are the same with existing siblings */ | |
| port = logical_block[iblk].model->inputs; | |
| second_pass = FALSE; | |
| while (port || !second_pass) { | |
| /* TODO: This is slow if the number of ports are large, fix if becomes a problem */ | |
| if (!port) { | |
| second_pass = TRUE; | |
| port = logical_block[iblk].model->outputs; | |
| } | |
| for (i = 0; i < cur_pb_type->num_ports; i++) { | |
| if (cur_pb_type->ports[i].model_port == port) { | |
| /* TODO: This is slow, I only need to check from 0 if it is a memory block, other blocks I can check from port->size onwards */ | |
| for (j = 0; j < port->size; j++) { | |
| if (port->dir == IN_PORT && !port->is_clock) { | |
| if (memory_class_pb) { | |
| if (cur_pb_type->ports[i].port_class == NULL | |
| || strstr(cur_pb_type->ports[i].port_class, | |
| "data") | |
| != cur_pb_type->ports[i].port_class) { | |
| if (logical_block[iblk].input_nets[port->index][j] | |
| != logical_block[sibling_memory_blk].input_nets[port->index][j]) { | |
| return FALSE; | |
| } | |
| } | |
| } | |
| if (logical_block[iblk].input_nets[port->index][j] | |
| != OPEN | |
| && j >= cur_pb_type->ports[i].num_pins) { | |
| return FALSE; | |
| } | |
| } else if (port->dir == OUT_PORT) { | |
| if (memory_class_pb) { | |
| if (cur_pb_type->ports[i].port_class == NULL | |
| || strstr(cur_pb_type->ports[i].port_class, | |
| "data") | |
| != cur_pb_type->ports[i].port_class) { | |
| if (logical_block[iblk].output_nets[port->index][j] | |
| != logical_block[sibling_memory_blk].output_nets[port->index][j]) { | |
| return FALSE; | |
| } | |
| } | |
| } | |
| if (logical_block[iblk].output_nets[port->index][j] | |
| != OPEN | |
| && j >= cur_pb_type->ports[i].num_pins) { | |
| return FALSE; | |
| } | |
| } else { | |
| assert(port->dir == IN_PORT && port->is_clock); | |
| assert(j == 0); | |
| if (memory_class_pb) { | |
| if (logical_block[iblk].clock_net | |
| != logical_block[sibling_memory_blk].clock_net) { | |
| return FALSE; | |
| } | |
| } | |
| if (logical_block[iblk].clock_net != OPEN | |
| && j >= cur_pb_type->ports[i].num_pins) { | |
| return FALSE; | |
| } | |
| } | |
| } | |
| break; | |
| } | |
| } | |
| if (i == cur_pb_type->num_ports) { | |
| if (((logical_block[iblk].model->inputs != NULL) && !second_pass) | |
| || (logical_block[iblk].model->outputs != NULL | |
| && second_pass)) { | |
| /* physical port not found */ | |
| return FALSE; | |
| } | |
| } | |
| if (port) { | |
| port = port->next; | |
| } | |
| } | |
| return TRUE; | |
| } | |
| /*****************************************/ | |
| static t_pack_molecule *get_molecule_by_num_ext_inputs( | |
| INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, | |
| INP int ext_inps, INP enum e_removal_policy remove_flag, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr) { | |
| /* This routine returns a logical_block which has not been clustered, has * | |
| * no connection to the current cluster, satisfies the cluster * | |
| * clock constraints, is a valid subblock inside the cluster, does not exceed the cluster subblock units available, | |
| and has ext_inps external inputs. If * | |
| * there is no such logical_block it returns NO_CLUSTER. Remove_flag * | |
| * controls whether or not blocks that have already been clustered * | |
| * are removed from the unclustered_list data structures. NB: * | |
| * to get a logical_block regardless of clock constraints just set clocks_ * | |
| * avail > 0. */ | |
| struct s_molecule_link *ptr, *prev_ptr; | |
| int ilogical_blk, i; | |
| boolean success; | |
| prev_ptr = &unclustered_list_head[ext_inps]; | |
| ptr = unclustered_list_head[ext_inps].next; | |
| while (ptr != NULL) { | |
| /* TODO: Get better candidate logical block in future, eg. return most timing critical or some other smarter metric */ | |
| if (ptr->moleculeptr->valid) { | |
| success = TRUE; | |
| for (i = 0; i < get_array_size_of_molecule(ptr->moleculeptr); i++) { | |
| if (ptr->moleculeptr->logical_block_ptrs[i] != NULL) { | |
| ilogical_blk = | |
| ptr->moleculeptr->logical_block_ptrs[i]->index; | |
| if (!exists_free_primitive_for_logical_block( | |
| cluster_placement_stats_ptr, ilogical_blk)) { /* TODO: I should be using a better filtering check especially when I'm dealing with multiple clock/multiple global reset signals where the clock/reset packed in matters, need to do later when I have the circuits to check my work */ | |
| success = FALSE; | |
| break; | |
| } | |
| } | |
| } | |
| if (success == TRUE) { | |
| return ptr->moleculeptr; | |
| } | |
| prev_ptr = ptr; | |
| } | |
| else if (remove_flag == REMOVE_CLUSTERED) { | |
| assert(0); /* this doesn't work right now with 2 the pass packing for each complex block */ | |
| prev_ptr->next = ptr->next; | |
| } | |
| ptr = ptr->next; | |
| } | |
| return NULL; | |
| } | |
| /*****************************************/ | |
| static t_pack_molecule *get_free_molecule_with_most_ext_inputs_for_cluster( | |
| INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr) { | |
| /* This routine is used to find new blocks for clustering when there are no feasible * | |
| * blocks with any attraction to the current cluster (i.e. it finds * | |
| * blocks which are unconnected from the current cluster). It returns * | |
| * the logical_block with the largest number of used inputs that satisfies the * | |
| * clocking and number of inputs constraints. If no suitable logical_block is * | |
| * found, the routine returns NO_CLUSTER. | |
| * TODO: Analyze if this function is useful in more detail, also, should probably not include clock in input count | |
| */ | |
| int ext_inps; | |
| int i, j; | |
| t_pack_molecule *molecule; | |
| int inputs_avail = 0; | |
| for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { | |
| for (j = 0; j < cur_pb->pb_graph_node->input_pin_class_size[i]; j++) { | |
| if (cur_pb->pb_stats->input_pins_used[i][j] != OPEN) | |
| inputs_avail++; | |
| } | |
| } | |
| molecule = NULL; | |
| if (inputs_avail >= unclustered_list_head_size) { | |
| inputs_avail = unclustered_list_head_size - 1; | |
| } | |
| for (ext_inps = inputs_avail; ext_inps >= 0; ext_inps--) { | |
| molecule = get_molecule_by_num_ext_inputs(packer_algorithm, cur_pb, | |
| ext_inps, LEAVE_CLUSTERED, cluster_placement_stats_ptr); | |
| if (molecule != NULL) { | |
| break; | |
| } | |
| } | |
| return molecule; | |
| } | |
| /*****************************************/ | |
| static t_pack_molecule* get_seed_logical_molecule_with_most_ext_inputs( | |
| int max_molecule_inputs) { | |
| /* This routine is used to find the first seed logical_block for the clustering. It returns * | |
| * the logical_block with the largest number of used inputs that satisfies the * | |
| * clocking and number of inputs constraints. If no suitable logical_block is * | |
| * found, the routine returns NO_CLUSTER. */ | |
| int ext_inps; | |
| struct s_molecule_link *ptr; | |
| for (ext_inps = max_molecule_inputs; ext_inps >= 0; ext_inps--) { | |
| ptr = unclustered_list_head[ext_inps].next; | |
| while (ptr != NULL) { | |
| if (ptr->moleculeptr->valid) { | |
| return ptr->moleculeptr; | |
| } | |
| ptr = ptr->next; | |
| } | |
| } | |
| return NULL; | |
| } | |
| /*****************************************/ | |
| /*****************************************/ | |
| static void alloc_and_load_pb_stats(t_pb *pb, int max_models, | |
| int max_nets_in_pb_type) { | |
| /* Call this routine when starting to fill up a new cluster. It resets * | |
| * the gain vector, etc. */ | |
| int i, j; | |
| pb->pb_stats = new t_pb_stats; | |
| /* If statement below is for speed. If nets are reasonably low-fanout, * | |
| * only a relatively small number of blocks will be marked, and updating * | |
| * only those logical_block structures will be fastest. If almost all blocks * | |
| * have been touched it should be faster to just run through them all * | |
| * in order (less addressing and better cache locality). */ | |
| pb->pb_stats->input_pins_used = (int **) my_malloc( | |
| pb->pb_graph_node->num_input_pin_class * sizeof(int*)); | |
| pb->pb_stats->output_pins_used = (int **) my_malloc( | |
| pb->pb_graph_node->num_output_pin_class * sizeof(int*)); | |
| pb->pb_stats->lookahead_input_pins_used = (int **) my_malloc( | |
| pb->pb_graph_node->num_input_pin_class * sizeof(int*)); | |
| pb->pb_stats->lookahead_output_pins_used = (int **) my_malloc( | |
| pb->pb_graph_node->num_output_pin_class * sizeof(int*)); | |
| pb->pb_stats->num_feasible_blocks = NOT_VALID; | |
| pb->pb_stats->feasible_blocks = (t_pack_molecule**) my_calloc( | |
| AAPACK_MAX_FEASIBLE_BLOCK_ARRAY_SIZE, sizeof(t_pack_molecule *)); | |
| pb->pb_stats->tie_break_high_fanout_net = OPEN; | |
| for (i = 0; i < pb->pb_graph_node->num_input_pin_class; i++) { | |
| pb->pb_stats->input_pins_used[i] = (int*) my_malloc( | |
| pb->pb_graph_node->input_pin_class_size[i] * sizeof(int)); | |
| for (j = 0; j < pb->pb_graph_node->input_pin_class_size[i]; j++) { | |
| pb->pb_stats->input_pins_used[i][j] = OPEN; | |
| } | |
| } | |
| for (i = 0; i < pb->pb_graph_node->num_output_pin_class; i++) { | |
| pb->pb_stats->output_pins_used[i] = (int*) my_malloc( | |
| pb->pb_graph_node->output_pin_class_size[i] * sizeof(int)); | |
| for (j = 0; j < pb->pb_graph_node->output_pin_class_size[i]; j++) { | |
| pb->pb_stats->output_pins_used[i][j] = OPEN; | |
| } | |
| } | |
| for (i = 0; i < pb->pb_graph_node->num_input_pin_class; i++) { | |
| pb->pb_stats->lookahead_input_pins_used[i] = (int*) my_malloc( | |
| (AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST + pb->pb_graph_node->input_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC) * sizeof(int)); | |
| for (j = 0; | |
| j | |
| < pb->pb_graph_node->input_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; j++) { | |
| pb->pb_stats->lookahead_input_pins_used[i][j] = OPEN; | |
| } | |
| } | |
| for (i = 0; i < pb->pb_graph_node->num_output_pin_class; i++) { | |
| pb->pb_stats->lookahead_output_pins_used[i] = (int*) my_malloc( | |
| (AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST + | |
| pb->pb_graph_node->output_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC) * sizeof(int)); | |
| for (j = 0; | |
| j | |
| < pb->pb_graph_node->output_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; j++) { | |
| pb->pb_stats->lookahead_output_pins_used[i][j] = OPEN; | |
| } | |
| } | |
| pb->pb_stats->gain.clear(); | |
| pb->pb_stats->timinggain.clear(); | |
| pb->pb_stats->connectiongain.clear(); | |
| pb->pb_stats->sharinggain.clear(); | |
| pb->pb_stats->hillgain.clear(); | |
| pb->pb_stats->num_pins_of_net_in_pb.clear(); | |
| pb->pb_stats->marked_nets = (int *) my_malloc( | |
| max_nets_in_pb_type * sizeof(int)); | |
| pb->pb_stats->marked_blocks = (int *) my_malloc( | |
| num_logical_blocks * sizeof(int)); | |
| pb->pb_stats->num_marked_nets = 0; | |
| pb->pb_stats->num_marked_blocks = 0; | |
| pb->pb_stats->num_child_blocks_in_pb = 0; | |
| } | |
| /*****************************************/ | |
| /** | |
| * Try pack molecule into current cluster | |
| */ | |
| static enum e_block_pack_status try_pack_molecule( | |
| INOUTP t_cluster_placement_stats *cluster_placement_stats_ptr, | |
| INP t_pack_molecule *molecule, INOUTP t_pb_graph_node **primitives_list, | |
| INOUTP t_pb * pb, INP int max_models, INP int max_cluster_size, | |
| INP int clb_index, INP int max_nets_in_pb_type, INP int detailed_routing_stage) { | |
| int molecule_size, failed_location; | |
| int i; | |
| enum e_block_pack_status block_pack_status; | |
| struct s_linked_vptr *cur_molecule; | |
| t_pb *parent; | |
| t_pb *cur_pb; | |
| t_logical_block *chain_root_block; | |
| boolean is_root_of_chain; | |
| t_pb_graph_pin *chain_root_pin; | |
| parent = NULL; | |
| block_pack_status = BLK_STATUS_UNDEFINED; | |
| molecule_size = get_array_size_of_molecule(molecule); | |
| failed_location = 0; | |
| while (block_pack_status != BLK_PASSED) { | |
| save_and_reset_routing_cluster(); /* save current routing information because speculative packing will change routing*/ | |
| if (get_next_primitive_list(cluster_placement_stats_ptr, molecule, | |
| primitives_list, clb_index)) { | |
| block_pack_status = BLK_PASSED; | |
| for (i = 0; i < molecule_size && block_pack_status == BLK_PASSED; | |
| i++) { | |
| assert( | |
| (primitives_list[i] == NULL) == (molecule->logical_block_ptrs[i] == NULL)); | |
| failed_location = i + 1; | |
| if (molecule->logical_block_ptrs[i] != NULL) { | |
| if(molecule->type == MOLECULE_FORCED_PACK && molecule->pack_pattern->is_chain && i == molecule->pack_pattern->root_block->block_id) { | |
| chain_root_pin = molecule->pack_pattern->chain_root_pin; | |
| is_root_of_chain = TRUE; | |
| } else { | |
| chain_root_pin = NULL; | |
| is_root_of_chain = FALSE; | |
| } | |
| block_pack_status = try_place_logical_block_rec( | |
| primitives_list[i], | |
| molecule->logical_block_ptrs[i]->index, pb, &parent, | |
| max_models, max_cluster_size, clb_index, | |
| max_nets_in_pb_type, cluster_placement_stats_ptr, is_root_of_chain, chain_root_pin); | |
| } | |
| } | |
| if (block_pack_status == BLK_PASSED) { | |
| /* Check if pin usage is feasible for the current packing assigment */ | |
| reset_lookahead_pins_used(pb); | |
| try_update_lookahead_pins_used(pb); | |
| if (!check_lookahead_pins_used(pb)) { | |
| block_pack_status = BLK_FAILED_FEASIBLE; | |
| } | |
| } | |
| if (block_pack_status == BLK_PASSED) { | |
| /* Try to route if heuristic is to route for every atom | |
| Skip routing if heuristic is to route at the end of packing complex block | |
| */ | |
| setup_intracluster_routing_for_molecule(molecule, primitives_list); | |
| if (detailed_routing_stage == (int)E_DETAILED_ROUTE_FOR_EACH_ATOM && try_breadth_first_route_cluster() == FALSE) { | |
| /* Cannot pack */ | |
| block_pack_status = BLK_FAILED_ROUTE; | |
| } else { | |
| /* Pack successful, commit | |
| TODO: SW Engineering note - may want to update cluster stats here too instead of doing it outside | |
| */ | |
| assert(block_pack_status == BLK_PASSED); | |
| if(molecule->type == MOLECULE_FORCED_PACK && molecule->pack_pattern->is_chain) { | |
| /* Chained molecules often take up lots of area and are important, if a chain is packed in, want to rename logic block to match chain name */ | |
| chain_root_block = molecule->logical_block_ptrs[molecule->pack_pattern->root_block->block_id]; | |
| cur_pb = chain_root_block->pb->parent_pb; | |
| while(cur_pb != NULL) { | |
| free(cur_pb->name); | |
| cur_pb->name = my_strdup(chain_root_block->name); | |
| cur_pb = cur_pb->parent_pb; | |
| } | |
| } | |
| for (i = 0; i < molecule_size; i++) { | |
| if (molecule->logical_block_ptrs[i] != NULL) { | |
| /* invalidate all molecules that share logical block with current molecule */ | |
| cur_molecule = | |
| molecule->logical_block_ptrs[i]->packed_molecules; | |
| while (cur_molecule != NULL) { | |
| ((t_pack_molecule*) cur_molecule->data_vptr)->valid = | |
| FALSE; | |
| cur_molecule = cur_molecule->next; | |
| } | |
| commit_primitive(cluster_placement_stats_ptr, | |
| primitives_list[i]); | |
| } | |
| } | |
| } | |
| } | |
| if (block_pack_status != BLK_PASSED) { | |
| for (i = 0; i < failed_location; i++) { | |
| if (molecule->logical_block_ptrs[i] != NULL) { | |
| revert_place_logical_block( | |
| molecule->logical_block_ptrs[i]->index, | |
| max_models); | |
| } | |
| } | |
| restore_routing_cluster(); | |
| } | |
| } else { | |
| block_pack_status = BLK_FAILED_FEASIBLE; | |
| restore_routing_cluster(); | |
| break; /* no more candidate primitives available, this molecule will not pack, return fail */ | |
| } | |
| } | |
| return block_pack_status; | |
| } | |
| /** | |
| * Try place logical block into current primitive location | |
| */ | |
| static enum e_block_pack_status try_place_logical_block_rec( | |
| INP t_pb_graph_node *pb_graph_node, INP int ilogical_block, | |
| INP t_pb *cb, OUTP t_pb **parent, INP int max_models, | |
| INP int max_cluster_size, INP int clb_index, | |
| INP int max_nets_in_pb_type, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr, | |
| INP boolean is_root_of_chain, INP t_pb_graph_pin *chain_root_pin) { | |
| int i, j; | |
| boolean is_primitive; | |
| enum e_block_pack_status block_pack_status; | |
| t_pb *my_parent; | |
| t_pb *pb, *parent_pb; | |
| const t_pb_type *pb_type; | |
| t_model_ports *root_port; | |
| my_parent = NULL; | |
| block_pack_status = BLK_PASSED; | |
| /* Discover parent */ | |
| if (pb_graph_node->parent_pb_graph_node != cb->pb_graph_node) { | |
| block_pack_status = try_place_logical_block_rec( | |
| pb_graph_node->parent_pb_graph_node, ilogical_block, cb, | |
| &my_parent, max_models, max_cluster_size, clb_index, | |
| max_nets_in_pb_type, cluster_placement_stats_ptr, is_root_of_chain, chain_root_pin); | |
| parent_pb = my_parent; | |
| } else { | |
| parent_pb = cb; | |
| } | |
| /* Create siblings if siblings are not allocated */ | |
| if (parent_pb->child_pbs == NULL) { | |
| assert(parent_pb->name == NULL); | |
| parent_pb->logical_block = OPEN; | |
| parent_pb->name = my_strdup(logical_block[ilogical_block].name); | |
| parent_pb->mode = pb_graph_node->pb_type->parent_mode->index; | |
| set_pb_graph_mode(parent_pb->pb_graph_node, 0, 0); /* TODO: default mode is to use mode 0, document this! */ | |
| set_pb_graph_mode(parent_pb->pb_graph_node, parent_pb->mode, 1); | |
| parent_pb->child_pbs = | |
| (t_pb **) my_calloc( | |
| parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children, | |
| sizeof(t_pb *)); | |
| for (i = 0; | |
| i | |
| < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children; | |
| i++) { | |
| parent_pb->child_pbs[i] = | |
| (t_pb *) my_calloc( | |
| parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].pb_type_children[i].num_pb, | |
| sizeof(t_pb)); | |
| for (j = 0; | |
| j | |
| < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].pb_type_children[i].num_pb; | |
| j++) { | |
| parent_pb->child_pbs[i][j].parent_pb = parent_pb; | |
| parent_pb->child_pbs[i][j].logical_block = OPEN; | |
| parent_pb->child_pbs[i][j].pb_graph_node = | |
| &(parent_pb->pb_graph_node->child_pb_graph_nodes[parent_pb->mode][i][j]); | |
| } | |
| } | |
| } else { | |
| assert(parent_pb->mode == pb_graph_node->pb_type->parent_mode->index); | |
| } | |
| for (i = 0; | |
| i | |
| < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children; | |
| i++) { | |
| if (pb_graph_node->pb_type | |
| == &parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].pb_type_children[i]) { | |
| break; | |
| } | |
| } | |
| assert( | |
| i < parent_pb->pb_graph_node->pb_type->modes[parent_pb->mode].num_pb_type_children); | |
| pb = &parent_pb->child_pbs[i][pb_graph_node->placement_index]; | |
| *parent = pb; /* this pb is parent of it's child that called this function */ | |
| assert(pb->pb_graph_node == pb_graph_node); | |
| if (pb->pb_stats == NULL) { | |
| alloc_and_load_pb_stats(pb, max_models, max_nets_in_pb_type); | |
| } | |
| pb_type = pb_graph_node->pb_type; | |
| is_primitive = (boolean) (pb_type->num_modes == 0); | |
| if (is_primitive) { | |
| assert( | |
| pb->logical_block == OPEN && logical_block[ilogical_block].pb == NULL && logical_block[ilogical_block].clb_index == NO_CLUSTER); | |
| /* try pack to location */ | |
| pb->name = my_strdup(logical_block[ilogical_block].name); | |
| pb->logical_block = ilogical_block; | |
| logical_block[ilogical_block].clb_index = clb_index; | |
| logical_block[ilogical_block].pb = pb; | |
| if (!primitive_feasible(ilogical_block, pb)) { | |
| /* failed location feasibility check, revert pack */ | |
| block_pack_status = BLK_FAILED_FEASIBLE; | |
| } | |
| if (block_pack_status == BLK_PASSED && is_root_of_chain == TRUE) { | |
| /* is carry chain, must check if this carry chain spans multiple logic blocks or not */ | |
| root_port = chain_root_pin->port->model_port; | |
| if(logical_block[ilogical_block].input_nets[root_port->index][chain_root_pin->pin_number] != OPEN) { | |
| /* this carry chain spans multiple logic blocks, must match up correctly with previous chain for this to route */ | |
| if(pb_graph_node != chain_root_pin->parent_node) { | |
| /* this location does not match with the dedicated chain input from outside logic block, therefore not feasible */ | |
| block_pack_status = BLK_FAILED_FEASIBLE; | |
| } | |
| } | |
| } | |
| } | |
| return block_pack_status; | |
| } | |
| /* Revert trial logical block iblock and free up memory space accordingly | |
| */ | |
| static void revert_place_logical_block(INP int iblock, INP int max_models) { | |
| t_pb *pb, *next; | |
| pb = logical_block[iblock].pb; | |
| logical_block[iblock].clb_index = NO_CLUSTER; | |
| logical_block[iblock].pb = NULL; | |
| if (pb != NULL) { | |
| /* When freeing molecules, the current block might already have been freed by a prior revert | |
| When this happens, no need to do anything beyond basic book keeping at the logical block | |
| */ | |
| next = pb->parent_pb; | |
| free_pb(pb); | |
| pb = next; | |
| while (pb != NULL) { | |
| /* If this is pb is created only for the purposes of holding new molecule, remove it | |
| Must check if cluster is already freed (which can be the case) | |
| */ | |
| next = pb->parent_pb; | |
| if (pb->child_pbs != NULL && pb->pb_stats != NULL | |
| && pb->pb_stats->num_child_blocks_in_pb == 0) { | |
| set_pb_graph_mode(pb->pb_graph_node, pb->mode, 0); /* default mode is to use mode 1 */ | |
| set_pb_graph_mode(pb->pb_graph_node, 0, 1); | |
| if (next != NULL) { | |
| /* If the code gets here, then that means that placing the initial seed molecule failed, don't free the actual complex block itself as the seed needs to find another placement */ | |
| free_pb(pb); | |
| } | |
| } | |
| pb = next; | |
| } | |
| } | |
| } | |
| static void update_connection_gain_values(int inet, int clustered_block, | |
| t_pb *cur_pb, | |
| enum e_net_relation_to_clustered_block net_relation_to_clustered_block) { | |
| /*This function is called when the connectiongain values on the vpack_net* | |
| *inet require updating. */ | |
| int iblk, ipin; | |
| int clb_index; | |
| int num_internal_connections, num_open_connections, num_stuck_connections; | |
| num_internal_connections = num_open_connections = num_stuck_connections = 0; | |
| clb_index = logical_block[clustered_block].clb_index; | |
| /* may wish to speed things up by ignoring clock nets since they are high fanout */ | |
| for (ipin = 0; ipin <= vpack_net[inet].num_sinks; ipin++) { | |
| iblk = vpack_net[inet].node_block[ipin]; | |
| if (logical_block[iblk].clb_index == clb_index | |
| && is_logical_blk_in_pb(iblk, | |
| logical_block[clustered_block].pb)) { | |
| num_internal_connections++; | |
| } else if (logical_block[iblk].clb_index == OPEN) { | |
| num_open_connections++; | |
| } else { | |
| num_stuck_connections++; | |
| } | |
| } | |
| if (net_relation_to_clustered_block == OUTPUT) { | |
| for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) { | |
| iblk = vpack_net[inet].node_block[ipin]; | |
| if (logical_block[iblk].clb_index == NO_CLUSTER) { | |
| /* TODO: Gain function accurate only if net has one connection to block, TODO: Should we handle case where net has multi-connection to block? Gain computation is only off by a bit in this case */ | |
| if(cur_pb->pb_stats->connectiongain.count(iblk) == 0) { | |
| cur_pb->pb_stats->connectiongain[iblk] = 0; | |
| } | |
| if (num_internal_connections > 1) { | |
| cur_pb->pb_stats->connectiongain[iblk] -= 1 | |
| / (float) (vpack_net[inet].num_sinks | |
| - (num_internal_connections - 1) | |
| + 1 * num_stuck_connections); | |
| } | |
| cur_pb->pb_stats->connectiongain[iblk] += 1 | |
| / (float) (vpack_net[inet].num_sinks | |
| - num_internal_connections | |
| + 1 * num_stuck_connections); | |
| } | |
| } | |
| } | |
| if (net_relation_to_clustered_block == INPUT) { | |
| /*Calculate the connectiongain for the logical_block which is driving * | |
| *the vpack_net that is an input to a logical_block in the cluster */ | |
| iblk = vpack_net[inet].node_block[0]; | |
| if (logical_block[iblk].clb_index == NO_CLUSTER) { | |
| if(cur_pb->pb_stats->connectiongain.count(iblk) == 0) { | |
| cur_pb->pb_stats->connectiongain[iblk] = 0; | |
| } | |
| if (num_internal_connections > 1) { | |
| cur_pb->pb_stats->connectiongain[iblk] -= 1 | |
| / (float) (vpack_net[inet].num_sinks | |
| - (num_internal_connections - 1) + 1 | |
| + 1 * num_stuck_connections); | |
| } | |
| cur_pb->pb_stats->connectiongain[iblk] += 1 | |
| / (float) (vpack_net[inet].num_sinks | |
| - num_internal_connections + 1 | |
| + 1 * num_stuck_connections); | |
| } | |
| } | |
| } | |
| /*****************************************/ | |
| static void update_timing_gain_values(int inet, int clustered_block, | |
| t_pb *cur_pb, | |
| enum e_net_relation_to_clustered_block net_relation_to_clustered_block, t_slack * slacks) { | |
| /*This function is called when the timing_gain values on the vpack_net* | |
| *inet requires updating. */ | |
| float timinggain; | |
| int newblk, ifirst; | |
| int iblk, ipin; | |
| /* Check if this vpack_net lists its driving logical_block twice. If so, avoid * | |
| * double counting this logical_block by skipping the first (driving) pin. */ | |
| if (net_output_feeds_driving_block_input[inet] == FALSE) | |
| ifirst = 0; | |
| else | |
| ifirst = 1; | |
| if (net_relation_to_clustered_block == OUTPUT | |
| && !vpack_net[inet].is_global) { | |
| for (ipin = ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) { | |
| iblk = vpack_net[inet].node_block[ipin]; | |
| if (logical_block[iblk].clb_index == NO_CLUSTER) { | |
| #ifdef PATH_COUNTING | |
| /* Timing gain is a weighted sum of timing and path criticalities. */ | |
| timinggain = TIMING_GAIN_PATH_WEIGHT * slacks->path_criticality[inet][ipin] | |
| + (1 - TIMING_GAIN_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin]; | |
| #else | |
| /* Timing gain is the timing criticality. */ | |
| timinggain = slacks->timing_criticality[inet][ipin]; | |
| #endif | |
| if(cur_pb->pb_stats->timinggain.count(iblk) == 0) { | |
| cur_pb->pb_stats->timinggain[iblk] = 0; | |
| } | |
| if (timinggain > cur_pb->pb_stats->timinggain[iblk]) | |
| cur_pb->pb_stats->timinggain[iblk] = timinggain; | |
| } | |
| } | |
| } | |
| if (net_relation_to_clustered_block == INPUT | |
| && !vpack_net[inet].is_global) { | |
| /*Calculate the timing gain for the logical_block which is driving * | |
| *the vpack_net that is an input to a logical_block in the cluster */ | |
| newblk = vpack_net[inet].node_block[0]; | |
| if (logical_block[newblk].clb_index == NO_CLUSTER) { | |
| for (ipin = 1; ipin <= vpack_net[inet].num_sinks; ipin++) { | |
| #ifdef PATH_COUNTING | |
| /* Timing gain is a weighted sum of timing and path criticalities. */ | |
| timinggain = TIMING_GAIN_PATH_WEIGHT * slacks->path_criticality[inet][ipin] | |
| + (1 - TIMING_GAIN_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin]; | |
| #else | |
| /* Timing gain is the timing criticality. */ | |
| timinggain = slacks->timing_criticality[inet][ipin]; | |
| #endif | |
| if(cur_pb->pb_stats->timinggain.count(newblk) == 0) { | |
| cur_pb->pb_stats->timinggain[newblk] = 0; | |
| } | |
| if (timinggain > cur_pb->pb_stats->timinggain[newblk]) | |
| cur_pb->pb_stats->timinggain[newblk] = timinggain; | |
| } | |
| } | |
| } | |
| } | |
| /*****************************************/ | |
| static void mark_and_update_partial_gain(int inet, enum e_gain_update gain_flag, | |
| int clustered_block, int port_on_clustered_block, | |
| int pin_on_clustered_block, boolean timing_driven, | |
| boolean connection_driven, | |
| enum e_net_relation_to_clustered_block net_relation_to_clustered_block, | |
| t_slack * slacks) { | |
| /* Updates the marked data structures, and if gain_flag is GAIN, * | |
| * the gain when a logic logical_block is added to a cluster. The * | |
| * sharinggain is the number of inputs that a logical_block shares with * | |
| * blocks that are already in the cluster. Hillgain is the * | |
| * reduction in number of pins-required by adding a logical_block to the * | |
| * cluster. The timinggain is the criticality of the most critical* | |
| * vpack_net between this logical_block and a logical_block in the cluster. */ | |
| int iblk, ipin, ifirst, stored_net; | |
| t_pb *cur_pb; | |
| cur_pb = logical_block[clustered_block].pb->parent_pb; | |
| if (vpack_net[inet].num_sinks > AAPACK_MAX_NET_SINKS_IGNORE) { | |
| /* Optimization: It can be too runtime costly for marking all sinks for a high fanout-net that probably has no hope of ever getting packed, thus ignore those high fanout nets */ | |
| if(vpack_net[inet].is_global != TRUE) { | |
| /* If no low/medium fanout nets, we may need to consider high fan-out nets for packing, so select one and store it */ | |
| while(cur_pb->parent_pb != NULL) { | |
| cur_pb = cur_pb->parent_pb; | |
| } | |
| stored_net = cur_pb->pb_stats->tie_break_high_fanout_net; | |
| if(stored_net == OPEN || vpack_net[inet].num_sinks < vpack_net[stored_net].num_sinks) { | |
| cur_pb->pb_stats->tie_break_high_fanout_net = inet; | |
| } | |
| } | |
| return; | |
| } | |
| while (cur_pb) { | |
| /* Mark vpack_net as being visited, if necessary. */ | |
| if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(inet) == 0) { | |
| cur_pb->pb_stats->marked_nets[cur_pb->pb_stats->num_marked_nets] = | |
| inet; | |
| cur_pb->pb_stats->num_marked_nets++; | |
| } | |
| /* Update gains of affected blocks. */ | |
| if (gain_flag == GAIN) { | |
| /* Check if this vpack_net lists its driving logical_block twice. If so, avoid * | |
| * double counting this logical_block by skipping the first (driving) pin. */ | |
| if (net_output_feeds_driving_block_input[inet] == 0) | |
| ifirst = 0; | |
| else | |
| ifirst = 1; | |
| if (cur_pb->pb_stats->num_pins_of_net_in_pb.count(inet) == 0) { | |
| for (ipin = ifirst; ipin <= vpack_net[inet].num_sinks; ipin++) { | |
| iblk = vpack_net[inet].node_block[ipin]; | |
| if (logical_block[iblk].clb_index == NO_CLUSTER) { | |
| if (cur_pb->pb_stats->sharinggain.count(iblk) == 0) { | |
| cur_pb->pb_stats->marked_blocks[cur_pb->pb_stats->num_marked_blocks] = | |
| iblk; | |
| cur_pb->pb_stats->num_marked_blocks++; | |
| cur_pb->pb_stats->sharinggain[iblk] = 1; | |
| cur_pb->pb_stats->hillgain[iblk] = 1 | |
| - num_ext_inputs_logical_block(iblk); | |
| } else { | |
| cur_pb->pb_stats->sharinggain[iblk]++; | |
| cur_pb->pb_stats->hillgain[iblk]++; | |
| } | |
| } | |
| } | |
| } | |
| if (connection_driven) { | |
| update_connection_gain_values(inet, clustered_block, cur_pb, | |
| net_relation_to_clustered_block); | |
| } | |
| if (timing_driven) { | |
| update_timing_gain_values(inet, clustered_block, cur_pb, | |
| net_relation_to_clustered_block, slacks); | |
| } | |
| } | |
| if(cur_pb->pb_stats->num_pins_of_net_in_pb.count(inet) == 0) { | |
| cur_pb->pb_stats->num_pins_of_net_in_pb[inet] = 0; | |
| } | |
| cur_pb->pb_stats->num_pins_of_net_in_pb[inet]++; | |
| cur_pb = cur_pb->parent_pb; | |
| } | |
| } | |
| /*****************************************/ | |
| static void update_total_gain(float alpha, float beta, boolean timing_driven, | |
| boolean connection_driven, boolean global_clocks, t_pb *pb) { | |
| /*Updates the total gain array to reflect the desired tradeoff between* | |
| *input sharing (sharinggain) and path_length minimization (timinggain)*/ | |
| int i, iblk, j, k; | |
| t_pb * cur_pb; | |
| int num_input_pins, num_output_pins; | |
| int num_used_input_pins, num_used_output_pins; | |
| t_model_ports *port; | |
| cur_pb = pb; | |
| while (cur_pb) { | |
| for (i = 0; i < cur_pb->pb_stats->num_marked_blocks; i++) { | |
| iblk = cur_pb->pb_stats->marked_blocks[i]; | |
| if(cur_pb->pb_stats->connectiongain.count(iblk) == 0) { | |
| cur_pb->pb_stats->connectiongain[iblk] = 0; | |
| } | |
| if(cur_pb->pb_stats->sharinggain.count(iblk) == 0) { | |
| cur_pb->pb_stats->connectiongain[iblk] = 0; | |
| } | |
| /* Todo: This was used to explore different normalization options, can be made more efficient once we decide on which one to use*/ | |
| num_input_pins = 0; | |
| port = logical_block[iblk].model->inputs; | |
| j = 0; | |
| num_used_input_pins = 0; | |
| while (port) { | |
| num_input_pins += port->size; | |
| if (!port->is_clock) { | |
| for (k = 0; k < port->size; k++) { | |
| if (logical_block[iblk].input_nets[j][k] != OPEN) { | |
| num_used_input_pins++; | |
| } | |
| } | |
| j++; | |
| } | |
| port = port->next; | |
| } | |
| if (num_input_pins == 0) { | |
| num_input_pins = 1; | |
| } | |
| num_used_output_pins = 0; | |
| j = 0; | |
| num_output_pins = 0; | |
| port = logical_block[iblk].model->outputs; | |
| while (port) { | |
| num_output_pins += port->size; | |
| for (k = 0; k < port->size; k++) { | |
| if (logical_block[iblk].output_nets[j][k] != OPEN) { | |
| num_used_output_pins++; | |
| } | |
| } | |
| port = port->next; | |
| j++; | |
| } | |
| /* end todo */ | |
| /* Calculate area-only cost function */ | |
| if (connection_driven) { | |
| /*try to absorb as many connections as possible*/ | |
| /*cur_pb->pb_stats->gain[iblk] = ((1-beta)*(float)cur_pb->pb_stats->sharinggain[iblk] + beta*(float)cur_pb->pb_stats->connectiongain[iblk])/(num_input_pins + num_output_pins);*/ | |
| cur_pb->pb_stats->gain[iblk] = ((1 - beta) | |
| * (float) cur_pb->pb_stats->sharinggain[iblk] | |
| + beta * (float) cur_pb->pb_stats->connectiongain[iblk]) | |
| / (num_used_input_pins + num_used_output_pins); | |
| } else { | |
| /*cur_pb->pb_stats->gain[iblk] = ((float)cur_pb->pb_stats->sharinggain[iblk])/(num_input_pins + num_output_pins); */ | |
| cur_pb->pb_stats->gain[iblk] = | |
| ((float) cur_pb->pb_stats->sharinggain[iblk]) | |
| / (num_used_input_pins + num_used_output_pins); | |
| } | |
| /* Add in timing driven cost into cost function */ | |
| if (timing_driven) { | |
| cur_pb->pb_stats->gain[iblk] = alpha | |
| * cur_pb->pb_stats->timinggain[iblk] | |
| + (1.0 - alpha) * (float) cur_pb->pb_stats->gain[iblk]; | |
| } | |
| } | |
| cur_pb = cur_pb->parent_pb; | |
| } | |
| } | |
| /*****************************************/ | |
| static void update_cluster_stats( INP t_pack_molecule *molecule, | |
| INP int clb_index, INP boolean *is_clock, INP boolean global_clocks, | |
| INP float alpha, INP float beta, INP boolean timing_driven, | |
| INP boolean connection_driven, INP t_slack * slacks) { | |
| /* Updates cluster stats such as gain, used pins, and clock structures. */ | |
| int ipin, inet; | |
| int new_blk, molecule_size; | |
| int iblock; | |
| t_model_ports *port; | |
| t_pb *cur_pb, *cb; | |
| /* TODO: what a scary comment from Vaughn, we'll have to watch out for this causing problems */ | |
| /* Output can be open so the check is necessary. I don't change * | |
| * the gain for clock outputs when clocks are globally distributed * | |
| * because I assume there is no real need to pack similarly clocked * | |
| * FFs together then. Note that by updating the gain when the * | |
| * clock driver is placed in a cluster implies that the output of * | |
| * LUTs can be connected to clock inputs internally. Probably not * | |
| * true, but it doesn't make much difference, since it will still * | |
| * make local routing of this clock very short, and none of my * | |
| * benchmarks actually generate local clocks (all come from pads). */ | |
| molecule_size = get_array_size_of_molecule(molecule); | |
| cb = NULL; | |
| for (iblock = 0; iblock < molecule_size; iblock++) { | |
| if (molecule->logical_block_ptrs[iblock] == NULL) { | |
| continue; | |
| } | |
| new_blk = molecule->logical_block_ptrs[iblock]->index; | |
| logical_block[new_blk].clb_index = clb_index; | |
| cur_pb = logical_block[new_blk].pb->parent_pb; | |
| while (cur_pb) { | |
| /* reset list of feasible blocks */ | |
| cur_pb->pb_stats->num_feasible_blocks = NOT_VALID; | |
| cur_pb->pb_stats->num_child_blocks_in_pb++; | |
| if (cur_pb->parent_pb == NULL) { | |
| cb = cur_pb; | |
| } | |
| cur_pb = cur_pb->parent_pb; | |
| } | |
| port = logical_block[new_blk].model->outputs; | |
| while (port) { | |
| for (ipin = 0; ipin < port->size; ipin++) { | |
| inet = logical_block[new_blk].output_nets[port->index][ipin]; /* Output pin first. */ | |
| if (inet != OPEN) { | |
| if (!is_clock[inet] || !global_clocks) | |
| mark_and_update_partial_gain(inet, GAIN, new_blk, | |
| port->index, ipin, timing_driven, | |
| connection_driven, OUTPUT, slacks); | |
| else | |
| mark_and_update_partial_gain(inet, NO_GAIN, new_blk, | |
| port->index, ipin, timing_driven, | |
| connection_driven, OUTPUT, slacks); | |
| } | |
| } | |
| port = port->next; | |
| } | |
| port = logical_block[new_blk].model->inputs; | |
| while (port) { | |
| if (port->is_clock) { | |
| port = port->next; | |
| continue; | |
| } | |
| for (ipin = 0; ipin < port->size; ipin++) { /* VPACK_BLOCK input pins. */ | |
| inet = logical_block[new_blk].input_nets[port->index][ipin]; | |
| if (inet != OPEN) { | |
| mark_and_update_partial_gain(inet, GAIN, new_blk, | |
| port->index, ipin, timing_driven, connection_driven, | |
| INPUT, slacks); | |
| } | |
| } | |
| port = port->next; | |
| } | |
| /* Note: The code below ONLY WORKS when nets that go to clock inputs * | |
| * NEVER go to the input of a VPACK_COMB. It doesn't really make electrical * | |
| * sense for that to happen, and I check this in the check_clocks * | |
| * function. Don't disable that sanity check. */ | |
| inet = logical_block[new_blk].clock_net; /* Clock input pin. */ | |
| if (inet != OPEN) { | |
| if (global_clocks) | |
| mark_and_update_partial_gain(inet, NO_GAIN, new_blk, 0, 0, | |
| timing_driven, connection_driven, INPUT, slacks); | |
| else | |
| mark_and_update_partial_gain(inet, GAIN, new_blk, 0, 0, | |
| timing_driven, connection_driven, INPUT, slacks); | |
| } | |
| update_total_gain(alpha, beta, timing_driven, connection_driven, | |
| global_clocks, logical_block[new_blk].pb->parent_pb); | |
| commit_lookahead_pins_used(cb); | |
| } | |
| } | |
| static void start_new_cluster( | |
| INP t_cluster_placement_stats *cluster_placement_stats, | |
| INOUTP t_pb_graph_node **primitives_list, INP const t_arch * arch, | |
| INOUTP t_block *new_cluster, INP int clb_index, | |
| INP t_pack_molecule *molecule, INP float aspect, | |
| INOUTP int *num_used_instances_type, INOUTP int *num_instances_type, | |
| INP int num_models, INP int max_cluster_size, | |
| INP int max_nets_in_pb_type, INP int detailed_routing_stage) { | |
| /* Given a starting seed block, start_new_cluster determines the next cluster type to use | |
| It expands the FPGA if it cannot find a legal cluster for the logical block | |
| */ | |
| int i, j; | |
| boolean success; | |
| int count; | |
| assert(new_cluster->name == NULL); | |
| /* Check if this cluster is really empty */ | |
| /* Allocate a dummy initial cluster and load a logical block as a seed and check if it is legal */ | |
| new_cluster->name = (char*) my_malloc( | |
| (strlen(molecule->logical_block_ptrs[molecule->root]->name) + 4) * sizeof(char)); | |
| sprintf(new_cluster->name, "cb.%s", | |
| molecule->logical_block_ptrs[molecule->root]->name); | |
| new_cluster->nets = NULL; | |
| new_cluster->type = NULL; | |
| new_cluster->pb = NULL; | |
| new_cluster->x = UNDEFINED; | |
| new_cluster->y = UNDEFINED; | |
| new_cluster->z = UNDEFINED; | |
| success = FALSE; | |
| while (!success) { | |
| count = 0; | |
| for (i = 0; i < num_types; i++) { | |
| if (num_used_instances_type[i] < num_instances_type[i]) { | |
| new_cluster->type = &type_descriptors[i]; | |
| if (new_cluster->type == EMPTY_TYPE) { | |
| continue; | |
| } | |
| new_cluster->pb = (t_pb*)my_calloc(1, sizeof(t_pb)); | |
| new_cluster->pb->pb_graph_node = | |
| new_cluster->type->pb_graph_head; | |
| alloc_and_load_pb_stats(new_cluster->pb, num_models, | |
| max_nets_in_pb_type); | |
| new_cluster->pb->parent_pb = NULL; | |
| alloc_and_load_legalizer_for_cluster(new_cluster, clb_index, | |
| arch); | |
| for (j = 0; | |
| j < new_cluster->type->pb_graph_head->pb_type->num_modes | |
| && !success; j++) { | |
| new_cluster->pb->mode = j; | |
| reset_cluster_placement_stats(&cluster_placement_stats[i]); | |
| set_mode_cluster_placement_stats( | |
| new_cluster->pb->pb_graph_node, j); | |
| success = (boolean) (BLK_PASSED | |
| == try_pack_molecule(&cluster_placement_stats[i], | |
| molecule, primitives_list, new_cluster->pb, | |
| num_models, max_cluster_size, clb_index, | |
| max_nets_in_pb_type, detailed_routing_stage)); | |
| } | |
| if (success) { | |
| /* TODO: For now, just grab any working cluster, in the future, heuristic needed to grab best complex block based on supply and demand */ | |
| break; | |
| } else { | |
| free_legalizer_for_cluster(new_cluster, TRUE); | |
| free_pb_stats(new_cluster->pb); | |
| free(new_cluster->pb); | |
| } | |
| count++; | |
| } | |
| } | |
| if (count == num_types - 1) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "Can not find any logic block that can implement molecule.\n"); | |
| if (molecule->type == MOLECULE_FORCED_PACK) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "\tPattern %s %s\n", | |
| molecule->pack_pattern->name, | |
| molecule->logical_block_ptrs[molecule->root]->name); | |
| } else { | |
| vpr_printf(TIO_MESSAGE_ERROR, "\tAtom %s\n", | |
| molecule->logical_block_ptrs[molecule->root]->name); | |
| } | |
| exit(1); | |
| } | |
| /* Expand FPGA size and recalculate number of available cluster types*/ | |
| if (!success) { | |
| if (aspect >= 1.0) { | |
| ny++; | |
| nx = nint(ny * aspect); | |
| } else { | |
| nx++; | |
| ny = nint(nx / aspect); | |
| } | |
| vpr_printf(TIO_MESSAGE_INFO, "Not enough resources expand FPGA size to x = %d y = %d.\n", | |
| nx, ny); | |
| if ((nx > MAX_SHORT) || (ny > MAX_SHORT)) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "Circuit cannot pack into architecture, architecture size (nx = %d, ny = %d) exceeds packer range.\n", | |
| nx, ny); | |
| exit(1); | |
| } | |
| alloc_and_load_grid(num_instances_type); | |
| freeGrid(); | |
| } | |
| } | |
| num_used_instances_type[new_cluster->type->index]++; | |
| } | |
| /*****************************************/ | |
| static t_pack_molecule *get_highest_gain_molecule( | |
| INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, | |
| INP enum e_gain_type gain_mode, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr) { | |
| /* This routine populates a list of feasible blocks outside the cluster then returns the best one for the list * | |
| * not currently in a cluster and satisfies the feasibility * | |
| * function passed in as is_feasible. If there are no feasible * | |
| * blocks it returns NO_CLUSTER. */ | |
| int i, j, iblk, index, inet, count; | |
| boolean success; | |
| struct s_linked_vptr *cur; | |
| t_pack_molecule *molecule; | |
| molecule = NULL; | |
| if (gain_mode == HILL_CLIMBING) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "Hill climbing not supported yet, error out.\n"); | |
| exit(1); | |
| } | |
| if (cur_pb->pb_stats->num_feasible_blocks == NOT_VALID) { | |
| /* Divide into two cases for speed only. */ | |
| /* Typical case: not too many blocks have been marked. */ | |
| cur_pb->pb_stats->num_feasible_blocks = 0; | |
| if (cur_pb->pb_stats->num_marked_blocks | |
| < num_logical_blocks / MARKED_FRAC) { | |
| for (i = 0; i < cur_pb->pb_stats->num_marked_blocks; i++) { | |
| iblk = cur_pb->pb_stats->marked_blocks[i]; | |
| if (logical_block[iblk].clb_index == NO_CLUSTER) { | |
| cur = logical_block[iblk].packed_molecules; | |
| while (cur != NULL) { | |
| molecule = (t_pack_molecule *) cur->data_vptr; | |
| if (molecule->valid) { | |
| success = TRUE; | |
| for (j = 0; | |
| j < get_array_size_of_molecule(molecule); | |
| j++) { | |
| if (molecule->logical_block_ptrs[j] != NULL) { | |
| assert( | |
| molecule->logical_block_ptrs[j]->clb_index == NO_CLUSTER); | |
| if (!exists_free_primitive_for_logical_block( | |
| cluster_placement_stats_ptr, | |
| iblk)) { /* TODO: debating whether to check if placement exists for molecule (more robust) or individual logical blocks (faster) */ | |
| success = FALSE; | |
| break; | |
| } | |
| } | |
| } | |
| if (success) { | |
| add_molecule_to_pb_stats_candidates(molecule, | |
| cur_pb->pb_stats->gain, cur_pb); | |
| } | |
| } | |
| cur = cur->next; | |
| } | |
| } | |
| } | |
| } else { /* Some high fanout nets marked lots of blocks. */ | |
| for (iblk = 0; iblk < num_logical_blocks; iblk++) { | |
| if (logical_block[iblk].clb_index == NO_CLUSTER) { | |
| cur = logical_block[iblk].packed_molecules; | |
| while (cur != NULL) { | |
| molecule = (t_pack_molecule *) cur->data_vptr; | |
| if (molecule->valid) { | |
| success = TRUE; | |
| for (j = 0; | |
| j < get_array_size_of_molecule(molecule); | |
| j++) { | |
| if (molecule->logical_block_ptrs[j] != NULL) { | |
| assert( | |
| molecule->logical_block_ptrs[j]->clb_index == NO_CLUSTER); | |
| if (!exists_free_primitive_for_logical_block( | |
| cluster_placement_stats_ptr, | |
| iblk)) { | |
| success = FALSE; | |
| break; | |
| } | |
| } | |
| } | |
| if (success) { | |
| add_molecule_to_pb_stats_candidates(molecule, | |
| cur_pb->pb_stats->gain, cur_pb); | |
| } | |
| } | |
| cur = cur->next; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| if(cur_pb->pb_stats->num_feasible_blocks == 0 && cur_pb->pb_stats->tie_break_high_fanout_net != OPEN) { | |
| /* Because the packer ignores high fanout nets when marking what blocks to consider, use one of the ignored high fanout net to fill up lightly related blocks */ | |
| reset_tried_but_unused_cluster_placements(cluster_placement_stats_ptr); | |
| inet = cur_pb->pb_stats->tie_break_high_fanout_net; | |
| count = 0; | |
| for (i = 0; i <= vpack_net[inet].num_sinks && count < AAPACK_MAX_HIGH_FANOUT_EXPLORE; i++) { | |
| iblk = vpack_net[inet].node_block[i]; | |
| if (logical_block[iblk].clb_index == NO_CLUSTER) { | |
| cur = logical_block[iblk].packed_molecules; | |
| while (cur != NULL) { | |
| molecule = (t_pack_molecule *) cur->data_vptr; | |
| if (molecule->valid) { | |
| success = TRUE; | |
| for (j = 0; | |
| j < get_array_size_of_molecule(molecule); | |
| j++) { | |
| if (molecule->logical_block_ptrs[j] != NULL) { | |
| assert( | |
| molecule->logical_block_ptrs[j]->clb_index == NO_CLUSTER); | |
| if (!exists_free_primitive_for_logical_block( | |
| cluster_placement_stats_ptr, | |
| iblk)) { /* TODO: debating whether to check if placement exists for molecule (more robust) or individual logical blocks (faster) */ | |
| success = FALSE; | |
| break; | |
| } | |
| } | |
| } | |
| if (success) { | |
| add_molecule_to_pb_stats_candidates(molecule, | |
| cur_pb->pb_stats->gain, cur_pb); | |
| count++; | |
| } | |
| } | |
| cur = cur->next; | |
| } | |
| } | |
| } | |
| cur_pb->pb_stats->tie_break_high_fanout_net = OPEN; /* Mark off that this high fanout net has been considered */ | |
| } | |
| molecule = NULL; | |
| for (j = 0; j < cur_pb->pb_stats->num_feasible_blocks; j++) { | |
| if (cur_pb->pb_stats->num_feasible_blocks != 0) { | |
| cur_pb->pb_stats->num_feasible_blocks--; | |
| index = cur_pb->pb_stats->num_feasible_blocks; | |
| molecule = cur_pb->pb_stats->feasible_blocks[index]; | |
| assert(molecule->valid == TRUE); | |
| return molecule; | |
| } | |
| } | |
| return molecule; | |
| } | |
| /*****************************************/ | |
| static t_pack_molecule *get_molecule_for_cluster( | |
| INP enum e_packer_algorithm packer_algorithm, INOUTP t_pb *cur_pb, | |
| INP boolean allow_unrelated_clustering, | |
| INOUTP int *num_unrelated_clustering_attempts, | |
| INP t_cluster_placement_stats *cluster_placement_stats_ptr) { | |
| /* Finds the vpack block with the the greatest gain that satisifies the * | |
| * input, clock and capacity constraints of a cluster that are * | |
| * passed in. If no suitable vpack block is found it returns NO_CLUSTER. | |
| */ | |
| t_pack_molecule *best_molecule; | |
| /* If cannot pack into primitive, try packing into cluster */ | |
| best_molecule = get_highest_gain_molecule(packer_algorithm, cur_pb, | |
| NOT_HILL_CLIMBING, cluster_placement_stats_ptr); | |
| /* If no blocks have any gain to the current cluster, the code above * | |
| * will not find anything. However, another logical_block with no inputs in * | |
| * common with the cluster may still be inserted into the cluster. */ | |
| if (allow_unrelated_clustering) { | |
| if (best_molecule == NULL) { | |
| if (*num_unrelated_clustering_attempts == 0) { | |
| best_molecule = | |
| get_free_molecule_with_most_ext_inputs_for_cluster( | |
| packer_algorithm, cur_pb, | |
| cluster_placement_stats_ptr); | |
| (*num_unrelated_clustering_attempts)++; | |
| } | |
| } else { | |
| *num_unrelated_clustering_attempts = 0; | |
| } | |
| } | |
| return best_molecule; | |
| } | |
| /*****************************************/ | |
| static void alloc_and_load_cluster_info(INP int num_clb, INOUTP t_block *clb) { | |
| /* Loads all missing clustering info necessary to complete clustering. */ | |
| int i, j, i_clb, node_index, ipin, iclass; | |
| int inport, outport, clockport; | |
| const t_pb_type * pb_type; | |
| t_pb *pb; | |
| for (i_clb = 0; i_clb < num_clb; i_clb++) { | |
| rr_node = clb[i_clb].pb->rr_graph; | |
| pb_type = clb[i_clb].pb->pb_graph_node->pb_type; | |
| pb = clb[i_clb].pb; | |
| clb[i_clb].nets = (int*)my_malloc(clb[i_clb].type->num_pins * sizeof(int)); | |
| for (i = 0; i < clb[i_clb].type->num_pins; i++) { | |
| clb[i_clb].nets[i] = OPEN; | |
| } | |
| inport = outport = clockport = 0; | |
| ipin = 0; | |
| /* Assume top-level pb and clb share a one-to-one connection */ | |
| for (i = 0; i < pb_type->num_ports; i++) { | |
| if (!pb_type->ports[i].is_clock | |
| && pb_type->ports[i].type == IN_PORT) { | |
| for (j = 0; j < pb_type->ports[i].num_pins; j++) { | |
| iclass = clb[i_clb].type->pin_class[ipin]; | |
| assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER); | |
| assert(clb[i_clb].type->is_global_pin[ipin] == pb->pb_graph_node->input_pins[inport][j].port->is_non_clock_global); | |
| node_index = | |
| pb->pb_graph_node->input_pins[inport][j].pin_count_in_cluster; | |
| clb[i_clb].nets[ipin] = rr_node[node_index].net_num; | |
| ipin++; | |
| } | |
| inport++; | |
| } else if (pb_type->ports[i].type == OUT_PORT) { | |
| for (j = 0; j < pb_type->ports[i].num_pins; j++) { | |
| iclass = clb[i_clb].type->pin_class[ipin]; | |
| assert(clb[i_clb].type->class_inf[iclass].type == DRIVER); | |
| node_index = | |
| pb->pb_graph_node->output_pins[outport][j].pin_count_in_cluster; | |
| clb[i_clb].nets[ipin] = rr_node[node_index].net_num; | |
| ipin++; | |
| } | |
| outport++; | |
| } else { | |
| assert( | |
| pb_type->ports[i].is_clock && pb_type->ports[i].type == IN_PORT); | |
| for (j = 0; j < pb_type->ports[i].num_pins; j++) { | |
| iclass = clb[i_clb].type->pin_class[ipin]; | |
| assert(clb[i_clb].type->class_inf[iclass].type == RECEIVER); | |
| assert(clb[i_clb].type->is_global_pin[ipin]); | |
| node_index = | |
| pb->pb_graph_node->clock_pins[clockport][j].pin_count_in_cluster; | |
| clb[i_clb].nets[ipin] = rr_node[node_index].net_num; | |
| ipin++; | |
| } | |
| clockport++; | |
| } | |
| } | |
| } | |
| } | |
| /* TODO: Add more error checking, too light */ | |
| /*****************************************/ | |
| static void check_clustering(int num_clb, t_block *clb, boolean *is_clock) { | |
| int i; | |
| t_pb * cur_pb; | |
| boolean * blocks_checked; | |
| blocks_checked = (boolean*)my_calloc(num_logical_blocks, sizeof(boolean)); | |
| /* | |
| * Check that each logical block connects to one primitive and that the primitive links up to the parent clb | |
| */ | |
| for (i = 0; i < num_blocks; i++) { | |
| if (logical_block[i].pb->logical_block != i) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "pb %s does not contain logical block %s but logical block %s #%d links to pb.\n", | |
| logical_block[i].pb->name, logical_block[i].name, logical_block[i].name, i); | |
| exit(1); | |
| } | |
| cur_pb = logical_block[i].pb; | |
| assert(strcmp(cur_pb->name, logical_block[i].name) == 0); | |
| while (cur_pb->parent_pb) { | |
| cur_pb = cur_pb->parent_pb; | |
| assert(cur_pb->name); | |
| } | |
| if (cur_pb != clb[num_clb].pb) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "CLB %s does not match CLB contained by pb %s.\n", | |
| cur_pb->name, logical_block[i].pb->name); | |
| exit(1); | |
| } | |
| } | |
| /* Check that I do not have spurious links in children pbs */ | |
| for (i = 0; i < num_clb; i++) { | |
| check_cluster_logical_blocks(clb[i].pb, blocks_checked); | |
| } | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| if (blocks_checked[i] == FALSE) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "Logical block %s #%d not found in any cluster.\n", | |
| logical_block[i].name, i); | |
| exit(1); | |
| } | |
| } | |
| free(blocks_checked); | |
| } | |
| /* TODO: May want to check that all logical blocks are actually reached (low priority, nice to have) */ | |
| static void check_cluster_logical_blocks(t_pb *pb, boolean *blocks_checked) { | |
| int i, j; | |
| const t_pb_type *pb_type; | |
| boolean has_child; | |
| has_child = FALSE; | |
| pb_type = pb->pb_graph_node->pb_type; | |
| if (pb_type->num_modes == 0) { | |
| /* primitive */ | |
| if (pb->logical_block != OPEN) { | |
| if (blocks_checked[pb->logical_block] != FALSE) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "pb %s contains logical block %s #%d but logical block is already contained in another pb.\n", | |
| pb->name, logical_block[pb->logical_block].name, pb->logical_block); | |
| exit(1); | |
| } | |
| blocks_checked[pb->logical_block] = TRUE; | |
| if (pb != logical_block[pb->logical_block].pb) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "pb %s contains logical block %s #%d but logical block does not link to pb.\n", | |
| pb->name, logical_block[pb->logical_block].name, pb->logical_block); | |
| exit(1); | |
| } | |
| } | |
| } else { | |
| /* this is a container pb, all container pbs must contain children */ | |
| for (i = 0; i < pb_type->modes[pb->mode].num_pb_type_children; i++) { | |
| for (j = 0; j < pb_type->modes[pb->mode].pb_type_children[i].num_pb; | |
| j++) { | |
| if (pb->child_pbs[i] != NULL) { | |
| if (pb->child_pbs[i][j].name != NULL) { | |
| has_child = TRUE; | |
| check_cluster_logical_blocks(&pb->child_pbs[i][j], | |
| blocks_checked); | |
| } | |
| } | |
| } | |
| } | |
| assert(has_child); | |
| } | |
| } | |
| static t_pack_molecule* get_most_critical_seed_molecule(int * indexofcrit) { | |
| /* Do_timing_analysis must be called before this, or this function | |
| * will return garbage. Returns molecule with most critical block; | |
| * if block belongs to multiple molecules, return the biggest molecule. */ | |
| int blkidx; | |
| t_pack_molecule *molecule, *best; | |
| struct s_linked_vptr *cur; | |
| while (*indexofcrit < num_logical_blocks) { | |
| blkidx = critindexarray[(*indexofcrit)++]; | |
| if (logical_block[blkidx].clb_index == NO_CLUSTER) { | |
| cur = logical_block[blkidx].packed_molecules; | |
| best = NULL; | |
| while (cur != NULL) { | |
| molecule = (t_pack_molecule *) cur->data_vptr; | |
| if (molecule->valid) { | |
| if (best == NULL | |
| || (best->base_gain) < (molecule->base_gain)) { | |
| best = molecule; | |
| } | |
| } | |
| cur = cur->next; | |
| } | |
| assert(best != NULL); | |
| return best; | |
| } | |
| } | |
| /*if it makes it to here , there are no more blocks available*/ | |
| return NULL; | |
| } | |
| /* get gain of packing molecule into current cluster | |
| gain is equal to total_block_gain + molecule_base_gain*some_factor - introduced_input_nets_of_unrelated_blocks_pulled_in_by_molecule*some_other_factor | |
| */ | |
| static float get_molecule_gain(t_pack_molecule *molecule, std::map<int, float> &blk_gain) { | |
| float gain; | |
| int i, ipin, iport, inet, iblk; | |
| int num_introduced_inputs_of_indirectly_related_block; | |
| t_model_ports *cur; | |
| gain = 0; | |
| num_introduced_inputs_of_indirectly_related_block = 0; | |
| for (i = 0; i < get_array_size_of_molecule(molecule); i++) { | |
| if (molecule->logical_block_ptrs[i] != NULL) { | |
| if(blk_gain.count(molecule->logical_block_ptrs[i]->index) > 0) { | |
| gain += blk_gain[molecule->logical_block_ptrs[i]->index]; | |
| } else { | |
| /* This block has no connection with current cluster, penalize molecule for having this block | |
| */ | |
| cur = molecule->logical_block_ptrs[i]->model->inputs; | |
| iport = 0; | |
| while (cur != NULL) { | |
| if (cur->is_clock != TRUE) { | |
| for (ipin = 0; ipin < cur->size; ipin++) { | |
| inet = | |
| molecule->logical_block_ptrs[i]->input_nets[iport][ipin]; | |
| if (inet != OPEN) { | |
| num_introduced_inputs_of_indirectly_related_block++; | |
| for (iblk = 0; | |
| iblk | |
| < get_array_size_of_molecule( | |
| molecule); iblk++) { | |
| if (molecule->logical_block_ptrs[iblk] | |
| != NULL | |
| && vpack_net[inet].node_block[0] | |
| == molecule->logical_block_ptrs[iblk]->index) { | |
| num_introduced_inputs_of_indirectly_related_block--; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| iport++; | |
| } | |
| cur = cur->next; | |
| } | |
| } | |
| } | |
| } | |
| gain += molecule->base_gain * 0.0001; /* Use base gain as tie breaker TODO: need to sweep this value and perhaps normalize */ | |
| gain -= num_introduced_inputs_of_indirectly_related_block * (0.001); | |
| return gain; | |
| } | |
| static int compare_molecule_gain(const void *a, const void *b) { | |
| float base_gain_a, base_gain_b, diff; | |
| const t_pack_molecule *molecule_a, *molecule_b; | |
| molecule_a = (*(const t_pack_molecule * const *) a); | |
| molecule_b = (*(const t_pack_molecule * const *) b); | |
| base_gain_a = molecule_a->base_gain; | |
| base_gain_b = molecule_b->base_gain; | |
| diff = base_gain_a - base_gain_b; | |
| if (diff > 0) { | |
| return 1; | |
| } | |
| if (diff < 0) { | |
| return -1; | |
| } | |
| return 0; | |
| } | |
| /* Determine if speculatively packed cur_pb is pin feasible | |
| * Runtime is actually not that bad for this. It's worst case O(k^2) where k is the number of pb_graph pins. Can use hash tables or make incremental if becomes an issue. | |
| */ | |
| static void try_update_lookahead_pins_used(t_pb *cur_pb) { | |
| int i, j; | |
| const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type; | |
| if (pb_type->num_modes > 0 && cur_pb->name != NULL) { | |
| if (cur_pb->child_pbs != NULL) { | |
| for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; | |
| i++) { | |
| if (cur_pb->child_pbs[i] != NULL) { | |
| for (j = 0; | |
| j | |
| < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; | |
| j++) { | |
| try_update_lookahead_pins_used( | |
| &cur_pb->child_pbs[i][j]); | |
| } | |
| } | |
| } | |
| } | |
| } else { | |
| if (pb_type->blif_model != NULL && cur_pb->logical_block != OPEN) { | |
| compute_and_mark_lookahead_pins_used(cur_pb->logical_block); | |
| } | |
| } | |
| } | |
| /* Resets nets used at different pin classes for determining pin feasibility */ | |
| static void reset_lookahead_pins_used(t_pb *cur_pb) { | |
| int i, j; | |
| const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type; | |
| if (cur_pb->pb_stats == NULL) { | |
| return; /* No pins used, no need to continue */ | |
| } | |
| if (pb_type->num_modes > 0 && cur_pb->name != NULL) { | |
| for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { | |
| for (j = 0; | |
| j | |
| < cur_pb->pb_graph_node->input_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| j++) { | |
| cur_pb->pb_stats->lookahead_input_pins_used[i][j] = OPEN; | |
| } | |
| } | |
| for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { | |
| for (j = 0; | |
| j | |
| < cur_pb->pb_graph_node->output_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| j++) { | |
| cur_pb->pb_stats->lookahead_output_pins_used[i][j] = OPEN; | |
| } | |
| } | |
| if (cur_pb->child_pbs != NULL) { | |
| for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; | |
| i++) { | |
| if (cur_pb->child_pbs[i] != NULL) { | |
| for (j = 0; | |
| j | |
| < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; | |
| j++) { | |
| reset_lookahead_pins_used(&cur_pb->child_pbs[i][j]); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| /* Determine if pins of speculatively packed pb are legal */ | |
| static void compute_and_mark_lookahead_pins_used(int ilogical_block) { | |
| int i, j; | |
| t_pb *cur_pb; | |
| t_pb_graph_node *pb_graph_node; | |
| const t_pb_type *pb_type; | |
| t_port *prim_port; | |
| int input_port; | |
| int output_port; | |
| int clock_port; | |
| assert(logical_block[ilogical_block].pb != NULL); | |
| cur_pb = logical_block[ilogical_block].pb; | |
| pb_graph_node = cur_pb->pb_graph_node; | |
| pb_type = pb_graph_node->pb_type; | |
| /* Walk through inputs, outputs, and clocks marking pins off of the same class */ | |
| /* TODO: This is inelegant design, I should change the primitive ports in pb_type to be input, output, or clock instead of this lookup */ | |
| input_port = output_port = clock_port = 0; | |
| for (i = 0; i < pb_type->num_ports; i++) { | |
| prim_port = &pb_type->ports[i]; | |
| if (prim_port->is_clock) { | |
| assert(prim_port->type == IN_PORT); | |
| assert(prim_port->num_pins == 1 && clock_port == 0); | |
| /* currently support only one clock for primitives */ | |
| if (logical_block[ilogical_block].clock_net != OPEN) { | |
| compute_and_mark_lookahead_pins_used_for_pin( | |
| &pb_graph_node->clock_pins[0][0], cur_pb, | |
| logical_block[ilogical_block].clock_net); | |
| } | |
| clock_port++; | |
| } else if (prim_port->type == IN_PORT) { | |
| for (j = 0; j < prim_port->num_pins; j++) { | |
| if (logical_block[ilogical_block].input_nets[prim_port->model_port->index][j] | |
| != OPEN) { | |
| compute_and_mark_lookahead_pins_used_for_pin( | |
| &pb_graph_node->input_pins[input_port][j], cur_pb, | |
| logical_block[ilogical_block].input_nets[prim_port->model_port->index][j]); | |
| } | |
| } | |
| input_port++; | |
| } else if (prim_port->type == OUT_PORT) { | |
| for (j = 0; j < prim_port->num_pins; j++) { | |
| if (logical_block[ilogical_block].output_nets[prim_port->model_port->index][j] | |
| != OPEN) { | |
| compute_and_mark_lookahead_pins_used_for_pin( | |
| &pb_graph_node->output_pins[output_port][j], cur_pb, | |
| logical_block[ilogical_block].output_nets[prim_port->model_port->index][j]); | |
| } | |
| } | |
| output_port++; | |
| } else { | |
| assert(0); | |
| } | |
| } | |
| } | |
| /* Given a pin and its assigned net, mark all pin classes that are affected */ | |
| static void compute_and_mark_lookahead_pins_used_for_pin( | |
| t_pb_graph_pin *pb_graph_pin, t_pb *primitive_pb, int inet) { | |
| int depth, i; | |
| int pin_class, output_port; | |
| t_pb * cur_pb; | |
| t_pb * check_pb; | |
| const t_pb_type *pb_type; | |
| t_port *prim_port; | |
| t_pb_graph_pin *output_pb_graph_pin; | |
| int count; | |
| boolean skip, found; | |
| cur_pb = primitive_pb->parent_pb; | |
| while (cur_pb) { | |
| depth = cur_pb->pb_graph_node->pb_type->depth; | |
| pin_class = pb_graph_pin->parent_pin_class[depth]; | |
| assert(pin_class != OPEN); | |
| if (pb_graph_pin->port->type == IN_PORT) { | |
| /* find location of net driver if exist in clb, NULL otherwise */ | |
| output_pb_graph_pin = NULL; | |
| if (logical_block[vpack_net[inet].node_block[0]].clb_index | |
| == logical_block[primitive_pb->logical_block].clb_index) { | |
| pb_type = | |
| logical_block[vpack_net[inet].node_block[0]].pb->pb_graph_node->pb_type; | |
| output_port = 0; | |
| found = FALSE; | |
| for (i = 0; i < pb_type->num_ports && !found; i++) { | |
| prim_port = &pb_type->ports[i]; | |
| if (prim_port->type == OUT_PORT) { | |
| if (pb_type->ports[i].model_port->index | |
| == vpack_net[inet].node_block_port[0]) { | |
| found = TRUE; | |
| break; | |
| } | |
| output_port++; | |
| } | |
| } | |
| assert(found); | |
| output_pb_graph_pin = | |
| &(logical_block[vpack_net[inet].node_block[0]].pb->pb_graph_node->output_pins[output_port][vpack_net[inet].node_block_pin[0]]); | |
| } | |
| skip = FALSE; | |
| /* check if driving pin for input is contained within the currently investigated cluster, if yes, do nothing since no input needs to be used */ | |
| if (output_pb_graph_pin != NULL) { | |
| check_pb = logical_block[vpack_net[inet].node_block[0]].pb; | |
| while (check_pb != NULL && check_pb != cur_pb) { | |
| check_pb = check_pb->parent_pb; | |
| } | |
| if (check_pb != NULL) { | |
| for (i = 0; | |
| skip == FALSE | |
| && i | |
| < output_pb_graph_pin->num_connectable_primtive_input_pins[depth]; | |
| i++) { | |
| if (pb_graph_pin | |
| == output_pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) { | |
| skip = TRUE; | |
| } | |
| } | |
| } | |
| } | |
| /* Must use input pin */ | |
| if (!skip) { | |
| /* Check if already in pin class, if yes, skip */ | |
| skip = FALSE; | |
| for (i = 0; | |
| i | |
| < cur_pb->pb_graph_node->input_pin_class_size[pin_class] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| i++) { | |
| if (cur_pb->pb_stats->lookahead_input_pins_used[pin_class][i] | |
| == inet) { | |
| skip = TRUE; | |
| } | |
| } | |
| if (!skip) { | |
| /* Net must take up a slot */ | |
| for (i = 0; | |
| i | |
| < cur_pb->pb_graph_node->input_pin_class_size[pin_class] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| i++) { | |
| if (cur_pb->pb_stats->lookahead_input_pins_used[pin_class][i] | |
| == OPEN) { | |
| cur_pb->pb_stats->lookahead_input_pins_used[pin_class][i] = | |
| inet; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| } else { | |
| assert(pb_graph_pin->port->type == OUT_PORT); | |
| skip = FALSE; | |
| if (pb_graph_pin->num_connectable_primtive_input_pins[depth] | |
| >= vpack_net[inet].num_sinks) { | |
| /* Important: This runtime penalty looks a lot scarier than it really is. For high fan-out nets, I at most look at the number of pins within the cluster which limits runtime. | |
| DO NOT REMOVE THIS INITIAL FILTER WITHOUT CAREFUL ANALYSIS ON RUNTIME!!! | |
| Key Observation: | |
| For LUT-based designs it is impossible for the average fanout to exceed the number of LUT inputs so it's usually around 4-5 (pigeon-hole argument, if the average fanout is greater than the | |
| number of LUT inputs, where do the extra connections go? Therefore, average fanout must be capped to a small constant where the constant is equal to the number of LUT inputs). The real danger to runtime | |
| is when the number of sinks of a net gets doubled | |
| */ | |
| for (i = 1; i <= vpack_net[inet].num_sinks; i++) { | |
| if (logical_block[vpack_net[inet].node_block[i]].clb_index | |
| != logical_block[vpack_net[inet].node_block[0]].clb_index) { | |
| break; | |
| } | |
| } | |
| if (i == vpack_net[inet].num_sinks + 1) { | |
| count = 0; | |
| /* TODO: I should cache the absorbed outputs, once net is absorbed, net is forever absorbed, no point in rechecking every time */ | |
| for (i = 0; | |
| i | |
| < pb_graph_pin->num_connectable_primtive_input_pins[depth]; | |
| i++) { | |
| if (get_net_corresponding_to_pb_graph_pin(cur_pb, | |
| pb_graph_pin->list_of_connectable_input_pin_ptrs[depth][i]) | |
| == inet) { | |
| count++; | |
| } | |
| } | |
| if (count == vpack_net[inet].num_sinks) { | |
| skip = TRUE; | |
| } | |
| } | |
| } | |
| if (!skip) { | |
| /* This output must exit this cluster */ | |
| for (i = 0; | |
| i | |
| < cur_pb->pb_graph_node->output_pin_class_size[pin_class] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| i++) { | |
| assert( | |
| cur_pb->pb_stats->lookahead_output_pins_used[pin_class][i] != inet); | |
| if (cur_pb->pb_stats->lookahead_output_pins_used[pin_class][i] | |
| == OPEN) { | |
| cur_pb->pb_stats->lookahead_output_pins_used[pin_class][i] = | |
| inet; | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| cur_pb = cur_pb->parent_pb; | |
| } | |
| } | |
| /* Check if the number of available inputs/outputs for a pin class is sufficient for speculatively packed blocks */ | |
| static boolean check_lookahead_pins_used(t_pb *cur_pb) { | |
| int i, j; | |
| int ipin; | |
| const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type; | |
| boolean success; | |
| success = TRUE; | |
| if (pb_type->num_modes > 0 && cur_pb->name != NULL) { | |
| for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class && success; | |
| i++) { | |
| ipin = 0; | |
| for (j = 0; | |
| j | |
| < cur_pb->pb_graph_node->input_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| j++) { | |
| if (cur_pb->pb_stats->lookahead_input_pins_used[i][j] != OPEN) { | |
| ipin++; | |
| } | |
| } | |
| if (ipin > cur_pb->pb_graph_node->input_pin_class_size[i]) { | |
| success = FALSE; | |
| } | |
| } | |
| for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class && success; | |
| i++) { | |
| ipin = 0; | |
| for (j = 0; | |
| j | |
| < cur_pb->pb_graph_node->output_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| j++) { | |
| if (cur_pb->pb_stats->lookahead_output_pins_used[i][j] != OPEN) { | |
| ipin++; | |
| } | |
| } | |
| if (ipin > cur_pb->pb_graph_node->output_pin_class_size[i]) { | |
| success = FALSE; | |
| } | |
| } | |
| if (success && cur_pb->child_pbs != NULL) { | |
| for (i = 0; | |
| success | |
| && i | |
| < pb_type->modes[cur_pb->mode].num_pb_type_children; | |
| i++) { | |
| if (cur_pb->child_pbs[i] != NULL) { | |
| for (j = 0; | |
| success | |
| && j | |
| < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; | |
| j++) { | |
| success = check_lookahead_pins_used( | |
| &cur_pb->child_pbs[i][j]); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| return success; | |
| } | |
| /* Speculation successful, commit input/output pins used */ | |
| static void commit_lookahead_pins_used(t_pb *cur_pb) { | |
| int i, j; | |
| int ipin; | |
| const t_pb_type *pb_type = cur_pb->pb_graph_node->pb_type; | |
| if (pb_type->num_modes > 0 && cur_pb->name != NULL) { | |
| for (i = 0; i < cur_pb->pb_graph_node->num_input_pin_class; i++) { | |
| ipin = 0; | |
| for (j = 0; | |
| j | |
| < cur_pb->pb_graph_node->input_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| j++) { | |
| if (cur_pb->pb_stats->lookahead_input_pins_used[i][j] != OPEN) { | |
| cur_pb->pb_stats->input_pins_used[i][ipin] = | |
| cur_pb->pb_stats->lookahead_input_pins_used[i][j]; | |
| ipin++; | |
| } | |
| assert(ipin <= cur_pb->pb_graph_node->input_pin_class_size[i]); | |
| } | |
| } | |
| for (i = 0; i < cur_pb->pb_graph_node->num_output_pin_class; i++) { | |
| ipin = 0; | |
| for (j = 0; | |
| j | |
| < cur_pb->pb_graph_node->output_pin_class_size[i] | |
| * AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_FAC + AAPACK_MAX_OVERUSE_LOOKAHEAD_PINS_CONST; | |
| j++) { | |
| if (cur_pb->pb_stats->lookahead_output_pins_used[i][j] != OPEN) { | |
| cur_pb->pb_stats->output_pins_used[i][ipin] = | |
| cur_pb->pb_stats->lookahead_output_pins_used[i][j]; | |
| ipin++; | |
| } | |
| assert(ipin <= cur_pb->pb_graph_node->output_pin_class_size[i]); | |
| } | |
| } | |
| if (cur_pb->child_pbs != NULL) { | |
| for (i = 0; i < pb_type->modes[cur_pb->mode].num_pb_type_children; | |
| i++) { | |
| if (cur_pb->child_pbs[i] != NULL) { | |
| for (j = 0; | |
| j | |
| < pb_type->modes[cur_pb->mode].pb_type_children[i].num_pb; | |
| j++) { | |
| commit_lookahead_pins_used(&cur_pb->child_pbs[i][j]); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| /* determine net at given pin location for cluster, return OPEN if none exists */ | |
| static int get_net_corresponding_to_pb_graph_pin(t_pb *cur_pb, | |
| t_pb_graph_pin *pb_graph_pin) { | |
| t_pb_graph_node *pb_graph_node; | |
| int i; | |
| t_model_ports *model_port; | |
| int ilogical_block; | |
| if (cur_pb->name == NULL) { | |
| return OPEN; | |
| } | |
| if (cur_pb->pb_graph_node->pb_type->num_modes != 0) { | |
| pb_graph_node = pb_graph_pin->parent_node; | |
| while (pb_graph_node->parent_pb_graph_node->pb_type->depth | |
| > cur_pb->pb_graph_node->pb_type->depth) { | |
| pb_graph_node = pb_graph_node->parent_pb_graph_node; | |
| } | |
| if (pb_graph_node->parent_pb_graph_node == cur_pb->pb_graph_node) { | |
| if (cur_pb->mode != pb_graph_node->pb_type->parent_mode->index) { | |
| return OPEN; | |
| } | |
| for (i = 0; | |
| i | |
| < cur_pb->pb_graph_node->pb_type->modes[cur_pb->mode].num_pb_type_children; | |
| i++) { | |
| if (pb_graph_node | |
| == &cur_pb->pb_graph_node->child_pb_graph_nodes[cur_pb->mode][i][pb_graph_node->placement_index]) { | |
| break; | |
| } | |
| } | |
| assert( | |
| i < cur_pb->pb_graph_node->pb_type->modes[cur_pb->mode].num_pb_type_children); | |
| return get_net_corresponding_to_pb_graph_pin( | |
| &cur_pb->child_pbs[i][pb_graph_node->placement_index], | |
| pb_graph_pin); | |
| } else { | |
| return OPEN; | |
| } | |
| } else { | |
| ilogical_block = cur_pb->logical_block; | |
| if (ilogical_block == OPEN) { | |
| return OPEN; | |
| } else { | |
| model_port = pb_graph_pin->port->model_port; | |
| if (model_port->is_clock) { | |
| assert(model_port->dir == IN_PORT); | |
| return logical_block[ilogical_block].clock_net; | |
| } else if (model_port->dir == IN_PORT) { | |
| return logical_block[ilogical_block].input_nets[model_port->index][pb_graph_pin->pin_number]; | |
| } else { | |
| assert(model_port->dir == OUT_PORT); | |
| return logical_block[ilogical_block].output_nets[model_port->index][pb_graph_pin->pin_number]; | |
| } | |
| } | |
| } | |
| } | |
| static void print_block_criticalities(const char * fname) { | |
| /* Prints criticality and critindexarray for each logical block to a file. */ | |
| int iblock, len; | |
| FILE * fp; | |
| char * name; | |
| fp = my_fopen(fname, "w", 0); | |
| fprintf(fp, "Index \tLogical block name \tCriticality \tCritindexarray\n\n"); | |
| for (iblock = 0; iblock < num_logical_blocks; iblock++) { | |
| name = logical_block[iblock].name; | |
| len = strlen(name); | |
| fprintf(fp, "%d\t%s\t", logical_block[iblock].index, name); | |
| if (len < 8) { | |
| fprintf(fp, "\t\t"); | |
| } else if (len < 16) { | |
| fprintf(fp, "\t"); | |
| } | |
| fprintf(fp, "%f\t%d\n", block_criticality[iblock], critindexarray[iblock]); | |
| } | |
| fclose(fp); | |
| } |