#include <cstdio> | |
#include <cstring> | |
#include <cmath> | |
#include <algorithm> | |
using namespace std; | |
#include <assert.h> | |
#include "util.h" | |
#include "vpr_types.h" | |
#include "vpr_utils.h" | |
#include "globals.h" | |
#include "rr_graph_util.h" | |
#include "rr_graph.h" | |
#include "rr_graph2.h" | |
#include "rr_graph_sbox.h" | |
#include "rr_graph_timing_params.h" | |
#include "rr_graph_indexed_data.h" | |
#include "check_rr_graph.h" | |
#include "read_xml_arch_file.h" | |
#include "ReadOptions.h" | |
#include "dump_rr_structs.h" | |
#include "cb_metrics.h" | |
#include "build_switchblocks.h" | |
#include "route_common.h" | |
#ifdef INTERPOSER_BASED_ARCHITECTURE | |
#include "rr_graph_multi.h" | |
#endif | |
typedef struct s_mux { | |
int size; | |
struct s_mux *next; | |
} t_mux; | |
typedef struct s_mux_size_distribution { | |
int mux_count; | |
int max_index; | |
int *distr; | |
struct s_mux_size_distribution *next; | |
} t_mux_size_distribution; | |
typedef struct s_clb_to_clb_directs { | |
t_type_descriptor *from_clb_type; | |
int from_clb_pin_start_index; | |
int from_clb_pin_end_index; | |
t_type_descriptor *to_clb_type; | |
int to_clb_pin_start_index; | |
int to_clb_pin_end_index; | |
int switch_index; //The switch type used by this direct connection | |
} t_clb_to_clb_directs; | |
/* UDSD Modifications by WMF End */ | |
/******************* Variables local to this module. ***********************/ | |
/* Used to free "chunked" memory. If NULL, no rr_graph exists right now. */ | |
static t_chunk rr_mem_ch = {NULL, 0, NULL}; | |
/* Status of current chunk being dished out by calls to my_chunk_malloc. */ | |
/********************* Subroutines local to this module. *******************/ | |
static int *****alloc_and_load_pin_to_track_map(INP e_pin_type pin_type, | |
INP int **Fc, INP t_type_ptr Type, INP bool *perturb_switch_pattern, | |
INP e_directionality directionality, | |
INP int num_seg_types, INP int *sets_per_seg_type); | |
static int *****alloc_and_load_pin_to_seg_type( | |
INP e_pin_type pin_type, | |
INP int seg_type_tracks, INP int Fc, | |
INP t_type_ptr Type, INP bool perturb_switch_pattern, | |
INP e_directionality directionality); | |
static struct s_ivec ****alloc_and_load_track_to_pin_lookup( | |
INP int *****pin_to_track_map, INP int **Fc, | |
INP int width, INP int height, | |
INP int num_pins, INP int max_chan_width, | |
INP int num_seg_types); | |
static void build_bidir_rr_opins( | |
INP int i, INP int j, | |
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices, | |
INP int ******opin_to_track_map, INP int ***Fc_out, | |
INP bool * L_rr_edge_done, INP t_seg_details * seg_details, | |
INP struct s_grid_tile **L_grid, INP int delayless_switch, | |
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs, | |
INP int num_seg_types); | |
static void build_unidir_rr_opins( | |
INP int i, INP int j, | |
INP struct s_grid_tile **L_grid, INP int ***Fc_out, | |
INP int max_chan_width, | |
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y, | |
INOUTP int ***Fc_xofs, INOUTP int ***Fc_yofs, | |
INOUTP t_rr_node * L_rr_node, INOUTP bool * L_rr_edge_done, | |
OUTP bool * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch, | |
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs, | |
INP int num_seg_types); | |
static int get_opin_direct_connecions( | |
int x, int y, int opin, | |
INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices, | |
INP t_direct_inf *directs, INP int num_directs, | |
INP t_clb_to_clb_directs *clb_to_clb_directs); | |
static void alloc_and_load_rr_graph( | |
INP int num_nodes, | |
INP t_rr_node * L_rr_node, INP int num_seg_types, | |
INP t_seg_details * seg_details, | |
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y, | |
INP bool * L_rr_edge_done, | |
INP struct s_ivec *****track_to_pin_lookup, | |
INP int ******opin_to_track_map, INP struct s_ivec ***switch_block_conn, | |
INP t_sb_connection_map *sb_conn_map, | |
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs, | |
INP short ******sblock_pattern, INP int ***Fc_out, INP int ***Fc_xofs, | |
INP int ***Fc_yofs, INP t_ivec *** L_rr_node_indices, | |
INP int max_chan_width, INP enum e_switch_block_type sb_type, | |
INP int delayless_switch, INP enum e_directionality directionality, | |
INP int wire_to_ipin_switch, OUTP bool * Fc_clipped, | |
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs); | |
static void load_uniform_switch_pattern( | |
INP t_type_ptr type, | |
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins, | |
INP int *pin_num_ordering, INP int *side_ordering, | |
INP int *width_ordering, INP int *height_ordering, | |
INP int x_chan_width, INP int y_chan_width, INP int Fc, | |
INP enum e_directionality directionality); | |
static void load_perturbed_switch_pattern( | |
INP t_type_ptr type, | |
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins, | |
INP int *pin_num_ordering, INP int *side_ordering, | |
INP int *width_ordering, INP int *height_ordering, | |
INP int x_chan_width, INP int y_chan_width, INP int Fc, | |
INP enum e_directionality directionality); | |
static bool* alloc_and_load_perturb_opins(INP t_type_ptr type, INP int **Fc_out, INP int max_chan_width, | |
INP int num_seg_types, INP t_segment_inf *segment_inf); | |
#ifdef ENABLE_CHECK_ALL_TRACKS | |
static void check_all_tracks_reach_pins( | |
t_type_ptr type, | |
int *****tracks_connected_to_pin, | |
int max_chan_width, int Fc, | |
enum e_pin_type ipin_or_opin); | |
#endif | |
static bool **alloc_and_load_perturb_ipins( | |
INP int max_chan_width, INP int L_num_types, | |
INP int num_seg_types, | |
INP int *sets_per_seg_type, | |
INP int ***Fc_in, INP int ***Fc_out, | |
INP enum e_directionality directionality); | |
static void build_rr_sinks_sources( | |
INP int i, INP int j, | |
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices, | |
INP int delayless_switch, INP struct s_grid_tile **L_grid); | |
static void build_rr_chan( | |
INP int i, INP int j, INP t_rr_type chan_type, | |
INP struct s_ivec *****track_to_pin_lookup, t_sb_connection_map *sb_conn_map, | |
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset, | |
INP int max_chan_width, INP int tracks_per_chan, INP int *opin_mux_size, | |
INP short ******sblock_pattern, INP int Fs_per_side, | |
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y, | |
INP t_ivec *** L_rr_node_indices, | |
INP bool * L_rr_edge_done, INOUTP t_rr_node * L_rr_node, | |
INP int wire_to_ipin_switch, INP enum e_directionality directionality); | |
static int alloc_and_load_rr_switch_inf(INP int num_arch_switches, INP int wire_to_arch_ipin_switch, OUTP int *wire_to_rr_ipin_switch); | |
static void remap_rr_node_switch_indices(INP map<int,int> *switch_fanin); | |
static void load_rr_switch_inf(INP int num_arch_switches, INOUTP map<int,int> *switch_fanin); | |
static int alloc_rr_switch_inf(INP int num_arch_switches, OUTP map<int,int> *switch_fanin); | |
static void rr_graph_externals( | |
t_segment_inf * segment_inf, int num_seg_types, int max_chan_width, | |
int wire_to_rr_ipin_switch, enum e_base_cost_type base_cost_type); | |
void alloc_and_load_edges_and_switches( | |
INP t_rr_node * L_rr_node, INP int inode, | |
INP int num_edges, INP bool * L_rr_edge_done, | |
INP t_linked_edge * edge_list_head); | |
static void alloc_net_rr_terminals(void); | |
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices); | |
static t_clb_to_clb_directs *alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs, | |
INP int delayless_switch); | |
void watch_edges(int inode, t_linked_edge * edge_list_head); | |
static void free_type_pin_to_track_map( | |
int ******ipin_to_track_map, | |
t_type_ptr types); | |
static void free_type_track_to_pin_map( | |
struct s_ivec *****track_to_pin_map, | |
t_type_ptr types, int max_chan_width); | |
static t_seg_details *alloc_and_load_global_route_seg_details( | |
INP int global_route_switch, | |
OUTP int * num_seg_details = 0); | |
/* UDSD Modifications by WMF End */ | |
static int ***alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types, INP int max_pins, | |
INP int num_seg_types, INP const int *sets_per_seg_type, | |
INP int max_chan_width, INP bool is_Fc_out, | |
INP enum e_directionality directionality, | |
OUTP bool *Fc_clipped, INP bool ignore_Fc_0); | |
/******************* Subroutine definitions *******************************/ | |
void build_rr_graph( | |
INP t_graph_type graph_type, INP int L_num_types, | |
INP t_type_ptr types, INP int L_nx, INP int L_ny, | |
INP struct s_grid_tile **L_grid, | |
INP struct s_chan_width *nodes_per_chan, | |
INP struct s_chan_width_dist *chan_capacity_inf, | |
INP enum e_switch_block_type sb_type, INP int Fs, | |
INP vector<t_switchblock_inf> switchblocks, | |
INP int num_seg_types, INP int num_arch_switches, | |
INP t_segment_inf * segment_inf, | |
INP int global_route_switch, INP int delayless_switch, | |
INP t_timing_inf timing_inf, INP int wire_to_arch_ipin_switch, | |
INP enum e_base_cost_type base_cost_type, | |
INP bool trim_empty_channels, | |
INP bool trim_obs_channels, | |
INP t_direct_inf *directs, INP int num_directs, | |
INP bool ignore_Fc_0, INP const char *dump_rr_structs_file, | |
OUTP int *wire_to_rr_ipin_switch, | |
OUTP int *num_rr_switches, | |
OUTP int *Warnings) { | |
/* Reset warning flag */ | |
*Warnings = RR_GRAPH_NO_WARN; | |
/* Decode the graph_type */ | |
bool is_global_graph = (GRAPH_GLOBAL == graph_type ? true : false); | |
bool use_full_seg_groups = (GRAPH_UNIDIR_TILEABLE == graph_type ? true : false); | |
enum e_directionality directionality = (GRAPH_BIDIR == graph_type ? BI_DIRECTIONAL : UNI_DIRECTIONAL); | |
if (is_global_graph) { | |
directionality = BI_DIRECTIONAL; | |
} | |
/* Global routing uses a single longwire track */ | |
int max_chan_width = (is_global_graph ? 1 : nodes_per_chan->max); | |
assert(max_chan_width > 0); | |
t_clb_to_clb_directs *clb_to_clb_directs = NULL; | |
if(num_directs > 0) { | |
clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs, delayless_switch); | |
} | |
/* START SEG_DETAILS */ | |
int num_seg_details = 0; | |
t_seg_details *seg_details = NULL; | |
if (is_global_graph) { | |
/* Sets up a single unit length segment type for global routing. */ | |
seg_details = alloc_and_load_global_route_seg_details( | |
global_route_switch, &num_seg_details); | |
} else { | |
/* Setup segments including distrubuting tracks and staggering. | |
* If use_full_seg_groups is specified, max_chan_width may be | |
* changed. Warning should be singled to caller if this happens. */ | |
seg_details = alloc_and_load_seg_details(&max_chan_width, | |
max(L_nx, L_ny), num_seg_types, segment_inf, | |
use_full_seg_groups, is_global_graph, directionality, | |
&num_seg_details); | |
if ((is_global_graph ? 1 : nodes_per_chan->max) != max_chan_width) { | |
nodes_per_chan->max = max_chan_width; | |
*Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED; | |
} | |
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) { | |
dump_seg_details(seg_details, max_chan_width, | |
getEchoFileName(E_ECHO_SEG_DETAILS)); | |
} | |
} | |
/* END SEG_DETAILS */ | |
/* START CHAN_DETAILS */ | |
t_chan_details *chan_details_x = NULL; | |
t_chan_details *chan_details_y = NULL; | |
alloc_and_load_chan_details(L_nx, L_ny, nodes_per_chan, | |
trim_empty_channels, trim_obs_channels, | |
num_seg_details, seg_details, | |
&chan_details_x, &chan_details_y); | |
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CHAN_DETAILS)) { | |
dump_chan_details( chan_details_x, chan_details_y, max_chan_width, nx, ny, | |
getEchoFileName(E_ECHO_CHAN_DETAILS)); | |
} | |
/* END CHAN_DETAILS */ | |
/* START FC */ | |
/* Determine the actual value of Fc */ | |
int ***Fc_in = NULL; /* [0..num_types-1][0..num_pins-1][0..num_segments-1] */ | |
int ***Fc_out = NULL; /* [0..num_types-1][0..num_pins-1][0..num_segments-1] */ | |
/* get maximum number of pins across all blocks */ | |
int max_pins = types[0].num_pins; | |
for (int i = 1; i < L_num_types; ++i) { | |
if (types[i].num_pins > max_pins) { | |
max_pins = types[i].num_pins; | |
} | |
} | |
/* get the number of 'sets' for each segment type -- unidirectial architectures have two tracks in a set, bidirectional have one */ | |
int total_sets = max_chan_width; | |
if (directionality == UNI_DIRECTIONAL){ | |
assert(max_chan_width % 2 == 0); | |
total_sets /= 2; | |
} | |
int *sets_per_seg_type = get_seg_track_counts(total_sets, num_seg_types, segment_inf, use_full_seg_groups); | |
if (is_global_graph) { | |
//FIXME: out of bounds? | |
Fc_in = (int ***) my_malloc(sizeof(int) * L_num_types); | |
Fc_out = (int ***) my_malloc(sizeof(int) * L_num_types); | |
for (int i = 0; i < L_num_types; ++i) { | |
for (int j = 0; j < types[i].num_pins; ++j) { | |
for (int k = 0; k < num_seg_types; k++){ | |
Fc_in[i][j][k] = 1; | |
Fc_out[i][j][k] = 1; | |
} | |
} | |
} | |
} else { | |
bool Fc_clipped = false; | |
Fc_in = alloc_and_load_actual_fc(L_num_types, types, max_pins, num_seg_types, sets_per_seg_type, max_chan_width, | |
false, directionality, &Fc_clipped, ignore_Fc_0); | |
if (Fc_clipped) { | |
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED; | |
} | |
Fc_clipped = false; | |
Fc_out = alloc_and_load_actual_fc(L_num_types, types, max_pins, num_seg_types, sets_per_seg_type, max_chan_width, | |
true, directionality, &Fc_clipped, ignore_Fc_0); | |
if (Fc_clipped) { | |
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED; | |
} | |
#ifdef VERBOSE | |
for (i = 1; i < L_num_types; ++i) { /* Skip "<EMPTY>" */ | |
for (j = 0; j < type_descriptors[i].num_pins; ++j) { | |
for (k = 0; k < num_seg_types; k++){ | |
if (type_descriptors[i].is_Fc_full_flex[j]) { | |
vpr_printf_info("Fc Actual Values: type = %s, pin = %d, seg_index = %d, Fc_out = full, Fc_in = %d.\n", | |
type_descriptors[i].name, j, k, Fc_in[i][j][k]); | |
} | |
else { | |
vpr_printf_info("Fc Actual Values: type = %s, pin = %d, seg = %d, Fc_out = %d, Fc_in = %d.\n", | |
type_descriptors[i].name, j, k, Fc_out[i][j], Fc_in[i][j]); | |
} | |
} | |
} | |
} | |
#endif /* VERBOSE */ | |
} | |
bool **perturb_ipins = alloc_and_load_perturb_ipins(max_chan_width, L_num_types, num_seg_types, | |
sets_per_seg_type, Fc_in, Fc_out, directionality); | |
/* END FC */ | |
/* Alloc node lookups, count nodes, alloc rr nodes */ | |
num_rr_nodes = 0; | |
rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, L_nx, L_ny, | |
&num_rr_nodes, chan_details_x, chan_details_y); | |
rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes); | |
memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes); | |
// XXX | |
g_pin_side = (char *) my_malloc(sizeof(char) * num_rr_nodes); | |
memset(g_pin_side, 0, sizeof(char) * num_rr_nodes); | |
bool *L_rr_edge_done = (bool *) my_malloc(sizeof(bool) * num_rr_nodes); | |
memset(L_rr_edge_done, 0, sizeof(bool) * num_rr_nodes); | |
/* These are data structures used by the the unidir opin mapping. They are used | |
to spread connections evenly for each segment type among the available | |
wire start points */ | |
int ***Fc_xofs = NULL; /* [0..ny-1][0..nx-1][0..num_seg_types-1] */ | |
int ***Fc_yofs = NULL; /* [0..nx-1][0..ny-1][0..num_seg_types-1] */ | |
if (UNI_DIRECTIONAL == directionality) { | |
Fc_xofs = (int ***) alloc_matrix3(0, L_ny, 0, L_nx, 0, num_seg_types-1, sizeof(int)); | |
Fc_yofs = (int ***) alloc_matrix3(0, L_nx, 0, L_ny, 0, num_seg_types-1, sizeof(int)); | |
for (int i = 0; i <= L_nx; ++i) { | |
for (int j = 0; j <= L_ny; ++j) { | |
for (int iseg = 0; iseg < num_seg_types; ++iseg){ | |
Fc_xofs[j][i][iseg] = 0; | |
Fc_yofs[i][j][iseg] = 0; | |
} | |
} | |
} | |
} | |
/* START SB LOOKUP */ | |
/* Alloc and load the switch block lookup */ | |
t_ivec ***switch_block_conn = NULL; | |
short ******unidir_sb_pattern = NULL; | |
t_sb_connection_map *sb_conn_map = NULL; //for custom switch blocks | |
if (is_global_graph) { | |
switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3); | |
} else if (BI_DIRECTIONAL == directionality) { | |
if (sb_type == CUSTOM){ | |
sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, | |
chan_details_y, L_nx, L_ny, switchblocks, | |
nodes_per_chan, directionality); | |
} else { | |
switch_block_conn = alloc_and_load_switch_block_conn(max_chan_width, | |
sb_type, Fs); | |
} | |
} else { | |
assert(UNI_DIRECTIONAL == directionality); | |
if (sb_type == CUSTOM){ | |
sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, | |
chan_details_y, L_nx, L_ny, switchblocks, | |
nodes_per_chan, directionality); | |
} else { | |
unidir_sb_pattern = alloc_sblock_pattern_lookup(L_nx, L_ny, max_chan_width); | |
for (int i = 0; i <= L_nx; i++) { | |
for (int j = 0; j <= L_ny; j++) { | |
load_sblock_pattern_lookup(i, j, nodes_per_chan, | |
chan_details_x, chan_details_y, | |
Fs, sb_type, unidir_sb_pattern); | |
} | |
} | |
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SBLOCK_PATTERN)) { | |
dump_sblock_pattern( unidir_sb_pattern, max_chan_width, L_nx, L_ny, | |
getEchoFileName(E_ECHO_SBLOCK_PATTERN)); | |
} | |
} | |
} | |
/* END SB LOOKUP */ | |
/* START IPINP MAP */ | |
/* Create ipin map lookups */ | |
int ******ipin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */ | |
t_ivec *****track_to_pin_lookup = NULL; /* [0..num_types-1][0..max_chan_width-1][0..width][0..height][0..3] */ | |
ipin_to_track_map = (int ******) my_malloc(sizeof(int *****) * L_num_types); | |
track_to_pin_lookup = (struct s_ivec *****) my_malloc(sizeof(struct s_ivec ****) * L_num_types); | |
for (int itype = 0; itype < L_num_types; ++itype) { | |
ipin_to_track_map[itype] = alloc_and_load_pin_to_track_map(RECEIVER, | |
Fc_in[itype], &types[itype], perturb_ipins[itype], directionality, | |
num_seg_types, sets_per_seg_type); | |
track_to_pin_lookup[itype] = alloc_and_load_track_to_pin_lookup( | |
ipin_to_track_map[itype], Fc_in[itype], types[itype].width, types[itype].height, | |
types[itype].num_pins, max_chan_width, num_seg_types); | |
} | |
/* END IPINP MAP */ | |
/* START OPINP MAP */ | |
/* Create opin map lookups */ | |
int ******opin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */ | |
if (BI_DIRECTIONAL == directionality) { | |
//bool test_metrics_outp = false; | |
opin_to_track_map = (int ******) my_malloc(sizeof(int *****) * L_num_types); | |
for (int i = 0; i < L_num_types; ++i) { | |
bool *perturb_opins = alloc_and_load_perturb_opins(&types[i], Fc_out[i], | |
max_chan_width, num_seg_types, segment_inf); | |
opin_to_track_map[i] = alloc_and_load_pin_to_track_map(DRIVER, | |
Fc_out[i], &types[i], perturb_opins, directionality, | |
num_seg_types, sets_per_seg_type); | |
///* adjust CLB connection block, if enabled */ | |
//if (strcmp("clb", types[i].name) == 0 && test_metrics_outp){ | |
// float target_metric_outp; | |
// target_metric_outp = 0; | |
// /* during binary search routing we want the connection block (CB) to be deterministic. That is, | |
// if we calculate a CB, check other CBs for other values of W, and then come back to that | |
// CB, that CB should be the same as before, otherwise we get connectivity errors. So to achieve this determinism | |
// we save (and later restore) the current seed, and just use a seed of 1 for generating the connection block */ | |
// unsigned int saved_seed = get_current_random(); | |
// my_srandom(1); | |
// adjust_cb_metric(WIRE_HOMOGENEITY, target_metric_outp, 0.005, 0.05, &types[i], opin_to_track_map[i], DRIVER, Fc_out[i], nodes_per_chan, | |
// num_seg_types, segment_inf); | |
// Conn_Block_Metrics cb_metrics; | |
// get_conn_block_metrics(&types[i], opin_to_track_map[i], num_seg_types, segment_inf, DRIVER, Fc_out[i], nodes_per_chan, &cb_metrics); | |
// vpr_printf_info("Block Type: %s Pin Diversity: %f Wire Homogeneity: %f Hamming Distance: %f Hamming Proximity: %f\n", | |
// types[i].name, cb_metrics.pin_diversity, cb_metrics.wire_homogeneity, | |
// cb_metrics.lemieux_cost_func, cb_metrics.hamming_proximity); | |
// | |
// my_srandom(saved_seed); | |
//} | |
free(perturb_opins); | |
} | |
} | |
/* END OPINP MAP */ | |
bool Fc_clipped = false; | |
alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types, | |
seg_details, chan_details_x, chan_details_y, | |
L_rr_edge_done, track_to_pin_lookup, opin_to_track_map, | |
switch_block_conn, sb_conn_map, L_grid, L_nx, L_ny, Fs, unidir_sb_pattern, | |
Fc_out, Fc_xofs, Fc_yofs, rr_node_indices, max_chan_width, sb_type, | |
delayless_switch, directionality, wire_to_arch_ipin_switch, &Fc_clipped, | |
directs, num_directs, clb_to_clb_directs); | |
/* Update rr_nodes capacities if global routing */ | |
if (graph_type == GRAPH_GLOBAL) { | |
for (int i = 0; i < num_rr_nodes; i++) { | |
if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) { | |
rr_node[i].set_capacity(chan_width.max); | |
} | |
} | |
} | |
/* Allocate and load routing resource switches, which are derived from the switches from the architecture file, | |
based on their fanin in the rr graph. This routine also adjusts the rr nodes to point to these new rr switches */ | |
(*num_rr_switches) = alloc_and_load_rr_switch_inf(num_arch_switches, wire_to_arch_ipin_switch, wire_to_rr_ipin_switch); | |
rr_graph_externals(segment_inf, num_seg_types, max_chan_width, | |
*wire_to_rr_ipin_switch, base_cost_type); | |
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH)) { | |
dump_rr_graph(getEchoFileName(E_ECHO_RR_GRAPH)); | |
} | |
#ifdef INTERPOSER_BASED_ARCHITECTURE | |
/* Main Entry Point to rr_graph modifications for interposer-based architectures */ | |
if(num_cuts > 0) | |
{ | |
modify_rr_graph_for_interposer_based_arch(max_chan_width, directionality); | |
} | |
#endif | |
check_rr_graph(graph_type, L_nx, L_ny, *num_rr_switches, Fc_in); | |
/* dump out rr structs if requested */ | |
if (dump_rr_structs_file){ | |
dump_rr_structs(dump_rr_structs_file); | |
} | |
/* Free all temp structs */ | |
if (seg_details) { | |
free_seg_details(seg_details, max_chan_width); | |
seg_details = NULL; | |
} | |
if (chan_details_x || chan_details_y) { | |
free_chan_details(chan_details_x, chan_details_y, max_chan_width, L_nx, L_ny); | |
chan_details_x = NULL; | |
chan_details_y = NULL; | |
} | |
if (Fc_in) { | |
free_matrix3(Fc_in, 0, L_num_types-1, 0, max_pins-1, 0, sizeof(int)); | |
Fc_in = NULL; | |
} | |
if (Fc_out) { | |
free_matrix3(Fc_out, 0, L_num_types-1, 0, max_pins-1, 0, sizeof(int)); | |
Fc_out = NULL; | |
} | |
if (perturb_ipins) { | |
free_matrix(perturb_ipins, 0, L_num_types-1, 0, sizeof(bool)); | |
perturb_ipins = NULL; | |
} | |
if (switch_block_conn) { | |
free_switch_block_conn(switch_block_conn, max_chan_width); | |
switch_block_conn = NULL; | |
} | |
if (sb_conn_map) { | |
free_switchblock_permutations(sb_conn_map); | |
sb_conn_map = NULL; | |
} | |
if (L_rr_edge_done) { | |
free(L_rr_edge_done); | |
L_rr_edge_done = NULL; | |
} | |
if (Fc_xofs) { | |
free_matrix3(Fc_xofs, 0, L_ny, 0, L_nx, 0, sizeof(int)); | |
Fc_xofs = NULL; | |
} | |
if (Fc_yofs) { | |
free_matrix3(Fc_yofs, 0, L_nx, 0, L_ny, 0, sizeof(int)); | |
Fc_yofs = NULL; | |
} | |
if (unidir_sb_pattern) { | |
free_sblock_pattern_lookup(unidir_sb_pattern); | |
unidir_sb_pattern = NULL; | |
} | |
if (opin_to_track_map) { | |
for (int i = 0; i < L_num_types; ++i) { | |
free_matrix5(opin_to_track_map[i], 0, types[i].num_pins - 1, | |
0, types[i].width - 1, 0, types[i].height - 1, | |
0, 3, 0, sizeof(int)); | |
} | |
free(opin_to_track_map); | |
} | |
if (sets_per_seg_type){ | |
free(sets_per_seg_type); | |
sets_per_seg_type = NULL; | |
} | |
free_type_pin_to_track_map(ipin_to_track_map, types); | |
free_type_track_to_pin_map(track_to_pin_lookup, types, max_chan_width); | |
if(clb_to_clb_directs != NULL) { | |
free(clb_to_clb_directs); | |
} | |
} | |
/* Allocates and loads the global rr_switch_inf array based on the global | |
arch_switch_inf array and the fan-ins used by the rr nodes. | |
Also changes switch indices of rr_nodes to index into rr_switch_inf | |
instead of arch_switch_inf. | |
Returns the number of rr switches created. | |
Also returns, through a pointer, the index of a representative ipin cblock switch. | |
- Currently we're not allowing a designer to specify an ipin cblock switch with | |
multiple fan-ins, so there's just one of these switches in the g_rr_switch_inf array. | |
But in the future if we allow this, we can return an index to a representative switch | |
The rr_switch_inf switches are derived from the arch_switch_inf switches | |
(which were read-in from the architecture file) based on fan-in. The delays of | |
the rr switches depend on their fan-in, so we first go through the rr_nodes | |
and count how many different fan-ins exist for each arch switch. | |
Then we create these rr switches and update the switch indices | |
of rr_nodes to index into the rr_switch_inf array. */ | |
static int alloc_and_load_rr_switch_inf(INP int num_arch_switches, INP int wire_to_arch_ipin_switch, OUTP int *wire_to_rr_ipin_switch){ | |
/* we will potentially be creating a couple of versions of each arch switch where | |
each version corresponds to a different fan-in. We will need to fill g_rr_switch_inf | |
with this expanded list of switches. | |
To do this we will use an array of maps where each map corresponds to a different arch switch. | |
So for each arch switch we will use this map to keep track of the different fan-ins that it uses (map key) | |
and which index in the g_rr_switch_inf array this arch switch / fanin combination will be placed in */ | |
map< int, int > *switch_fanin; | |
switch_fanin = new map<int,int>[num_arch_switches]; | |
/* Determine what the different fan-ins are for each arch switch, and also | |
how many entries the rr_switch_inf array should have */ | |
int num_rr_switches = alloc_rr_switch_inf(num_arch_switches, switch_fanin); | |
/* create the rr switches. also keep track of, for each arch switch, what index of the rr_switch_inf | |
array each version of its fanin has been mapped to */ | |
load_rr_switch_inf(num_arch_switches, switch_fanin); | |
/* next, walk through rr nodes again and remap their switch indices to rr_switch_inf */ | |
remap_rr_node_switch_indices(switch_fanin); | |
/* now we need to set the wire_to_rr_ipin_switch variable which points the detailed routing architecture | |
to the representative ipin cblock switch. currently we're not allowing the specification of an ipin cblock switch | |
with multiple fan-ins, so right now there's just one. May change in the future, in which case we'd need to | |
return a representative switch */ | |
if (switch_fanin[wire_to_arch_ipin_switch].count(UNDEFINED)){ | |
/* only have one ipin cblock switch. OK. */ | |
(*wire_to_rr_ipin_switch) = switch_fanin[wire_to_arch_ipin_switch][UNDEFINED]; | |
} else if (switch_fanin[wire_to_arch_ipin_switch].size() != 0){ | |
vpr_throw(VPR_ERROR_ARCH, __FILE__, __LINE__, | |
"Not currently allowing an ipin cblock switch to have multiple fan-ins"); | |
} else { | |
vpr_throw(VPR_ERROR_ARCH, __FILE__, __LINE__, | |
"No switch is specified for the ipin cblock, check if there is an error in arch file"); | |
} | |
delete[] switch_fanin; | |
return num_rr_switches; | |
} | |
/* Allocates space for the global g_rr_switch_inf variable and returns the | |
number of rr switches that were allocated */ | |
static int alloc_rr_switch_inf(INP int num_arch_switches, OUTP map<int,int> *switch_fanin){ | |
int num_rr_switches = 0; | |
// map key: switch index specified in arch; map value: fanin for that index | |
map<int, int> *inward_switch_inf = new map<int, int>[num_rr_nodes]; | |
for (int inode = 0; inode < num_rr_nodes; inode ++) { | |
t_rr_node from_node = rr_node[inode]; | |
int num_edges = from_node.get_num_edges(); | |
for (int iedge = 0; iedge < num_edges; iedge++) { | |
int switch_index = from_node.switches[iedge]; | |
int to_node_index = from_node.edges[iedge]; | |
if (inward_switch_inf[to_node_index].count(switch_index) == 0) | |
inward_switch_inf[to_node_index][switch_index] = 0; | |
inward_switch_inf[to_node_index][switch_index] ++; | |
} | |
} | |
// get unique index / fanin combination based on inward_switch_inf | |
for (int inode = 0; inode < num_rr_nodes; inode ++) { | |
map<int, int>::iterator itr; | |
for (itr = inward_switch_inf[inode].begin(); itr != inward_switch_inf[inode].end(); itr++) { | |
int switch_index = itr->first; | |
int fanin = itr->second; | |
if (g_arch_switch_inf[switch_index].Tdel_map.count(UNDEFINED) == 1) { | |
fanin = UNDEFINED; | |
} | |
if (switch_fanin[switch_index].count(fanin) == 0) { | |
switch_fanin[switch_index][fanin] = 0; | |
num_rr_switches++; | |
} | |
} | |
} | |
delete[] inward_switch_inf; | |
/* allocate space for the rr_switch_inf array (it's freed later in vpr_api.c-->free_arch) */ | |
g_rr_switch_inf = new s_rr_switch_inf[num_rr_switches]; | |
return num_rr_switches; | |
} | |
void print_map(map<int, int> dbmap) { | |
printf("size is: %d\n", (int)dbmap.size()); | |
printf("first first: %d\n", (dbmap.begin())->first); | |
} | |
/* load the global g_rr_switch_inf variable. also keep track of, for each arch switch, what | |
index of the rr_switch_inf array each version of its fanin has been mapped to (through switch_fanin map) */ | |
static void load_rr_switch_inf(INP int num_arch_switches, INOUTP map<int,int> *switch_fanin){ | |
int i_rr_switch = 0; | |
if (g_switch_fanin_remap != NULL) { | |
// at this stage, we rebuild the rr_graph (probably in binary search) | |
// so old g_switch_fanin_remap is obsolete | |
delete [] g_switch_fanin_remap; | |
} | |
g_switch_fanin_remap = new map<int, int>[num_arch_switches]; | |
for (int i_arch_switch = 0; i_arch_switch < num_arch_switches; i_arch_switch++){ | |
map<int,int>::iterator it; | |
for (it = switch_fanin[i_arch_switch].begin(); it != switch_fanin[i_arch_switch].end(); it++){ | |
/* the fanin value is in it->first, and we'll need to set what index this i_arch_switch/fanin | |
combination maps to (within rr_switch_inf) in it->second) */ | |
int fanin = it->first; | |
it->second = i_rr_switch; | |
// setup g_switch_fanin_remap, for future swich usage analysis | |
g_switch_fanin_remap[i_arch_switch][fanin] = i_rr_switch; | |
/* figure out, by looking at the arch switch's Tdel map, what the delay of the new | |
rr switch should be */ | |
map<int,double> &Tdel_map = g_arch_switch_inf[i_arch_switch].Tdel_map; | |
double rr_switch_Tdel; | |
if (Tdel_map.count(UNDEFINED) == 1){ | |
/* the switch specified a single constant delay. i.e., it did not | |
specify fanin/delay pairs */ | |
rr_switch_Tdel = Tdel_map[UNDEFINED]; | |
} else { | |
/* interpolate/extrapolate based on the available (fanin,delay) pairs in the | |
Tdel_map to get the rr_switch_Tdel at 'fanin' */ | |
rr_switch_Tdel = linear_interpolate_or_extrapolate(&Tdel_map, fanin); | |
} | |
/* copy over the arch switch to rr_switch_inf[i_rr_switch], but with the changed Tdel value */ | |
g_rr_switch_inf[i_rr_switch].buffered = g_arch_switch_inf[i_arch_switch].buffered; | |
g_rr_switch_inf[i_rr_switch].R = g_arch_switch_inf[i_arch_switch].R; | |
g_rr_switch_inf[i_rr_switch].Cin = g_arch_switch_inf[i_arch_switch].Cin; | |
g_rr_switch_inf[i_rr_switch].Cout = g_arch_switch_inf[i_arch_switch].Cout; | |
g_rr_switch_inf[i_rr_switch].Tdel = rr_switch_Tdel; | |
g_rr_switch_inf[i_rr_switch].mux_trans_size = g_arch_switch_inf[i_arch_switch].mux_trans_size; | |
g_rr_switch_inf[i_rr_switch].buf_size = g_arch_switch_inf[i_arch_switch].buf_size; | |
g_rr_switch_inf[i_rr_switch].name = g_arch_switch_inf[i_arch_switch].name; | |
g_rr_switch_inf[i_rr_switch].power_buffer_type = g_arch_switch_inf[i_arch_switch].power_buffer_type; | |
g_rr_switch_inf[i_rr_switch].power_buffer_size = g_arch_switch_inf[i_arch_switch].power_buffer_size; | |
/* have created a switch in the rr_switch_inf array */ | |
i_rr_switch++; | |
} | |
} | |
} | |
/* switch indices of each rr_node original point into the global g_arch_switch_inf array. | |
now we want to remap these indices to point into the global g_rr_switch_inf array | |
which contains switch info at different fan-in values */ | |
static void remap_rr_node_switch_indices(INP map<int,int> *switch_fanin){ | |
for (int inode = 0; inode < num_rr_nodes; inode++){ | |
t_rr_node from_node = rr_node[inode]; | |
int num_edges = from_node.get_num_edges(); | |
for (int iedge = 0; iedge < num_edges; iedge++){ | |
t_rr_node to_node = rr_node[ from_node.edges[iedge] ]; | |
/* get the switch which this edge uses and its fanin */ | |
int switch_index = from_node.switches[iedge]; | |
int fanin = to_node.get_fan_in(); | |
if (switch_fanin[switch_index].count(UNDEFINED) == 1){ | |
fanin = UNDEFINED; | |
} | |
int rr_switch_index = switch_fanin[switch_index][fanin]; | |
from_node.switches[iedge] = rr_switch_index; | |
} | |
} | |
} | |
static void rr_graph_externals( | |
t_segment_inf * segment_inf, int num_seg_types, int max_chan_width, | |
int wire_to_rr_ipin_switch, enum e_base_cost_type base_cost_type) { | |
add_rr_graph_C_from_switches(g_rr_switch_inf[wire_to_rr_ipin_switch].Cin); | |
alloc_and_load_rr_indexed_data(segment_inf, num_seg_types, rr_node_indices, | |
max_chan_width, wire_to_rr_ipin_switch, base_cost_type); | |
alloc_net_rr_terminals(); | |
load_net_rr_terminals(rr_node_indices); | |
alloc_and_load_rr_clb_source(rr_node_indices); | |
} | |
static bool **alloc_and_load_perturb_ipins(INP int max_chan_width, INP int L_num_types, | |
INP int num_seg_types, | |
INP int *sets_per_seg_type, | |
INP int ***Fc_in, INP int ***Fc_out, | |
INP enum e_directionality directionality) { | |
bool **result = (bool **) alloc_matrix(0, L_num_types-1, 0, num_seg_types-1, sizeof(bool)); | |
/* factor to account for unidir vs bidir */ | |
int fac = 1; | |
if (directionality == UNI_DIRECTIONAL){ | |
fac = 2; | |
} | |
if (BI_DIRECTIONAL == directionality) { | |
for (int iseg = 0; iseg < num_seg_types; ++iseg){ | |
result[0][iseg] = false; | |
int tracks_in_seg_type = sets_per_seg_type[iseg] * fac; | |
for (int itype = 1; iseg < L_num_types; ++itype) { | |
result[itype][iseg] = false; | |
float Fc_ratio; | |
if (Fc_in[itype][0][iseg] > Fc_out[itype][0][iseg]) { | |
Fc_ratio = (float) Fc_in[itype][0][iseg] / (float) Fc_out[itype][0][iseg]; | |
} else { | |
Fc_ratio = (float) Fc_out[itype][0][iseg] / (float) Fc_in[itype][0][iseg]; | |
} | |
if ((Fc_in[itype][0][iseg] <= tracks_in_seg_type - 2) | |
&& (fabs(Fc_ratio - nint(Fc_ratio)) | |
< (0.5 / (float) tracks_in_seg_type))) { | |
result[itype][iseg] = true; | |
} | |
} | |
} | |
} else { | |
/* Unidirectional routing uses mux balancing patterns and | |
* thus shouldn't need perturbation. */ | |
assert(UNI_DIRECTIONAL == directionality); | |
for (int itype = 0; itype < L_num_types; ++itype) { | |
for (int iseg = 0; iseg < num_seg_types; ++iseg){ | |
result[itype][iseg] = false; | |
} | |
} | |
} | |
return result; | |
} | |
static t_seg_details *alloc_and_load_global_route_seg_details( | |
INP int global_route_switch, | |
OUTP int * num_seg_details) { | |
t_seg_details *seg_details = (t_seg_details *) my_malloc(sizeof(t_seg_details)); | |
seg_details->index = 0; | |
seg_details->length = 1; | |
seg_details->arch_wire_switch = global_route_switch; | |
seg_details->arch_opin_switch = global_route_switch; | |
seg_details->longline = false; | |
seg_details->direction = BI_DIRECTION; | |
seg_details->Cmetal = 0.0; | |
seg_details->Rmetal = 0.0; | |
seg_details->start = 1; | |
seg_details->drivers = MULTI_BUFFERED; | |
seg_details->cb = (bool *) my_malloc(sizeof(bool) * 1); | |
seg_details->cb[0] = true; | |
seg_details->sb = (bool *) my_malloc(sizeof(bool) * 2); | |
seg_details->sb[0] = true; | |
seg_details->sb[1] = true; | |
seg_details->group_size = 1; | |
seg_details->group_start = 0; | |
seg_details->seg_start = -1; | |
seg_details->seg_end = -1; | |
if (num_seg_details) { | |
*num_seg_details = 1; | |
} | |
return seg_details; | |
} | |
/* Calculates the actual Fc values for the given max_chan_width value */ | |
static int ***alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types, INP int max_pins, | |
INP int num_seg_types, INP const int *sets_per_seg_type, | |
INP int max_chan_width, INP bool is_Fc_out, | |
INP enum e_directionality directionality, | |
OUTP bool *Fc_clipped, INP bool ignore_Fc_0) { | |
int ***Result = NULL; | |
int fac; | |
*Fc_clipped = false; | |
/* Unidir tracks formed in pairs, otherwise no effect. */ | |
fac = 1; | |
if (UNI_DIRECTIONAL == directionality) { | |
fac = 2; | |
} | |
assert((max_chan_width % fac) == 0); | |
Result = (int ***) alloc_matrix3(0, L_num_types-1, 0, max_pins-1, 0, num_seg_types-1, sizeof(int)); | |
for (int itype = 1; itype < L_num_types; ++itype) { | |
float **Fc = types[itype].Fc; | |
for (int ipin = 0; ipin < types[itype].num_pins; ++ipin) { | |
for (int iseg = 0; iseg < num_seg_types; iseg++){ | |
if(Fc[ipin][iseg] == 0 && ignore_Fc_0 == false) { | |
/* Special case indicating that this pin does not connect to general-purpose routing */ | |
Result[itype][ipin][iseg] = 0; | |
} else { | |
/* General case indicating that this pin connects to general-purpose routing */ | |
if (types[itype].is_Fc_frac[ipin]) { | |
Result[itype][ipin][iseg] = fac * nint(sets_per_seg_type[iseg] * Fc[ipin][iseg]); | |
} else { | |
Result[itype][ipin][iseg] = (int)Fc[ipin][iseg]; | |
} | |
if (is_Fc_out && types[itype].is_Fc_full_flex[ipin]) { | |
Result[itype][ipin][iseg] = sets_per_seg_type[iseg] * fac; | |
} | |
Result[itype][ipin][iseg] = max(Result[itype][ipin][iseg], fac); | |
if (Result[itype][ipin][iseg] > sets_per_seg_type[iseg] * fac) { | |
*Fc_clipped = true; | |
Result[itype][ipin][iseg] = sets_per_seg_type[iseg] * fac; | |
} | |
} | |
assert(Result[itype][ipin][iseg] % fac == 0); | |
} | |
} | |
} | |
return Result; | |
} | |
/* frees the ipin to track mapping for each physical grid type */ | |
static void free_type_pin_to_track_map(int ******ipin_to_track_map, | |
t_type_ptr types) { | |
for (int i = 0; i < num_types; ++i) { | |
free_matrix5(ipin_to_track_map[i], 0, types[i].num_pins - 1, | |
0, types[i].width - 1, 0, types[i].height - 1, | |
0, 3, 0, sizeof(int)); | |
} | |
free(ipin_to_track_map); | |
} | |
/* frees the track to ipin mapping for each physical grid type */ | |
static void free_type_track_to_pin_map(struct s_ivec***** track_to_pin_map, | |
t_type_ptr types, int max_chan_width) { | |
for (int i = 0; i < num_types; i++) { | |
if (track_to_pin_map[i] != NULL) { | |
for (int track = 0; track < max_chan_width; ++track) { | |
for (int width = 0; width < types[i].width; ++width) { | |
for (int height = 0; height < types[i].height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
if (track_to_pin_map[i][track][width][height][side].list != NULL) { | |
free(track_to_pin_map[i][track][width][height][side].list); | |
} | |
} | |
} | |
} | |
} | |
free_matrix4(track_to_pin_map[i], 0, max_chan_width - 1, | |
0, types[i].width - 1, 0, types[i].height - 1, | |
0, sizeof(struct s_ivec)); | |
} | |
} | |
free(track_to_pin_map); | |
} | |
/* Does the actual work of allocating the rr_graph and filling all the * | |
* appropriate values. Everything up to this was just a prelude! */ | |
static void alloc_and_load_rr_graph(INP int num_nodes, | |
INP t_rr_node * L_rr_node, INP int num_seg_types, | |
INP t_seg_details * seg_details, | |
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y, | |
INP bool * L_rr_edge_done, | |
INP struct s_ivec *****track_to_pin_lookup, | |
INP int ******opin_to_track_map, INP struct s_ivec ***switch_block_conn, | |
INP t_sb_connection_map *sb_conn_map, | |
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs, | |
INP short ******sblock_pattern, INP int ***Fc_out, INP int ***Fc_xofs, | |
INP int ***Fc_yofs, INP t_ivec *** L_rr_node_indices, | |
INP int max_chan_width, INP enum e_switch_block_type sb_type, | |
INP int delayless_switch, INP enum e_directionality directionality, | |
INP int wire_to_ipin_switch, OUTP bool * Fc_clipped, | |
INP t_direct_inf *directs, INP int num_directs, | |
INP t_clb_to_clb_directs *clb_to_clb_directs) { | |
/* If Fc gets clipped, this will be flagged to true */ | |
*Fc_clipped = false; | |
/* Connection SINKS and SOURCES to their pins. */ | |
for (int i = 0; i <= (L_nx + 1); ++i) { | |
for (int j = 0; j <= (L_ny + 1); ++j) { | |
build_rr_sinks_sources(i, j, L_rr_node, L_rr_node_indices, | |
delayless_switch, L_grid); | |
} | |
} | |
/* Build opins */ | |
for (int i = 0; i <= (L_nx + 1); ++i) { | |
for (int j = 0; j <= (L_ny + 1); ++j) { | |
if (BI_DIRECTIONAL == directionality) { | |
build_bidir_rr_opins(i, j, L_rr_node, L_rr_node_indices, | |
opin_to_track_map, Fc_out, L_rr_edge_done, seg_details, | |
L_grid, delayless_switch, | |
directs, num_directs, clb_to_clb_directs, num_seg_types); | |
} else { | |
assert(UNI_DIRECTIONAL == directionality); | |
bool clipped; | |
build_unidir_rr_opins(i, j, L_grid, Fc_out, max_chan_width, | |
chan_details_x, chan_details_y, Fc_xofs, Fc_yofs, L_rr_node, | |
L_rr_edge_done, &clipped, L_rr_node_indices, delayless_switch, | |
directs, num_directs, clb_to_clb_directs, num_seg_types); | |
if (clipped) { | |
*Fc_clipped = true; | |
} | |
} | |
} | |
} | |
/* We make a copy of the current fanin values for the nodes to | |
* know the number of OPINs driving each mux presently */ | |
int *opin_mux_size = (int *) my_malloc(sizeof(int) * num_nodes); | |
for (int i = 0; i < num_nodes; ++i) { | |
opin_mux_size[i] = L_rr_node[i].get_fan_in(); | |
} | |
/* Build channels */ | |
assert(Fs % 3 == 0); | |
for (int i = 0; i <= L_nx; ++i) { | |
for (int j = 0; j <= L_ny; ++j) { | |
if (i > 0) { | |
build_rr_chan(i, j, CHANX, track_to_pin_lookup, sb_conn_map, switch_block_conn, | |
CHANX_COST_INDEX_START, | |
max_chan_width, chan_width.x_list[j], opin_mux_size, | |
sblock_pattern, Fs / 3, chan_details_x, chan_details_y, | |
L_rr_node_indices, L_rr_edge_done, L_rr_node, | |
wire_to_ipin_switch, directionality); | |
} | |
if (j > 0) { | |
build_rr_chan(i, j, CHANY, track_to_pin_lookup, sb_conn_map, switch_block_conn, | |
CHANX_COST_INDEX_START + num_seg_types, | |
max_chan_width, chan_width.y_list[i], opin_mux_size, | |
sblock_pattern, Fs / 3, chan_details_x, chan_details_y, | |
L_rr_node_indices, L_rr_edge_done, L_rr_node, | |
wire_to_ipin_switch, directionality); | |
} | |
} | |
} | |
free(opin_mux_size); | |
// setup the side info for SOURCE and SINK | |
map<int, int>::iterator itr; | |
for (itr = g_ipin_to_sink.begin(); itr != g_ipin_to_sink.end(); itr++) { | |
int sink_node = itr->second; | |
int ipin_node = itr->first; | |
g_pin_side[sink_node] |= g_pin_side[ipin_node]; | |
} | |
/* XXX currently don't do opins as it is more tricky than ipins | |
you will need to differentiate unidir and bidir | |
for (itr = g_opin_by_source.begin(); itr != g_opin_by_source.end(); itr++) { | |
} | |
*/ | |
g_opin_by_source.clear(); | |
g_ipin_to_sink.clear(); | |
#if LOOKAHEADBYHISTORY == 1 | |
// the first time alloc_and_load_rr_graph() is called is when it does placement | |
// and after that, size of the chip (nx, ny) is fixed. | |
if (max_cost_by_relative_pos == NULL) { | |
assert(min_cost_by_relative_pos == NULL); | |
assert(max_cost_coord == NULL); | |
assert(min_cost_coord == NULL); | |
max_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float)); | |
min_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float)); | |
avg_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float)); | |
dev_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float)); | |
count_by_relative_pos = (int **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(int)); | |
max_cost_coord = (pair<float, float> **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(pair<float, float>)); | |
min_cost_coord = (pair<float, float> **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(pair<float, float>)); | |
} | |
// everytime this function is called, the chan width is changed. | |
// so the old data stored in the 2d arrays are no longer valid | |
// So clear them up. | |
for (int i = 0; i <= (L_nx + 1); ++i) { | |
for (int j = 0; j <= (L_ny + 1); ++j) { | |
max_cost_by_relative_pos[i][j] = -1; | |
min_cost_by_relative_pos[i][j] = HUGE_POSITIVE_FLOAT; | |
avg_cost_by_relative_pos[i][j] = -1; | |
dev_cost_by_relative_pos[i][j] = HUGE_POSITIVE_FLOAT; | |
count_by_relative_pos[i][j] = 0; | |
max_cost_coord[i][j].first = -1; | |
max_cost_coord[i][j].second = -1; | |
min_cost_coord[i][j].first = -1; | |
min_cost_coord[i][j].second = -1; | |
} | |
} | |
#endif | |
#if CONGESTIONBYCHIPAREA == 1 | |
total_cong_tile_count = CONG_MAP_BASE_COST * (nx + 1) * (ny + 1); | |
if (congestion_map_by_area == NULL) { | |
congestion_map_by_area = (int **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(int)); | |
congestion_map_relative = (int **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(int)); | |
} | |
for (int i = 0; i <= (L_nx + 1); ++i) { | |
for (int j = 0; j <= (L_ny + 1); ++j) { | |
congestion_map_by_area[i][j] = CONG_MAP_BASE_COST; | |
congestion_map_relative[i][j] = 0; | |
} | |
} | |
#endif | |
#if ONEMORELOOKAHEAD == 1 | |
for (int i = 0; i < num_rr_nodes; i++) { | |
rr_node[i].is_to_long_wire = false; | |
if (rr_node[i].type != CHANX && rr_node[i].type != CHANY) | |
continue; | |
if (rr_node[i].get_len() != 4) | |
continue; | |
int to_num_edges = rr_node[i].get_num_edges(); | |
for (int e = 0; e < to_num_edges; e++) { | |
int to_node = rr_node[i].edges[e]; | |
if (rr_node[to_node].get_len() == 16) | |
rr_node[i].is_to_long_wire = true; | |
} | |
} | |
#endif | |
} | |
static void build_bidir_rr_opins(INP int i, INP int j, | |
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices, | |
INP int ******opin_to_track_map, INP int ***Fc_out, | |
INP bool * L_rr_edge_done, INP t_seg_details * seg_details, | |
INP struct s_grid_tile **L_grid, INP int delayless_switch, | |
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs, | |
INP int num_seg_types) { | |
/* OPINP edges need to be done at once so let the offset 0 | |
* block do the work. */ | |
if (L_grid[i][j].width_offset > 0 || L_grid[i][j].height_offset > 0) { | |
return; | |
} | |
t_type_ptr type = L_grid[i][j].type; | |
int **Fc = Fc_out[type->index]; | |
for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) { | |
/* We only are working with opins so skip non-drivers */ | |
if (type->class_inf[type->pin_class[pin_index]].type != DRIVER) { | |
continue; | |
} | |
/* get number of tracks that this pin connects to */ | |
int total_pin_Fc = 0; | |
for (int iseg = 0; iseg < num_seg_types; iseg++){ | |
total_pin_Fc += Fc[pin_index][iseg]; | |
} | |
int num_edges = 0; | |
struct s_linked_edge *edge_list = NULL; | |
if(total_pin_Fc > 0) { | |
for (int width = 0; width < type->width; ++width) { | |
for (int height = 0; height < type->height; ++height) { | |
num_edges += get_bidir_opin_connections(i + width, j + height, pin_index, | |
&edge_list, opin_to_track_map, total_pin_Fc, L_rr_edge_done, | |
L_rr_node_indices, seg_details); | |
} | |
} | |
} | |
/* Add in direct connections */ | |
num_edges += get_opin_direct_connecions(i, j, pin_index, &edge_list, L_rr_node_indices, | |
directs, num_directs, clb_to_clb_directs); | |
int node_index = get_rr_node_index(i, j, OPIN, pin_index, L_rr_node_indices); | |
alloc_and_load_edges_and_switches(L_rr_node, node_index, num_edges, | |
L_rr_edge_done, edge_list); | |
while (edge_list != NULL) { | |
struct s_linked_edge *next_edge = edge_list->next; | |
free(edge_list); | |
edge_list = next_edge; | |
} | |
} | |
} | |
void free_rr_graph(void) { | |
int i; | |
/* Frees all the routing graph data structures, if they have been * | |
* allocated. I use rr_mem_chunk_list_head as a flag to indicate * | |
* whether or not the graph has been allocated -- if it is not NULL, * | |
* a routing graph exists and can be freed. Hence, you can call this * | |
* routine even if you're not sure of whether a rr_graph exists or not. */ | |
if (g_pin_side != NULL) { | |
free(g_pin_side); | |
g_pin_side = NULL; | |
} | |
if (rr_mem_ch.chunk_ptr_head == NULL) /* Nothing to free. */ | |
return; | |
free_chunk_memory(&rr_mem_ch); /* Frees ALL "chunked" data */ | |
/* Before adding any more free calls here, be sure the data is NOT chunk * | |
* allocated, as ALL the chunk allocated data is already free! */ | |
if(net_rr_terminals != NULL) { | |
free(net_rr_terminals); | |
} | |
for (i = 0; i < num_rr_nodes; i++) { | |
if (rr_node[i].edges != NULL) { | |
free(rr_node[i].edges); | |
} | |
if (rr_node[i].switches != NULL) { | |
free(rr_node[i].switches); | |
} | |
} | |
assert(rr_node_indices); | |
free_rr_node_indices(rr_node_indices); | |
free(rr_node); | |
free(rr_indexed_data); | |
for (i = 0; i < num_blocks; i++) { | |
free(rr_blk_source[i]); | |
} | |
free(rr_blk_source); | |
rr_blk_source = NULL; | |
net_rr_terminals = NULL; | |
rr_node = NULL; | |
rr_node_indices = NULL; | |
rr_indexed_data = NULL; | |
num_rr_nodes = 0; | |
delete[] g_rr_switch_inf; | |
g_rr_switch_inf = NULL; | |
#if LOOKAHEADBYHISTORY == 1 | |
assert(max_cost_by_relative_pos != NULL); | |
assert(min_cost_by_relative_pos != NULL); | |
assert(max_cost_coord != NULL); | |
assert(min_cost_coord != NULL); | |
free_matrix(max_cost_by_relative_pos, 0, nx+1, 0, sizeof(float)); | |
free_matrix(min_cost_by_relative_pos, 0, nx+1, 0, sizeof(float)); | |
free_matrix(avg_cost_by_relative_pos, 0, nx+1, 0, sizeof(float)); | |
free_matrix(dev_cost_by_relative_pos, 0, nx+1, 0, sizeof(float)); | |
free_matrix(count_by_relative_pos, 0, nx+1, 0, sizeof(int)); | |
free_matrix(max_cost_coord, 0, nx+1, 0, sizeof(pair<float, float>)); | |
free_matrix(min_cost_coord, 0, nx+1, 0, sizeof(pair<float, float>)); | |
free_matrix4(bfs_lookahead_info, 0, nx-3, 0, ny-3, g_num_segment-1, 0, sizeof(pair<float, float>)); | |
max_cost_by_relative_pos = NULL; | |
min_cost_by_relative_pos = NULL; | |
avg_cost_by_relative_pos = NULL; | |
dev_cost_by_relative_pos = NULL; | |
count_by_relative_pos = NULL; | |
max_cost_coord = NULL; | |
min_cost_coord = NULL; | |
bfs_lookahead_info = NULL; | |
#endif | |
#if CONGESTIONBYCHIPAREA == 1 | |
assert(congestion_map_by_area != NULL); | |
free_matrix(congestion_map_by_area, 0, nx+1, 0, sizeof(int)); | |
free_matrix(congestion_map_relative, 0, nx+1, 0, sizeof(int)); | |
congestion_map_by_area = NULL; | |
congestion_map_relative = NULL; | |
#endif | |
} | |
static void alloc_net_rr_terminals(void) { | |
unsigned int inet; | |
net_rr_terminals = (int **) my_malloc(g_clbs_nlist.net.size() * sizeof(int *)); | |
for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) { | |
net_rr_terminals[inet] = (int *) my_chunk_malloc( | |
g_clbs_nlist.net[inet].pins.size() * sizeof(int), | |
&rr_mem_ch); | |
} | |
} | |
void load_net_rr_terminals(t_ivec *** L_rr_node_indices) { | |
/* Allocates and loads the net_rr_terminals data structure. For each net * | |
* it stores the rr_node index of the SOURCE of the net and all the SINKs * | |
* of the net. [0..g_clbs_nlist.net.size()-1][0..num_pins-1]. Entry [inet][pnum] stores * | |
* the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum. */ | |
int inode, iblk, i, j, node_block_pin, iclass; | |
unsigned int ipin, inet; | |
t_type_ptr type; | |
for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) { | |
for (ipin = 0; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) { | |
iblk = g_clbs_nlist.net[inet].pins[ipin].block; | |
i = block[iblk].x; | |
j = block[iblk].y; | |
type = block[iblk].type; | |
/* In the routing graph, each (x, y) location has unique pins on it | |
* so when there is capacity, blocks are packed and their pin numbers | |
* are offset to get their actual rr_node */ | |
node_block_pin = g_clbs_nlist.net[inet].pins[ipin].block_pin; | |
iclass = type->pin_class[node_block_pin]; | |
inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK), /* First pin is driver */ | |
iclass, L_rr_node_indices); | |
net_rr_terminals[inet][ipin] = inode; | |
} | |
} | |
} | |
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices) { | |
/* Saves the rr_node corresponding to each SOURCE and SINK in each CLB * | |
* in the FPGA. Currently only the SOURCE rr_node values are used, and * | |
* they are used only to reserve pins for locally used OPINs in the router. * | |
* [0..num_blocks-1][0..num_class-1]. The values for blocks that are pads * | |
* are NOT valid. */ | |
int iblk, i, j, iclass, inode; | |
int class_low, class_high; | |
t_rr_type rr_type; | |
t_type_ptr type; | |
rr_blk_source = (int **) my_malloc(num_blocks * sizeof(int *)); | |
for (iblk = 0; iblk < num_blocks; iblk++) { | |
type = block[iblk].type; | |
get_class_range_for_block(iblk, &class_low, &class_high); | |
rr_blk_source[iblk] = (int *) my_malloc(type->num_class * sizeof(int)); | |
for (iclass = 0; iclass < type->num_class; iclass++) { | |
if (iclass >= class_low && iclass <= class_high) { | |
i = block[iblk].x; | |
j = block[iblk].y; | |
if (type->class_inf[iclass].type == DRIVER) | |
rr_type = SOURCE; | |
else | |
rr_type = SINK; | |
inode = get_rr_node_index(i, j, rr_type, iclass, | |
L_rr_node_indices); | |
rr_blk_source[iblk][iclass] = inode; | |
} else { | |
rr_blk_source[iblk][iclass] = OPEN; | |
} | |
} | |
} | |
} | |
static void build_rr_sinks_sources(INP int i, INP int j, | |
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices, | |
INP int delayless_switch, INP struct s_grid_tile **L_grid) { | |
/* Loads IPIN, SINK, SOURCE, and OPIN. | |
* Loads IPINP to SINK edges, and SOURCE to OPINP edges */ | |
/* Since we share nodes within a large block, only | |
* start tile can initialize sinks, sources, and pins */ | |
if (L_grid[i][j].width_offset > 0 || L_grid[i][j].height_offset > 0) | |
return; | |
t_type_ptr type = L_grid[i][j].type; | |
int num_class = type->num_class; | |
struct s_class *class_inf = type->class_inf; | |
int num_pins = type->num_pins; | |
int *pin_class = type->pin_class; | |
/* SINKS and SOURCE to OPIN edges */ | |
for (int iclass = 0; iclass < num_class; ++iclass) { | |
int inode = 0; | |
if (class_inf[iclass].type == DRIVER) { /* SOURCE */ | |
inode = get_rr_node_index(i, j, SOURCE, iclass, L_rr_node_indices); | |
int num_edges = class_inf[iclass].num_pins; | |
L_rr_node[inode].set_num_edges(num_edges); | |
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int)); | |
L_rr_node[inode].switches = (short *) my_malloc(num_edges * sizeof(short)); | |
for (int ipin = 0; ipin < class_inf[iclass].num_pins; ++ipin) { | |
int pin_num = class_inf[iclass].pinlist[ipin]; | |
int to_node = get_rr_node_index(i, j, OPIN, pin_num, L_rr_node_indices); | |
// XXX: to_node is OPIN, inode is SOURCE | |
g_opin_by_source[to_node] = inode; | |
L_rr_node[inode].edges[ipin] = to_node; | |
L_rr_node[inode].switches[ipin] = delayless_switch; | |
L_rr_node[to_node].set_fan_in(L_rr_node[to_node].get_fan_in() + 1); | |
} | |
L_rr_node[inode].set_cost_index(SOURCE_COST_INDEX); | |
L_rr_node[inode].type = SOURCE; | |
} else { /* SINK */ | |
assert(class_inf[iclass].type == RECEIVER); | |
inode = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices); | |
/* NOTE: To allow route throughs through clbs, change the lines below to * | |
* make an edge from the input SINK to the output SOURCE. Do for just the * | |
* special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it * | |
* leads to. If route throughs are allowed, you may want to increase the * | |
* base cost of OPINs and/or SOURCES so they aren't used excessively. */ | |
/* Initialize to unconnected to fix values */ | |
L_rr_node[inode].set_num_edges(0); | |
L_rr_node[inode].edges = NULL; | |
L_rr_node[inode].switches = NULL; | |
L_rr_node[inode].set_cost_index(SINK_COST_INDEX); | |
L_rr_node[inode].type = SINK; | |
} | |
/* Things common to both SOURCEs and SINKs. */ | |
L_rr_node[inode].set_capacity(class_inf[iclass].num_pins); | |
L_rr_node[inode].set_occ(0); | |
//assuming that type->width is always 1. | |
//if this needs to change, rr_node.{h,c} need to be modified accordingly | |
assert(type->width == 1); | |
L_rr_node[inode].set_coordinates(i, j, i + type->width - 1, j + type->height - 1); | |
L_rr_node[inode].R = 0; | |
L_rr_node[inode].C = 0; | |
L_rr_node[inode].set_ptc_num(iclass); | |
L_rr_node[inode].set_direction((enum e_direction)OPEN); | |
//L_rr_node[inode].set_drivers((enum e_drivers)OPEN); | |
} | |
int ipb_pin = 0; | |
int opb_pin = 0; | |
int iport = 0; | |
int oport = 0; | |
int iporttype = 0; | |
const t_pb_graph_node *pb_graph_node = type->pb_graph_head; | |
if(pb_graph_node != NULL && pb_graph_node->num_input_ports == 0) { | |
iporttype = 1; | |
} | |
/* Connect IPINS to SINKS and dummy for OPINS */ | |
for (int ipin = 0; ipin < num_pins; ++ipin) { | |
int inode = 0;; | |
int iclass = pin_class[ipin]; | |
int z = ipin / (type->pb_type->num_clock_pins + type->pb_type->num_output_pins + type->pb_type->num_input_pins); | |
if (class_inf[iclass].type == RECEIVER) { | |
inode = get_rr_node_index(i, j, IPIN, ipin, L_rr_node_indices); | |
int to_node = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices); | |
// XXX: inode is IPIN; to_node is SINK | |
// setup this mapping to setup g_pin_side for sinks in the future | |
g_ipin_to_sink[inode] = to_node; | |
L_rr_node[inode].set_num_edges(1); | |
L_rr_node[inode].edges = (int *) my_malloc(sizeof(int)); | |
L_rr_node[inode].switches = (short *) my_malloc(sizeof(short)); | |
L_rr_node[inode].edges[0] = to_node; | |
L_rr_node[inode].switches[0] = delayless_switch; | |
L_rr_node[to_node].set_fan_in(L_rr_node[to_node].get_fan_in() + 1); | |
L_rr_node[inode].set_cost_index(IPIN_COST_INDEX); | |
L_rr_node[inode].type = IPIN; | |
/* Add in information so that I can identify which cluster pin this rr_node connects to later */ | |
L_rr_node[inode].z = z; | |
if(iporttype == 0) { | |
L_rr_node[inode].pb_graph_pin = &pb_graph_node->input_pins[iport][ipb_pin]; | |
ipb_pin++; | |
if(ipb_pin >= pb_graph_node->num_input_pins[iport]) { | |
iport++; | |
ipb_pin = 0; | |
if(iport >= pb_graph_node->num_input_ports) { | |
iporttype++; | |
iport = 0; | |
if(pb_graph_node->num_clock_ports == 0) { | |
iporttype = 0; | |
} | |
} | |
} | |
} else { | |
assert(iporttype == 1); | |
L_rr_node[inode].pb_graph_pin = &pb_graph_node->clock_pins[iport][ipb_pin]; | |
ipb_pin++; | |
if(ipb_pin >= pb_graph_node->num_clock_pins[iport]) { | |
iport++; | |
ipb_pin = 0; | |
if(iport >= pb_graph_node->num_clock_ports) { | |
iporttype = 0; | |
iport = 0; | |
if(pb_graph_node->num_input_ports == 0) { | |
iporttype = 1; | |
} | |
} | |
} | |
} | |
} else { | |
assert(class_inf[iclass].type == DRIVER); | |
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices); | |
/* Add in information so that I can identify which cluster pin this rr_node connects to later */ | |
L_rr_node[inode].z = z; | |
L_rr_node[inode].set_num_edges(0); | |
L_rr_node[inode].edges = NULL; | |
L_rr_node[inode].switches = NULL; | |
L_rr_node[inode].set_cost_index(OPIN_COST_INDEX); | |
L_rr_node[inode].type = OPIN; | |
L_rr_node[inode].pb_graph_pin = &pb_graph_node->output_pins[oport][opb_pin]; | |
opb_pin++; | |
if(opb_pin >= pb_graph_node->num_output_pins[oport]) { | |
oport++; | |
opb_pin = 0; | |
if(oport >= pb_graph_node->num_output_ports) { | |
oport = 0; | |
} | |
} | |
} | |
/* Common to both DRIVERs and RECEIVERs */ | |
L_rr_node[inode].set_capacity(1); | |
L_rr_node[inode].set_occ(0); | |
L_rr_node[inode].set_coordinates(i, j, i + type->width - 1, j + type->height - 1); | |
L_rr_node[inode].C = 0; | |
L_rr_node[inode].R = 0; | |
L_rr_node[inode].set_ptc_num(ipin); | |
L_rr_node[inode].set_direction((enum e_direction)OPEN); | |
//L_rr_node[inode].set_drivers((enum e_drivers)OPEN); | |
} | |
} | |
/* Allocates/loads edges for nodes belonging to specified channel segment and initializes | |
node properties such as cost, occupancy and capacity */ | |
static void build_rr_chan(INP int x_coord, INP int y_coord, INP t_rr_type chan_type, | |
INP struct s_ivec *****track_to_pin_lookup, t_sb_connection_map *sb_conn_map, | |
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset, | |
INP int max_chan_width, INP int tracks_per_chan, INP int *opin_mux_size, | |
INP short ******sblock_pattern, INP int Fs_per_side, | |
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y, | |
INP t_ivec *** L_rr_node_indices, | |
INOUTP bool * L_rr_edge_done, INOUTP t_rr_node * L_rr_node, | |
INP int wire_to_ipin_switch, INP enum e_directionality directionality) { | |
/* this function builds both x and y-directed channel segments, so set up our | |
coordinates based on channel type */ | |
int seg_coord = x_coord; | |
int chan_coord = y_coord; | |
int seg_dimension = nx; | |
int chan_dimension = ny; | |
t_chan_details *from_chan_details = chan_details_x; | |
t_chan_details *opposite_chan_details = chan_details_y; | |
t_rr_type opposite_chan_type = CHANY; | |
if (chan_type == CHANY){ | |
seg_coord = y_coord; | |
chan_coord = x_coord; | |
seg_dimension = ny; | |
chan_dimension = nx; | |
from_chan_details = chan_details_y; | |
opposite_chan_details = chan_details_x; | |
opposite_chan_type = CHANX; | |
} | |
t_seg_details * seg_details = from_chan_details[x_coord][y_coord]; | |
/* figure out if we're generating switch block edges based on a custom switch block | |
description */ | |
bool custom_switch_block = false; | |
if (sb_conn_map != NULL){ | |
assert(sblock_pattern == NULL && switch_block_conn == NULL); | |
custom_switch_block = true; | |
} | |
/* Loads up all the routing resource nodes in the current channel segment */ | |
for (int track = 0; track < tracks_per_chan; ++track) { | |
if (seg_details[track].length == 0) | |
continue; | |
int start = get_seg_start(seg_details, track, chan_coord, seg_coord); | |
int end = get_seg_end(seg_details, track, start, chan_coord, seg_dimension); | |
if (seg_coord > start) | |
continue; /* Not the start of this segment. */ | |
struct s_linked_edge *edge_list = NULL; | |
t_seg_details * from_seg_details = chan_details_x[start][y_coord]; | |
if (chan_type == CHANY){ | |
from_seg_details = chan_details_y[x_coord][start]; | |
} | |
/* First count number of edges and put the edges in a linked list. */ | |
int num_edges = 0; | |
num_edges += get_track_to_pins(start, chan_coord, track, tracks_per_chan, &edge_list, | |
L_rr_node_indices, track_to_pin_lookup, seg_details, chan_type, seg_dimension, | |
wire_to_ipin_switch, directionality); | |
/* get edges going from the current track into channel segments which are perpendicular to it */ | |
if (chan_coord > 0) { | |
t_seg_details * to_seg_details = chan_details_y[start][y_coord]; | |
if (chan_type == CHANY) | |
to_seg_details = chan_details_x[x_coord][start]; | |
if (to_seg_details->length > 0) { | |
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord, | |
opposite_chan_type, seg_dimension, max_chan_width, opin_mux_size, | |
Fs_per_side, sblock_pattern, &edge_list, | |
from_seg_details, to_seg_details, opposite_chan_details, | |
directionality, L_rr_node_indices, L_rr_edge_done, | |
switch_block_conn, sb_conn_map); | |
} | |
} | |
if (chan_coord < chan_dimension) { | |
t_seg_details * to_seg_details = chan_details_y[start][y_coord+1]; | |
if (chan_type == CHANY) | |
to_seg_details = chan_details_x[x_coord+1][start]; | |
if (to_seg_details->length > 0) { | |
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord + 1, | |
opposite_chan_type, seg_dimension, max_chan_width, opin_mux_size, | |
Fs_per_side, sblock_pattern, &edge_list, | |
from_seg_details, to_seg_details, opposite_chan_details, | |
directionality, L_rr_node_indices, L_rr_edge_done, | |
switch_block_conn, sb_conn_map); | |
} | |
} | |
/* walk over the switch blocks along the source track and implement edges from this track to other tracks | |
in the same channel (i.e. straight-through connections) */ | |
for (int target_seg = start-1; target_seg <= end+1; target_seg++){ | |
if (target_seg != start-1 && target_seg != end+1){ | |
/* skip straight-through connections from midpoint if non-custom switch block. | |
currently non-custom switch blocks don't properly describe connections from the mid-point of a wire segment | |
to other segments in the same channel (i.e. straight-through connections) */ | |
if (!custom_switch_block){ | |
continue; | |
} | |
} | |
if (target_seg > 0 && target_seg < seg_dimension+1){ | |
t_seg_details * to_seg_details = chan_details_x[target_seg][y_coord]; | |
if (chan_type == CHANY) | |
to_seg_details = chan_details_y[x_coord][target_seg]; | |
if (to_seg_details->length > 0) { | |
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, target_seg, | |
chan_type, seg_dimension, max_chan_width, opin_mux_size, | |
Fs_per_side, sblock_pattern, &edge_list, | |
from_seg_details, to_seg_details, from_chan_details, | |
directionality, L_rr_node_indices, L_rr_edge_done, | |
switch_block_conn, sb_conn_map); | |
} | |
} | |
} | |
int node = get_rr_node_index(x_coord, y_coord, chan_type, track, L_rr_node_indices); | |
alloc_and_load_edges_and_switches(L_rr_node, node, num_edges, | |
L_rr_edge_done, edge_list); | |
while (edge_list != NULL) { | |
struct s_linked_edge *next_edge = edge_list->next; | |
free(edge_list); | |
edge_list = next_edge; | |
} | |
/* Edge arrays have now been built up. Do everything else. */ | |
L_rr_node[node].set_cost_index(cost_index_offset + seg_details[track].index); | |
L_rr_node[node].set_occ( track < tracks_per_chan ? 0 : 1 ); | |
L_rr_node[node].set_capacity(1); /* GLOBAL routing handled elsewhere */ | |
if (chan_type == CHANX){ | |
L_rr_node[node].set_coordinates(start, y_coord, end, y_coord); | |
} else { | |
assert(chan_type == CHANY); | |
L_rr_node[node].set_coordinates(x_coord, start, x_coord, end); | |
} | |
int length = end - start + 1; | |
L_rr_node[node].R = length * seg_details[track].Rmetal; | |
L_rr_node[node].C = length * seg_details[track].Cmetal; | |
L_rr_node[node].set_ptc_num(track); | |
L_rr_node[node].type = chan_type; | |
L_rr_node[node].set_direction(seg_details[track].direction); | |
//L_rr_node[node].set_drivers(seg_details[track].drivers); | |
} | |
} | |
void watch_edges(int inode, t_linked_edge * edge_list_head) { | |
t_linked_edge *list_ptr; | |
int i, to_node; | |
list_ptr = edge_list_head; | |
i = 0; | |
print_rr_node(stdout, rr_node, inode); | |
while (list_ptr != NULL) { | |
to_node = list_ptr->edge; | |
print_rr_node(stdout, rr_node, to_node); | |
list_ptr = list_ptr->next; | |
i++; | |
} | |
} | |
void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode, | |
INP int num_edges, INOUTP bool * L_rr_edge_done, | |
INP t_linked_edge * edge_list_head) { | |
/* Sets up all the edge related information for rr_node inode (num_edges, * | |
* the edges array and the switches array). The edge_list_head points to * | |
* a list of the num_edges edges and switches to put in the arrays. This * | |
* linked list is freed by this routine. This routine also resets the * | |
* rr_edge_done array for the next rr_node (i.e. set it so that no edges * | |
* are marked as having been seen before). */ | |
t_linked_edge *list_ptr; | |
int i; | |
/* Check we aren't overwriting edges */ | |
assert(L_rr_node[inode].get_num_edges() < 1); | |
assert(NULL == L_rr_node[inode].edges); | |
assert(NULL == L_rr_node[inode].switches); | |
L_rr_node[inode].set_num_edges(num_edges); | |
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int)); | |
L_rr_node[inode].switches = (short *) my_malloc(num_edges * sizeof(short)); | |
i = 0; | |
list_ptr = edge_list_head; | |
while (list_ptr && (i < num_edges)) { | |
L_rr_node[inode].edges[i] = list_ptr->edge; | |
L_rr_node[inode].switches[i] = list_ptr->iswitch; | |
L_rr_node[list_ptr->edge].set_fan_in(L_rr_node[list_ptr->edge].get_fan_in() + 1); | |
/* Unmark the edge since we are done considering fanout from node. */ | |
L_rr_edge_done[list_ptr->edge] = false; | |
list_ptr = list_ptr->next; | |
++i; | |
} | |
assert(list_ptr == NULL); | |
assert(i == num_edges); | |
} | |
/* allocate pin to track map for each segment type individually and then combine into a single | |
vector */ | |
static int *****alloc_and_load_pin_to_track_map(INP e_pin_type pin_type, | |
INP int **Fc, INP t_type_ptr Type, INP bool *perturb_switch_pattern, | |
INP e_directionality directionality, | |
INP int num_seg_types, INP int *sets_per_seg_type) { | |
/* get the maximum number of tracks that any pin can connect to */ | |
int max_pin_tracks = 0; | |
for (int iseg = 0; iseg < num_seg_types; iseg++){ | |
/* determine the maximum Fc to this segment type across all pins */ | |
int max_Fc = 0; | |
for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) { | |
int pin_class = Type->pin_class[pin_index]; | |
if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) { | |
max_Fc = Fc[pin_index][iseg]; | |
} | |
} | |
max_pin_tracks += max_Fc; | |
} | |
/* allocate 'result' matrix and initialize entries to OPEN. also allocate and intialize matrix which will be | |
used to index into the correct entries when loading up 'result' */ | |
int *****result = NULL; /* [0..num_pins-1][0..width-1][0..height-1][0..3][0..Fc-1] */ | |
int ****next_result_index = NULL; /* [0..num_pins-1][0..width-1][0..height-1][0..3] */ | |
result = (int *****) alloc_matrix5(0, Type->num_pins-1, 0, Type->width-1, 0, Type->height-1, 0, 3, 0, max_pin_tracks-1, sizeof(int)); | |
next_result_index = (int ****) alloc_matrix4(0, Type->num_pins-1, 0, Type->width-1, 0, Type->height-1, 0, 3, sizeof(int)); | |
for (int ipin = 0; ipin < Type->num_pins; ipin++){ | |
for (int iwidth = 0; iwidth < Type->width; iwidth++){ | |
for (int iheight = 0; iheight < Type->height; iheight++){ | |
for (int iside = 0; iside < 4; iside++){ | |
for (int itrack = 0; itrack < max_pin_tracks; itrack++){ | |
result[ipin][iwidth][iheight][iside][itrack] = OPEN; | |
} | |
next_result_index[ipin][iwidth][iheight][iside] = 0; | |
} | |
} | |
} | |
} | |
/* multiplier for unidirectional vs bidirectional architectures */ | |
int fac = 1; | |
if (directionality == UNI_DIRECTIONAL){ | |
fac = 2; | |
} | |
/* load the pin to track matrix by looking at each segment type in turn */ | |
int seg_type_start_track = 0; | |
for (int iseg = 0; iseg < num_seg_types; iseg++){ | |
int seg_type_tracks = fac * sets_per_seg_type[iseg]; | |
/* determine the maximum Fc to this segment type across all pins */ | |
int max_Fc = 0; | |
for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) { | |
int pin_class = Type->pin_class[pin_index]; | |
if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) { | |
max_Fc = Fc[pin_index][iseg]; | |
} | |
} | |
/* get pin connections to tracks of the current segment type */ | |
int *****pin_to_seg_type_map = NULL; /* [0..num_pins-1][0..width-1][0..height-1][0..3][0..Fc-1] */ | |
pin_to_seg_type_map = alloc_and_load_pin_to_seg_type(pin_type, | |
seg_type_tracks, max_Fc, Type, perturb_switch_pattern[iseg], directionality); | |
/* connections in pin_to_seg_type_map are within that seg type -- i.e. in the [0,seg_type_tracks-1] range. | |
now load up 'result' array with these connections, but offset them so they are relative to the channel | |
as a whole */ | |
for (int ipin = 0; ipin < Type->num_pins; ipin++){ | |
for (int iwidth = 0; iwidth < Type->width; iwidth++){ | |
for (int iheight = 0; iheight < Type->height; iheight++){ | |
for (int iside = 0; iside < 4; iside++){ | |
for (int iconn = 0; iconn < max_Fc; iconn++){ | |
int relative_track_ind = pin_to_seg_type_map[ipin][iwidth][iheight][iside][iconn]; | |
int absolute_track_ind = relative_track_ind + seg_type_start_track; | |
int result_index = next_result_index[ipin][iwidth][iheight][iside]; | |
next_result_index[ipin][iwidth][iheight][iside]++; | |
result[ipin][iwidth][iheight][iside][result_index] = absolute_track_ind; | |
} | |
} | |
} | |
} | |
} | |
/* free temporary loop variables */ | |
free_matrix5(pin_to_seg_type_map, 0, Type->num_pins-1, | |
0, Type->width-1, | |
0, Type->height-1, | |
0, 3, | |
0, sizeof(int)); | |
pin_to_seg_type_map = NULL; | |
/* next seg type will start at this track index */ | |
seg_type_start_track += seg_type_tracks; | |
} | |
/* free temporary function variables */ | |
free_matrix4(next_result_index, 0, Type->num_pins-1, | |
0, Type->width-1, | |
0, Type->height-1, | |
0, sizeof(int)); | |
next_result_index = NULL; | |
return result; | |
} | |
static int *****alloc_and_load_pin_to_seg_type(INP e_pin_type pin_type, | |
INP int seg_type_tracks, INP int Fc, | |
INP t_type_ptr Type, INP bool perturb_switch_pattern, | |
INP e_directionality directionality) { | |
/* Note: currently a single value of Fc is used across each pin. In the future | |
the looping below will have to be modified if we want to account for pin-based | |
Fc values */ | |
int ***num_dir; /* [0..width][0..height][0..3] - Number of *physical* pins on each side. */ | |
int ****dir_list; /* [0..width][0..height][0..3][0..num_pins-1] - List of pins of correct type on each side. Max possible space alloced for simplicity */ | |
int ***num_done_per_dir; /* [0..width][0..height][0..3] */ | |
int *****tracks_connected_to_pin; /* [0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */ | |
/* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] = | |
* NULL if there is no pin on that side, or that pin is of the wrong type. | |
* Probably not enough memory to worry about, esp. as it's temporary. | |
* If pin ipin on side iside does not exist or is of the wrong type, | |
* tracks_connected_to_pin[ipin][iside][0] = OPEN. */ | |
if (Type->num_pins < 1) { | |
return NULL; | |
} | |
tracks_connected_to_pin = (int *****) alloc_matrix5(0, Type->num_pins - 1, | |
0, Type->width - 1, 0, Type->height - 1, 0, 3, 0, Fc, sizeof(int)); | |
for (int pin = 0; pin < Type->num_pins; ++pin) { | |
for (int width = 0; width < Type->width; ++width) { | |
for (int height = 0; height < Type->height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
for (int fc = 0; fc < Fc; ++fc) { | |
tracks_connected_to_pin[pin][width][height][side][fc] = OPEN; /* Unconnected. */ | |
} | |
} | |
} | |
} | |
} | |
num_dir = (int ***) alloc_matrix3(0, Type->width - 1, 0, Type->height - 1, 0, 3, sizeof(int)); | |
dir_list = (int ****) alloc_matrix4(0, Type->width - 1, 0, Type->height - 1, 0, 3, 0, Type->num_pins - 1, sizeof(int)); | |
/* Defensive coding. Try to crash hard if I use an unset entry. */ | |
for (int width = 0; width < Type->width; ++width) | |
for (int height = 0; height < Type->height; ++height) | |
for (int side = 0; side < 4; ++side) | |
for (int pin = 0; pin < Type->num_pins; ++pin) | |
dir_list[width][height][side][pin] = (-1); | |
for (int width = 0; width < Type->width; ++width) | |
for (int height = 0; height < Type->height; ++height) | |
for (int side = 0; side < 4; ++side) | |
num_dir[width][height][side] = 0; | |
for (int pin = 0; pin < Type->num_pins; ++pin) { | |
int pin_class = Type->pin_class[pin]; | |
if (Type->class_inf[pin_class].type != pin_type) /* Doing either ipins OR opins */ | |
continue; | |
/* Pins connecting only to global resources get no switches -> keeps area model accurate. */ | |
if (Type->is_global_pin[pin]) | |
continue; | |
for (int width = 0; width < Type->width; ++width) { | |
for (int height = 0; height < Type->height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
if (Type->pinloc[width][height][side][pin] == 1) { | |
dir_list[width][height][side][num_dir[width][height][side]] = pin; | |
num_dir[width][height][side]++; | |
} | |
} | |
} | |
} | |
} | |
int num_phys_pins = 0; | |
for (int width = 0; width < Type->width; ++width) { | |
for (int height = 0; height < Type->height; ++height) { | |
for (int side = 0; side < 4; ++side) | |
num_phys_pins += num_dir[width][height][side]; /* Num. physical pins per type */ | |
} | |
} | |
num_done_per_dir = (int ***) alloc_matrix3(0, Type->width - 1, 0, Type->height - 1, 0, 3, sizeof(int)); | |
for (int width = 0; width < Type->width; ++width) { | |
for (int height = 0; height < Type->height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
num_done_per_dir[width][height][side] = 0; | |
} | |
} | |
} | |
int *pin_num_ordering = (int *) my_malloc(num_phys_pins * sizeof(int)); | |
int *side_ordering = (int *) my_malloc(num_phys_pins * sizeof(int)); | |
int *width_ordering = (int *) my_malloc(num_phys_pins * sizeof(int)); | |
int *height_ordering = (int *) my_malloc(num_phys_pins * sizeof(int)); | |
/* Connection block I use distributes pins evenly across the tracks * | |
* of ALL sides of the clb at once. Ensures that each pin connects * | |
* to spaced out tracks in its connection block, and that the other * | |
* pins (potentially in other C blocks) connect to the remaining tracks * | |
* first. Doesn't matter for large Fc, but should make a fairly * | |
* good low Fc block that leverages the fact that usually lots of pins * | |
* are logically equivalent. */ | |
int side = LEFT; | |
int width = 0; | |
int height = Type->height - 1; | |
int pin = 0; | |
int pin_index = -1; | |
while (pin < num_phys_pins) { | |
if (side == TOP) { | |
if (width >= Type->width - 1) { | |
side = RIGHT; | |
} else { | |
width++; | |
} | |
} else if (side == RIGHT) { | |
if (height <= 0) { | |
side = BOTTOM; | |
} else { | |
height--; | |
} | |
} else if (side == BOTTOM) { | |
if (width <= 0) { | |
side = LEFT; | |
} else { | |
width--; | |
} | |
} else if (side == LEFT) { | |
if (height >= Type->height - 1) { | |
pin_index++; | |
side = TOP; | |
} else { | |
height++; | |
} | |
} | |
assert(pin_index < num_phys_pins); | |
/* Number of physical pins bounds number of logical pins */ | |
if (num_done_per_dir[width][height][side] >= num_dir[width][height][side]) | |
continue; | |
pin_num_ordering[pin] = dir_list[width][height][side][pin_index]; | |
side_ordering[pin] = side; | |
width_ordering[pin] = width; | |
height_ordering[pin] = height; | |
assert(Type->pinloc[width][height][side][dir_list[width][height][side][pin_index]]); | |
num_done_per_dir[width][height][side]++; | |
pin++; | |
} | |
if (perturb_switch_pattern) { | |
load_perturbed_switch_pattern(Type, tracks_connected_to_pin, | |
num_phys_pins, pin_num_ordering, side_ordering, width_ordering, height_ordering, | |
seg_type_tracks, seg_type_tracks, Fc, directionality); | |
} else { | |
load_uniform_switch_pattern(Type, tracks_connected_to_pin, | |
num_phys_pins, pin_num_ordering, side_ordering, width_ordering, height_ordering, | |
seg_type_tracks, seg_type_tracks, Fc, directionality); | |
} | |
#ifdef ENABLE_CHECK_ALL_TRACKS | |
check_all_tracks_reach_pins(Type, tracks_connected_to_pin, seg_type_tracks, | |
Fc, pin_type); | |
#endif | |
/* Free all temporary storage. */ | |
free_matrix3(num_dir, 0, Type->width - 1, 0, Type->height - 1, 0, sizeof(int)); | |
free_matrix4(dir_list, 0, Type->width - 1, 0, Type->height - 1, 0, 3, 0, sizeof(int)); | |
free_matrix3(num_done_per_dir, 0, Type->width - 1, 0, Type->height - 1, 0, sizeof(int)); | |
free(pin_num_ordering); | |
free(side_ordering); | |
free(width_ordering); | |
free(height_ordering); | |
return tracks_connected_to_pin; | |
} | |
static void load_uniform_switch_pattern(INP t_type_ptr type, | |
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins, | |
INP int *pin_num_ordering, INP int *side_ordering, | |
INP int *width_ordering, INP int *height_ordering, | |
INP int x_chan_width, INP int y_chan_width, INP int Fc, | |
enum e_directionality directionality) { | |
/* Loads the tracks_connected_to_pin array with an even distribution of * | |
* switches across the tracks for each pin. For example, each pin connects * | |
* to every 4.3rd track in a channel, with exactly which tracks a pin * | |
* connects to staggered from pin to pin. */ | |
/* Uni-directional drive is implemented to ensure no directional bias and this means | |
* two important comments noted below */ | |
/* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins), | |
* and lay down 2 switches on an adjacent pair of tracks at a time to ensure | |
* no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and | |
* assign switches to a pair at a time. Can do this because W is guaranteed to | |
* be even-numbered; however same approach cannot be applied to Fc_out pattern | |
* when L > 1 and W <> 2L multiple. | |
* | |
* 2. This generic pattern should be considered the tileable physical layout, | |
* meaning all track # here are physical #'s, | |
* so later must use vpr_to_phy conversion to find actual logical #'s to connect. | |
* This also means I will not use get_output_block_companion_track to ensure | |
* no bias, since that describes a logical # -> that would confuse people. */ | |
int group_size; | |
if (directionality == BI_DIRECTIONAL) { | |
group_size = 1; | |
} else { | |
assert(directionality == UNI_DIRECTIONAL); | |
group_size = 2; | |
} | |
assert((x_chan_width % group_size == 0) && (y_chan_width % group_size == 0) && (Fc % group_size == 0)); | |
for (int i = 0; i < num_phys_pins; ++i) { | |
int pin = pin_num_ordering[i]; | |
int side = side_ordering[i]; | |
int width = width_ordering[i]; | |
int height = height_ordering[i]; | |
/* Bi-directional treats each track separately, uni-directional works with pairs of tracks */ | |
for (int j = 0; j < (Fc / group_size); ++j) { | |
int max_chan_width = (side == 0 || side == 2 ? x_chan_width : y_chan_width); | |
float step_size = (float) max_chan_width / (float) (Fc * num_phys_pins); | |
float fc_step = (float) max_chan_width / (float) Fc; | |
float ftrack = (i * step_size) + (j * fc_step); | |
int itrack = ((int) ftrack) * group_size; | |
/* Catch possible floating point round error */ | |
itrack = min(itrack, max_chan_width - group_size); | |
/* Assign the group of tracks for the Fc pattern */ | |
for (int k = 0; k < group_size; ++k) { | |
tracks_connected_to_pin[pin][width][height][side][group_size * j + k] = itrack + k; | |
} | |
} | |
} | |
} | |
static void load_perturbed_switch_pattern(INP t_type_ptr type, | |
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins, | |
INP int *pin_num_ordering, INP int *side_ordering, | |
INP int *width_ordering, INP int *height_ordering, | |
INP int x_chan_width, INP int y_chan_width, INP int Fc, | |
enum e_directionality directionality) { | |
/* Loads the tracks_connected_to_pin array with an unevenly distributed * | |
* set of switches across the channel. This is done for inputs when * | |
* Fc_input = Fc_output to avoid creating "pin domains" -- certain output * | |
* pins being able to talk only to certain input pins because their switch * | |
* patterns exactly line up. Distribute Fc/2 + 1 switches over half the * | |
* channel and Fc/2 - 1 switches over the other half to make the switch * | |
* pattern different from the uniform one of the outputs. Also, have half * | |
* the pins put the "dense" part of their connections in the first half of * | |
* the channel and the other half put the "dense" part in the second half, * | |
* to make sure each track can connect to about the same number of ipins. */ | |
assert(directionality == BI_DIRECTIONAL); | |
int Fc_dense = (Fc / 2) + 1; | |
int Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */ | |
int Fc_half[2]; | |
for (int i = 0; i < num_phys_pins; i++) { | |
int pin = pin_num_ordering[i]; | |
int side = side_ordering[i]; | |
int width = width_ordering[i]; | |
int height = height_ordering[i]; | |
int max_chan_width = (side == 0 || side == 2 ? x_chan_width : y_chan_width); | |
float step_size = (float) max_chan_width / (float) (Fc * num_phys_pins); | |
float spacing_dense = (float) max_chan_width / (float) (2 * Fc_dense); | |
float spacing_sparse = (float) max_chan_width / (float) (2 * Fc_sparse); | |
float spacing[2]; | |
/* Flip every pin to balance switch density */ | |
spacing[i % 2] = spacing_dense; | |
Fc_half[i % 2] = Fc_dense; | |
spacing[(i + 1) % 2] = spacing_sparse; | |
Fc_half[(i + 1) % 2] = Fc_sparse; | |
float ftrack = i * step_size; /* Start point. Staggered from pin to pin */ | |
int iconn = 0; | |
for (int ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */ | |
for (int j = 0; j < Fc_half[ihalf]; ++j) { | |
/* Can occasionally get wraparound due to floating point rounding. | |
This is okay because the starting position > 0 when this occurs | |
so connection is valid and fine */ | |
int itrack = (int) ftrack; | |
itrack = itrack % max_chan_width; | |
tracks_connected_to_pin[pin][width][height][side][iconn] = itrack; | |
ftrack += spacing[ihalf]; | |
iconn++; | |
} | |
} | |
} /* End for all physical pins. */ | |
} | |
#ifdef ENABLE_CHECK_ALL_TRACKS | |
static void check_all_tracks_reach_pins(t_type_ptr type, | |
int *****tracks_connected_to_pin, int max_chan_width, int Fc, | |
enum e_pin_type ipin_or_opin) { | |
/* Checks that all tracks can be reached by some pin. */ | |
assert(max_chan_width > 0); | |
int *num_conns_to_track; /* [0..max_chan_width-1] */ | |
num_conns_to_track = (int *) my_calloc(max_chan_width, sizeof(int)); | |
for (int pin = 0; pin < type->num_pins; ++pin) { | |
for (int width = 0; width < type->width; ++width) { | |
for (int height = 0; height < type->height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
if (tracks_connected_to_pin[pin][width][height][side][0] != OPEN) { /* Pin exists */ | |
for (int conn = 0; conn < Fc; ++conn) { | |
int track = tracks_connected_to_pin[pin][width][height][side][conn]; | |
num_conns_to_track[track]++; | |
} | |
} | |
} | |
} | |
} | |
} | |
for (int track = 0; track < max_chan_width; ++track) { | |
if (num_conns_to_track[track] <= 0) { | |
vpr_printf_error(__FILE__, __LINE__, | |
"check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n", | |
track, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN")); | |
} | |
} | |
free(num_conns_to_track); | |
} | |
#endif | |
/* Allocates and loads the track to ipin lookup for each physical grid type. This | |
* is the same information as the ipin_to_track map but accessed in a different way. */ | |
static struct s_ivec ****alloc_and_load_track_to_pin_lookup( | |
INP int *****pin_to_track_map, INP int **Fc, | |
INP int type_width, INP int type_height, | |
INP int num_pins, INP int max_chan_width, | |
INP int num_seg_types) { | |
struct s_ivec ****track_to_pin_lookup; | |
/* [0..max_chan_width-1][0..width][0..height][0..3]. For each track number | |
* it stores a vector for each of the four sides. x-directed channels will | |
* use the TOP and BOTTOM vectors to figure out what clb input pins they | |
* connect to above and below them, respectively, while y-directed channels | |
* use the LEFT and RIGHT vectors. Each vector contains an nelem field | |
* saying how many ipins it connects to. The list[0..nelem-1] array then | |
* gives the pin numbers. */ | |
/* Note that a clb pin that connects to a channel on its RIGHT means that * | |
* that channel connects to a clb pin on its LEFT. The convention used * | |
* here is always in the perspective of the CLB */ | |
if (num_pins < 1) { | |
return NULL; | |
} | |
/* Alloc and zero the the lookup table */ | |
track_to_pin_lookup = (struct s_ivec ****) alloc_matrix4(0, max_chan_width - 1, | |
0, type_width - 1, 0, type_height - 1, 0, 3, sizeof(struct s_ivec)); | |
for (int track = 0; track < max_chan_width; ++track) { | |
for (int width = 0; width < type_width; ++width) { | |
for (int height = 0; height < type_height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
track_to_pin_lookup[track][width][height][side].nelem = 0; | |
track_to_pin_lookup[track][width][height][side].list = NULL; | |
} | |
} | |
} | |
} | |
/* Count number of pins to which each track connects */ | |
for (int pin = 0; pin < num_pins; ++pin) { | |
for (int width = 0; width < type_width; ++width) { | |
for (int height = 0; height < type_height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
if (pin_to_track_map[pin][width][height][side][0] == OPEN) | |
continue; | |
/* get number of tracks to which this pin connects */ | |
int num_tracks = 0; | |
for (int iseg = 0; iseg < num_seg_types; iseg++){ | |
num_tracks += Fc[pin][iseg]; | |
} | |
for (int conn = 0; conn < num_tracks; ++conn) { | |
int track = pin_to_track_map[pin][width][height][side][conn]; | |
track_to_pin_lookup[track][width][height][side].nelem++; | |
} | |
} | |
} | |
} | |
} | |
/* Allocate space. */ | |
for (int track = 0; track < max_chan_width; ++track) { | |
for (int width = 0; width < type_width; ++width) { | |
for (int height = 0; height < type_height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
track_to_pin_lookup[track][width][height][side].list = NULL; /* Defensive code */ | |
if (track_to_pin_lookup[track][width][height][side].nelem != 0) { | |
track_to_pin_lookup[track][width][height][side].list = | |
(int *) my_malloc(track_to_pin_lookup[track][width][height][side].nelem * sizeof(int)); | |
track_to_pin_lookup[track][width][height][side].nelem = 0; | |
} | |
} | |
} | |
} | |
} | |
/* Loading pass. */ | |
for (int pin = 0; pin < num_pins; ++pin) { | |
for (int width = 0; width < type_width; ++width) { | |
for (int height = 0; height < type_height; ++height) { | |
for (int side = 0; side < 4; ++side) { | |
if (pin_to_track_map[pin][width][height][side][0] == OPEN) | |
continue; | |
/* get number of tracks to which this pin connects */ | |
int num_tracks = 0; | |
for (int iseg = 0; iseg < num_seg_types; iseg++){ | |
num_tracks += Fc[pin][iseg]; | |
} | |
for (int conn = 0; conn < num_tracks; ++conn) { | |
int track = pin_to_track_map[pin][width][height][side][conn]; | |
int pin_counter = track_to_pin_lookup[track][width][height][side].nelem; | |
track_to_pin_lookup[track][width][height][side].list[pin_counter] = pin; | |
track_to_pin_lookup[track][width][height][side].nelem++; | |
} | |
} | |
} | |
} | |
} | |
return track_to_pin_lookup; | |
} | |
/* A utility routine to dump the contents of the routing resource graph * | |
* (everything -- connectivity, occupancy, cost, etc.) into a file. Used * | |
* only for debugging. */ | |
void dump_rr_graph(INP const char *file_name) { | |
FILE *fp = my_fopen(file_name, "w", 0); | |
for (int inode = 0; inode < num_rr_nodes; ++inode) { | |
print_rr_node(fp, rr_node, inode); | |
fprintf(fp, "\n"); | |
} | |
fclose(fp); | |
} | |
/* Prints all the data about node inode to file fp. */ | |
void print_rr_node(FILE * fp, t_rr_node * L_rr_node, int inode) { | |
static const char *direction_name[] = { "OPEN", "INC_DIRECTION", "DEC_DIRECTION", "BI_DIRECTION" }; | |
static const char *drivers_name[] = { "OPEN", "MULTI_BUFFER", "SINGLE" }; | |
t_rr_type rr_type = L_rr_node[inode].type; | |
/* Make sure we don't overrun const arrays */ | |
assert((L_rr_node[inode].get_direction() + 1) < (int)(sizeof(direction_name) / sizeof(char *))); | |
assert((L_rr_node[inode].get_drivers() + 1) < (int)(sizeof(drivers_name) / sizeof(char *))); | |
fprintf(fp, "Node: %d %s ", inode, L_rr_node[inode].rr_get_type_string()); | |
if ((L_rr_node[inode].get_xlow() == L_rr_node[inode].get_xhigh()) | |
&& (L_rr_node[inode].get_ylow() == L_rr_node[inode].get_yhigh())) { | |
fprintf(fp, "(%d, %d) ", | |
L_rr_node[inode].get_xlow(), L_rr_node[inode].get_ylow()); | |
} else { | |
fprintf(fp, "(%d, %d) to (%d, %d) ", | |
L_rr_node[inode].get_xlow(), L_rr_node[inode].get_ylow(), | |
L_rr_node[inode].get_xhigh(), L_rr_node[inode].get_yhigh()); | |
} | |
fprintf(fp, "Ptc_num: %d ", L_rr_node[inode].get_ptc_num()); | |
fprintf(fp, "Direction: %s ", direction_name[L_rr_node[inode].get_direction() + 1]); | |
fprintf(fp, "Drivers: %s ", drivers_name[L_rr_node[inode].get_drivers() + 1]); | |
fprintf(fp, "\n"); | |
if( rr_type == IPIN || rr_type == OPIN) | |
{ | |
fprintf(fp, "name %s\n", L_rr_node[inode].pb_graph_pin->port->name); | |
} | |
fprintf(fp, "%d edge(s):", L_rr_node[inode].get_num_edges()); | |
for (int iconn = 0; iconn < L_rr_node[inode].get_num_edges(); ++iconn) | |
fprintf(fp, " %d", L_rr_node[inode].edges[iconn]); | |
fprintf(fp, "\n"); | |
fprintf(fp, "Switch types:"); | |
for (int iconn = 0; iconn < L_rr_node[inode].get_num_edges(); ++iconn) | |
fprintf(fp, " %d", L_rr_node[inode].switches[iconn]); | |
fprintf(fp, "\n"); | |
fprintf(fp, "Occ: %d Capacity: %d\n", L_rr_node[inode].get_occ(), | |
L_rr_node[inode].get_capacity()); | |
if (rr_type != INTRA_CLUSTER_EDGE) { | |
fprintf(fp, "R: %g C: %g\n", L_rr_node[inode].R, L_rr_node[inode].C); | |
} | |
fprintf(fp, "Cost_index: %d\n", L_rr_node[inode].get_cost_index()); | |
} | |
/* Prints all the rr_indexed_data of index to file fp. */ | |
void print_rr_indexed_data(FILE * fp, int index) { | |
fprintf(fp, "Index: %d\n", index); | |
fprintf(fp, "ortho_cost_index: %d ", rr_indexed_data[index].ortho_cost_index); | |
fprintf(fp, "base_cost: %g ", rr_indexed_data[index].saved_base_cost); | |
fprintf(fp, "saved_base_cost: %g\n", rr_indexed_data[index].saved_base_cost); | |
fprintf(fp, "Seg_index: %d ", rr_indexed_data[index].seg_index); | |
fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length); | |
fprintf(fp, "T_linear: %g ", rr_indexed_data[index].T_linear); | |
fprintf(fp, "T_quadratic: %g ", rr_indexed_data[index].T_quadratic); | |
fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load); | |
} | |
static void build_unidir_rr_opins(INP int i, INP int j, | |
INP struct s_grid_tile **L_grid, INP int ***Fc_out, INP int max_chan_width, | |
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y, | |
INOUTP int ***Fc_xofs, INOUTP int ***Fc_yofs, | |
INOUTP t_rr_node * L_rr_node, INOUTP bool * L_rr_edge_done, | |
OUTP bool * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch, | |
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs, | |
INP int num_seg_types){ | |
/* This routine returns a list of the opins rr_nodes on each | |
* side/width/height of the block. You must free the result with | |
* free_matrix. */ | |
*Fc_clipped = false; | |
/* Only the base block of a set should use this function */ | |
if (L_grid[i][j].width_offset > 0 || L_grid[i][j].height_offset > 0) { | |
return; | |
} | |
t_type_ptr type = L_grid[i][j].type; | |
/* Go through each pin and find its fanout. */ | |
for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) { | |
/* Skip global pins and pins that are not of DRIVER type */ | |
int class_index = type->pin_class[pin_index]; | |
if (type->class_inf[class_index].type != DRIVER) { | |
continue; | |
} | |
if (type->is_global_pin[pin_index]) { | |
continue; | |
} | |
int num_edges = 0; | |
t_linked_edge *edge_list = NULL; | |
for (int iseg = 0; iseg < num_seg_types; iseg++){ | |
/* get Fc for this segment type */ | |
int seg_type_Fc = Fc_out[type->index][pin_index][iseg]; | |
assert( seg_type_Fc >= 0 ); | |
if (seg_type_Fc == 0){ | |
continue; | |
} | |
for (int width = 0; width < type->width; ++width) { | |
for (int height = 0; height < type->height; ++height) { | |
for (enum e_side side = (enum e_side)0; side < 4; side = (enum e_side)(side + 1)) { | |
/* Can't do anything if pin isn't at this location */ | |
if (0 == type->pinloc[width][height][side][pin_index]) { | |
continue; | |
} | |
/* Figure out the chan seg at that side. | |
* side is the side of the logic or io block. */ | |
bool vert = ((side == TOP) || (side == BOTTOM)); | |
bool pos_dir = ((side == TOP) || (side == RIGHT)); | |
t_rr_type chan_type = (vert ? CHANX : CHANY); | |
int chan = (vert ? (j + height) : (i + width)); | |
int seg = (vert ? (i + width) : (j + height)); | |
int max_len = (vert ? nx : ny); | |
int ***Fc_ofs = (vert ? Fc_xofs : Fc_yofs); | |
if (false == pos_dir) { | |
--chan; | |
} | |
/* Skip the location if there is no channel. */ | |
if (chan < 0) { | |
continue; | |
} | |
if (seg < 1) { | |
continue; | |
} | |
if (seg > (vert ? nx : ny)) { | |
continue; | |
} | |
if (chan > (vert ? ny : nx)) { | |
continue; | |
} | |
t_seg_details * seg_details = (chan_type == CHANX ? | |
chan_details_x[seg][chan] : chan_details_y[chan][seg]); | |
if (seg_details[0].length == 0) | |
continue; | |
/* Get the list of opin to mux connections for that chan seg. */ | |
bool clipped; | |
num_edges += get_unidir_opin_connections(type, chan, seg, | |
seg_type_Fc, iseg, chan_type, seg_details, &edge_list, | |
Fc_ofs, L_rr_edge_done, max_len, max_chan_width, | |
L_rr_node_indices, &clipped); | |
if (clipped) { | |
*Fc_clipped = true; | |
} | |
} | |
} | |
} | |
} | |
/* Add in direct connections */ | |
num_edges += get_opin_direct_connecions(i, j, pin_index, &edge_list, L_rr_node_indices, | |
directs, num_directs, clb_to_clb_directs); | |
/* Add the edges */ | |
int opin_node_index = get_rr_node_index(i, j, OPIN, pin_index, L_rr_node_indices); | |
alloc_and_load_edges_and_switches(rr_node, opin_node_index, num_edges, | |
L_rr_edge_done, edge_list); | |
while (edge_list != NULL) { | |
t_linked_edge *next_edge = edge_list->next; | |
free(edge_list); | |
edge_list = next_edge; | |
} | |
} | |
} | |
/** | |
* Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure | |
* This data structure supplements the the info in the "directs" data structure | |
* TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one | |
*/ | |
static t_clb_to_clb_directs * alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs, int delayless_switch) { | |
int i, j; | |
t_clb_to_clb_directs *clb_to_clb_directs; | |
char *pb_type_name, *port_name; | |
int start_pin_index, end_pin_index; | |
t_pb_type *pb_type; | |
clb_to_clb_directs = (t_clb_to_clb_directs*)my_calloc(num_directs, sizeof(t_clb_to_clb_directs)); | |
pb_type_name = NULL; | |
port_name = NULL; | |
for (i = 0; i < num_directs; i++) { | |
pb_type_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char)); | |
port_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char)); | |
// Load from pins | |
// Parse out the pb_type name, port name, and pin range | |
parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name); | |
// Figure out which type, port, and pin is used | |
for (j = 0; j < num_types; j++) { | |
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) { | |
break; | |
} | |
} | |
assert(j < num_types); | |
clb_to_clb_directs[i].from_clb_type = &type_descriptors[j]; | |
pb_type = clb_to_clb_directs[i].from_clb_type->pb_type; | |
for (j = 0; j < pb_type->num_ports; j++) { | |
if(strcmp(pb_type->ports[j].name, port_name) == 0) { | |
break; | |
} | |
} | |
assert(j < pb_type->num_ports); | |
if(start_pin_index == OPEN) { | |
assert(start_pin_index == end_pin_index); | |
start_pin_index = 0; | |
end_pin_index = pb_type->ports[j].num_pins - 1; | |
} | |
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index); | |
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index); | |
// Load to pins | |
// Parse out the pb_type name, port name, and pin range | |
parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name); | |
// Figure out which type, port, and pin is used | |
for (j = 0; j < num_types; j++) { | |
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) { | |
break; | |
} | |
} | |
assert(j < num_types); | |
clb_to_clb_directs[i].to_clb_type = &type_descriptors[j]; | |
pb_type = clb_to_clb_directs[i].to_clb_type->pb_type; | |
for (j = 0; j < pb_type->num_ports; j++) { | |
if(strcmp(pb_type->ports[j].name, port_name) == 0) { | |
break; | |
} | |
} | |
assert(j < pb_type->num_ports); | |
if(start_pin_index == OPEN) { | |
assert(start_pin_index == end_pin_index); | |
start_pin_index = 0; | |
end_pin_index = pb_type->ports[j].num_pins - 1; | |
} | |
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index); | |
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index); | |
if(abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) { | |
vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, | |
"Range mismatch from %s to %s.\n", directs[i].from_pin, directs[i].to_pin); | |
} | |
//Set the switch index | |
if(directs[i].switch_type > 0) { | |
//Use the specified switch | |
clb_to_clb_directs[i].switch_index = directs[i].switch_type; | |
} else { | |
//Use the delayless switch by default | |
clb_to_clb_directs[i].switch_index = delayless_switch; | |
} | |
free(pb_type_name); | |
free(port_name); | |
//We must be careful to clean-up anything that we may have incidentally allocated. | |
//Specifically, we can be called while generating the dummy architecture | |
//for placer delay estimation. Since the delay estimation occurs on a | |
//'different' architecture it is almost certain that the f_blk_pin_from_port_pin allocated | |
//by calling get_blk_pin_from_port_pin() will later be invalid. | |
//We therefore must free it now. | |
free_blk_pin_from_port_pin(); | |
} | |
return clb_to_clb_directs; | |
} | |
/* Add all direct clb-pin-to-clb-pin edges to given opin */ | |
static int get_opin_direct_connecions(int x, int y, int opin, | |
INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices, | |
INP t_direct_inf *directs, INP int num_directs, | |
INP t_clb_to_clb_directs *clb_to_clb_directs) { | |
t_type_ptr curr_type, target_type; | |
int width_offset, height_offset; | |
int i, ipin, inode; | |
t_linked_edge *edge_list_head; | |
int max_index, min_index, offset, swap; | |
int new_edges; | |
curr_type = grid[x][y].type; | |
edge_list_head = *edge_list_ptr; | |
new_edges = 0; | |
/* Iterate through all direct connections */ | |
for (i = 0; i < num_directs; i++) { | |
/* Find matching direct clb-to-clb connections with the same type as current grid location */ | |
if(clb_to_clb_directs[i].from_clb_type == curr_type) { //We are at a valid starting point | |
//Offset must be in range | |
if(x + directs[i].x_offset < nx + 1 && | |
x + directs[i].x_offset > 0 && | |
y + directs[i].y_offset < ny + 1 && | |
y + directs[i].y_offset > 0) { | |
//Only add connections if the target clb type matches the type in the direct specification | |
target_type = grid[x + directs[i].x_offset][y + directs[i].y_offset].type; | |
if(clb_to_clb_directs[i].to_clb_type == target_type) { | |
/* Compute index of opin with regards to given pins */ | |
if(clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) { | |
swap = true; | |
max_index = clb_to_clb_directs[i].from_clb_pin_start_index; | |
min_index = clb_to_clb_directs[i].from_clb_pin_end_index; | |
} else { | |
swap = false; | |
min_index = clb_to_clb_directs[i].from_clb_pin_start_index; | |
max_index = clb_to_clb_directs[i].from_clb_pin_end_index; | |
} | |
if(max_index >= opin && min_index <= opin) { | |
offset = opin - min_index; | |
/* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */ | |
ipin = OPEN; | |
if(clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) { | |
if(swap) { | |
ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset; | |
} else { | |
ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset; | |
} | |
} else { | |
if(swap) { | |
ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset; | |
} else { | |
ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset; | |
} | |
} | |
/* Add new ipin edge to list of edges */ | |
width_offset = grid[x + directs[i].x_offset][y + directs[i].y_offset].width_offset; | |
height_offset = grid[x + directs[i].x_offset][y + directs[i].y_offset].height_offset; | |
inode = get_rr_node_index(x + directs[i].x_offset - width_offset, y + directs[i].y_offset - height_offset, | |
IPIN, ipin, L_rr_node_indices); | |
edge_list_head = insert_in_edge_list(edge_list_head, inode, clb_to_clb_directs[i].switch_index); | |
new_edges++; | |
} | |
} | |
} | |
} | |
} | |
*edge_list_ptr = edge_list_head; | |
return new_edges; | |
} | |
/* Determines whether the output pins of the specified block type should be perturbed. * | |
* This is to prevent pathological cases where the output pin connections are * | |
* spaced such that the connection pattern always skips some types of wire (w.r.t. * | |
* starting points) */ | |
static bool* alloc_and_load_perturb_opins(INP t_type_ptr type, INP int **Fc_out, | |
INP int max_chan_width, INP int num_seg_types, INP t_segment_inf *segment_inf){ | |
int i, Fc_max, iclass, num_wire_types; | |
int num, max_primes, factor, num_factors; | |
int *prime_factors; | |
float step_size = 0; | |
float n = 0; | |
float threshold = 0.07; | |
bool *perturb_opins = (bool *) my_calloc( num_seg_types, sizeof(bool) ); | |
i = Fc_max = iclass = 0; | |
if (num_seg_types > 1){ | |
/* Segments of one length are grouped together in the channel. * | |
* In the future we can determine if any of these segments will * | |
* encounter the pathological step size case, and determine if * | |
* we need to perturb based on the segment's frequency (if * | |
* frequency is small we should not perturb - testing has found * | |
* that perturbing a channel when unnecessary increases needed * | |
* W to achieve the same delay); but for now we just return. */ | |
return perturb_opins; | |
} else { | |
/* There are as many wire start points as the value of L */ | |
num_wire_types = segment_inf[0].length; | |
} | |
/* get Fc_max */ | |
for (i = 0; i < type->num_pins; ++i) { | |
iclass = type->pin_class[i]; | |
if (Fc_out[i][0] > Fc_max && type->class_inf[iclass].type == DRIVER) { | |
Fc_max = Fc_out[i][0]; | |
} | |
} | |
/* Nothing to perturb if Fc=0; no need to perturb if Fc = 1 */ | |
if (Fc_max == 0 || Fc_max == max_chan_width){ | |
return perturb_opins; | |
} | |
/* Pathological cases occur when the step size, W/Fc, is a multiple of * | |
* the number of wire starting points, L. Specifically, when the step * | |
* size is a multiple of a prime factor of L, the connection pattern * | |
* will always skip some wires. Thus, we perturb pins if we detect this * | |
* case. */ | |
/* get an upper bound on the number of prime factors of num_wire_types */ | |
max_primes = (int)floor(log((float)num_wire_types)/log(2.0)); | |
prime_factors = (int *) my_malloc(max_primes * sizeof(int)); | |
for (i = 0; i < max_primes; i++){ | |
prime_factors[i] = 0; | |
} | |
/* Find the prime factors of num_wire_types */ | |
num = num_wire_types; | |
factor = 2; | |
num_factors = 0; | |
while ( pow((float)factor, 2) <= num ){ | |
if ( num % factor == 0 ){ | |
num /= factor; | |
if ( factor != prime_factors[num_factors] ){ | |
prime_factors[num_factors] = factor; | |
num_factors++; | |
} | |
} else { | |
factor++; | |
} | |
} | |
if (num_factors == 0){ | |
prime_factors[num_factors++] = num_wire_types; /* covers cases when num_wire_types is prime */ | |
} | |
/* Now see if step size is an approximate multiple of one of the factors. A * | |
* threshold is used because step size may not be an integer. */ | |
step_size = (float)max_chan_width / Fc_max; | |
for (i = 0; i < num_factors; i++){ | |
if ( nint(step_size) < prime_factors[i] ){ | |
perturb_opins[0] = false; | |
break; | |
} | |
n = step_size / prime_factors[i]; | |
n = n - (float)nint(n); /* fractinal part */ | |
if ( fabs(n) < threshold ){ | |
perturb_opins[0] = true; | |
break; | |
} else { | |
perturb_opins[0] = false; | |
} | |
} | |
free(prime_factors); | |
return perturb_opins; | |
} | |