blob: 2f76187d1d916a093107da124f34b481db882465 [file] [log] [blame]
#include <cstdio>
#include <cstring>
#include <cmath>
#include <algorithm>
using namespace std;
#include <assert.h>
#include "util.h"
#include "vpr_types.h"
#include "vpr_utils.h"
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph.h"
#include "rr_graph2.h"
#include "rr_graph_sbox.h"
#include "rr_graph_timing_params.h"
#include "rr_graph_indexed_data.h"
#include "check_rr_graph.h"
#include "read_xml_arch_file.h"
#include "ReadOptions.h"
#include "dump_rr_structs.h"
#include "cb_metrics.h"
#include "build_switchblocks.h"
#include "route_common.h"
#ifdef INTERPOSER_BASED_ARCHITECTURE
#include "rr_graph_multi.h"
#endif
typedef struct s_mux {
int size;
struct s_mux *next;
} t_mux;
typedef struct s_mux_size_distribution {
int mux_count;
int max_index;
int *distr;
struct s_mux_size_distribution *next;
} t_mux_size_distribution;
typedef struct s_clb_to_clb_directs {
t_type_descriptor *from_clb_type;
int from_clb_pin_start_index;
int from_clb_pin_end_index;
t_type_descriptor *to_clb_type;
int to_clb_pin_start_index;
int to_clb_pin_end_index;
int switch_index; //The switch type used by this direct connection
} t_clb_to_clb_directs;
/* UDSD Modifications by WMF End */
/******************* Variables local to this module. ***********************/
/* Used to free "chunked" memory. If NULL, no rr_graph exists right now. */
static t_chunk rr_mem_ch = {NULL, 0, NULL};
/* Status of current chunk being dished out by calls to my_chunk_malloc. */
/********************* Subroutines local to this module. *******************/
static int *****alloc_and_load_pin_to_track_map(INP e_pin_type pin_type,
INP int **Fc, INP t_type_ptr Type, INP bool *perturb_switch_pattern,
INP e_directionality directionality,
INP int num_seg_types, INP int *sets_per_seg_type);
static int *****alloc_and_load_pin_to_seg_type(
INP e_pin_type pin_type,
INP int seg_type_tracks, INP int Fc,
INP t_type_ptr Type, INP bool perturb_switch_pattern,
INP e_directionality directionality);
static struct s_ivec ****alloc_and_load_track_to_pin_lookup(
INP int *****pin_to_track_map, INP int **Fc,
INP int width, INP int height,
INP int num_pins, INP int max_chan_width,
INP int num_seg_types);
static void build_bidir_rr_opins(
INP int i, INP int j,
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int ******opin_to_track_map, INP int ***Fc_out,
INP bool * L_rr_edge_done, INP t_seg_details * seg_details,
INP struct s_grid_tile **L_grid, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs,
INP int num_seg_types);
static void build_unidir_rr_opins(
INP int i, INP int j,
INP struct s_grid_tile **L_grid, INP int ***Fc_out,
INP int max_chan_width,
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y,
INOUTP int ***Fc_xofs, INOUTP int ***Fc_yofs,
INOUTP t_rr_node * L_rr_node, INOUTP bool * L_rr_edge_done,
OUTP bool * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs,
INP int num_seg_types);
static int get_opin_direct_connecions(
int x, int y, int opin,
INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices,
INP t_direct_inf *directs, INP int num_directs,
INP t_clb_to_clb_directs *clb_to_clb_directs);
static void alloc_and_load_rr_graph(
INP int num_nodes,
INP t_rr_node * L_rr_node, INP int num_seg_types,
INP t_seg_details * seg_details,
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y,
INP bool * L_rr_edge_done,
INP struct s_ivec *****track_to_pin_lookup,
INP int ******opin_to_track_map, INP struct s_ivec ***switch_block_conn,
INP t_sb_connection_map *sb_conn_map,
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
INP short ******sblock_pattern, INP int ***Fc_out, INP int ***Fc_xofs,
INP int ***Fc_yofs, INP t_ivec *** L_rr_node_indices,
INP int max_chan_width, INP enum e_switch_block_type sb_type,
INP int delayless_switch, INP enum e_directionality directionality,
INP int wire_to_ipin_switch, OUTP bool * Fc_clipped,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs);
static void load_uniform_switch_pattern(
INP t_type_ptr type,
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *width_ordering, INP int *height_ordering,
INP int x_chan_width, INP int y_chan_width, INP int Fc,
INP enum e_directionality directionality);
static void load_perturbed_switch_pattern(
INP t_type_ptr type,
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *width_ordering, INP int *height_ordering,
INP int x_chan_width, INP int y_chan_width, INP int Fc,
INP enum e_directionality directionality);
static bool* alloc_and_load_perturb_opins(INP t_type_ptr type, INP int **Fc_out, INP int max_chan_width,
INP int num_seg_types, INP t_segment_inf *segment_inf);
#ifdef ENABLE_CHECK_ALL_TRACKS
static void check_all_tracks_reach_pins(
t_type_ptr type,
int *****tracks_connected_to_pin,
int max_chan_width, int Fc,
enum e_pin_type ipin_or_opin);
#endif
static bool **alloc_and_load_perturb_ipins(
INP int max_chan_width, INP int L_num_types,
INP int num_seg_types,
INP int *sets_per_seg_type,
INP int ***Fc_in, INP int ***Fc_out,
INP enum e_directionality directionality);
static void build_rr_sinks_sources(
INP int i, INP int j,
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int delayless_switch, INP struct s_grid_tile **L_grid);
static void build_rr_chan(
INP int i, INP int j, INP t_rr_type chan_type,
INP struct s_ivec *****track_to_pin_lookup, t_sb_connection_map *sb_conn_map,
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
INP int max_chan_width, INP int tracks_per_chan, INP int *opin_mux_size,
INP short ******sblock_pattern, INP int Fs_per_side,
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y,
INP t_ivec *** L_rr_node_indices,
INP bool * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
INP int wire_to_ipin_switch, INP enum e_directionality directionality);
static int alloc_and_load_rr_switch_inf(INP int num_arch_switches, INP int wire_to_arch_ipin_switch, OUTP int *wire_to_rr_ipin_switch);
static void remap_rr_node_switch_indices(INP map<int,int> *switch_fanin);
static void load_rr_switch_inf(INP int num_arch_switches, INOUTP map<int,int> *switch_fanin);
static int alloc_rr_switch_inf(INP int num_arch_switches, OUTP map<int,int> *switch_fanin);
static void rr_graph_externals(
t_segment_inf * segment_inf, int num_seg_types, int max_chan_width,
int wire_to_rr_ipin_switch, enum e_base_cost_type base_cost_type);
void alloc_and_load_edges_and_switches(
INP t_rr_node * L_rr_node, INP int inode,
INP int num_edges, INP bool * L_rr_edge_done,
INP t_linked_edge * edge_list_head);
static void alloc_net_rr_terminals(void);
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices);
static t_clb_to_clb_directs *alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs,
INP int delayless_switch);
void watch_edges(int inode, t_linked_edge * edge_list_head);
static void free_type_pin_to_track_map(
int ******ipin_to_track_map,
t_type_ptr types);
static void free_type_track_to_pin_map(
struct s_ivec *****track_to_pin_map,
t_type_ptr types, int max_chan_width);
static t_seg_details *alloc_and_load_global_route_seg_details(
INP int global_route_switch,
OUTP int * num_seg_details = 0);
/* UDSD Modifications by WMF End */
static int ***alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types, INP int max_pins,
INP int num_seg_types, INP const int *sets_per_seg_type,
INP int max_chan_width, INP bool is_Fc_out,
INP enum e_directionality directionality,
OUTP bool *Fc_clipped, INP bool ignore_Fc_0);
/******************* Subroutine definitions *******************************/
void build_rr_graph(
INP t_graph_type graph_type, INP int L_num_types,
INP t_type_ptr types, INP int L_nx, INP int L_ny,
INP struct s_grid_tile **L_grid,
INP struct s_chan_width *nodes_per_chan,
INP struct s_chan_width_dist *chan_capacity_inf,
INP enum e_switch_block_type sb_type, INP int Fs,
INP vector<t_switchblock_inf> switchblocks,
INP int num_seg_types, INP int num_arch_switches,
INP t_segment_inf * segment_inf,
INP int global_route_switch, INP int delayless_switch,
INP t_timing_inf timing_inf, INP int wire_to_arch_ipin_switch,
INP enum e_base_cost_type base_cost_type,
INP bool trim_empty_channels,
INP bool trim_obs_channels,
INP t_direct_inf *directs, INP int num_directs,
INP bool ignore_Fc_0, INP const char *dump_rr_structs_file,
OUTP int *wire_to_rr_ipin_switch,
OUTP int *num_rr_switches,
OUTP int *Warnings) {
/* Reset warning flag */
*Warnings = RR_GRAPH_NO_WARN;
/* Decode the graph_type */
bool is_global_graph = (GRAPH_GLOBAL == graph_type ? true : false);
bool use_full_seg_groups = (GRAPH_UNIDIR_TILEABLE == graph_type ? true : false);
enum e_directionality directionality = (GRAPH_BIDIR == graph_type ? BI_DIRECTIONAL : UNI_DIRECTIONAL);
if (is_global_graph) {
directionality = BI_DIRECTIONAL;
}
/* Global routing uses a single longwire track */
int max_chan_width = (is_global_graph ? 1 : nodes_per_chan->max);
assert(max_chan_width > 0);
t_clb_to_clb_directs *clb_to_clb_directs = NULL;
if(num_directs > 0) {
clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs, delayless_switch);
}
/* START SEG_DETAILS */
int num_seg_details = 0;
t_seg_details *seg_details = NULL;
if (is_global_graph) {
/* Sets up a single unit length segment type for global routing. */
seg_details = alloc_and_load_global_route_seg_details(
global_route_switch, &num_seg_details);
} else {
/* Setup segments including distrubuting tracks and staggering.
* If use_full_seg_groups is specified, max_chan_width may be
* changed. Warning should be singled to caller if this happens. */
seg_details = alloc_and_load_seg_details(&max_chan_width,
max(L_nx, L_ny), num_seg_types, segment_inf,
use_full_seg_groups, is_global_graph, directionality,
&num_seg_details);
if ((is_global_graph ? 1 : nodes_per_chan->max) != max_chan_width) {
nodes_per_chan->max = max_chan_width;
*Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
}
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) {
dump_seg_details(seg_details, max_chan_width,
getEchoFileName(E_ECHO_SEG_DETAILS));
}
}
/* END SEG_DETAILS */
/* START CHAN_DETAILS */
t_chan_details *chan_details_x = NULL;
t_chan_details *chan_details_y = NULL;
alloc_and_load_chan_details(L_nx, L_ny, nodes_per_chan,
trim_empty_channels, trim_obs_channels,
num_seg_details, seg_details,
&chan_details_x, &chan_details_y);
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CHAN_DETAILS)) {
dump_chan_details( chan_details_x, chan_details_y, max_chan_width, nx, ny,
getEchoFileName(E_ECHO_CHAN_DETAILS));
}
/* END CHAN_DETAILS */
/* START FC */
/* Determine the actual value of Fc */
int ***Fc_in = NULL; /* [0..num_types-1][0..num_pins-1][0..num_segments-1] */
int ***Fc_out = NULL; /* [0..num_types-1][0..num_pins-1][0..num_segments-1] */
/* get maximum number of pins across all blocks */
int max_pins = types[0].num_pins;
for (int i = 1; i < L_num_types; ++i) {
if (types[i].num_pins > max_pins) {
max_pins = types[i].num_pins;
}
}
/* get the number of 'sets' for each segment type -- unidirectial architectures have two tracks in a set, bidirectional have one */
int total_sets = max_chan_width;
if (directionality == UNI_DIRECTIONAL){
assert(max_chan_width % 2 == 0);
total_sets /= 2;
}
int *sets_per_seg_type = get_seg_track_counts(total_sets, num_seg_types, segment_inf, use_full_seg_groups);
if (is_global_graph) {
//FIXME: out of bounds?
Fc_in = (int ***) my_malloc(sizeof(int) * L_num_types);
Fc_out = (int ***) my_malloc(sizeof(int) * L_num_types);
for (int i = 0; i < L_num_types; ++i) {
for (int j = 0; j < types[i].num_pins; ++j) {
for (int k = 0; k < num_seg_types; k++){
Fc_in[i][j][k] = 1;
Fc_out[i][j][k] = 1;
}
}
}
} else {
bool Fc_clipped = false;
Fc_in = alloc_and_load_actual_fc(L_num_types, types, max_pins, num_seg_types, sets_per_seg_type, max_chan_width,
false, directionality, &Fc_clipped, ignore_Fc_0);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
Fc_clipped = false;
Fc_out = alloc_and_load_actual_fc(L_num_types, types, max_pins, num_seg_types, sets_per_seg_type, max_chan_width,
true, directionality, &Fc_clipped, ignore_Fc_0);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
#ifdef VERBOSE
for (i = 1; i < L_num_types; ++i) { /* Skip "<EMPTY>" */
for (j = 0; j < type_descriptors[i].num_pins; ++j) {
for (k = 0; k < num_seg_types; k++){
if (type_descriptors[i].is_Fc_full_flex[j]) {
vpr_printf_info("Fc Actual Values: type = %s, pin = %d, seg_index = %d, Fc_out = full, Fc_in = %d.\n",
type_descriptors[i].name, j, k, Fc_in[i][j][k]);
}
else {
vpr_printf_info("Fc Actual Values: type = %s, pin = %d, seg = %d, Fc_out = %d, Fc_in = %d.\n",
type_descriptors[i].name, j, k, Fc_out[i][j], Fc_in[i][j]);
}
}
}
}
#endif /* VERBOSE */
}
bool **perturb_ipins = alloc_and_load_perturb_ipins(max_chan_width, L_num_types, num_seg_types,
sets_per_seg_type, Fc_in, Fc_out, directionality);
/* END FC */
/* Alloc node lookups, count nodes, alloc rr nodes */
num_rr_nodes = 0;
rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, L_nx, L_ny,
&num_rr_nodes, chan_details_x, chan_details_y);
rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes);
memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes);
// XXX
g_pin_side = (char *) my_malloc(sizeof(char) * num_rr_nodes);
memset(g_pin_side, 0, sizeof(char) * num_rr_nodes);
bool *L_rr_edge_done = (bool *) my_malloc(sizeof(bool) * num_rr_nodes);
memset(L_rr_edge_done, 0, sizeof(bool) * num_rr_nodes);
/* These are data structures used by the the unidir opin mapping. They are used
to spread connections evenly for each segment type among the available
wire start points */
int ***Fc_xofs = NULL; /* [0..ny-1][0..nx-1][0..num_seg_types-1] */
int ***Fc_yofs = NULL; /* [0..nx-1][0..ny-1][0..num_seg_types-1] */
if (UNI_DIRECTIONAL == directionality) {
Fc_xofs = (int ***) alloc_matrix3(0, L_ny, 0, L_nx, 0, num_seg_types-1, sizeof(int));
Fc_yofs = (int ***) alloc_matrix3(0, L_nx, 0, L_ny, 0, num_seg_types-1, sizeof(int));
for (int i = 0; i <= L_nx; ++i) {
for (int j = 0; j <= L_ny; ++j) {
for (int iseg = 0; iseg < num_seg_types; ++iseg){
Fc_xofs[j][i][iseg] = 0;
Fc_yofs[i][j][iseg] = 0;
}
}
}
}
/* START SB LOOKUP */
/* Alloc and load the switch block lookup */
t_ivec ***switch_block_conn = NULL;
short ******unidir_sb_pattern = NULL;
t_sb_connection_map *sb_conn_map = NULL; //for custom switch blocks
if (is_global_graph) {
switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3);
} else if (BI_DIRECTIONAL == directionality) {
if (sb_type == CUSTOM){
sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x,
chan_details_y, L_nx, L_ny, switchblocks,
nodes_per_chan, directionality);
} else {
switch_block_conn = alloc_and_load_switch_block_conn(max_chan_width,
sb_type, Fs);
}
} else {
assert(UNI_DIRECTIONAL == directionality);
if (sb_type == CUSTOM){
sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x,
chan_details_y, L_nx, L_ny, switchblocks,
nodes_per_chan, directionality);
} else {
unidir_sb_pattern = alloc_sblock_pattern_lookup(L_nx, L_ny, max_chan_width);
for (int i = 0; i <= L_nx; i++) {
for (int j = 0; j <= L_ny; j++) {
load_sblock_pattern_lookup(i, j, nodes_per_chan,
chan_details_x, chan_details_y,
Fs, sb_type, unidir_sb_pattern);
}
}
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SBLOCK_PATTERN)) {
dump_sblock_pattern( unidir_sb_pattern, max_chan_width, L_nx, L_ny,
getEchoFileName(E_ECHO_SBLOCK_PATTERN));
}
}
}
/* END SB LOOKUP */
/* START IPINP MAP */
/* Create ipin map lookups */
int ******ipin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */
t_ivec *****track_to_pin_lookup = NULL; /* [0..num_types-1][0..max_chan_width-1][0..width][0..height][0..3] */
ipin_to_track_map = (int ******) my_malloc(sizeof(int *****) * L_num_types);
track_to_pin_lookup = (struct s_ivec *****) my_malloc(sizeof(struct s_ivec ****) * L_num_types);
for (int itype = 0; itype < L_num_types; ++itype) {
ipin_to_track_map[itype] = alloc_and_load_pin_to_track_map(RECEIVER,
Fc_in[itype], &types[itype], perturb_ipins[itype], directionality,
num_seg_types, sets_per_seg_type);
track_to_pin_lookup[itype] = alloc_and_load_track_to_pin_lookup(
ipin_to_track_map[itype], Fc_in[itype], types[itype].width, types[itype].height,
types[itype].num_pins, max_chan_width, num_seg_types);
}
/* END IPINP MAP */
/* START OPINP MAP */
/* Create opin map lookups */
int ******opin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */
if (BI_DIRECTIONAL == directionality) {
//bool test_metrics_outp = false;
opin_to_track_map = (int ******) my_malloc(sizeof(int *****) * L_num_types);
for (int i = 0; i < L_num_types; ++i) {
bool *perturb_opins = alloc_and_load_perturb_opins(&types[i], Fc_out[i],
max_chan_width, num_seg_types, segment_inf);
opin_to_track_map[i] = alloc_and_load_pin_to_track_map(DRIVER,
Fc_out[i], &types[i], perturb_opins, directionality,
num_seg_types, sets_per_seg_type);
///* adjust CLB connection block, if enabled */
//if (strcmp("clb", types[i].name) == 0 && test_metrics_outp){
// float target_metric_outp;
// target_metric_outp = 0;
// /* during binary search routing we want the connection block (CB) to be deterministic. That is,
// if we calculate a CB, check other CBs for other values of W, and then come back to that
// CB, that CB should be the same as before, otherwise we get connectivity errors. So to achieve this determinism
// we save (and later restore) the current seed, and just use a seed of 1 for generating the connection block */
// unsigned int saved_seed = get_current_random();
// my_srandom(1);
// adjust_cb_metric(WIRE_HOMOGENEITY, target_metric_outp, 0.005, 0.05, &types[i], opin_to_track_map[i], DRIVER, Fc_out[i], nodes_per_chan,
// num_seg_types, segment_inf);
// Conn_Block_Metrics cb_metrics;
// get_conn_block_metrics(&types[i], opin_to_track_map[i], num_seg_types, segment_inf, DRIVER, Fc_out[i], nodes_per_chan, &cb_metrics);
// vpr_printf_info("Block Type: %s Pin Diversity: %f Wire Homogeneity: %f Hamming Distance: %f Hamming Proximity: %f\n",
// types[i].name, cb_metrics.pin_diversity, cb_metrics.wire_homogeneity,
// cb_metrics.lemieux_cost_func, cb_metrics.hamming_proximity);
//
// my_srandom(saved_seed);
//}
free(perturb_opins);
}
}
/* END OPINP MAP */
bool Fc_clipped = false;
alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types,
seg_details, chan_details_x, chan_details_y,
L_rr_edge_done, track_to_pin_lookup, opin_to_track_map,
switch_block_conn, sb_conn_map, L_grid, L_nx, L_ny, Fs, unidir_sb_pattern,
Fc_out, Fc_xofs, Fc_yofs, rr_node_indices, max_chan_width, sb_type,
delayless_switch, directionality, wire_to_arch_ipin_switch, &Fc_clipped,
directs, num_directs, clb_to_clb_directs);
/* Update rr_nodes capacities if global routing */
if (graph_type == GRAPH_GLOBAL) {
for (int i = 0; i < num_rr_nodes; i++) {
if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
rr_node[i].set_capacity(chan_width.max);
}
}
}
/* Allocate and load routing resource switches, which are derived from the switches from the architecture file,
based on their fanin in the rr graph. This routine also adjusts the rr nodes to point to these new rr switches */
(*num_rr_switches) = alloc_and_load_rr_switch_inf(num_arch_switches, wire_to_arch_ipin_switch, wire_to_rr_ipin_switch);
rr_graph_externals(segment_inf, num_seg_types, max_chan_width,
*wire_to_rr_ipin_switch, base_cost_type);
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH)) {
dump_rr_graph(getEchoFileName(E_ECHO_RR_GRAPH));
}
#ifdef INTERPOSER_BASED_ARCHITECTURE
/* Main Entry Point to rr_graph modifications for interposer-based architectures */
if(num_cuts > 0)
{
modify_rr_graph_for_interposer_based_arch(max_chan_width, directionality);
}
#endif
check_rr_graph(graph_type, L_nx, L_ny, *num_rr_switches, Fc_in);
/* dump out rr structs if requested */
if (dump_rr_structs_file){
dump_rr_structs(dump_rr_structs_file);
}
/* Free all temp structs */
if (seg_details) {
free_seg_details(seg_details, max_chan_width);
seg_details = NULL;
}
if (chan_details_x || chan_details_y) {
free_chan_details(chan_details_x, chan_details_y, max_chan_width, L_nx, L_ny);
chan_details_x = NULL;
chan_details_y = NULL;
}
if (Fc_in) {
free_matrix3(Fc_in, 0, L_num_types-1, 0, max_pins-1, 0, sizeof(int));
Fc_in = NULL;
}
if (Fc_out) {
free_matrix3(Fc_out, 0, L_num_types-1, 0, max_pins-1, 0, sizeof(int));
Fc_out = NULL;
}
if (perturb_ipins) {
free_matrix(perturb_ipins, 0, L_num_types-1, 0, sizeof(bool));
perturb_ipins = NULL;
}
if (switch_block_conn) {
free_switch_block_conn(switch_block_conn, max_chan_width);
switch_block_conn = NULL;
}
if (sb_conn_map) {
free_switchblock_permutations(sb_conn_map);
sb_conn_map = NULL;
}
if (L_rr_edge_done) {
free(L_rr_edge_done);
L_rr_edge_done = NULL;
}
if (Fc_xofs) {
free_matrix3(Fc_xofs, 0, L_ny, 0, L_nx, 0, sizeof(int));
Fc_xofs = NULL;
}
if (Fc_yofs) {
free_matrix3(Fc_yofs, 0, L_nx, 0, L_ny, 0, sizeof(int));
Fc_yofs = NULL;
}
if (unidir_sb_pattern) {
free_sblock_pattern_lookup(unidir_sb_pattern);
unidir_sb_pattern = NULL;
}
if (opin_to_track_map) {
for (int i = 0; i < L_num_types; ++i) {
free_matrix5(opin_to_track_map[i], 0, types[i].num_pins - 1,
0, types[i].width - 1, 0, types[i].height - 1,
0, 3, 0, sizeof(int));
}
free(opin_to_track_map);
}
if (sets_per_seg_type){
free(sets_per_seg_type);
sets_per_seg_type = NULL;
}
free_type_pin_to_track_map(ipin_to_track_map, types);
free_type_track_to_pin_map(track_to_pin_lookup, types, max_chan_width);
if(clb_to_clb_directs != NULL) {
free(clb_to_clb_directs);
}
}
/* Allocates and loads the global rr_switch_inf array based on the global
arch_switch_inf array and the fan-ins used by the rr nodes.
Also changes switch indices of rr_nodes to index into rr_switch_inf
instead of arch_switch_inf.
Returns the number of rr switches created.
Also returns, through a pointer, the index of a representative ipin cblock switch.
- Currently we're not allowing a designer to specify an ipin cblock switch with
multiple fan-ins, so there's just one of these switches in the g_rr_switch_inf array.
But in the future if we allow this, we can return an index to a representative switch
The rr_switch_inf switches are derived from the arch_switch_inf switches
(which were read-in from the architecture file) based on fan-in. The delays of
the rr switches depend on their fan-in, so we first go through the rr_nodes
and count how many different fan-ins exist for each arch switch.
Then we create these rr switches and update the switch indices
of rr_nodes to index into the rr_switch_inf array. */
static int alloc_and_load_rr_switch_inf(INP int num_arch_switches, INP int wire_to_arch_ipin_switch, OUTP int *wire_to_rr_ipin_switch){
/* we will potentially be creating a couple of versions of each arch switch where
each version corresponds to a different fan-in. We will need to fill g_rr_switch_inf
with this expanded list of switches.
To do this we will use an array of maps where each map corresponds to a different arch switch.
So for each arch switch we will use this map to keep track of the different fan-ins that it uses (map key)
and which index in the g_rr_switch_inf array this arch switch / fanin combination will be placed in */
map< int, int > *switch_fanin;
switch_fanin = new map<int,int>[num_arch_switches];
/* Determine what the different fan-ins are for each arch switch, and also
how many entries the rr_switch_inf array should have */
int num_rr_switches = alloc_rr_switch_inf(num_arch_switches, switch_fanin);
/* create the rr switches. also keep track of, for each arch switch, what index of the rr_switch_inf
array each version of its fanin has been mapped to */
load_rr_switch_inf(num_arch_switches, switch_fanin);
/* next, walk through rr nodes again and remap their switch indices to rr_switch_inf */
remap_rr_node_switch_indices(switch_fanin);
/* now we need to set the wire_to_rr_ipin_switch variable which points the detailed routing architecture
to the representative ipin cblock switch. currently we're not allowing the specification of an ipin cblock switch
with multiple fan-ins, so right now there's just one. May change in the future, in which case we'd need to
return a representative switch */
if (switch_fanin[wire_to_arch_ipin_switch].count(UNDEFINED)){
/* only have one ipin cblock switch. OK. */
(*wire_to_rr_ipin_switch) = switch_fanin[wire_to_arch_ipin_switch][UNDEFINED];
} else if (switch_fanin[wire_to_arch_ipin_switch].size() != 0){
vpr_throw(VPR_ERROR_ARCH, __FILE__, __LINE__,
"Not currently allowing an ipin cblock switch to have multiple fan-ins");
} else {
vpr_throw(VPR_ERROR_ARCH, __FILE__, __LINE__,
"No switch is specified for the ipin cblock, check if there is an error in arch file");
}
delete[] switch_fanin;
return num_rr_switches;
}
/* Allocates space for the global g_rr_switch_inf variable and returns the
number of rr switches that were allocated */
static int alloc_rr_switch_inf(INP int num_arch_switches, OUTP map<int,int> *switch_fanin){
int num_rr_switches = 0;
// map key: switch index specified in arch; map value: fanin for that index
map<int, int> *inward_switch_inf = new map<int, int>[num_rr_nodes];
for (int inode = 0; inode < num_rr_nodes; inode ++) {
t_rr_node from_node = rr_node[inode];
int num_edges = from_node.get_num_edges();
for (int iedge = 0; iedge < num_edges; iedge++) {
int switch_index = from_node.switches[iedge];
int to_node_index = from_node.edges[iedge];
if (inward_switch_inf[to_node_index].count(switch_index) == 0)
inward_switch_inf[to_node_index][switch_index] = 0;
inward_switch_inf[to_node_index][switch_index] ++;
}
}
// get unique index / fanin combination based on inward_switch_inf
for (int inode = 0; inode < num_rr_nodes; inode ++) {
map<int, int>::iterator itr;
for (itr = inward_switch_inf[inode].begin(); itr != inward_switch_inf[inode].end(); itr++) {
int switch_index = itr->first;
int fanin = itr->second;
if (g_arch_switch_inf[switch_index].Tdel_map.count(UNDEFINED) == 1) {
fanin = UNDEFINED;
}
if (switch_fanin[switch_index].count(fanin) == 0) {
switch_fanin[switch_index][fanin] = 0;
num_rr_switches++;
}
}
}
delete[] inward_switch_inf;
/* allocate space for the rr_switch_inf array (it's freed later in vpr_api.c-->free_arch) */
g_rr_switch_inf = new s_rr_switch_inf[num_rr_switches];
return num_rr_switches;
}
void print_map(map<int, int> dbmap) {
printf("size is: %d\n", (int)dbmap.size());
printf("first first: %d\n", (dbmap.begin())->first);
}
/* load the global g_rr_switch_inf variable. also keep track of, for each arch switch, what
index of the rr_switch_inf array each version of its fanin has been mapped to (through switch_fanin map) */
static void load_rr_switch_inf(INP int num_arch_switches, INOUTP map<int,int> *switch_fanin){
int i_rr_switch = 0;
if (g_switch_fanin_remap != NULL) {
// at this stage, we rebuild the rr_graph (probably in binary search)
// so old g_switch_fanin_remap is obsolete
delete [] g_switch_fanin_remap;
}
g_switch_fanin_remap = new map<int, int>[num_arch_switches];
for (int i_arch_switch = 0; i_arch_switch < num_arch_switches; i_arch_switch++){
map<int,int>::iterator it;
for (it = switch_fanin[i_arch_switch].begin(); it != switch_fanin[i_arch_switch].end(); it++){
/* the fanin value is in it->first, and we'll need to set what index this i_arch_switch/fanin
combination maps to (within rr_switch_inf) in it->second) */
int fanin = it->first;
it->second = i_rr_switch;
// setup g_switch_fanin_remap, for future swich usage analysis
g_switch_fanin_remap[i_arch_switch][fanin] = i_rr_switch;
/* figure out, by looking at the arch switch's Tdel map, what the delay of the new
rr switch should be */
map<int,double> &Tdel_map = g_arch_switch_inf[i_arch_switch].Tdel_map;
double rr_switch_Tdel;
if (Tdel_map.count(UNDEFINED) == 1){
/* the switch specified a single constant delay. i.e., it did not
specify fanin/delay pairs */
rr_switch_Tdel = Tdel_map[UNDEFINED];
} else {
/* interpolate/extrapolate based on the available (fanin,delay) pairs in the
Tdel_map to get the rr_switch_Tdel at 'fanin' */
rr_switch_Tdel = linear_interpolate_or_extrapolate(&Tdel_map, fanin);
}
/* copy over the arch switch to rr_switch_inf[i_rr_switch], but with the changed Tdel value */
g_rr_switch_inf[i_rr_switch].buffered = g_arch_switch_inf[i_arch_switch].buffered;
g_rr_switch_inf[i_rr_switch].R = g_arch_switch_inf[i_arch_switch].R;
g_rr_switch_inf[i_rr_switch].Cin = g_arch_switch_inf[i_arch_switch].Cin;
g_rr_switch_inf[i_rr_switch].Cout = g_arch_switch_inf[i_arch_switch].Cout;
g_rr_switch_inf[i_rr_switch].Tdel = rr_switch_Tdel;
g_rr_switch_inf[i_rr_switch].mux_trans_size = g_arch_switch_inf[i_arch_switch].mux_trans_size;
g_rr_switch_inf[i_rr_switch].buf_size = g_arch_switch_inf[i_arch_switch].buf_size;
g_rr_switch_inf[i_rr_switch].name = g_arch_switch_inf[i_arch_switch].name;
g_rr_switch_inf[i_rr_switch].power_buffer_type = g_arch_switch_inf[i_arch_switch].power_buffer_type;
g_rr_switch_inf[i_rr_switch].power_buffer_size = g_arch_switch_inf[i_arch_switch].power_buffer_size;
/* have created a switch in the rr_switch_inf array */
i_rr_switch++;
}
}
}
/* switch indices of each rr_node original point into the global g_arch_switch_inf array.
now we want to remap these indices to point into the global g_rr_switch_inf array
which contains switch info at different fan-in values */
static void remap_rr_node_switch_indices(INP map<int,int> *switch_fanin){
for (int inode = 0; inode < num_rr_nodes; inode++){
t_rr_node from_node = rr_node[inode];
int num_edges = from_node.get_num_edges();
for (int iedge = 0; iedge < num_edges; iedge++){
t_rr_node to_node = rr_node[ from_node.edges[iedge] ];
/* get the switch which this edge uses and its fanin */
int switch_index = from_node.switches[iedge];
int fanin = to_node.get_fan_in();
if (switch_fanin[switch_index].count(UNDEFINED) == 1){
fanin = UNDEFINED;
}
int rr_switch_index = switch_fanin[switch_index][fanin];
from_node.switches[iedge] = rr_switch_index;
}
}
}
static void rr_graph_externals(
t_segment_inf * segment_inf, int num_seg_types, int max_chan_width,
int wire_to_rr_ipin_switch, enum e_base_cost_type base_cost_type) {
add_rr_graph_C_from_switches(g_rr_switch_inf[wire_to_rr_ipin_switch].Cin);
alloc_and_load_rr_indexed_data(segment_inf, num_seg_types, rr_node_indices,
max_chan_width, wire_to_rr_ipin_switch, base_cost_type);
alloc_net_rr_terminals();
load_net_rr_terminals(rr_node_indices);
alloc_and_load_rr_clb_source(rr_node_indices);
}
static bool **alloc_and_load_perturb_ipins(INP int max_chan_width, INP int L_num_types,
INP int num_seg_types,
INP int *sets_per_seg_type,
INP int ***Fc_in, INP int ***Fc_out,
INP enum e_directionality directionality) {
bool **result = (bool **) alloc_matrix(0, L_num_types-1, 0, num_seg_types-1, sizeof(bool));
/* factor to account for unidir vs bidir */
int fac = 1;
if (directionality == UNI_DIRECTIONAL){
fac = 2;
}
if (BI_DIRECTIONAL == directionality) {
for (int iseg = 0; iseg < num_seg_types; ++iseg){
result[0][iseg] = false;
int tracks_in_seg_type = sets_per_seg_type[iseg] * fac;
for (int itype = 1; iseg < L_num_types; ++itype) {
result[itype][iseg] = false;
float Fc_ratio;
if (Fc_in[itype][0][iseg] > Fc_out[itype][0][iseg]) {
Fc_ratio = (float) Fc_in[itype][0][iseg] / (float) Fc_out[itype][0][iseg];
} else {
Fc_ratio = (float) Fc_out[itype][0][iseg] / (float) Fc_in[itype][0][iseg];
}
if ((Fc_in[itype][0][iseg] <= tracks_in_seg_type - 2)
&& (fabs(Fc_ratio - nint(Fc_ratio))
< (0.5 / (float) tracks_in_seg_type))) {
result[itype][iseg] = true;
}
}
}
} else {
/* Unidirectional routing uses mux balancing patterns and
* thus shouldn't need perturbation. */
assert(UNI_DIRECTIONAL == directionality);
for (int itype = 0; itype < L_num_types; ++itype) {
for (int iseg = 0; iseg < num_seg_types; ++iseg){
result[itype][iseg] = false;
}
}
}
return result;
}
static t_seg_details *alloc_and_load_global_route_seg_details(
INP int global_route_switch,
OUTP int * num_seg_details) {
t_seg_details *seg_details = (t_seg_details *) my_malloc(sizeof(t_seg_details));
seg_details->index = 0;
seg_details->length = 1;
seg_details->arch_wire_switch = global_route_switch;
seg_details->arch_opin_switch = global_route_switch;
seg_details->longline = false;
seg_details->direction = BI_DIRECTION;
seg_details->Cmetal = 0.0;
seg_details->Rmetal = 0.0;
seg_details->start = 1;
seg_details->drivers = MULTI_BUFFERED;
seg_details->cb = (bool *) my_malloc(sizeof(bool) * 1);
seg_details->cb[0] = true;
seg_details->sb = (bool *) my_malloc(sizeof(bool) * 2);
seg_details->sb[0] = true;
seg_details->sb[1] = true;
seg_details->group_size = 1;
seg_details->group_start = 0;
seg_details->seg_start = -1;
seg_details->seg_end = -1;
if (num_seg_details) {
*num_seg_details = 1;
}
return seg_details;
}
/* Calculates the actual Fc values for the given max_chan_width value */
static int ***alloc_and_load_actual_fc(INP int L_num_types, INP t_type_ptr types, INP int max_pins,
INP int num_seg_types, INP const int *sets_per_seg_type,
INP int max_chan_width, INP bool is_Fc_out,
INP enum e_directionality directionality,
OUTP bool *Fc_clipped, INP bool ignore_Fc_0) {
int ***Result = NULL;
int fac;
*Fc_clipped = false;
/* Unidir tracks formed in pairs, otherwise no effect. */
fac = 1;
if (UNI_DIRECTIONAL == directionality) {
fac = 2;
}
assert((max_chan_width % fac) == 0);
Result = (int ***) alloc_matrix3(0, L_num_types-1, 0, max_pins-1, 0, num_seg_types-1, sizeof(int));
for (int itype = 1; itype < L_num_types; ++itype) {
float **Fc = types[itype].Fc;
for (int ipin = 0; ipin < types[itype].num_pins; ++ipin) {
for (int iseg = 0; iseg < num_seg_types; iseg++){
if(Fc[ipin][iseg] == 0 && ignore_Fc_0 == false) {
/* Special case indicating that this pin does not connect to general-purpose routing */
Result[itype][ipin][iseg] = 0;
} else {
/* General case indicating that this pin connects to general-purpose routing */
if (types[itype].is_Fc_frac[ipin]) {
Result[itype][ipin][iseg] = fac * nint(sets_per_seg_type[iseg] * Fc[ipin][iseg]);
} else {
Result[itype][ipin][iseg] = (int)Fc[ipin][iseg];
}
if (is_Fc_out && types[itype].is_Fc_full_flex[ipin]) {
Result[itype][ipin][iseg] = sets_per_seg_type[iseg] * fac;
}
Result[itype][ipin][iseg] = max(Result[itype][ipin][iseg], fac);
if (Result[itype][ipin][iseg] > sets_per_seg_type[iseg] * fac) {
*Fc_clipped = true;
Result[itype][ipin][iseg] = sets_per_seg_type[iseg] * fac;
}
}
assert(Result[itype][ipin][iseg] % fac == 0);
}
}
}
return Result;
}
/* frees the ipin to track mapping for each physical grid type */
static void free_type_pin_to_track_map(int ******ipin_to_track_map,
t_type_ptr types) {
for (int i = 0; i < num_types; ++i) {
free_matrix5(ipin_to_track_map[i], 0, types[i].num_pins - 1,
0, types[i].width - 1, 0, types[i].height - 1,
0, 3, 0, sizeof(int));
}
free(ipin_to_track_map);
}
/* frees the track to ipin mapping for each physical grid type */
static void free_type_track_to_pin_map(struct s_ivec***** track_to_pin_map,
t_type_ptr types, int max_chan_width) {
for (int i = 0; i < num_types; i++) {
if (track_to_pin_map[i] != NULL) {
for (int track = 0; track < max_chan_width; ++track) {
for (int width = 0; width < types[i].width; ++width) {
for (int height = 0; height < types[i].height; ++height) {
for (int side = 0; side < 4; ++side) {
if (track_to_pin_map[i][track][width][height][side].list != NULL) {
free(track_to_pin_map[i][track][width][height][side].list);
}
}
}
}
}
free_matrix4(track_to_pin_map[i], 0, max_chan_width - 1,
0, types[i].width - 1, 0, types[i].height - 1,
0, sizeof(struct s_ivec));
}
}
free(track_to_pin_map);
}
/* Does the actual work of allocating the rr_graph and filling all the *
* appropriate values. Everything up to this was just a prelude! */
static void alloc_and_load_rr_graph(INP int num_nodes,
INP t_rr_node * L_rr_node, INP int num_seg_types,
INP t_seg_details * seg_details,
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y,
INP bool * L_rr_edge_done,
INP struct s_ivec *****track_to_pin_lookup,
INP int ******opin_to_track_map, INP struct s_ivec ***switch_block_conn,
INP t_sb_connection_map *sb_conn_map,
INP struct s_grid_tile **L_grid, INP int L_nx, INP int L_ny, INP int Fs,
INP short ******sblock_pattern, INP int ***Fc_out, INP int ***Fc_xofs,
INP int ***Fc_yofs, INP t_ivec *** L_rr_node_indices,
INP int max_chan_width, INP enum e_switch_block_type sb_type,
INP int delayless_switch, INP enum e_directionality directionality,
INP int wire_to_ipin_switch, OUTP bool * Fc_clipped,
INP t_direct_inf *directs, INP int num_directs,
INP t_clb_to_clb_directs *clb_to_clb_directs) {
/* If Fc gets clipped, this will be flagged to true */
*Fc_clipped = false;
/* Connection SINKS and SOURCES to their pins. */
for (int i = 0; i <= (L_nx + 1); ++i) {
for (int j = 0; j <= (L_ny + 1); ++j) {
build_rr_sinks_sources(i, j, L_rr_node, L_rr_node_indices,
delayless_switch, L_grid);
}
}
/* Build opins */
for (int i = 0; i <= (L_nx + 1); ++i) {
for (int j = 0; j <= (L_ny + 1); ++j) {
if (BI_DIRECTIONAL == directionality) {
build_bidir_rr_opins(i, j, L_rr_node, L_rr_node_indices,
opin_to_track_map, Fc_out, L_rr_edge_done, seg_details,
L_grid, delayless_switch,
directs, num_directs, clb_to_clb_directs, num_seg_types);
} else {
assert(UNI_DIRECTIONAL == directionality);
bool clipped;
build_unidir_rr_opins(i, j, L_grid, Fc_out, max_chan_width,
chan_details_x, chan_details_y, Fc_xofs, Fc_yofs, L_rr_node,
L_rr_edge_done, &clipped, L_rr_node_indices, delayless_switch,
directs, num_directs, clb_to_clb_directs, num_seg_types);
if (clipped) {
*Fc_clipped = true;
}
}
}
}
/* We make a copy of the current fanin values for the nodes to
* know the number of OPINs driving each mux presently */
int *opin_mux_size = (int *) my_malloc(sizeof(int) * num_nodes);
for (int i = 0; i < num_nodes; ++i) {
opin_mux_size[i] = L_rr_node[i].get_fan_in();
}
/* Build channels */
assert(Fs % 3 == 0);
for (int i = 0; i <= L_nx; ++i) {
for (int j = 0; j <= L_ny; ++j) {
if (i > 0) {
build_rr_chan(i, j, CHANX, track_to_pin_lookup, sb_conn_map, switch_block_conn,
CHANX_COST_INDEX_START,
max_chan_width, chan_width.x_list[j], opin_mux_size,
sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
L_rr_node_indices, L_rr_edge_done, L_rr_node,
wire_to_ipin_switch, directionality);
}
if (j > 0) {
build_rr_chan(i, j, CHANY, track_to_pin_lookup, sb_conn_map, switch_block_conn,
CHANX_COST_INDEX_START + num_seg_types,
max_chan_width, chan_width.y_list[i], opin_mux_size,
sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
L_rr_node_indices, L_rr_edge_done, L_rr_node,
wire_to_ipin_switch, directionality);
}
}
}
free(opin_mux_size);
// setup the side info for SOURCE and SINK
map<int, int>::iterator itr;
for (itr = g_ipin_to_sink.begin(); itr != g_ipin_to_sink.end(); itr++) {
int sink_node = itr->second;
int ipin_node = itr->first;
g_pin_side[sink_node] |= g_pin_side[ipin_node];
}
/* XXX currently don't do opins as it is more tricky than ipins
you will need to differentiate unidir and bidir
for (itr = g_opin_by_source.begin(); itr != g_opin_by_source.end(); itr++) {
}
*/
g_opin_by_source.clear();
g_ipin_to_sink.clear();
#if LOOKAHEADBYHISTORY == 1
// the first time alloc_and_load_rr_graph() is called is when it does placement
// and after that, size of the chip (nx, ny) is fixed.
if (max_cost_by_relative_pos == NULL) {
assert(min_cost_by_relative_pos == NULL);
assert(max_cost_coord == NULL);
assert(min_cost_coord == NULL);
max_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float));
min_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float));
avg_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float));
dev_cost_by_relative_pos = (float **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(float));
count_by_relative_pos = (int **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(int));
max_cost_coord = (pair<float, float> **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(pair<float, float>));
min_cost_coord = (pair<float, float> **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(pair<float, float>));
}
// everytime this function is called, the chan width is changed.
// so the old data stored in the 2d arrays are no longer valid
// So clear them up.
for (int i = 0; i <= (L_nx + 1); ++i) {
for (int j = 0; j <= (L_ny + 1); ++j) {
max_cost_by_relative_pos[i][j] = -1;
min_cost_by_relative_pos[i][j] = HUGE_POSITIVE_FLOAT;
avg_cost_by_relative_pos[i][j] = -1;
dev_cost_by_relative_pos[i][j] = HUGE_POSITIVE_FLOAT;
count_by_relative_pos[i][j] = 0;
max_cost_coord[i][j].first = -1;
max_cost_coord[i][j].second = -1;
min_cost_coord[i][j].first = -1;
min_cost_coord[i][j].second = -1;
}
}
#endif
#if CONGESTIONBYCHIPAREA == 1
total_cong_tile_count = CONG_MAP_BASE_COST * (nx + 1) * (ny + 1);
if (congestion_map_by_area == NULL) {
congestion_map_by_area = (int **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(int));
congestion_map_relative = (int **)alloc_matrix(0, L_nx+1, 0, L_ny+1, sizeof(int));
}
for (int i = 0; i <= (L_nx + 1); ++i) {
for (int j = 0; j <= (L_ny + 1); ++j) {
congestion_map_by_area[i][j] = CONG_MAP_BASE_COST;
congestion_map_relative[i][j] = 0;
}
}
#endif
#if ONEMORELOOKAHEAD == 1
for (int i = 0; i < num_rr_nodes; i++) {
rr_node[i].is_to_long_wire = false;
if (rr_node[i].type != CHANX && rr_node[i].type != CHANY)
continue;
if (rr_node[i].get_len() != 4)
continue;
int to_num_edges = rr_node[i].get_num_edges();
for (int e = 0; e < to_num_edges; e++) {
int to_node = rr_node[i].edges[e];
if (rr_node[to_node].get_len() == 16)
rr_node[i].is_to_long_wire = true;
}
}
#endif
}
static void build_bidir_rr_opins(INP int i, INP int j,
INOUTP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int ******opin_to_track_map, INP int ***Fc_out,
INP bool * L_rr_edge_done, INP t_seg_details * seg_details,
INP struct s_grid_tile **L_grid, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs,
INP int num_seg_types) {
/* OPINP edges need to be done at once so let the offset 0
* block do the work. */
if (L_grid[i][j].width_offset > 0 || L_grid[i][j].height_offset > 0) {
return;
}
t_type_ptr type = L_grid[i][j].type;
int **Fc = Fc_out[type->index];
for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) {
/* We only are working with opins so skip non-drivers */
if (type->class_inf[type->pin_class[pin_index]].type != DRIVER) {
continue;
}
/* get number of tracks that this pin connects to */
int total_pin_Fc = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++){
total_pin_Fc += Fc[pin_index][iseg];
}
int num_edges = 0;
struct s_linked_edge *edge_list = NULL;
if(total_pin_Fc > 0) {
for (int width = 0; width < type->width; ++width) {
for (int height = 0; height < type->height; ++height) {
num_edges += get_bidir_opin_connections(i + width, j + height, pin_index,
&edge_list, opin_to_track_map, total_pin_Fc, L_rr_edge_done,
L_rr_node_indices, seg_details);
}
}
}
/* Add in direct connections */
num_edges += get_opin_direct_connecions(i, j, pin_index, &edge_list, L_rr_node_indices,
directs, num_directs, clb_to_clb_directs);
int node_index = get_rr_node_index(i, j, OPIN, pin_index, L_rr_node_indices);
alloc_and_load_edges_and_switches(L_rr_node, node_index, num_edges,
L_rr_edge_done, edge_list);
while (edge_list != NULL) {
struct s_linked_edge *next_edge = edge_list->next;
free(edge_list);
edge_list = next_edge;
}
}
}
void free_rr_graph(void) {
int i;
/* Frees all the routing graph data structures, if they have been *
* allocated. I use rr_mem_chunk_list_head as a flag to indicate *
* whether or not the graph has been allocated -- if it is not NULL, *
* a routing graph exists and can be freed. Hence, you can call this *
* routine even if you're not sure of whether a rr_graph exists or not. */
if (g_pin_side != NULL) {
free(g_pin_side);
g_pin_side = NULL;
}
if (rr_mem_ch.chunk_ptr_head == NULL) /* Nothing to free. */
return;
free_chunk_memory(&rr_mem_ch); /* Frees ALL "chunked" data */
/* Before adding any more free calls here, be sure the data is NOT chunk *
* allocated, as ALL the chunk allocated data is already free! */
if(net_rr_terminals != NULL) {
free(net_rr_terminals);
}
for (i = 0; i < num_rr_nodes; i++) {
if (rr_node[i].edges != NULL) {
free(rr_node[i].edges);
}
if (rr_node[i].switches != NULL) {
free(rr_node[i].switches);
}
}
assert(rr_node_indices);
free_rr_node_indices(rr_node_indices);
free(rr_node);
free(rr_indexed_data);
for (i = 0; i < num_blocks; i++) {
free(rr_blk_source[i]);
}
free(rr_blk_source);
rr_blk_source = NULL;
net_rr_terminals = NULL;
rr_node = NULL;
rr_node_indices = NULL;
rr_indexed_data = NULL;
num_rr_nodes = 0;
delete[] g_rr_switch_inf;
g_rr_switch_inf = NULL;
#if LOOKAHEADBYHISTORY == 1
assert(max_cost_by_relative_pos != NULL);
assert(min_cost_by_relative_pos != NULL);
assert(max_cost_coord != NULL);
assert(min_cost_coord != NULL);
free_matrix(max_cost_by_relative_pos, 0, nx+1, 0, sizeof(float));
free_matrix(min_cost_by_relative_pos, 0, nx+1, 0, sizeof(float));
free_matrix(avg_cost_by_relative_pos, 0, nx+1, 0, sizeof(float));
free_matrix(dev_cost_by_relative_pos, 0, nx+1, 0, sizeof(float));
free_matrix(count_by_relative_pos, 0, nx+1, 0, sizeof(int));
free_matrix(max_cost_coord, 0, nx+1, 0, sizeof(pair<float, float>));
free_matrix(min_cost_coord, 0, nx+1, 0, sizeof(pair<float, float>));
free_matrix4(bfs_lookahead_info, 0, nx-3, 0, ny-3, g_num_segment-1, 0, sizeof(pair<float, float>));
max_cost_by_relative_pos = NULL;
min_cost_by_relative_pos = NULL;
avg_cost_by_relative_pos = NULL;
dev_cost_by_relative_pos = NULL;
count_by_relative_pos = NULL;
max_cost_coord = NULL;
min_cost_coord = NULL;
bfs_lookahead_info = NULL;
#endif
#if CONGESTIONBYCHIPAREA == 1
assert(congestion_map_by_area != NULL);
free_matrix(congestion_map_by_area, 0, nx+1, 0, sizeof(int));
free_matrix(congestion_map_relative, 0, nx+1, 0, sizeof(int));
congestion_map_by_area = NULL;
congestion_map_relative = NULL;
#endif
}
static void alloc_net_rr_terminals(void) {
unsigned int inet;
net_rr_terminals = (int **) my_malloc(g_clbs_nlist.net.size() * sizeof(int *));
for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
net_rr_terminals[inet] = (int *) my_chunk_malloc(
g_clbs_nlist.net[inet].pins.size() * sizeof(int),
&rr_mem_ch);
}
}
void load_net_rr_terminals(t_ivec *** L_rr_node_indices) {
/* Allocates and loads the net_rr_terminals data structure. For each net *
* it stores the rr_node index of the SOURCE of the net and all the SINKs *
* of the net. [0..g_clbs_nlist.net.size()-1][0..num_pins-1]. Entry [inet][pnum] stores *
* the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum. */
int inode, iblk, i, j, node_block_pin, iclass;
unsigned int ipin, inet;
t_type_ptr type;
for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
for (ipin = 0; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
iblk = g_clbs_nlist.net[inet].pins[ipin].block;
i = block[iblk].x;
j = block[iblk].y;
type = block[iblk].type;
/* In the routing graph, each (x, y) location has unique pins on it
* so when there is capacity, blocks are packed and their pin numbers
* are offset to get their actual rr_node */
node_block_pin = g_clbs_nlist.net[inet].pins[ipin].block_pin;
iclass = type->pin_class[node_block_pin];
inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK), /* First pin is driver */
iclass, L_rr_node_indices);
net_rr_terminals[inet][ipin] = inode;
}
}
}
static void alloc_and_load_rr_clb_source(t_ivec *** L_rr_node_indices) {
/* Saves the rr_node corresponding to each SOURCE and SINK in each CLB *
* in the FPGA. Currently only the SOURCE rr_node values are used, and *
* they are used only to reserve pins for locally used OPINs in the router. *
* [0..num_blocks-1][0..num_class-1]. The values for blocks that are pads *
* are NOT valid. */
int iblk, i, j, iclass, inode;
int class_low, class_high;
t_rr_type rr_type;
t_type_ptr type;
rr_blk_source = (int **) my_malloc(num_blocks * sizeof(int *));
for (iblk = 0; iblk < num_blocks; iblk++) {
type = block[iblk].type;
get_class_range_for_block(iblk, &class_low, &class_high);
rr_blk_source[iblk] = (int *) my_malloc(type->num_class * sizeof(int));
for (iclass = 0; iclass < type->num_class; iclass++) {
if (iclass >= class_low && iclass <= class_high) {
i = block[iblk].x;
j = block[iblk].y;
if (type->class_inf[iclass].type == DRIVER)
rr_type = SOURCE;
else
rr_type = SINK;
inode = get_rr_node_index(i, j, rr_type, iclass,
L_rr_node_indices);
rr_blk_source[iblk][iclass] = inode;
} else {
rr_blk_source[iblk][iclass] = OPEN;
}
}
}
}
static void build_rr_sinks_sources(INP int i, INP int j,
INP t_rr_node * L_rr_node, INP t_ivec *** L_rr_node_indices,
INP int delayless_switch, INP struct s_grid_tile **L_grid) {
/* Loads IPIN, SINK, SOURCE, and OPIN.
* Loads IPINP to SINK edges, and SOURCE to OPINP edges */
/* Since we share nodes within a large block, only
* start tile can initialize sinks, sources, and pins */
if (L_grid[i][j].width_offset > 0 || L_grid[i][j].height_offset > 0)
return;
t_type_ptr type = L_grid[i][j].type;
int num_class = type->num_class;
struct s_class *class_inf = type->class_inf;
int num_pins = type->num_pins;
int *pin_class = type->pin_class;
/* SINKS and SOURCE to OPIN edges */
for (int iclass = 0; iclass < num_class; ++iclass) {
int inode = 0;
if (class_inf[iclass].type == DRIVER) { /* SOURCE */
inode = get_rr_node_index(i, j, SOURCE, iclass, L_rr_node_indices);
int num_edges = class_inf[iclass].num_pins;
L_rr_node[inode].set_num_edges(num_edges);
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
L_rr_node[inode].switches = (short *) my_malloc(num_edges * sizeof(short));
for (int ipin = 0; ipin < class_inf[iclass].num_pins; ++ipin) {
int pin_num = class_inf[iclass].pinlist[ipin];
int to_node = get_rr_node_index(i, j, OPIN, pin_num, L_rr_node_indices);
// XXX: to_node is OPIN, inode is SOURCE
g_opin_by_source[to_node] = inode;
L_rr_node[inode].edges[ipin] = to_node;
L_rr_node[inode].switches[ipin] = delayless_switch;
L_rr_node[to_node].set_fan_in(L_rr_node[to_node].get_fan_in() + 1);
}
L_rr_node[inode].set_cost_index(SOURCE_COST_INDEX);
L_rr_node[inode].type = SOURCE;
} else { /* SINK */
assert(class_inf[iclass].type == RECEIVER);
inode = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);
/* NOTE: To allow route throughs through clbs, change the lines below to *
* make an edge from the input SINK to the output SOURCE. Do for just the *
* special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it *
* leads to. If route throughs are allowed, you may want to increase the *
* base cost of OPINs and/or SOURCES so they aren't used excessively. */
/* Initialize to unconnected to fix values */
L_rr_node[inode].set_num_edges(0);
L_rr_node[inode].edges = NULL;
L_rr_node[inode].switches = NULL;
L_rr_node[inode].set_cost_index(SINK_COST_INDEX);
L_rr_node[inode].type = SINK;
}
/* Things common to both SOURCEs and SINKs. */
L_rr_node[inode].set_capacity(class_inf[iclass].num_pins);
L_rr_node[inode].set_occ(0);
//assuming that type->width is always 1.
//if this needs to change, rr_node.{h,c} need to be modified accordingly
assert(type->width == 1);
L_rr_node[inode].set_coordinates(i, j, i + type->width - 1, j + type->height - 1);
L_rr_node[inode].R = 0;
L_rr_node[inode].C = 0;
L_rr_node[inode].set_ptc_num(iclass);
L_rr_node[inode].set_direction((enum e_direction)OPEN);
//L_rr_node[inode].set_drivers((enum e_drivers)OPEN);
}
int ipb_pin = 0;
int opb_pin = 0;
int iport = 0;
int oport = 0;
int iporttype = 0;
const t_pb_graph_node *pb_graph_node = type->pb_graph_head;
if(pb_graph_node != NULL && pb_graph_node->num_input_ports == 0) {
iporttype = 1;
}
/* Connect IPINS to SINKS and dummy for OPINS */
for (int ipin = 0; ipin < num_pins; ++ipin) {
int inode = 0;;
int iclass = pin_class[ipin];
int z = ipin / (type->pb_type->num_clock_pins + type->pb_type->num_output_pins + type->pb_type->num_input_pins);
if (class_inf[iclass].type == RECEIVER) {
inode = get_rr_node_index(i, j, IPIN, ipin, L_rr_node_indices);
int to_node = get_rr_node_index(i, j, SINK, iclass, L_rr_node_indices);
// XXX: inode is IPIN; to_node is SINK
// setup this mapping to setup g_pin_side for sinks in the future
g_ipin_to_sink[inode] = to_node;
L_rr_node[inode].set_num_edges(1);
L_rr_node[inode].edges = (int *) my_malloc(sizeof(int));
L_rr_node[inode].switches = (short *) my_malloc(sizeof(short));
L_rr_node[inode].edges[0] = to_node;
L_rr_node[inode].switches[0] = delayless_switch;
L_rr_node[to_node].set_fan_in(L_rr_node[to_node].get_fan_in() + 1);
L_rr_node[inode].set_cost_index(IPIN_COST_INDEX);
L_rr_node[inode].type = IPIN;
/* Add in information so that I can identify which cluster pin this rr_node connects to later */
L_rr_node[inode].z = z;
if(iporttype == 0) {
L_rr_node[inode].pb_graph_pin = &pb_graph_node->input_pins[iport][ipb_pin];
ipb_pin++;
if(ipb_pin >= pb_graph_node->num_input_pins[iport]) {
iport++;
ipb_pin = 0;
if(iport >= pb_graph_node->num_input_ports) {
iporttype++;
iport = 0;
if(pb_graph_node->num_clock_ports == 0) {
iporttype = 0;
}
}
}
} else {
assert(iporttype == 1);
L_rr_node[inode].pb_graph_pin = &pb_graph_node->clock_pins[iport][ipb_pin];
ipb_pin++;
if(ipb_pin >= pb_graph_node->num_clock_pins[iport]) {
iport++;
ipb_pin = 0;
if(iport >= pb_graph_node->num_clock_ports) {
iporttype = 0;
iport = 0;
if(pb_graph_node->num_input_ports == 0) {
iporttype = 1;
}
}
}
}
} else {
assert(class_inf[iclass].type == DRIVER);
inode = get_rr_node_index(i, j, OPIN, ipin, L_rr_node_indices);
/* Add in information so that I can identify which cluster pin this rr_node connects to later */
L_rr_node[inode].z = z;
L_rr_node[inode].set_num_edges(0);
L_rr_node[inode].edges = NULL;
L_rr_node[inode].switches = NULL;
L_rr_node[inode].set_cost_index(OPIN_COST_INDEX);
L_rr_node[inode].type = OPIN;
L_rr_node[inode].pb_graph_pin = &pb_graph_node->output_pins[oport][opb_pin];
opb_pin++;
if(opb_pin >= pb_graph_node->num_output_pins[oport]) {
oport++;
opb_pin = 0;
if(oport >= pb_graph_node->num_output_ports) {
oport = 0;
}
}
}
/* Common to both DRIVERs and RECEIVERs */
L_rr_node[inode].set_capacity(1);
L_rr_node[inode].set_occ(0);
L_rr_node[inode].set_coordinates(i, j, i + type->width - 1, j + type->height - 1);
L_rr_node[inode].C = 0;
L_rr_node[inode].R = 0;
L_rr_node[inode].set_ptc_num(ipin);
L_rr_node[inode].set_direction((enum e_direction)OPEN);
//L_rr_node[inode].set_drivers((enum e_drivers)OPEN);
}
}
/* Allocates/loads edges for nodes belonging to specified channel segment and initializes
node properties such as cost, occupancy and capacity */
static void build_rr_chan(INP int x_coord, INP int y_coord, INP t_rr_type chan_type,
INP struct s_ivec *****track_to_pin_lookup, t_sb_connection_map *sb_conn_map,
INP struct s_ivec ***switch_block_conn, INP int cost_index_offset,
INP int max_chan_width, INP int tracks_per_chan, INP int *opin_mux_size,
INP short ******sblock_pattern, INP int Fs_per_side,
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y,
INP t_ivec *** L_rr_node_indices,
INOUTP bool * L_rr_edge_done, INOUTP t_rr_node * L_rr_node,
INP int wire_to_ipin_switch, INP enum e_directionality directionality) {
/* this function builds both x and y-directed channel segments, so set up our
coordinates based on channel type */
int seg_coord = x_coord;
int chan_coord = y_coord;
int seg_dimension = nx;
int chan_dimension = ny;
t_chan_details *from_chan_details = chan_details_x;
t_chan_details *opposite_chan_details = chan_details_y;
t_rr_type opposite_chan_type = CHANY;
if (chan_type == CHANY){
seg_coord = y_coord;
chan_coord = x_coord;
seg_dimension = ny;
chan_dimension = nx;
from_chan_details = chan_details_y;
opposite_chan_details = chan_details_x;
opposite_chan_type = CHANX;
}
t_seg_details * seg_details = from_chan_details[x_coord][y_coord];
/* figure out if we're generating switch block edges based on a custom switch block
description */
bool custom_switch_block = false;
if (sb_conn_map != NULL){
assert(sblock_pattern == NULL && switch_block_conn == NULL);
custom_switch_block = true;
}
/* Loads up all the routing resource nodes in the current channel segment */
for (int track = 0; track < tracks_per_chan; ++track) {
if (seg_details[track].length == 0)
continue;
int start = get_seg_start(seg_details, track, chan_coord, seg_coord);
int end = get_seg_end(seg_details, track, start, chan_coord, seg_dimension);
if (seg_coord > start)
continue; /* Not the start of this segment. */
struct s_linked_edge *edge_list = NULL;
t_seg_details * from_seg_details = chan_details_x[start][y_coord];
if (chan_type == CHANY){
from_seg_details = chan_details_y[x_coord][start];
}
/* First count number of edges and put the edges in a linked list. */
int num_edges = 0;
num_edges += get_track_to_pins(start, chan_coord, track, tracks_per_chan, &edge_list,
L_rr_node_indices, track_to_pin_lookup, seg_details, chan_type, seg_dimension,
wire_to_ipin_switch, directionality);
/* get edges going from the current track into channel segments which are perpendicular to it */
if (chan_coord > 0) {
t_seg_details * to_seg_details = chan_details_y[start][y_coord];
if (chan_type == CHANY)
to_seg_details = chan_details_x[x_coord][start];
if (to_seg_details->length > 0) {
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord,
opposite_chan_type, seg_dimension, max_chan_width, opin_mux_size,
Fs_per_side, sblock_pattern, &edge_list,
from_seg_details, to_seg_details, opposite_chan_details,
directionality, L_rr_node_indices, L_rr_edge_done,
switch_block_conn, sb_conn_map);
}
}
if (chan_coord < chan_dimension) {
t_seg_details * to_seg_details = chan_details_y[start][y_coord+1];
if (chan_type == CHANY)
to_seg_details = chan_details_x[x_coord+1][start];
if (to_seg_details->length > 0) {
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord + 1,
opposite_chan_type, seg_dimension, max_chan_width, opin_mux_size,
Fs_per_side, sblock_pattern, &edge_list,
from_seg_details, to_seg_details, opposite_chan_details,
directionality, L_rr_node_indices, L_rr_edge_done,
switch_block_conn, sb_conn_map);
}
}
/* walk over the switch blocks along the source track and implement edges from this track to other tracks
in the same channel (i.e. straight-through connections) */
for (int target_seg = start-1; target_seg <= end+1; target_seg++){
if (target_seg != start-1 && target_seg != end+1){
/* skip straight-through connections from midpoint if non-custom switch block.
currently non-custom switch blocks don't properly describe connections from the mid-point of a wire segment
to other segments in the same channel (i.e. straight-through connections) */
if (!custom_switch_block){
continue;
}
}
if (target_seg > 0 && target_seg < seg_dimension+1){
t_seg_details * to_seg_details = chan_details_x[target_seg][y_coord];
if (chan_type == CHANY)
to_seg_details = chan_details_y[x_coord][target_seg];
if (to_seg_details->length > 0) {
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, target_seg,
chan_type, seg_dimension, max_chan_width, opin_mux_size,
Fs_per_side, sblock_pattern, &edge_list,
from_seg_details, to_seg_details, from_chan_details,
directionality, L_rr_node_indices, L_rr_edge_done,
switch_block_conn, sb_conn_map);
}
}
}
int node = get_rr_node_index(x_coord, y_coord, chan_type, track, L_rr_node_indices);
alloc_and_load_edges_and_switches(L_rr_node, node, num_edges,
L_rr_edge_done, edge_list);
while (edge_list != NULL) {
struct s_linked_edge *next_edge = edge_list->next;
free(edge_list);
edge_list = next_edge;
}
/* Edge arrays have now been built up. Do everything else. */
L_rr_node[node].set_cost_index(cost_index_offset + seg_details[track].index);
L_rr_node[node].set_occ( track < tracks_per_chan ? 0 : 1 );
L_rr_node[node].set_capacity(1); /* GLOBAL routing handled elsewhere */
if (chan_type == CHANX){
L_rr_node[node].set_coordinates(start, y_coord, end, y_coord);
} else {
assert(chan_type == CHANY);
L_rr_node[node].set_coordinates(x_coord, start, x_coord, end);
}
int length = end - start + 1;
L_rr_node[node].R = length * seg_details[track].Rmetal;
L_rr_node[node].C = length * seg_details[track].Cmetal;
L_rr_node[node].set_ptc_num(track);
L_rr_node[node].type = chan_type;
L_rr_node[node].set_direction(seg_details[track].direction);
//L_rr_node[node].set_drivers(seg_details[track].drivers);
}
}
void watch_edges(int inode, t_linked_edge * edge_list_head) {
t_linked_edge *list_ptr;
int i, to_node;
list_ptr = edge_list_head;
i = 0;
print_rr_node(stdout, rr_node, inode);
while (list_ptr != NULL) {
to_node = list_ptr->edge;
print_rr_node(stdout, rr_node, to_node);
list_ptr = list_ptr->next;
i++;
}
}
void alloc_and_load_edges_and_switches(INP t_rr_node * L_rr_node, INP int inode,
INP int num_edges, INOUTP bool * L_rr_edge_done,
INP t_linked_edge * edge_list_head) {
/* Sets up all the edge related information for rr_node inode (num_edges, *
* the edges array and the switches array). The edge_list_head points to *
* a list of the num_edges edges and switches to put in the arrays. This *
* linked list is freed by this routine. This routine also resets the *
* rr_edge_done array for the next rr_node (i.e. set it so that no edges *
* are marked as having been seen before). */
t_linked_edge *list_ptr;
int i;
/* Check we aren't overwriting edges */
assert(L_rr_node[inode].get_num_edges() < 1);
assert(NULL == L_rr_node[inode].edges);
assert(NULL == L_rr_node[inode].switches);
L_rr_node[inode].set_num_edges(num_edges);
L_rr_node[inode].edges = (int *) my_malloc(num_edges * sizeof(int));
L_rr_node[inode].switches = (short *) my_malloc(num_edges * sizeof(short));
i = 0;
list_ptr = edge_list_head;
while (list_ptr && (i < num_edges)) {
L_rr_node[inode].edges[i] = list_ptr->edge;
L_rr_node[inode].switches[i] = list_ptr->iswitch;
L_rr_node[list_ptr->edge].set_fan_in(L_rr_node[list_ptr->edge].get_fan_in() + 1);
/* Unmark the edge since we are done considering fanout from node. */
L_rr_edge_done[list_ptr->edge] = false;
list_ptr = list_ptr->next;
++i;
}
assert(list_ptr == NULL);
assert(i == num_edges);
}
/* allocate pin to track map for each segment type individually and then combine into a single
vector */
static int *****alloc_and_load_pin_to_track_map(INP e_pin_type pin_type,
INP int **Fc, INP t_type_ptr Type, INP bool *perturb_switch_pattern,
INP e_directionality directionality,
INP int num_seg_types, INP int *sets_per_seg_type) {
/* get the maximum number of tracks that any pin can connect to */
int max_pin_tracks = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++){
/* determine the maximum Fc to this segment type across all pins */
int max_Fc = 0;
for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) {
int pin_class = Type->pin_class[pin_index];
if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) {
max_Fc = Fc[pin_index][iseg];
}
}
max_pin_tracks += max_Fc;
}
/* allocate 'result' matrix and initialize entries to OPEN. also allocate and intialize matrix which will be
used to index into the correct entries when loading up 'result' */
int *****result = NULL; /* [0..num_pins-1][0..width-1][0..height-1][0..3][0..Fc-1] */
int ****next_result_index = NULL; /* [0..num_pins-1][0..width-1][0..height-1][0..3] */
result = (int *****) alloc_matrix5(0, Type->num_pins-1, 0, Type->width-1, 0, Type->height-1, 0, 3, 0, max_pin_tracks-1, sizeof(int));
next_result_index = (int ****) alloc_matrix4(0, Type->num_pins-1, 0, Type->width-1, 0, Type->height-1, 0, 3, sizeof(int));
for (int ipin = 0; ipin < Type->num_pins; ipin++){
for (int iwidth = 0; iwidth < Type->width; iwidth++){
for (int iheight = 0; iheight < Type->height; iheight++){
for (int iside = 0; iside < 4; iside++){
for (int itrack = 0; itrack < max_pin_tracks; itrack++){
result[ipin][iwidth][iheight][iside][itrack] = OPEN;
}
next_result_index[ipin][iwidth][iheight][iside] = 0;
}
}
}
}
/* multiplier for unidirectional vs bidirectional architectures */
int fac = 1;
if (directionality == UNI_DIRECTIONAL){
fac = 2;
}
/* load the pin to track matrix by looking at each segment type in turn */
int seg_type_start_track = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++){
int seg_type_tracks = fac * sets_per_seg_type[iseg];
/* determine the maximum Fc to this segment type across all pins */
int max_Fc = 0;
for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) {
int pin_class = Type->pin_class[pin_index];
if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) {
max_Fc = Fc[pin_index][iseg];
}
}
/* get pin connections to tracks of the current segment type */
int *****pin_to_seg_type_map = NULL; /* [0..num_pins-1][0..width-1][0..height-1][0..3][0..Fc-1] */
pin_to_seg_type_map = alloc_and_load_pin_to_seg_type(pin_type,
seg_type_tracks, max_Fc, Type, perturb_switch_pattern[iseg], directionality);
/* connections in pin_to_seg_type_map are within that seg type -- i.e. in the [0,seg_type_tracks-1] range.
now load up 'result' array with these connections, but offset them so they are relative to the channel
as a whole */
for (int ipin = 0; ipin < Type->num_pins; ipin++){
for (int iwidth = 0; iwidth < Type->width; iwidth++){
for (int iheight = 0; iheight < Type->height; iheight++){
for (int iside = 0; iside < 4; iside++){
for (int iconn = 0; iconn < max_Fc; iconn++){
int relative_track_ind = pin_to_seg_type_map[ipin][iwidth][iheight][iside][iconn];
int absolute_track_ind = relative_track_ind + seg_type_start_track;
int result_index = next_result_index[ipin][iwidth][iheight][iside];
next_result_index[ipin][iwidth][iheight][iside]++;
result[ipin][iwidth][iheight][iside][result_index] = absolute_track_ind;
}
}
}
}
}
/* free temporary loop variables */
free_matrix5(pin_to_seg_type_map, 0, Type->num_pins-1,
0, Type->width-1,
0, Type->height-1,
0, 3,
0, sizeof(int));
pin_to_seg_type_map = NULL;
/* next seg type will start at this track index */
seg_type_start_track += seg_type_tracks;
}
/* free temporary function variables */
free_matrix4(next_result_index, 0, Type->num_pins-1,
0, Type->width-1,
0, Type->height-1,
0, sizeof(int));
next_result_index = NULL;
return result;
}
static int *****alloc_and_load_pin_to_seg_type(INP e_pin_type pin_type,
INP int seg_type_tracks, INP int Fc,
INP t_type_ptr Type, INP bool perturb_switch_pattern,
INP e_directionality directionality) {
/* Note: currently a single value of Fc is used across each pin. In the future
the looping below will have to be modified if we want to account for pin-based
Fc values */
int ***num_dir; /* [0..width][0..height][0..3] - Number of *physical* pins on each side. */
int ****dir_list; /* [0..width][0..height][0..3][0..num_pins-1] - List of pins of correct type on each side. Max possible space alloced for simplicity */
int ***num_done_per_dir; /* [0..width][0..height][0..3] */
int *****tracks_connected_to_pin; /* [0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */
/* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] =
* NULL if there is no pin on that side, or that pin is of the wrong type.
* Probably not enough memory to worry about, esp. as it's temporary.
* If pin ipin on side iside does not exist or is of the wrong type,
* tracks_connected_to_pin[ipin][iside][0] = OPEN. */
if (Type->num_pins < 1) {
return NULL;
}
tracks_connected_to_pin = (int *****) alloc_matrix5(0, Type->num_pins - 1,
0, Type->width - 1, 0, Type->height - 1, 0, 3, 0, Fc, sizeof(int));
for (int pin = 0; pin < Type->num_pins; ++pin) {
for (int width = 0; width < Type->width; ++width) {
for (int height = 0; height < Type->height; ++height) {
for (int side = 0; side < 4; ++side) {
for (int fc = 0; fc < Fc; ++fc) {
tracks_connected_to_pin[pin][width][height][side][fc] = OPEN; /* Unconnected. */
}
}
}
}
}
num_dir = (int ***) alloc_matrix3(0, Type->width - 1, 0, Type->height - 1, 0, 3, sizeof(int));
dir_list = (int ****) alloc_matrix4(0, Type->width - 1, 0, Type->height - 1, 0, 3, 0, Type->num_pins - 1, sizeof(int));
/* Defensive coding. Try to crash hard if I use an unset entry. */
for (int width = 0; width < Type->width; ++width)
for (int height = 0; height < Type->height; ++height)
for (int side = 0; side < 4; ++side)
for (int pin = 0; pin < Type->num_pins; ++pin)
dir_list[width][height][side][pin] = (-1);
for (int width = 0; width < Type->width; ++width)
for (int height = 0; height < Type->height; ++height)
for (int side = 0; side < 4; ++side)
num_dir[width][height][side] = 0;
for (int pin = 0; pin < Type->num_pins; ++pin) {
int pin_class = Type->pin_class[pin];
if (Type->class_inf[pin_class].type != pin_type) /* Doing either ipins OR opins */
continue;
/* Pins connecting only to global resources get no switches -> keeps area model accurate. */
if (Type->is_global_pin[pin])
continue;
for (int width = 0; width < Type->width; ++width) {
for (int height = 0; height < Type->height; ++height) {
for (int side = 0; side < 4; ++side) {
if (Type->pinloc[width][height][side][pin] == 1) {
dir_list[width][height][side][num_dir[width][height][side]] = pin;
num_dir[width][height][side]++;
}
}
}
}
}
int num_phys_pins = 0;
for (int width = 0; width < Type->width; ++width) {
for (int height = 0; height < Type->height; ++height) {
for (int side = 0; side < 4; ++side)
num_phys_pins += num_dir[width][height][side]; /* Num. physical pins per type */
}
}
num_done_per_dir = (int ***) alloc_matrix3(0, Type->width - 1, 0, Type->height - 1, 0, 3, sizeof(int));
for (int width = 0; width < Type->width; ++width) {
for (int height = 0; height < Type->height; ++height) {
for (int side = 0; side < 4; ++side) {
num_done_per_dir[width][height][side] = 0;
}
}
}
int *pin_num_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
int *side_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
int *width_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
int *height_ordering = (int *) my_malloc(num_phys_pins * sizeof(int));
/* Connection block I use distributes pins evenly across the tracks *
* of ALL sides of the clb at once. Ensures that each pin connects *
* to spaced out tracks in its connection block, and that the other *
* pins (potentially in other C blocks) connect to the remaining tracks *
* first. Doesn't matter for large Fc, but should make a fairly *
* good low Fc block that leverages the fact that usually lots of pins *
* are logically equivalent. */
int side = LEFT;
int width = 0;
int height = Type->height - 1;
int pin = 0;
int pin_index = -1;
while (pin < num_phys_pins) {
if (side == TOP) {
if (width >= Type->width - 1) {
side = RIGHT;
} else {
width++;
}
} else if (side == RIGHT) {
if (height <= 0) {
side = BOTTOM;
} else {
height--;
}
} else if (side == BOTTOM) {
if (width <= 0) {
side = LEFT;
} else {
width--;
}
} else if (side == LEFT) {
if (height >= Type->height - 1) {
pin_index++;
side = TOP;
} else {
height++;
}
}
assert(pin_index < num_phys_pins);
/* Number of physical pins bounds number of logical pins */
if (num_done_per_dir[width][height][side] >= num_dir[width][height][side])
continue;
pin_num_ordering[pin] = dir_list[width][height][side][pin_index];
side_ordering[pin] = side;
width_ordering[pin] = width;
height_ordering[pin] = height;
assert(Type->pinloc[width][height][side][dir_list[width][height][side][pin_index]]);
num_done_per_dir[width][height][side]++;
pin++;
}
if (perturb_switch_pattern) {
load_perturbed_switch_pattern(Type, tracks_connected_to_pin,
num_phys_pins, pin_num_ordering, side_ordering, width_ordering, height_ordering,
seg_type_tracks, seg_type_tracks, Fc, directionality);
} else {
load_uniform_switch_pattern(Type, tracks_connected_to_pin,
num_phys_pins, pin_num_ordering, side_ordering, width_ordering, height_ordering,
seg_type_tracks, seg_type_tracks, Fc, directionality);
}
#ifdef ENABLE_CHECK_ALL_TRACKS
check_all_tracks_reach_pins(Type, tracks_connected_to_pin, seg_type_tracks,
Fc, pin_type);
#endif
/* Free all temporary storage. */
free_matrix3(num_dir, 0, Type->width - 1, 0, Type->height - 1, 0, sizeof(int));
free_matrix4(dir_list, 0, Type->width - 1, 0, Type->height - 1, 0, 3, 0, sizeof(int));
free_matrix3(num_done_per_dir, 0, Type->width - 1, 0, Type->height - 1, 0, sizeof(int));
free(pin_num_ordering);
free(side_ordering);
free(width_ordering);
free(height_ordering);
return tracks_connected_to_pin;
}
static void load_uniform_switch_pattern(INP t_type_ptr type,
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *width_ordering, INP int *height_ordering,
INP int x_chan_width, INP int y_chan_width, INP int Fc,
enum e_directionality directionality) {
/* Loads the tracks_connected_to_pin array with an even distribution of *
* switches across the tracks for each pin. For example, each pin connects *
* to every 4.3rd track in a channel, with exactly which tracks a pin *
* connects to staggered from pin to pin. */
/* Uni-directional drive is implemented to ensure no directional bias and this means
* two important comments noted below */
/* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
* and lay down 2 switches on an adjacent pair of tracks at a time to ensure
* no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
* assign switches to a pair at a time. Can do this because W is guaranteed to
* be even-numbered; however same approach cannot be applied to Fc_out pattern
* when L > 1 and W <> 2L multiple.
*
* 2. This generic pattern should be considered the tileable physical layout,
* meaning all track # here are physical #'s,
* so later must use vpr_to_phy conversion to find actual logical #'s to connect.
* This also means I will not use get_output_block_companion_track to ensure
* no bias, since that describes a logical # -> that would confuse people. */
int group_size;
if (directionality == BI_DIRECTIONAL) {
group_size = 1;
} else {
assert(directionality == UNI_DIRECTIONAL);
group_size = 2;
}
assert((x_chan_width % group_size == 0) && (y_chan_width % group_size == 0) && (Fc % group_size == 0));
for (int i = 0; i < num_phys_pins; ++i) {
int pin = pin_num_ordering[i];
int side = side_ordering[i];
int width = width_ordering[i];
int height = height_ordering[i];
/* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
for (int j = 0; j < (Fc / group_size); ++j) {
int max_chan_width = (side == 0 || side == 2 ? x_chan_width : y_chan_width);
float step_size = (float) max_chan_width / (float) (Fc * num_phys_pins);
float fc_step = (float) max_chan_width / (float) Fc;
float ftrack = (i * step_size) + (j * fc_step);
int itrack = ((int) ftrack) * group_size;
/* Catch possible floating point round error */
itrack = min(itrack, max_chan_width - group_size);
/* Assign the group of tracks for the Fc pattern */
for (int k = 0; k < group_size; ++k) {
tracks_connected_to_pin[pin][width][height][side][group_size * j + k] = itrack + k;
}
}
}
}
static void load_perturbed_switch_pattern(INP t_type_ptr type,
INOUTP int *****tracks_connected_to_pin, INP int num_phys_pins,
INP int *pin_num_ordering, INP int *side_ordering,
INP int *width_ordering, INP int *height_ordering,
INP int x_chan_width, INP int y_chan_width, INP int Fc,
enum e_directionality directionality) {
/* Loads the tracks_connected_to_pin array with an unevenly distributed *
* set of switches across the channel. This is done for inputs when *
* Fc_input = Fc_output to avoid creating "pin domains" -- certain output *
* pins being able to talk only to certain input pins because their switch *
* patterns exactly line up. Distribute Fc/2 + 1 switches over half the *
* channel and Fc/2 - 1 switches over the other half to make the switch *
* pattern different from the uniform one of the outputs. Also, have half *
* the pins put the "dense" part of their connections in the first half of *
* the channel and the other half put the "dense" part in the second half, *
* to make sure each track can connect to about the same number of ipins. */
assert(directionality == BI_DIRECTIONAL);
int Fc_dense = (Fc / 2) + 1;
int Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */
int Fc_half[2];
for (int i = 0; i < num_phys_pins; i++) {
int pin = pin_num_ordering[i];
int side = side_ordering[i];
int width = width_ordering[i];
int height = height_ordering[i];
int max_chan_width = (side == 0 || side == 2 ? x_chan_width : y_chan_width);
float step_size = (float) max_chan_width / (float) (Fc * num_phys_pins);
float spacing_dense = (float) max_chan_width / (float) (2 * Fc_dense);
float spacing_sparse = (float) max_chan_width / (float) (2 * Fc_sparse);
float spacing[2];
/* Flip every pin to balance switch density */
spacing[i % 2] = spacing_dense;
Fc_half[i % 2] = Fc_dense;
spacing[(i + 1) % 2] = spacing_sparse;
Fc_half[(i + 1) % 2] = Fc_sparse;
float ftrack = i * step_size; /* Start point. Staggered from pin to pin */
int iconn = 0;
for (int ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */
for (int j = 0; j < Fc_half[ihalf]; ++j) {
/* Can occasionally get wraparound due to floating point rounding.
This is okay because the starting position > 0 when this occurs
so connection is valid and fine */
int itrack = (int) ftrack;
itrack = itrack % max_chan_width;
tracks_connected_to_pin[pin][width][height][side][iconn] = itrack;
ftrack += spacing[ihalf];
iconn++;
}
}
} /* End for all physical pins. */
}
#ifdef ENABLE_CHECK_ALL_TRACKS
static void check_all_tracks_reach_pins(t_type_ptr type,
int *****tracks_connected_to_pin, int max_chan_width, int Fc,
enum e_pin_type ipin_or_opin) {
/* Checks that all tracks can be reached by some pin. */
assert(max_chan_width > 0);
int *num_conns_to_track; /* [0..max_chan_width-1] */
num_conns_to_track = (int *) my_calloc(max_chan_width, sizeof(int));
for (int pin = 0; pin < type->num_pins; ++pin) {
for (int width = 0; width < type->width; ++width) {
for (int height = 0; height < type->height; ++height) {
for (int side = 0; side < 4; ++side) {
if (tracks_connected_to_pin[pin][width][height][side][0] != OPEN) { /* Pin exists */
for (int conn = 0; conn < Fc; ++conn) {
int track = tracks_connected_to_pin[pin][width][height][side][conn];
num_conns_to_track[track]++;
}
}
}
}
}
}
for (int track = 0; track < max_chan_width; ++track) {
if (num_conns_to_track[track] <= 0) {
vpr_printf_error(__FILE__, __LINE__,
"check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n",
track, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN"));
}
}
free(num_conns_to_track);
}
#endif
/* Allocates and loads the track to ipin lookup for each physical grid type. This
* is the same information as the ipin_to_track map but accessed in a different way. */
static struct s_ivec ****alloc_and_load_track_to_pin_lookup(
INP int *****pin_to_track_map, INP int **Fc,
INP int type_width, INP int type_height,
INP int num_pins, INP int max_chan_width,
INP int num_seg_types) {
struct s_ivec ****track_to_pin_lookup;
/* [0..max_chan_width-1][0..width][0..height][0..3]. For each track number
* it stores a vector for each of the four sides. x-directed channels will
* use the TOP and BOTTOM vectors to figure out what clb input pins they
* connect to above and below them, respectively, while y-directed channels
* use the LEFT and RIGHT vectors. Each vector contains an nelem field
* saying how many ipins it connects to. The list[0..nelem-1] array then
* gives the pin numbers. */
/* Note that a clb pin that connects to a channel on its RIGHT means that *
* that channel connects to a clb pin on its LEFT. The convention used *
* here is always in the perspective of the CLB */
if (num_pins < 1) {
return NULL;
}
/* Alloc and zero the the lookup table */
track_to_pin_lookup = (struct s_ivec ****) alloc_matrix4(0, max_chan_width - 1,
0, type_width - 1, 0, type_height - 1, 0, 3, sizeof(struct s_ivec));
for (int track = 0; track < max_chan_width; ++track) {
for (int width = 0; width < type_width; ++width) {
for (int height = 0; height < type_height; ++height) {
for (int side = 0; side < 4; ++side) {
track_to_pin_lookup[track][width][height][side].nelem = 0;
track_to_pin_lookup[track][width][height][side].list = NULL;
}
}
}
}
/* Count number of pins to which each track connects */
for (int pin = 0; pin < num_pins; ++pin) {
for (int width = 0; width < type_width; ++width) {
for (int height = 0; height < type_height; ++height) {
for (int side = 0; side < 4; ++side) {
if (pin_to_track_map[pin][width][height][side][0] == OPEN)
continue;
/* get number of tracks to which this pin connects */
int num_tracks = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++){
num_tracks += Fc[pin][iseg];
}
for (int conn = 0; conn < num_tracks; ++conn) {
int track = pin_to_track_map[pin][width][height][side][conn];
track_to_pin_lookup[track][width][height][side].nelem++;
}
}
}
}
}
/* Allocate space. */
for (int track = 0; track < max_chan_width; ++track) {
for (int width = 0; width < type_width; ++width) {
for (int height = 0; height < type_height; ++height) {
for (int side = 0; side < 4; ++side) {
track_to_pin_lookup[track][width][height][side].list = NULL; /* Defensive code */
if (track_to_pin_lookup[track][width][height][side].nelem != 0) {
track_to_pin_lookup[track][width][height][side].list =
(int *) my_malloc(track_to_pin_lookup[track][width][height][side].nelem * sizeof(int));
track_to_pin_lookup[track][width][height][side].nelem = 0;
}
}
}
}
}
/* Loading pass. */
for (int pin = 0; pin < num_pins; ++pin) {
for (int width = 0; width < type_width; ++width) {
for (int height = 0; height < type_height; ++height) {
for (int side = 0; side < 4; ++side) {
if (pin_to_track_map[pin][width][height][side][0] == OPEN)
continue;
/* get number of tracks to which this pin connects */
int num_tracks = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++){
num_tracks += Fc[pin][iseg];
}
for (int conn = 0; conn < num_tracks; ++conn) {
int track = pin_to_track_map[pin][width][height][side][conn];
int pin_counter = track_to_pin_lookup[track][width][height][side].nelem;
track_to_pin_lookup[track][width][height][side].list[pin_counter] = pin;
track_to_pin_lookup[track][width][height][side].nelem++;
}
}
}
}
}
return track_to_pin_lookup;
}
/* A utility routine to dump the contents of the routing resource graph *
* (everything -- connectivity, occupancy, cost, etc.) into a file. Used *
* only for debugging. */
void dump_rr_graph(INP const char *file_name) {
FILE *fp = my_fopen(file_name, "w", 0);
for (int inode = 0; inode < num_rr_nodes; ++inode) {
print_rr_node(fp, rr_node, inode);
fprintf(fp, "\n");
}
fclose(fp);
}
/* Prints all the data about node inode to file fp. */
void print_rr_node(FILE * fp, t_rr_node * L_rr_node, int inode) {
static const char *direction_name[] = { "OPEN", "INC_DIRECTION", "DEC_DIRECTION", "BI_DIRECTION" };
static const char *drivers_name[] = { "OPEN", "MULTI_BUFFER", "SINGLE" };
t_rr_type rr_type = L_rr_node[inode].type;
/* Make sure we don't overrun const arrays */
assert((L_rr_node[inode].get_direction() + 1) < (int)(sizeof(direction_name) / sizeof(char *)));
assert((L_rr_node[inode].get_drivers() + 1) < (int)(sizeof(drivers_name) / sizeof(char *)));
fprintf(fp, "Node: %d %s ", inode, L_rr_node[inode].rr_get_type_string());
if ((L_rr_node[inode].get_xlow() == L_rr_node[inode].get_xhigh())
&& (L_rr_node[inode].get_ylow() == L_rr_node[inode].get_yhigh())) {
fprintf(fp, "(%d, %d) ",
L_rr_node[inode].get_xlow(), L_rr_node[inode].get_ylow());
} else {
fprintf(fp, "(%d, %d) to (%d, %d) ",
L_rr_node[inode].get_xlow(), L_rr_node[inode].get_ylow(),
L_rr_node[inode].get_xhigh(), L_rr_node[inode].get_yhigh());
}
fprintf(fp, "Ptc_num: %d ", L_rr_node[inode].get_ptc_num());
fprintf(fp, "Direction: %s ", direction_name[L_rr_node[inode].get_direction() + 1]);
fprintf(fp, "Drivers: %s ", drivers_name[L_rr_node[inode].get_drivers() + 1]);
fprintf(fp, "\n");
if( rr_type == IPIN || rr_type == OPIN)
{
fprintf(fp, "name %s\n", L_rr_node[inode].pb_graph_pin->port->name);
}
fprintf(fp, "%d edge(s):", L_rr_node[inode].get_num_edges());
for (int iconn = 0; iconn < L_rr_node[inode].get_num_edges(); ++iconn)
fprintf(fp, " %d", L_rr_node[inode].edges[iconn]);
fprintf(fp, "\n");
fprintf(fp, "Switch types:");
for (int iconn = 0; iconn < L_rr_node[inode].get_num_edges(); ++iconn)
fprintf(fp, " %d", L_rr_node[inode].switches[iconn]);
fprintf(fp, "\n");
fprintf(fp, "Occ: %d Capacity: %d\n", L_rr_node[inode].get_occ(),
L_rr_node[inode].get_capacity());
if (rr_type != INTRA_CLUSTER_EDGE) {
fprintf(fp, "R: %g C: %g\n", L_rr_node[inode].R, L_rr_node[inode].C);
}
fprintf(fp, "Cost_index: %d\n", L_rr_node[inode].get_cost_index());
}
/* Prints all the rr_indexed_data of index to file fp. */
void print_rr_indexed_data(FILE * fp, int index) {
fprintf(fp, "Index: %d\n", index);
fprintf(fp, "ortho_cost_index: %d ", rr_indexed_data[index].ortho_cost_index);
fprintf(fp, "base_cost: %g ", rr_indexed_data[index].saved_base_cost);
fprintf(fp, "saved_base_cost: %g\n", rr_indexed_data[index].saved_base_cost);
fprintf(fp, "Seg_index: %d ", rr_indexed_data[index].seg_index);
fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length);
fprintf(fp, "T_linear: %g ", rr_indexed_data[index].T_linear);
fprintf(fp, "T_quadratic: %g ", rr_indexed_data[index].T_quadratic);
fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load);
}
static void build_unidir_rr_opins(INP int i, INP int j,
INP struct s_grid_tile **L_grid, INP int ***Fc_out, INP int max_chan_width,
INP t_chan_details * chan_details_x, INP t_chan_details * chan_details_y,
INOUTP int ***Fc_xofs, INOUTP int ***Fc_yofs,
INOUTP t_rr_node * L_rr_node, INOUTP bool * L_rr_edge_done,
OUTP bool * Fc_clipped, INP t_ivec *** L_rr_node_indices, INP int delayless_switch,
INP t_direct_inf *directs, INP int num_directs, INP t_clb_to_clb_directs *clb_to_clb_directs,
INP int num_seg_types){
/* This routine returns a list of the opins rr_nodes on each
* side/width/height of the block. You must free the result with
* free_matrix. */
*Fc_clipped = false;
/* Only the base block of a set should use this function */
if (L_grid[i][j].width_offset > 0 || L_grid[i][j].height_offset > 0) {
return;
}
t_type_ptr type = L_grid[i][j].type;
/* Go through each pin and find its fanout. */
for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) {
/* Skip global pins and pins that are not of DRIVER type */
int class_index = type->pin_class[pin_index];
if (type->class_inf[class_index].type != DRIVER) {
continue;
}
if (type->is_global_pin[pin_index]) {
continue;
}
int num_edges = 0;
t_linked_edge *edge_list = NULL;
for (int iseg = 0; iseg < num_seg_types; iseg++){
/* get Fc for this segment type */
int seg_type_Fc = Fc_out[type->index][pin_index][iseg];
assert( seg_type_Fc >= 0 );
if (seg_type_Fc == 0){
continue;
}
for (int width = 0; width < type->width; ++width) {
for (int height = 0; height < type->height; ++height) {
for (enum e_side side = (enum e_side)0; side < 4; side = (enum e_side)(side + 1)) {
/* Can't do anything if pin isn't at this location */
if (0 == type->pinloc[width][height][side][pin_index]) {
continue;
}
/* Figure out the chan seg at that side.
* side is the side of the logic or io block. */
bool vert = ((side == TOP) || (side == BOTTOM));
bool pos_dir = ((side == TOP) || (side == RIGHT));
t_rr_type chan_type = (vert ? CHANX : CHANY);
int chan = (vert ? (j + height) : (i + width));
int seg = (vert ? (i + width) : (j + height));
int max_len = (vert ? nx : ny);
int ***Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
if (false == pos_dir) {
--chan;
}
/* Skip the location if there is no channel. */
if (chan < 0) {
continue;
}
if (seg < 1) {
continue;
}
if (seg > (vert ? nx : ny)) {
continue;
}
if (chan > (vert ? ny : nx)) {
continue;
}
t_seg_details * seg_details = (chan_type == CHANX ?
chan_details_x[seg][chan] : chan_details_y[chan][seg]);
if (seg_details[0].length == 0)
continue;
/* Get the list of opin to mux connections for that chan seg. */
bool clipped;
num_edges += get_unidir_opin_connections(type, chan, seg,
seg_type_Fc, iseg, chan_type, seg_details, &edge_list,
Fc_ofs, L_rr_edge_done, max_len, max_chan_width,
L_rr_node_indices, &clipped);
if (clipped) {
*Fc_clipped = true;
}
}
}
}
}
/* Add in direct connections */
num_edges += get_opin_direct_connecions(i, j, pin_index, &edge_list, L_rr_node_indices,
directs, num_directs, clb_to_clb_directs);
/* Add the edges */
int opin_node_index = get_rr_node_index(i, j, OPIN, pin_index, L_rr_node_indices);
alloc_and_load_edges_and_switches(rr_node, opin_node_index, num_edges,
L_rr_edge_done, edge_list);
while (edge_list != NULL) {
t_linked_edge *next_edge = edge_list->next;
free(edge_list);
edge_list = next_edge;
}
}
}
/**
* Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure
* This data structure supplements the the info in the "directs" data structure
* TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one
*/
static t_clb_to_clb_directs * alloc_and_load_clb_to_clb_directs(INP t_direct_inf *directs, INP int num_directs, int delayless_switch) {
int i, j;
t_clb_to_clb_directs *clb_to_clb_directs;
char *pb_type_name, *port_name;
int start_pin_index, end_pin_index;
t_pb_type *pb_type;
clb_to_clb_directs = (t_clb_to_clb_directs*)my_calloc(num_directs, sizeof(t_clb_to_clb_directs));
pb_type_name = NULL;
port_name = NULL;
for (i = 0; i < num_directs; i++) {
pb_type_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
port_name = (char*)my_malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
// Load from pins
// Parse out the pb_type name, port name, and pin range
parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
// Figure out which type, port, and pin is used
for (j = 0; j < num_types; j++) {
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
break;
}
}
assert(j < num_types);
clb_to_clb_directs[i].from_clb_type = &type_descriptors[j];
pb_type = clb_to_clb_directs[i].from_clb_type->pb_type;
for (j = 0; j < pb_type->num_ports; j++) {
if(strcmp(pb_type->ports[j].name, port_name) == 0) {
break;
}
}
assert(j < pb_type->num_ports);
if(start_pin_index == OPEN) {
assert(start_pin_index == end_pin_index);
start_pin_index = 0;
end_pin_index = pb_type->ports[j].num_pins - 1;
}
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index);
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index);
// Load to pins
// Parse out the pb_type name, port name, and pin range
parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
// Figure out which type, port, and pin is used
for (j = 0; j < num_types; j++) {
if(strcmp(type_descriptors[j].name, pb_type_name) == 0) {
break;
}
}
assert(j < num_types);
clb_to_clb_directs[i].to_clb_type = &type_descriptors[j];
pb_type = clb_to_clb_directs[i].to_clb_type->pb_type;
for (j = 0; j < pb_type->num_ports; j++) {
if(strcmp(pb_type->ports[j].name, port_name) == 0) {
break;
}
}
assert(j < pb_type->num_ports);
if(start_pin_index == OPEN) {
assert(start_pin_index == end_pin_index);
start_pin_index = 0;
end_pin_index = pb_type->ports[j].num_pins - 1;
}
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index);
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index);
if(abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) {
vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line,
"Range mismatch from %s to %s.\n", directs[i].from_pin, directs[i].to_pin);
}
//Set the switch index
if(directs[i].switch_type > 0) {
//Use the specified switch
clb_to_clb_directs[i].switch_index = directs[i].switch_type;
} else {
//Use the delayless switch by default
clb_to_clb_directs[i].switch_index = delayless_switch;
}
free(pb_type_name);
free(port_name);
//We must be careful to clean-up anything that we may have incidentally allocated.
//Specifically, we can be called while generating the dummy architecture
//for placer delay estimation. Since the delay estimation occurs on a
//'different' architecture it is almost certain that the f_blk_pin_from_port_pin allocated
//by calling get_blk_pin_from_port_pin() will later be invalid.
//We therefore must free it now.
free_blk_pin_from_port_pin();
}
return clb_to_clb_directs;
}
/* Add all direct clb-pin-to-clb-pin edges to given opin */
static int get_opin_direct_connecions(int x, int y, int opin,
INOUTP t_linked_edge ** edge_list_ptr, INP t_ivec *** L_rr_node_indices,
INP t_direct_inf *directs, INP int num_directs,
INP t_clb_to_clb_directs *clb_to_clb_directs) {
t_type_ptr curr_type, target_type;
int width_offset, height_offset;
int i, ipin, inode;
t_linked_edge *edge_list_head;
int max_index, min_index, offset, swap;
int new_edges;
curr_type = grid[x][y].type;
edge_list_head = *edge_list_ptr;
new_edges = 0;
/* Iterate through all direct connections */
for (i = 0; i < num_directs; i++) {
/* Find matching direct clb-to-clb connections with the same type as current grid location */
if(clb_to_clb_directs[i].from_clb_type == curr_type) { //We are at a valid starting point
//Offset must be in range
if(x + directs[i].x_offset < nx + 1 &&
x + directs[i].x_offset > 0 &&
y + directs[i].y_offset < ny + 1 &&
y + directs[i].y_offset > 0) {
//Only add connections if the target clb type matches the type in the direct specification
target_type = grid[x + directs[i].x_offset][y + directs[i].y_offset].type;
if(clb_to_clb_directs[i].to_clb_type == target_type) {
/* Compute index of opin with regards to given pins */
if(clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) {
swap = true;
max_index = clb_to_clb_directs[i].from_clb_pin_start_index;
min_index = clb_to_clb_directs[i].from_clb_pin_end_index;
} else {
swap = false;
min_index = clb_to_clb_directs[i].from_clb_pin_start_index;
max_index = clb_to_clb_directs[i].from_clb_pin_end_index;
}
if(max_index >= opin && min_index <= opin) {
offset = opin - min_index;
/* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */
ipin = OPEN;
if(clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) {
if(swap) {
ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset;
} else {
ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset;
}
} else {
if(swap) {
ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset;
} else {
ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset;
}
}
/* Add new ipin edge to list of edges */
width_offset = grid[x + directs[i].x_offset][y + directs[i].y_offset].width_offset;
height_offset = grid[x + directs[i].x_offset][y + directs[i].y_offset].height_offset;
inode = get_rr_node_index(x + directs[i].x_offset - width_offset, y + directs[i].y_offset - height_offset,
IPIN, ipin, L_rr_node_indices);
edge_list_head = insert_in_edge_list(edge_list_head, inode, clb_to_clb_directs[i].switch_index);
new_edges++;
}
}
}
}
}
*edge_list_ptr = edge_list_head;
return new_edges;
}
/* Determines whether the output pins of the specified block type should be perturbed. *
* This is to prevent pathological cases where the output pin connections are *
* spaced such that the connection pattern always skips some types of wire (w.r.t. *
* starting points) */
static bool* alloc_and_load_perturb_opins(INP t_type_ptr type, INP int **Fc_out,
INP int max_chan_width, INP int num_seg_types, INP t_segment_inf *segment_inf){
int i, Fc_max, iclass, num_wire_types;
int num, max_primes, factor, num_factors;
int *prime_factors;
float step_size = 0;
float n = 0;
float threshold = 0.07;
bool *perturb_opins = (bool *) my_calloc( num_seg_types, sizeof(bool) );
i = Fc_max = iclass = 0;
if (num_seg_types > 1){
/* Segments of one length are grouped together in the channel. *
* In the future we can determine if any of these segments will *
* encounter the pathological step size case, and determine if *
* we need to perturb based on the segment's frequency (if *
* frequency is small we should not perturb - testing has found *
* that perturbing a channel when unnecessary increases needed *
* W to achieve the same delay); but for now we just return. */
return perturb_opins;
} else {
/* There are as many wire start points as the value of L */
num_wire_types = segment_inf[0].length;
}
/* get Fc_max */
for (i = 0; i < type->num_pins; ++i) {
iclass = type->pin_class[i];
if (Fc_out[i][0] > Fc_max && type->class_inf[iclass].type == DRIVER) {
Fc_max = Fc_out[i][0];
}
}
/* Nothing to perturb if Fc=0; no need to perturb if Fc = 1 */
if (Fc_max == 0 || Fc_max == max_chan_width){
return perturb_opins;
}
/* Pathological cases occur when the step size, W/Fc, is a multiple of *
* the number of wire starting points, L. Specifically, when the step *
* size is a multiple of a prime factor of L, the connection pattern *
* will always skip some wires. Thus, we perturb pins if we detect this *
* case. */
/* get an upper bound on the number of prime factors of num_wire_types */
max_primes = (int)floor(log((float)num_wire_types)/log(2.0));
prime_factors = (int *) my_malloc(max_primes * sizeof(int));
for (i = 0; i < max_primes; i++){
prime_factors[i] = 0;
}
/* Find the prime factors of num_wire_types */
num = num_wire_types;
factor = 2;
num_factors = 0;
while ( pow((float)factor, 2) <= num ){
if ( num % factor == 0 ){
num /= factor;
if ( factor != prime_factors[num_factors] ){
prime_factors[num_factors] = factor;
num_factors++;
}
} else {
factor++;
}
}
if (num_factors == 0){
prime_factors[num_factors++] = num_wire_types; /* covers cases when num_wire_types is prime */
}
/* Now see if step size is an approximate multiple of one of the factors. A *
* threshold is used because step size may not be an integer. */
step_size = (float)max_chan_width / Fc_max;
for (i = 0; i < num_factors; i++){
if ( nint(step_size) < prime_factors[i] ){
perturb_opins[0] = false;
break;
}
n = step_size / prime_factors[i];
n = n - (float)nint(n); /* fractinal part */
if ( fabs(n) < threshold ){
perturb_opins[0] = true;
break;
} else {
perturb_opins[0] = false;
}
}
free(prime_factors);
return perturb_opins;
}