blob: 3a90718e73c3e5a859e1feecdace9796d2f32da3 [file] [log] [blame]
#include <stdio.h>
#include <math.h>
#include <assert.h>
#include <string.h>
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph.h"
#include "rr_graph2.h"
#include "rr_graph_sbox.h"
#include "check_rr_graph.h"
#include "rr_graph_timing_params.h"
#include "rr_graph_indexed_data.h"
#include "vpr_utils.h"
/* UDSD Modifications by WMF Begin */
/* WMF: I'm using this macro to reverse the order for wire muxes, so opins
* connect to the highest track first. This way, the leftover imbalance
* is in reverse to the leftover imbalance due to Fs generation
* May 15, I revamped the imbalance solution, so this feature is not useful
* anymore. */
#define REVERSE_OPIN_ORDER
/* #define ENABLE_DUMP */
#define MUX_SIZE_DIST_DISPLAY
/* mux size statistic data structures */
typedef struct s_mux
{
int size;
struct s_mux *next;
}
t_mux;
typedef struct s_mux_size_distribution
{
int mux_count;
int max_index;
int *distr;
struct s_mux_size_distribution *next;
}
t_mux_size_distribution;
/* UDSD Modifications by WMF End */
/******************* Variables local to this module. ***********************/
static struct s_linked_vptr *rr_mem_chunk_list_head = NULL;
/* Used to free "chunked" memory. If NULL, no rr_graph exists right now. */
static int chunk_bytes_avail = 0;
static char *chunk_next_avail_mem = NULL;
/* Status of current chunk being dished out by calls to my_chunk_malloc. */
/********************* Subroutines local to this module. *******************/
static int ****alloc_and_load_pin_to_track_map(IN enum e_pin_type pin_type,
IN int nodes_per_chan,
IN int Fc,
IN t_type_ptr Type,
IN boolean
perturb_switch_pattern,
IN enum e_directionality
directionality);
static struct s_ivec ***alloc_and_load_track_to_pin_lookup(IN int
****pin_to_track_map,
IN int Fc,
IN int height,
IN int num_pins,
IN int
nodes_per_chan);
static void build_bidir_rr_opins(IN int i,
IN int j,
INOUT t_rr_node * rr_node,
IN t_ivec *** rr_node_indices,
IN int *****opin_to_track_map,
IN int *Fc_out,
IN boolean * rr_edge_done,
IN t_seg_details * seg_details,
IN struct s_grid_tile **grid);
static void build_unidir_rr_opins(IN int i,
IN int j,
IN struct s_grid_tile **grid,
IN int *Fc_out,
IN int nodes_per_chan,
IN t_seg_details * seg_details,
INOUT int **Fc_xofs,
INOUT int **Fc_yofs,
INOUT t_rr_node * rr_node,
INOUT boolean * rr_edge_done,
OUT boolean * Fc_clipped,
IN t_ivec *** rr_node_indices);
static void alloc_and_load_rr_graph(IN int num_nodes,
IN t_rr_node * rr_node,
IN int num_seg_types,
IN t_seg_details * seg_details,
IN boolean * rr_edge_done,
IN struct s_ivec ****track_to_ipin_lookup,
IN int *****opin_to_track_map,
IN struct s_ivec ***switch_block_conn,
IN struct s_grid_tile **grid,
IN int nx,
IN int ny,
IN int Fs,
IN short *****sblock_pattern,
IN int *Fc_out,
IN int **Fc_xofs,
IN int **Fc_yofs,
IN t_ivec *** rr_node_indices,
IN int nodes_per_chan,
IN enum e_switch_block_type sb_type,
IN int delayless_switch,
IN enum e_directionality directionality,
IN int wire_to_ipin_switch,
OUT boolean * Fc_clipped);
static void load_uniform_switch_pattern(IN t_type_ptr type,
INOUT int ****tracks_connected_to_pin,
IN int num_phys_pins,
IN int *pin_num_ordering,
IN int *side_ordering,
IN int *offset_ordering,
IN int nodes_per_chan,
IN int Fc,
IN enum e_directionality
directionality);
static void load_perturbed_switch_pattern(IN t_type_ptr type,
INOUT int
****tracks_connected_to_pin,
IN int num_phys_pins,
IN int *pin_num_ordering,
IN int *side_ordering,
IN int *offset_ordering,
IN int nodes_per_chan,
IN int Fc,
IN enum e_directionality
directionality);
static void check_all_tracks_reach_pins(t_type_ptr type,
int ****tracks_connected_to_pin,
int nodes_per_chan,
int Fc,
enum e_pin_type ipin_or_opin);
static int track_side(int clb_side);
static boolean *alloc_and_load_perturb_ipins(IN int nodes_per_chan,
IN int num_types,
IN int *Fc_in,
IN int *Fc_out,
IN enum e_directionality
directionality);
static void print_rr_node(FILE * fp,
t_rr_node * rr_node,
int inode);
static void build_rr_sinks_sources(IN int i,
IN int j,
IN t_rr_node * rr_node,
IN t_ivec *** rr_node_indices,
IN int delayless_switch,
IN struct s_grid_tile **grid);
static void build_rr_xchan(IN int i,
IN int j,
IN struct s_ivec ****track_to_ipin_lookup,
IN struct s_ivec ***switch_block_conn,
IN int cost_index_offset,
IN int nodes_per_chan,
IN int *opin_mux_size,
IN short *****sblock_pattern,
IN int Fs_per_side,
IN t_seg_details * seg_details,
IN t_ivec *** rr_node_indices,
IN boolean * rr_edge_done,
INOUT t_rr_node * rr_node,
IN int wire_to_ipin_switch,
IN enum e_directionality directionality);
static void build_rr_ychan(IN int i,
IN int j,
IN struct s_ivec ****track_to_ipin_lookup,
IN struct s_ivec ***switch_block_conn,
IN int cost_index_offset,
IN int nodes_per_chan,
IN int *opin_mux_size,
IN short *****sblock_pattern,
IN int Fs_per_side,
IN t_seg_details * seg_details,
IN t_ivec *** rr_node_indices,
IN boolean * rr_edge_done,
INOUT t_rr_node * rr_node,
IN int wire_to_ipin_switch,
IN enum e_directionality directionality);
static void rr_graph_externals(
t_timing_inf timing_inf,
t_segment_inf * segment_inf,
int num_seg_types,
int nodes_per_chan,
int wire_to_ipin_switch,
enum e_base_cost_type base_cost_type);
void alloc_and_load_edges_and_switches(IN t_rr_node * rr_node,
IN int inode,
IN int num_edges,
IN boolean * rr_edge_done,
IN t_linked_edge * edge_list_head);
static void alloc_net_rr_terminals(void);
static void alloc_and_load_rr_clb_source(t_ivec *** rr_node_indices);
static void load_uniform_opin_switch_pattern_paired(IN int *Fc_out,
IN int num_pins,
IN int *pins_in_chan_seg,
IN int num_wire_inc_muxes,
IN int num_wire_dec_muxes,
IN int *wire_inc_muxes,
IN int *wire_dec_muxes,
INOUT t_rr_node * rr_node,
INOUT boolean *
rr_edge_done,
IN t_seg_details *
seg_details,
OUT boolean * Fc_clipped);
static int *get_adj_opins(IN int i,
IN int j,
OUT int *num_pins,
IN t_ivec *** rr_node_indices,
IN enum e_rr_type chan_type);
void watch_edges(int inode,
t_linked_edge * edge_list_head);
static void view_mux_size_distribution(t_ivec *** rr_node_indices,
int nodes_per_chan,
t_seg_details * seg_details_x,
t_seg_details * seg_details_y);
static void print_distribution(FILE * fptr,
t_mux_size_distribution * distr_struct);
static void free_type_pin_to_track_map(int***** ipin_to_track_map, t_type_ptr types, int* fc_in);
static void free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map, t_type_ptr types, int nodes_per_chan);
static t_seg_details *alloc_and_load_global_route_seg_details(IN int
nodes_per_chan,
IN int
global_route_switch);
/* UDSD Modifications by WMF End */
static int *alloc_and_load_actual_fc(IN int num_types,
IN t_type_ptr types,
IN int nodes_per_chan,
IN boolean is_Fc_out,
IN enum e_directionality directionality,
OUT boolean * Fc_clipped);
/******************* Subroutine definitions *******************************/
void
build_rr_graph(IN t_graph_type graph_type,
IN int num_types,
IN t_type_ptr types,
IN int nx,
IN int ny,
IN struct s_grid_tile **grid,
IN int chan_width,
IN struct s_chan_width_dist *chan_capacity_inf,
IN enum e_switch_block_type sb_type,
IN int Fs,
IN int num_seg_types,
IN int num_switches,
IN t_segment_inf * segment_inf,
IN int global_route_switch,
IN int delayless_switch,
IN t_timing_inf timing_inf,
IN int wire_to_ipin_switch,
IN enum e_base_cost_type base_cost_type,
OUT int *Warnings)
{
/* Temp structures used to build graph */
int nodes_per_chan, i, j;
t_seg_details *seg_details = NULL;
int *Fc_in = NULL;
int *Fc_out = NULL;
int *****opin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
int *****ipin_to_track_map = NULL; /* [0..num_types-1][0..num_pins-1][0..height][0..3][0..Fc-1] */
t_ivec ****track_to_ipin_lookup = NULL; /* [0..num_types-1][0..nodes_per_chan-1][0..height][0..3] */
t_ivec ***switch_block_conn = NULL;
short *****unidir_sb_pattern = NULL;
boolean *rr_edge_done = NULL;
boolean is_global_graph;
boolean Fc_clipped;
boolean use_full_seg_groups;
boolean *perturb_ipins = NULL;
enum e_directionality directionality;
int **Fc_xofs = NULL; /* [0..ny-1][0..nx-1] */
int **Fc_yofs = NULL; /* [0..nx-1][0..ny-1] */
rr_node_indices = NULL;
rr_node = NULL;
num_rr_nodes = 0;
/* Reset warning flag */
*Warnings = RR_GRAPH_NO_WARN;
/* Decode the graph_type */
is_global_graph = FALSE;
if(GRAPH_GLOBAL == graph_type)
{
is_global_graph = TRUE;
}
use_full_seg_groups = FALSE;
if(GRAPH_UNIDIR_TILEABLE == graph_type)
{
use_full_seg_groups = TRUE;
}
directionality = UNI_DIRECTIONAL;
if(GRAPH_BIDIR == graph_type)
{
directionality = BI_DIRECTIONAL;
}
if(is_global_graph)
{
directionality = BI_DIRECTIONAL;
}
/* Global routing uses a single longwire track */
nodes_per_chan = (is_global_graph ? 1 : chan_width);
assert(nodes_per_chan > 0);
/* START SEG_DETAILS */
if(is_global_graph)
{
/* Sets up a single unit length segment type for global routing. */
seg_details =
alloc_and_load_global_route_seg_details(nodes_per_chan,
global_route_switch);
}
else
{
/* Setup segments including distrubuting tracks and staggering.
* If use_full_seg_groups is specified, nodes_per_chan may be
* changed. Warning should be singled to caller if this happens. */
seg_details = alloc_and_load_seg_details(&nodes_per_chan,
max(nx, ny),
num_seg_types,
segment_inf,
use_full_seg_groups,
is_global_graph,
directionality);
if((is_global_graph ? 1 : chan_width) != nodes_per_chan)
{
*Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
}
#ifdef CREATE_ECHO_FILES
dump_seg_details(seg_details, nodes_per_chan, "seg_details.txt");
#endif /* CREATE_ECHO_FILES */
}
/* END SEG_DETAILS */
/* START FC */
/* Determine the actual value of Fc */
if(is_global_graph)
{
Fc_in = (int *)my_malloc(sizeof(int) * num_types);
Fc_out = (int *)my_malloc(sizeof(int) * num_types);
for(i = 0; i < num_types; ++i)
{
Fc_in[i] = 1;
Fc_out[i] = 1;
}
}
else
{
Fc_clipped = FALSE;
Fc_in =
alloc_and_load_actual_fc(num_types, types, nodes_per_chan,
FALSE, directionality, &Fc_clipped);
if(Fc_clipped)
{
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
Fc_clipped = FALSE;
Fc_out =
alloc_and_load_actual_fc(num_types, types, nodes_per_chan,
TRUE, directionality, &Fc_clipped);
if(Fc_clipped)
{
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
#ifdef VERBOSE
for(i = 1; i < num_types; ++i)
{ /* Skip "<EMPTY>" */
if(type_descriptors[i].is_Fc_out_full_flex)
{
printf
("Fc Actual Values: Type = %s, Fc_out = full, Fc_in = %d.\n",
type_descriptors[i].name, Fc_input[i]);
}
else
{
printf
("Fc Actual Values: Type = %s, Fc_out = %d, Fc_in = %d.\n",
type_descriptors[i].name, Fc_output[i],
Fc_input[i]);
}
}
#endif /* VERBOSE */
}
perturb_ipins =
alloc_and_load_perturb_ipins(nodes_per_chan, num_types, Fc_in,
Fc_out, directionality);
/* END FC */
/* Alloc node lookups, count nodes, alloc rr nodes */
num_rr_nodes = 0;
rr_node_indices =
alloc_and_load_rr_node_indices(nodes_per_chan, nx, ny, &num_rr_nodes,
seg_details);
rr_node = (t_rr_node *) my_malloc(sizeof(t_rr_node) * num_rr_nodes);
memset(rr_node, 0, sizeof(t_rr_node) * num_rr_nodes);
rr_edge_done = (boolean *) my_malloc(sizeof(boolean) * num_rr_nodes);
memset(rr_edge_done, 0, sizeof(boolean) * num_rr_nodes);
/* These are data structures used by the the unidir opin mapping. */
if(UNI_DIRECTIONAL == directionality)
{
Fc_xofs = (int **)alloc_matrix(0, ny, 0, nx, sizeof(int));
Fc_yofs = (int **)alloc_matrix(0, nx, 0, ny, sizeof(int));
for(i = 0; i <= nx; ++i)
{
for(j = 0; j <= ny; ++j)
{
Fc_xofs[j][i] = 0;
Fc_yofs[i][j] = 0;
}
}
}
/* START SB LOOKUP */
/* Alloc and load the switch block lookup */
if(is_global_graph)
{
assert(nodes_per_chan == 1);
switch_block_conn =
alloc_and_load_switch_block_conn(1, SUBSET, 3);
}
else if(BI_DIRECTIONAL == directionality)
{
switch_block_conn =
alloc_and_load_switch_block_conn(nodes_per_chan, sb_type, Fs);
}
else
{
assert(UNI_DIRECTIONAL == directionality);
unidir_sb_pattern =
alloc_sblock_pattern_lookup(nx, ny, nodes_per_chan);
for(i = 0; i <= nx; i++)
{
for(j = 0; j <= ny; j++)
{
load_sblock_pattern_lookup(i, j, nodes_per_chan,
seg_details, Fs,
sb_type,
unidir_sb_pattern);
}
}
}
/* END SB LOOKUP */
/* START IPIN MAP */
/* Create ipin map lookups */
ipin_to_track_map = (int *****)my_malloc(sizeof(int ****) * num_types);
track_to_ipin_lookup =
(struct s_ivec ****)my_malloc(sizeof(struct s_ivec ***) * num_types);
for(i = 0; i < num_types; ++i)
{
ipin_to_track_map[i] = alloc_and_load_pin_to_track_map(RECEIVER,
nodes_per_chan,
Fc_in[i],
&types[i],
perturb_ipins
[i],
directionality);
track_to_ipin_lookup[i] =
alloc_and_load_track_to_pin_lookup(ipin_to_track_map[i],
Fc_in[i], types[i].height,
types[i].num_pins,
nodes_per_chan);
}
/* END IPIN MAP */
/* START OPIN MAP */
/* Create opin map lookups */
if(BI_DIRECTIONAL == directionality)
{
opin_to_track_map =
(int *****)my_malloc(sizeof(int ****) * num_types);
for(i = 0; i < num_types; ++i)
{
opin_to_track_map[i] =
alloc_and_load_pin_to_track_map(DRIVER,
nodes_per_chan,
Fc_out[i], &types[i],
FALSE,
directionality);
}
}
/* END OPIN MAP */
/* UDSD Modifications by WMF begin */
/* I'm adding 2 new fields to t_rr_node, and I want them initialized to 0. */
for(i = 0; i < num_rr_nodes; i++)
{
rr_node[i].num_wire_drivers = 0;
rr_node[i].num_opin_drivers = 0;
}
alloc_and_load_rr_graph(num_rr_nodes, rr_node, num_seg_types,
seg_details, rr_edge_done,
track_to_ipin_lookup, opin_to_track_map,
switch_block_conn, grid, nx,
ny, Fs, unidir_sb_pattern, Fc_out, Fc_xofs,
Fc_yofs, rr_node_indices, nodes_per_chan,
sb_type, delayless_switch, directionality,
wire_to_ipin_switch, &Fc_clipped);
#if 0
#ifdef MUX_SIZE_DIST_DISPLAY
if(UNI_DIRECTIONAL == directionality)
{
view_mux_size_distribution(rr_node_indices, nodes_per_chan,
seg_details, seg_details);
}
#endif
#endif
/* Update rr_nodes capacities if global routing */
if(graph_type == GLOBAL) {
for(i = 0; i < num_rr_nodes; i++) {
if(rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
rr_node[i].capacity = chan_width;
}
}
}
rr_graph_externals(timing_inf, segment_inf, num_seg_types,
nodes_per_chan, wire_to_ipin_switch, base_cost_type);
#ifdef CREATE_ECHO_FILES
dump_rr_graph("rr_graph.echo");
#endif /* CREATE_ECHO_FILES */
check_rr_graph(graph_type, num_types, types, nx, ny,
grid, nodes_per_chan, Fs, num_seg_types, num_switches, segment_inf,
global_route_switch, delayless_switch,
wire_to_ipin_switch, seg_details, Fc_in, Fc_out,
rr_node_indices, opin_to_track_map, ipin_to_track_map,
track_to_ipin_lookup, switch_block_conn, perturb_ipins);
/* Free all temp structs */
if(seg_details)
{
free_seg_details(seg_details, nodes_per_chan);
seg_details = NULL;
}
if(Fc_in)
{
free(Fc_in);
Fc_in = NULL;
}
if(Fc_out)
{
free(Fc_out);
Fc_out = NULL;
}
if(perturb_ipins)
{
free(perturb_ipins);
perturb_ipins = NULL;
}
if(switch_block_conn)
{
free_switch_block_conn(switch_block_conn, nodes_per_chan);
switch_block_conn = NULL;
}
if(rr_edge_done)
{
free(rr_edge_done);
rr_edge_done = NULL;
}
if(Fc_xofs)
{
free_matrix(Fc_xofs, 0, ny, 0, sizeof(int));
Fc_xofs = NULL;
}
if(Fc_yofs)
{
free_matrix(Fc_yofs, 0, nx, 0, sizeof(int));
Fc_yofs = NULL;
}
if(unidir_sb_pattern)
{
free_sblock_pattern_lookup(unidir_sb_pattern);
unidir_sb_pattern = NULL;
}
if(opin_to_track_map) {
for(i = 0; i < num_types; ++i)
{
free_matrix4(opin_to_track_map[i], 0, types[i].num_pins - 1,
0, types[i].height - 1, 0, 3, 0, sizeof(int));
}
free(opin_to_track_map);
}
free_type_pin_to_track_map(ipin_to_track_map, types, Fc_in);
free_type_track_to_ipin_map(track_to_ipin_lookup, types, nodes_per_chan);
}
static void
rr_graph_externals( t_timing_inf timing_inf,
t_segment_inf * segment_inf,
int num_seg_types,
int nodes_per_chan,
int wire_to_ipin_switch,
enum e_base_cost_type base_cost_type)
{
add_rr_graph_C_from_switches(timing_inf.C_ipin_cblock);
alloc_and_load_rr_indexed_data(segment_inf, num_seg_types,
rr_node_indices, nodes_per_chan,
wire_to_ipin_switch, base_cost_type);
alloc_net_rr_terminals();
load_net_rr_terminals(rr_node_indices);
alloc_and_load_rr_clb_source(rr_node_indices);
}
static boolean *
alloc_and_load_perturb_ipins(IN int nodes_per_chan,
IN int num_types,
IN int *Fc_in,
IN int *Fc_out,
IN enum e_directionality directionality)
{
int i;
float Fc_ratio;
boolean *result = NULL;
result = (boolean *) my_malloc(num_types * sizeof(boolean));
if(BI_DIRECTIONAL == directionality)
{
for(i = 0; i < num_types; ++i)
{
result[i] = FALSE;
if(Fc_in[i] > Fc_out[i])
{
Fc_ratio = (float)Fc_in[i] / (float)Fc_out[i];
}
else
{
Fc_ratio = (float)Fc_out[i] / (float)Fc_in[i];
}
if((Fc_in[i] <= nodes_per_chan - 2) &&
(fabs(Fc_ratio - nint(Fc_ratio)) <
(0.5 / (float)nodes_per_chan)))
{
result[i] = TRUE;
}
}
}
else
{
/* Unidirectional routing uses mux balancing patterns and
* thus shouldn't need perturbation. */
assert(UNI_DIRECTIONAL == directionality);
for(i = 0; i < num_types; ++i)
{
result[i] = FALSE;
}
}
return result;
}
static t_seg_details *
alloc_and_load_global_route_seg_details(IN int nodes_per_chan,
IN int global_route_switch)
{
t_seg_details *result = NULL;
assert(nodes_per_chan == 1);
result = (t_seg_details *) my_malloc(sizeof(t_seg_details));
result->index = 0;
result->length = 1;
result->wire_switch = global_route_switch;
result->opin_switch = global_route_switch;
result->longline = FALSE;
result->direction = BI_DIRECTION;
result->Cmetal = 0.0;
result->Rmetal = 0.0;
result->start = 1;
result->drivers = MULTI_BUFFERED;
result->cb = (boolean *) my_malloc(sizeof(boolean) * 1);
result->cb[0] = TRUE;
result->sb = (boolean *) my_malloc(sizeof(boolean) * 2);
result->sb[0] = TRUE;
result->sb[1] = TRUE;
result->group_size = 1;
result->group_start = 0;
return result;
}
/* Calculates the actual Fc values for the given nodes_per_chan value */
static int *
alloc_and_load_actual_fc(IN int num_types,
IN t_type_ptr types,
IN int nodes_per_chan,
IN boolean is_Fc_out,
IN enum e_directionality directionality,
OUT boolean * Fc_clipped)
{
int i;
int *Result = NULL;
int fac, num_sets;
float Fc;
*Fc_clipped = FALSE;
/* Unidir tracks formed in pairs, otherwise no effect. */
fac = 1;
if(UNI_DIRECTIONAL == directionality)
{
fac = 2;
}
assert((nodes_per_chan % fac) == 0);
num_sets = nodes_per_chan / fac;
Result = (int *)my_malloc(sizeof(int) * num_types);
for(i = 0; i < num_types; ++i)
{
Fc = (is_Fc_out ? type_descriptors[i].
Fc_out : type_descriptors[i].Fc_in);
if(type_descriptors[i].is_Fc_frac)
{
Result[i] = fac * nint(num_sets * Fc);
}
else
{
Result[i] = Fc;
}
if(is_Fc_out && type_descriptors[i].is_Fc_out_full_flex)
{
Result[i] = nodes_per_chan;
}
Result[i] = max(Result[i], fac);
if(Result[i] > nodes_per_chan)
{
*Fc_clipped = TRUE;
Result[i] = nodes_per_chan;
}
assert(Result[i] % fac == 0);
}
return Result;
}
/* frees the track to ipin mapping for each physical grid type */
static void
free_type_track_to_ipin_map(struct s_ivec**** track_to_pin_map, t_type_ptr types, int nodes_per_chan)
{
int i, itrack, ioff, iside;
for(i = 0; i < num_types; i++) {
if(track_to_pin_map[i] != NULL) {
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
for(ioff = 0; ioff < types[i].height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
if(track_to_pin_map[i][itrack][ioff][iside].list != NULL) {
free(track_to_pin_map[i][itrack][ioff][iside].list);
}
}
}
}
free_matrix3(track_to_pin_map[i], 0, nodes_per_chan - 1, 0,
types[i].height - 1, 0,
sizeof(struct s_ivec));
}
}
free(track_to_pin_map);
}
/* frees the ipin to track mapping for each physical grid type */
static void
free_type_pin_to_track_map(int***** ipin_to_track_map, t_type_ptr types, int* fc_in)
{
int i;
for(i = 0; i < num_types; i++) {
free_matrix4(ipin_to_track_map[i], 0, types[i].num_pins - 1,
0, types[i].height - 1, 0, 3, 0, sizeof(int));
}
free(ipin_to_track_map);
}
/* Does the actual work of allocating the rr_graph and filling all the *
* appropriate values. Everything up to this was just a prelude! */
static void
alloc_and_load_rr_graph(IN int num_nodes,
IN t_rr_node * rr_node,
IN int num_seg_types,
IN t_seg_details * seg_details,
IN boolean * rr_edge_done,
IN struct s_ivec ****track_to_ipin_lookup,
IN int *****opin_to_track_map,
IN struct s_ivec ***switch_block_conn,
IN struct s_grid_tile **grid,
IN int nx,
IN int ny,
IN int Fs,
IN short *****sblock_pattern,
IN int *Fc_out,
IN int **Fc_xofs,
IN int **Fc_yofs,
IN t_ivec *** rr_node_indices,
IN int nodes_per_chan,
IN enum e_switch_block_type sb_type,
IN int delayless_switch,
IN enum e_directionality directionality,
IN int wire_to_ipin_switch,
OUT boolean * Fc_clipped)
{
int i, j;
boolean clipped;
int *opin_mux_size = NULL;
/* If Fc gets clipped, this will be flagged to true */
*Fc_clipped = FALSE;
/* Connection SINKS and SOURCES to their pins. */
for(i = 0; i <= (nx + 1); i++)
{
for(j = 0; j <= (ny + 1); j++)
{
build_rr_sinks_sources(i, j, rr_node, rr_node_indices,
delayless_switch, grid);
}
}
/* Build opins */
for(i = 0; i <= (nx + 1); ++i)
{
for(j = 0; j <= (ny + 1); ++j)
{
if(BI_DIRECTIONAL == directionality)
{
build_bidir_rr_opins(i, j, rr_node,
rr_node_indices,
opin_to_track_map, Fc_out,
rr_edge_done, seg_details,
grid);
}
else
{
assert(UNI_DIRECTIONAL == directionality);
build_unidir_rr_opins(i, j, grid, Fc_out,
nodes_per_chan, seg_details,
Fc_xofs, Fc_yofs, rr_node,
rr_edge_done, &clipped,
rr_node_indices);
if(clipped)
{
*Fc_clipped = TRUE;
}
}
}
}
/* We make a copy of the current fanin values for the nodes to
* know the number of OPINs driving each mux presently */
opin_mux_size = (int *)my_malloc(sizeof(int) * num_nodes);
for(i = 0; i < num_nodes; ++i)
{
opin_mux_size[i] = rr_node[i].fan_in;
}
/* Build channels */
assert(Fs % 3 == 0);
for(i = 0; i <= nx; i++)
{
for(j = 0; j <= ny; j++)
{
if(i > 0)
{
build_rr_xchan(i, j, track_to_ipin_lookup,
switch_block_conn,
CHANX_COST_INDEX_START,
nodes_per_chan, opin_mux_size,
sblock_pattern, Fs / 3,
seg_details, rr_node_indices,
rr_edge_done, rr_node,
wire_to_ipin_switch,
directionality);
}
if(j > 0)
{
build_rr_ychan(i, j, track_to_ipin_lookup,
switch_block_conn,
CHANX_COST_INDEX_START +
num_seg_types, nodes_per_chan,
opin_mux_size, sblock_pattern,
Fs / 3, seg_details,
rr_node_indices, rr_edge_done,
rr_node, wire_to_ipin_switch,
directionality);
}
}
}
free(opin_mux_size);
}
static void
build_bidir_rr_opins(IN int i,
IN int j,
INOUT t_rr_node * rr_node,
IN t_ivec *** rr_node_indices,
IN int *****opin_to_track_map,
IN int *Fc_out,
IN boolean * rr_edge_done,
IN t_seg_details * seg_details,
IN struct s_grid_tile **grid)
{
int ipin, inode, num_edges, Fc, ofs;
t_type_ptr type;
struct s_linked_edge *edge_list, *next;
/* OPIN edges need to be done at once so let the offset 0
* block do the work. */
if(grid[i][j].offset > 0)
{
return;
}
type = grid[i][j].type;
Fc = Fc_out[type->index];
for(ipin = 0; ipin < type->num_pins; ++ipin)
{
/* We only are working with opins so skip non-drivers */
if(type->class_inf[type->pin_class[ipin]].type != DRIVER)
{
continue;
}
num_edges = 0;
edge_list = NULL;
for(ofs = 0; ofs < type->height; ++ofs)
{
num_edges +=
get_bidir_opin_connections(i, j + ofs, ipin,
&edge_list,
opin_to_track_map, Fc,
rr_edge_done,
rr_node_indices,
seg_details);
}
inode = get_rr_node_index(i, j, OPIN, ipin, rr_node_indices);
alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
rr_edge_done, edge_list);
while(edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
}
}
void
free_rr_graph(void)
{
int i;
/* Frees all the routing graph data structures, if they have been *
* allocated. I use rr_mem_chunk_list_head as a flag to indicate *
* whether or not the graph has been allocated -- if it is not NULL, *
* a routing graph exists and can be freed. Hence, you can call this *
* routine even if you're not sure of whether a rr_graph exists or not. */
if(rr_mem_chunk_list_head == NULL) /* Nothing to free. */
return;
free_chunk_memory(rr_mem_chunk_list_head); /* Frees ALL "chunked" data */
rr_mem_chunk_list_head = NULL; /* No chunks allocated now. */
chunk_bytes_avail = 0; /* 0 bytes left in current "chunk". */
chunk_next_avail_mem = NULL; /* No current chunk. */
/* Before adding any more free calls here, be sure the data is NOT chunk *
* allocated, as ALL the chunk allocated data is already free! */
free(net_rr_terminals);
for(i = 0; i < num_rr_nodes; i++) {
if(rr_node[i].edges != NULL) {
free(rr_node[i].edges);
}
if(rr_node[i].switches != NULL) {
free(rr_node[i].switches);
}
}
assert(rr_node_indices);
free_rr_node_indices(rr_node_indices);
free(rr_node);
free(rr_indexed_data);
for(i = 0; i < num_blocks; i++)
{
free(rr_blk_source[i]);
}
free(rr_blk_source);
rr_blk_source = NULL;
net_rr_terminals = NULL;
rr_node = NULL;
rr_node_indices = NULL;
rr_indexed_data = NULL;
}
static void
alloc_net_rr_terminals(void)
{
int inet;
net_rr_terminals = (int **)my_malloc(num_nets * sizeof(int *));
for(inet = 0; inet < num_nets; inet++)
{
net_rr_terminals[inet] =
(int *)my_chunk_malloc((net[inet].num_sinks + 1) *
sizeof(int), &rr_mem_chunk_list_head,
&chunk_bytes_avail,
&chunk_next_avail_mem);
}
}
void
load_net_rr_terminals(t_ivec *** rr_node_indices)
{
/* Allocates and loads the net_rr_terminals data structure. For each net *
* it stores the rr_node index of the SOURCE of the net and all the SINKs *
* of the net. [0..num_nets-1][0..num_pins-1]. Entry [inet][pnum] stores *
* the rr index corresponding to the SOURCE (opin) or SINK (ipin) of pnum. */
int inet, ipin, inode, iblk, i, j, k, node_block_pin, iclass;
t_type_ptr type;
for(inet = 0; inet < num_nets; inet++)
{
for(ipin = 0; ipin <= net[inet].num_sinks; ipin++)
{
iblk = net[inet].node_block[ipin];
i = block[iblk].x;
j = block[iblk].y;
k = block[iblk].z;
type = block[iblk].type;
/* In the routing graph, each (x, y) location has unique pins on it
* so when there is capacity, blocks are packed and their pin numbers
* are offset to get their actual rr_node */
node_block_pin = net[inet].node_block_pin[ipin];
iclass = type->pin_class[node_block_pin];
inode = get_rr_node_index(i, j, (ipin == 0 ? SOURCE : SINK), /* First pin is driver */
iclass, rr_node_indices);
net_rr_terminals[inet][ipin] = inode;
}
}
}
static void
alloc_and_load_rr_clb_source(t_ivec *** rr_node_indices)
{
/* Saves the rr_node corresponding to each SOURCE and SINK in each FB *
* in the FPGA. Currently only the SOURCE rr_node values are used, and *
* they are used only to reserve pins for locally used OPINs in the router. *
* [0..num_blocks-1][0..num_class-1]. The values for blocks that are pads *
* are NOT valid. */
int iblk, i, j, iclass, inode;
int class_low, class_high;
t_rr_type rr_type;
t_type_ptr type;
rr_blk_source = (int **)my_malloc(num_blocks * sizeof(int *));
for(iblk = 0; iblk < num_blocks; iblk++)
{
type = block[iblk].type;
get_class_range_for_block(iblk, &class_low, &class_high);
rr_blk_source[iblk] =
(int *)my_malloc(type->num_class * sizeof(int));
for(iclass = 0; iclass < type->num_class; iclass++)
{
if(iclass >= class_low && iclass <= class_high)
{
i = block[iblk].x;
j = block[iblk].y;
if(type->class_inf[iclass].type == DRIVER)
rr_type = SOURCE;
else
rr_type = SINK;
inode =
get_rr_node_index(i, j, rr_type, iclass,
rr_node_indices);
rr_blk_source[iblk][iclass] = inode;
}
else
{
rr_blk_source[iblk][iclass] = OPEN;
}
}
}
}
static void
build_rr_sinks_sources(IN int i,
IN int j,
IN t_rr_node * rr_node,
IN t_ivec *** rr_node_indices,
IN int delayless_switch,
IN struct s_grid_tile **grid)
{
/* Loads IPIN, SINK, SOURCE, and OPIN.
* Loads IPIN to SINK edges, and SOURCE to OPIN edges */
int ipin, iclass, inode, pin_num, to_node, num_edges;
int num_class, num_pins;
t_type_ptr type;
struct s_class *class_inf;
int *pin_class;
/* Since we share nodes within a large block, only
* start tile can initialize sinks, sources, and pins */
if(grid[i][j].offset > 0)
return;
type = grid[i][j].type;
num_class = type->num_class;
class_inf = type->class_inf;
num_pins = type->num_pins;
pin_class = type->pin_class;
/* SINKS and SOURCE to OPIN edges */
for(iclass = 0; iclass < num_class; iclass++)
{
if(class_inf[iclass].type == DRIVER)
{ /* SOURCE */
inode =
get_rr_node_index(i, j, SOURCE, iclass,
rr_node_indices);
num_edges = class_inf[iclass].num_pins;
rr_node[inode].num_edges = num_edges;
rr_node[inode].edges =
(int *)my_malloc(num_edges * sizeof(int));
rr_node[inode].switches =
(short *)my_malloc(num_edges * sizeof(short));
for(ipin = 0; ipin < class_inf[iclass].num_pins; ipin++)
{
pin_num = class_inf[iclass].pinlist[ipin];
to_node =
get_rr_node_index(i, j, OPIN, pin_num,
rr_node_indices);
rr_node[inode].edges[ipin] = to_node;
rr_node[inode].switches[ipin] = delayless_switch;
++rr_node[to_node].fan_in;
}
rr_node[inode].cost_index = SOURCE_COST_INDEX;
rr_node[inode].type = SOURCE;
}
else
{ /* SINK */
assert(class_inf[iclass].type == RECEIVER);
inode =
get_rr_node_index(i, j, SINK, iclass,
rr_node_indices);
/* NOTE: To allow route throughs through clbs, change the lines below to *
* make an edge from the input SINK to the output SOURCE. Do for just the *
* special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it *
* leads to. If route throughs are allowed, you may want to increase the *
* base cost of OPINs and/or SOURCES so they aren't used excessively. */
/* Initialize to unconnected to fix values */
rr_node[inode].num_edges = 0;
rr_node[inode].edges = NULL;
rr_node[inode].switches = NULL;
rr_node[inode].cost_index = SINK_COST_INDEX;
rr_node[inode].type = SINK;
}
/* Things common to both SOURCEs and SINKs. */
rr_node[inode].capacity = class_inf[iclass].num_pins;
rr_node[inode].occ = 0;
rr_node[inode].xlow = i;
rr_node[inode].xhigh = i;
rr_node[inode].ylow = j;
rr_node[inode].yhigh = j + type->height - 1;
rr_node[inode].R = 0;
rr_node[inode].C = 0;
rr_node[inode].ptc_num = iclass;
rr_node[inode].direction = OPEN;
rr_node[inode].drivers = OPEN;
}
/* Connect IPINS to SINKS and dummy for OPINS */
for(ipin = 0; ipin < num_pins; ipin++)
{
iclass = pin_class[ipin];
if(class_inf[iclass].type == RECEIVER)
{
inode =
get_rr_node_index(i, j, IPIN, ipin, rr_node_indices);
to_node =
get_rr_node_index(i, j, SINK, iclass,
rr_node_indices);
rr_node[inode].num_edges = 1;
rr_node[inode].edges = (int *)my_malloc(sizeof(int));
rr_node[inode].switches =
(short *)my_malloc(sizeof(short));
rr_node[inode].edges[0] = to_node;
rr_node[inode].switches[0] = delayless_switch;
++rr_node[to_node].fan_in;
rr_node[inode].cost_index = IPIN_COST_INDEX;
rr_node[inode].type = IPIN;
}
else
{
assert(class_inf[iclass].type == DRIVER);
inode =
get_rr_node_index(i, j, OPIN, ipin, rr_node_indices);
rr_node[inode].num_edges = 0;
rr_node[inode].edges = NULL;
rr_node[inode].switches = NULL;
rr_node[inode].cost_index = OPIN_COST_INDEX;
rr_node[inode].type = OPIN;
}
/* Common to both DRIVERs and RECEIVERs */
rr_node[inode].capacity = 1;
rr_node[inode].occ = 0;
rr_node[inode].xlow = i;
rr_node[inode].xhigh = i;
rr_node[inode].ylow = j;
rr_node[inode].yhigh = j + type->height - 1;
rr_node[inode].C = 0;
rr_node[inode].R = 0;
rr_node[inode].ptc_num = ipin;
rr_node[inode].direction = OPEN;
rr_node[inode].drivers = OPEN;
}
}
static void
build_rr_xchan(IN int i,
IN int j,
IN struct s_ivec ****track_to_ipin_lookup,
IN struct s_ivec ***switch_block_conn,
IN int cost_index_offset,
IN int nodes_per_chan,
IN int *opin_mux_size,
IN short *****sblock_pattern,
IN int Fs_per_side,
IN t_seg_details * seg_details,
IN t_ivec *** rr_node_indices,
INOUT boolean * rr_edge_done,
INOUT t_rr_node * rr_node,
IN int wire_to_ipin_switch,
IN enum e_directionality directionality)
{
/* Loads up all the routing resource nodes in the x-directed channel *
* segments starting at (i,j). */
int itrack, istart, iend, num_edges, inode, length;
struct s_linked_edge *edge_list, *next;
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
istart = get_seg_start(seg_details, itrack, j, i);
iend = get_seg_end(seg_details, itrack, istart, j, nx);
if(i > istart)
continue; /* Not the start of this segment. */
edge_list = NULL;
/* First count number of edges and put the edges in a linked list. */
num_edges = 0;
num_edges += get_track_to_ipins(istart, j, itrack,
&edge_list,
rr_node_indices,
track_to_ipin_lookup,
seg_details,
CHANX, nx,
wire_to_ipin_switch,
directionality);
if(j > 0)
{
num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
j, CHANY, nx,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
if(j < ny)
{
num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
j + 1, CHANY, nx,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
if(istart > 1)
{
num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
istart - 1, CHANX, nx,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
if(iend < nx)
{
num_edges += get_track_to_tracks(j, istart, itrack, CHANX,
iend + 1, CHANX, nx,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
inode = get_rr_node_index(i, j, CHANX, itrack, rr_node_indices);
alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
rr_edge_done, edge_list);
while(edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
/* Edge arrays have now been built up. Do everything else. */
rr_node[inode].cost_index =
cost_index_offset + seg_details[itrack].index;
rr_node[inode].occ = 0;
rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */
rr_node[inode].xlow = istart;
rr_node[inode].xhigh = iend;
rr_node[inode].ylow = j;
rr_node[inode].yhigh = j;
length = iend - istart + 1;
rr_node[inode].R = length * seg_details[itrack].Rmetal;
rr_node[inode].C = length * seg_details[itrack].Cmetal;
rr_node[inode].ptc_num = itrack;
rr_node[inode].type = CHANX;
rr_node[inode].direction = seg_details[itrack].direction;
rr_node[inode].drivers = seg_details[itrack].drivers;
}
}
static void
build_rr_ychan(IN int i,
IN int j,
IN struct s_ivec ****track_to_ipin_lookup,
IN struct s_ivec ***switch_block_conn,
IN int cost_index_offset,
IN int nodes_per_chan,
IN int *opin_mux_size,
IN short *****sblock_pattern,
IN int Fs_per_side,
IN t_seg_details * seg_details,
IN t_ivec *** rr_node_indices,
IN boolean * rr_edge_done,
INOUT t_rr_node * rr_node,
IN int wire_to_ipin_switch,
IN enum e_directionality directionality)
{
/* Loads up all the routing resource nodes in the y-directed channel *
* segments starting at (i,j). */
int itrack, istart, iend, num_edges, inode, length;
struct s_linked_edge *edge_list, *next;
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
istart = get_seg_start(seg_details, itrack, i, j);
iend = get_seg_end(seg_details, itrack, istart, i, ny);
if(j > istart)
continue; /* Not the start of this segment. */
edge_list = NULL;
/* First count number of edges and put the edges in a linked list. */
num_edges = 0;
num_edges += get_track_to_ipins(istart, i, itrack,
&edge_list,
rr_node_indices,
track_to_ipin_lookup,
seg_details,
CHANY, ny,
wire_to_ipin_switch,
directionality);
if(i > 0)
{
num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
i, CHANX, ny,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
if(i < nx)
{
num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
i + 1, CHANX, ny,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
if(istart > 1)
{
num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
istart - 1, CHANY, ny,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
if(iend < ny)
{
num_edges += get_track_to_tracks(i, istart, itrack, CHANY,
iend + 1, CHANY, ny,
nodes_per_chan,
opin_mux_size,
Fs_per_side,
sblock_pattern,
&edge_list,
seg_details,
directionality,
rr_node_indices,
rr_edge_done,
switch_block_conn);
}
inode = get_rr_node_index(i, j, CHANY, itrack, rr_node_indices);
alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
rr_edge_done, edge_list);
while(edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
/* Edge arrays have now been built up. Do everything else. */
rr_node[inode].cost_index =
cost_index_offset + seg_details[itrack].index;
rr_node[inode].occ = 0;
rr_node[inode].capacity = 1; /* GLOBAL routing handled elsewhere */
rr_node[inode].xlow = i;
rr_node[inode].xhigh = i;
rr_node[inode].ylow = istart;
rr_node[inode].yhigh = iend;
length = iend - istart + 1;
rr_node[inode].R = length * seg_details[itrack].Rmetal;
rr_node[inode].C = length * seg_details[itrack].Cmetal;
rr_node[inode].ptc_num = itrack;
rr_node[inode].type = CHANY;
rr_node[inode].direction = seg_details[itrack].direction;
rr_node[inode].drivers = seg_details[itrack].drivers;
}
}
void
watch_edges(int inode,
t_linked_edge * edge_list_head)
{
t_linked_edge *list_ptr;
int i, to_node;
list_ptr = edge_list_head;
i = 0;
printf("!!! Watching Node %d !!!!\n", inode);
print_rr_node(stdout, rr_node, inode);
printf("Currently connects to: \n");
while(list_ptr != NULL)
{
to_node = list_ptr->edge;
print_rr_node(stdout, rr_node, to_node);
list_ptr = list_ptr->next;
i++;
}
}
void
alloc_and_load_edges_and_switches(IN t_rr_node * rr_node,
IN int inode,
IN int num_edges,
INOUT boolean * rr_edge_done,
IN t_linked_edge * edge_list_head)
{
/* Sets up all the edge related information for rr_node inode (num_edges, *
* the edges array and the switches array). The edge_list_head points to *
* a list of the num_edges edges and switches to put in the arrays. This *
* linked list is freed by this routine. This routine also resets the *
* rr_edge_done array for the next rr_node (i.e. set it so that no edges *
* are marked as having been seen before). */
t_linked_edge *list_ptr, *prev_ptr;
int i;
/* Check we aren't overwriting edges */
assert(rr_node[inode].num_edges < 1);
assert(NULL == rr_node[inode].edges);
assert(NULL == rr_node[inode].switches);
rr_node[inode].num_edges = num_edges;
rr_node[inode].edges = (int *)my_malloc(num_edges * sizeof(int));
rr_node[inode].switches = (short *)my_malloc(num_edges * sizeof(short));
i = 0;
list_ptr = edge_list_head;
while(list_ptr && (i < num_edges))
{
rr_node[inode].edges[i] = list_ptr->edge;
rr_node[inode].switches[i] = list_ptr->iswitch;
++rr_node[list_ptr->edge].fan_in;
/* Unmark the edge since we are done considering fanout from node. */
rr_edge_done[list_ptr->edge] = FALSE;
prev_ptr = list_ptr;
list_ptr = list_ptr->next;
++i;
}
assert(list_ptr == NULL);
assert(i == num_edges);
}
static int ****
alloc_and_load_pin_to_track_map(IN enum e_pin_type pin_type,
IN int nodes_per_chan,
IN int Fc,
IN t_type_ptr Type,
IN boolean perturb_switch_pattern,
IN enum e_directionality directionality)
{
int **num_dir; /* [0..height][0..3] Number of *physical* pins on each side. */
int ***dir_list; /* [0..height][0..3][0..num_pins-1] list of pins of correct type *
* * on each side. Max possible space alloced for simplicity */
int i, j, k, iside, ipin, iclass, num_phys_pins, pindex, ioff;
int *pin_num_ordering, *side_ordering, *offset_ordering;
int **num_done_per_dir; /* [0..height][0..3] */
int ****tracks_connected_to_pin; /* [0..num_pins-1][0..height][0..3][0..Fc-1] */
/* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] =
* NULL if there is no pin on that side, or that pin is of the wrong type.
* Probably not enough memory to worry about, esp. as it's temporary.
* If pin ipin on side iside does not exist or is of the wrong type,
* tracks_connected_to_pin[ipin][iside][0] = OPEN. */
if(Type->num_pins < 1)
{
return NULL;
}
tracks_connected_to_pin = (int ****)
alloc_matrix4(0, Type->num_pins - 1,
0, Type->height - 1, 0, 3, 0, Fc - 1, sizeof(int));
for(ipin = 0; ipin < Type->num_pins; ipin++)
{
for(ioff = 0; ioff < Type->height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
for(i = 0; i < Fc; ++i)
{
tracks_connected_to_pin[ipin][ioff][iside][i] = OPEN; /* Unconnected. */
}
}
}
}
num_dir = (int **)alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int));
dir_list =
(int ***)alloc_matrix3(0, Type->height - 1, 0, 3, 0,
Type->num_pins - 1, sizeof(int));
/* Defensive coding. Try to crash hard if I use an unset entry. */
for(i = 0; i < Type->height; i++)
for(j = 0; j < 4; j++)
for(k = 0; k < Type->num_pins; k++)
dir_list[i][j][k] = (-1);
for(i = 0; i < Type->height; i++)
for(j = 0; j < 4; j++)
num_dir[i][j] = 0;
for(ipin = 0; ipin < Type->num_pins; ipin++)
{
iclass = Type->pin_class[ipin];
if(Type->class_inf[iclass].type != pin_type) /* Doing either ipins OR opins */
continue;
/* Pins connecting only to global resources get no switches -> keeps the *
* area model accurate. */
if(Type->is_global_pin[ipin])
continue;
for(ioff = 0; ioff < Type->height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
if(Type->pinloc[ioff][iside][ipin] == 1)
{
dir_list[ioff][iside][num_dir[ioff]
[iside]] = ipin;
num_dir[ioff][iside]++;
}
}
}
}
num_phys_pins = 0;
for(ioff = 0; ioff < Type->height; ioff++)
{
for(iside = 0; iside < 4; iside++)
num_phys_pins += num_dir[ioff][iside]; /* Num. physical pins per type */
}
num_done_per_dir =
(int **)alloc_matrix(0, Type->height - 1, 0, 3, sizeof(int));
for(ioff = 0; ioff < Type->height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
num_done_per_dir[ioff][iside] = 0;
}
}
pin_num_ordering = (int *)my_malloc(num_phys_pins * sizeof(int));
side_ordering = (int *)my_malloc(num_phys_pins * sizeof(int));
offset_ordering = (int *)my_malloc(num_phys_pins * sizeof(int));
/* Connection block I use distributes pins evenly across the tracks *
* of ALL sides of the clb at once. Ensures that each pin connects *
* to spaced out tracks in its connection block, and that the other *
* pins (potentially in other C blocks) connect to the remaining tracks *
* first. Doesn't matter for large Fc, but should make a fairly *
* good low Fc block that leverages the fact that usually lots of pins *
* are logically equivalent. */
iside = LEFT;
ioff = Type->height - 1;
ipin = 0;
pindex = -1;
while(ipin < num_phys_pins)
{
if(iside == TOP)
{
iside = RIGHT;
}
else if(iside == RIGHT)
{
if(ioff <= 0)
{
iside = BOTTOM;
}
else
{
ioff--;
}
}
else if(iside == BOTTOM)
{
iside = LEFT;
}
else
{
assert(iside == LEFT);
if(ioff >= Type->height - 1)
{
pindex++;
iside = TOP;
}
else
{
ioff++;
}
}
assert(pindex < num_phys_pins); /* Number of physical pins bounds number of logical pins */
if(num_done_per_dir[ioff][iside] >= num_dir[ioff][iside])
continue;
pin_num_ordering[ipin] = dir_list[ioff][iside][pindex];
side_ordering[ipin] = iside;
offset_ordering[ipin] = ioff;
assert(Type->pinloc[ioff][iside][dir_list[ioff][iside][pindex]]);
num_done_per_dir[ioff][iside]++;
ipin++;
}
if(perturb_switch_pattern)
{
load_perturbed_switch_pattern(Type,
tracks_connected_to_pin,
num_phys_pins,
pin_num_ordering,
side_ordering,
offset_ordering,
nodes_per_chan, Fc, directionality);
}
else
{
load_uniform_switch_pattern(Type,
tracks_connected_to_pin,
num_phys_pins,
pin_num_ordering,
side_ordering,
offset_ordering,
nodes_per_chan, Fc, directionality);
}
check_all_tracks_reach_pins(Type, tracks_connected_to_pin,
nodes_per_chan, Fc, pin_type);
/* Free all temporary storage. */
free_matrix(num_dir, 0, Type->height - 1, 0, sizeof(int));
free_matrix3(dir_list, 0, Type->height - 1, 0, 3, 0, sizeof(int));
free_matrix(num_done_per_dir, 0, Type->height - 1, 0, sizeof(int));
free(pin_num_ordering);
free(side_ordering);
free(offset_ordering);
return tracks_connected_to_pin;
}
static void
load_uniform_switch_pattern(IN t_type_ptr type,
INOUT int ****tracks_connected_to_pin,
IN int num_phys_pins,
IN int *pin_num_ordering,
IN int *side_ordering,
IN int *offset_ordering,
IN int nodes_per_chan,
IN int Fc,
enum e_directionality directionality)
{
/* Loads the tracks_connected_to_pin array with an even distribution of *
* switches across the tracks for each pin. For example, each pin connects *
* to every 4.3rd track in a channel, with exactly which tracks a pin *
* connects to staggered from pin to pin. */
int i, j, ipin, iside, ioff, itrack, k;
float f_track, fc_step;
int group_size;
float step_size;
/* Uni-directional drive is implemented to ensure no directional bias and this means
* two important comments noted below */
/* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
* and lay down 2 switches on an adjacent pair of tracks at a time to ensure
* no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
* assign switches to a pair at a time. Can do this because W is guaranteed to
* be even-numbered; however same approach cannot be applied to Fc_out pattern
* when L > 1 and W <> 2L multiple.
*
* 2. This generic pattern should be considered the tileable physical layout,
* meaning all track # here are physical #'s,
* so later must use vpr_to_phy conversion to find actual logical #'s to connect.
* This also means I will not use get_output_block_companion_track to ensure
* no bias, since that describes a logical # -> that would confuse people. */
step_size = (float)nodes_per_chan / (float)(Fc * num_phys_pins);
if(directionality == BI_DIRECTIONAL)
{
group_size = 1;
}
else
{
assert(directionality == UNI_DIRECTIONAL);
group_size = 2;
}
assert((nodes_per_chan % group_size == 0) && (Fc % group_size == 0));
fc_step = (float)nodes_per_chan / (float)Fc;
for(i = 0; i < num_phys_pins; i++)
{
ipin = pin_num_ordering[i];
iside = side_ordering[i];
ioff = offset_ordering[i];
/* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
for(j = 0; j < (Fc / group_size); j++)
{
f_track = (i * step_size) + (j * fc_step);
itrack = ((int)f_track) * group_size;
/* Catch possible floating point round error */
itrack = min(itrack, nodes_per_chan - group_size);
/* Assign the group of tracks for the Fc pattern */
for(k = 0; k < group_size; ++k)
{
tracks_connected_to_pin[ipin][ioff][iside]
[group_size * j + k] = itrack + k;
}
}
}
}
static void
load_perturbed_switch_pattern(IN t_type_ptr type,
INOUT int ****tracks_connected_to_pin,
IN int num_phys_pins,
IN int *pin_num_ordering,
IN int *side_ordering,
IN int *offset_ordering,
IN int nodes_per_chan,
IN int Fc,
enum e_directionality directionality)
{
/* Loads the tracks_connected_to_pin array with an unevenly distributed *
* set of switches across the channel. This is done for inputs when *
* Fc_input = Fc_output to avoid creating "pin domains" -- certain output *
* pins being able to talk only to certain input pins because their switch *
* patterns exactly line up. Distribute Fc/2 + 1 switches over half the *
* channel and Fc/2 - 1 switches over the other half to make the switch *
* pattern different from the uniform one of the outputs. Also, have half *
* the pins put the "dense" part of their connections in the first half of *
* the channel and the other half put the "dense" part in the second half, *
* to make sure each track can connect to about the same number of ipins. */
int i, j, ipin, iside, itrack, ihalf, iconn, ioff;
int Fc_dense, Fc_sparse, Fc_half[2];
float f_track, spacing_dense, spacing_sparse, spacing[2];
float step_size;
assert(directionality == BI_DIRECTIONAL);
step_size = (float)nodes_per_chan / (float)(Fc * num_phys_pins);
Fc_dense = (Fc / 2) + 1;
Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */
spacing_dense = (float)nodes_per_chan / (float)(2 * Fc_dense);
spacing_sparse = (float)nodes_per_chan / (float)(2 * Fc_sparse);
for(i = 0; i < num_phys_pins; i++)
{
ipin = pin_num_ordering[i];
iside = side_ordering[i];
ioff = offset_ordering[i];
/* Flip every pin to balance switch density */
spacing[i % 2] = spacing_dense;
Fc_half[i % 2] = Fc_dense;
spacing[(i + 1) % 2] = spacing_sparse;
Fc_half[(i + 1) % 2] = Fc_sparse;
f_track = i * step_size; /* Start point. Staggered from pin to pin */
iconn = 0;
for(ihalf = 0; ihalf < 2; ihalf++)
{ /* For both dense and sparse halves. */
for(j = 0; j < Fc_half[ihalf]; ++j)
{
itrack = (int)f_track;
/* Can occasionally get wraparound due to floating point rounding.
This is okay because the starting position > 0 when this occurs
so connection is valid and fine */
itrack = itrack % nodes_per_chan;
tracks_connected_to_pin[ipin][ioff][iside][iconn]
= itrack;
f_track += spacing[ihalf];
iconn++;
}
}
} /* End for all physical pins. */
}
static void
check_all_tracks_reach_pins(t_type_ptr type,
int ****tracks_connected_to_pin,
int nodes_per_chan,
int Fc,
enum e_pin_type ipin_or_opin)
{
/* Checks that all tracks can be reached by some pin. */
int iconn, iside, itrack, ipin, ioff;
int *num_conns_to_track; /* [0..nodes_per_chan-1] */
assert(nodes_per_chan > 0);
num_conns_to_track = (int *)my_calloc(nodes_per_chan, sizeof(int));
for(ipin = 0; ipin < type->num_pins; ipin++)
{
for(ioff = 0; ioff < type->height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
if(tracks_connected_to_pin[ipin][ioff][iside][0]
!= OPEN)
{ /* Pin exists */
for(iconn = 0; iconn < Fc; iconn++)
{
itrack =
tracks_connected_to_pin[ipin]
[ioff][iside][iconn];
num_conns_to_track[itrack]++;
}
}
}
}
}
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
if(num_conns_to_track[itrack] <= 0)
{
printf
("Warning (check_all_tracks_reach_pins): track %d does not \n"
"\tconnect to any FB ", itrack);
if(ipin_or_opin == DRIVER)
printf("OPINs.\n");
else
printf("IPINs.\n");
}
}
free(num_conns_to_track);
}
/* Allocates and loads the track to ipin lookup for each physical grid type. This
* is the same information as the ipin_to_track map but accessed in a different way. */
static struct s_ivec ***
alloc_and_load_track_to_pin_lookup(IN int ****pin_to_track_map,
IN int Fc,
IN int height,
IN int num_pins,
IN int nodes_per_chan)
{
int ipin, iside, itrack, iconn, ioff, pin_counter;
struct s_ivec ***track_to_pin_lookup;
/* [0..nodes_per_chan-1][0..height][0..3]. For each track number it stores a vector
* for each of the four sides. x-directed channels will use the TOP and
* BOTTOM vectors to figure out what clb input pins they connect to above
* and below them, respectively, while y-directed channels use the LEFT
* and RIGHT vectors. Each vector contains an nelem field saying how many
* ipins it connects to. The list[0..nelem-1] array then gives the pin
* numbers. */
/* Note that a clb pin that connects to a channel on its RIGHT means that *
* that channel connects to a clb pin on its LEFT. The convention used *
* here is always in the perspective of the CLB */
if(num_pins < 1)
{
return NULL;
}
/* Alloc and zero the the lookup table */
track_to_pin_lookup =
(struct s_ivec ***)alloc_matrix3(0, nodes_per_chan - 1, 0,
height - 1, 0, 3,
sizeof(struct s_ivec));
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
for(ioff = 0; ioff < height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
track_to_pin_lookup[itrack][ioff][iside].nelem =
0;
track_to_pin_lookup[itrack][ioff][iside].list =
NULL;
}
}
}
/* Counting pass. */
for(ipin = 0; ipin < num_pins; ipin++)
{
for(ioff = 0; ioff < height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
if(pin_to_track_map[ipin][ioff][iside][0] == OPEN)
continue;
for(iconn = 0; iconn < Fc; iconn++)
{
itrack =
pin_to_track_map[ipin][ioff][iside]
[iconn];
track_to_pin_lookup[itrack][ioff][iside].
nelem++;
}
}
}
}
/* Allocate space. */
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
for(ioff = 0; ioff < height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
track_to_pin_lookup[itrack][ioff][iside].list = NULL; /* Defensive code */
if(track_to_pin_lookup[itrack][ioff][iside].
nelem != 0)
{
track_to_pin_lookup[itrack][ioff][iside].
list =
(int *)
my_malloc(track_to_pin_lookup[itrack]
[ioff][iside].nelem *
sizeof(int));
track_to_pin_lookup[itrack][ioff][iside].
nelem = 0;
}
}
}
}
/* Loading pass. */
for(ipin = 0; ipin < num_pins; ipin++)
{
for(ioff = 0; ioff < height; ioff++)
{
for(iside = 0; iside < 4; iside++)
{
if(pin_to_track_map[ipin][ioff][iside][0] == OPEN)
continue;
for(iconn = 0; iconn < Fc; iconn++)
{
itrack =
pin_to_track_map[ipin][ioff][iside]
[iconn];
pin_counter =
track_to_pin_lookup[itrack][ioff]
[iside].nelem;
track_to_pin_lookup[itrack][ioff][iside].
list[pin_counter] = ipin;
track_to_pin_lookup[itrack][ioff][iside].
nelem++;
}
}
}
}
return track_to_pin_lookup;
}
/* A utility routine to dump the contents of the routing resource graph *
* (everything -- connectivity, occupancy, cost, etc.) into a file. Used *
* only for debugging. */
void
dump_rr_graph(IN const char *file_name)
{
int inode;
FILE *fp;
fp = my_fopen(file_name, "w");
for(inode = 0; inode < num_rr_nodes; inode++)
{
print_rr_node(fp, rr_node, inode);
fprintf(fp, "\n");
}
#if 0
fprintf(fp, "\n\n%d rr_indexed_data entries.\n\n", num_rr_indexed_data);
for(index = 0; index < num_rr_indexed_data; index++)
{
print_rr_indexed_data(fp, index);
fprintf(fp, "\n");
}
#endif
fclose(fp);
}
/* Prints all the data about node inode to file fp. */
static void
print_rr_node(FILE * fp,
t_rr_node * rr_node,
int inode)
{
static const char *name_type[] = {
"SOURCE", "SINK", "IPIN", "OPIN", "CHANX", "CHANY"
};
static const char *direction_name[] = {
"OPEN", "INC_DIRECTION", "DEC_DIRECTION", "BI_DIRECTION"
};
static const char *drivers_name[] = {
"OPEN", "MULTI_BUFFER", "MULTI_MUXED", "MULTI_MERGED", "SINGLE"
};
t_rr_type rr_type;
int iconn;
rr_type = rr_node[inode].type;
/* Make sure we don't overrun const arrays */
assert(rr_type < (sizeof(name_type) / sizeof(char *)));
assert((rr_node[inode].direction + 1) <
(sizeof(direction_name) / sizeof(char *)));
assert((rr_node[inode].drivers + 1) <
(sizeof(drivers_name) / sizeof(char *)));
fprintf(fp, "Node: %d %s ", inode, name_type[rr_type]);
if((rr_node[inode].xlow == rr_node[inode].xhigh) &&
(rr_node[inode].ylow == rr_node[inode].yhigh))
{
fprintf(fp, "(%d, %d) ", rr_node[inode].xlow,
rr_node[inode].ylow);
}
else
{
fprintf(fp, "(%d, %d) to (%d, %d) ", rr_node[inode].xlow,
rr_node[inode].ylow, rr_node[inode].xhigh,
rr_node[inode].yhigh);
}
fprintf(fp, "Ptc_num: %d ", rr_node[inode].ptc_num);
fprintf(fp, "Direction: %s ",
direction_name[rr_node[inode].direction + 1]);
fprintf(fp, "Drivers: %s ", drivers_name[rr_node[inode].drivers + 1]);
fprintf(fp, "\n");
fprintf(fp, "%d edge(s):", rr_node[inode].num_edges);
for(iconn = 0; iconn < rr_node[inode].num_edges; iconn++)
fprintf(fp, " %d", rr_node[inode].edges[iconn]);
fprintf(fp, "\n");
fprintf(fp, "Switch types:");
for(iconn = 0; iconn < rr_node[inode].num_edges; iconn++)
fprintf(fp, " %d", rr_node[inode].switches[iconn]);
fprintf(fp, "\n");
fprintf(fp, "Occ: %d Capacity: %d\n", rr_node[inode].occ,
rr_node[inode].capacity);
fprintf(fp, "R: %g C: %g\n", rr_node[inode].R, rr_node[inode].C);
fprintf(fp, "Cost_index: %d\n", rr_node[inode].cost_index);
}
/* Prints all the rr_indexed_data of index to file fp. */
void
print_rr_indexed_data(FILE * fp,
int index)
{
fprintf(fp, "Index: %d\n", index);
fprintf(fp, "ortho_cost_index: %d ",
rr_indexed_data[index].ortho_cost_index);
fprintf(fp, "base_cost: %g ", rr_indexed_data[index].saved_base_cost);
fprintf(fp, "saved_base_cost: %g\n",
rr_indexed_data[index].saved_base_cost);
fprintf(fp, "Seg_index: %d ", rr_indexed_data[index].seg_index);
fprintf(fp, "inv_length: %g\n", rr_indexed_data[index].inv_length);
fprintf(fp, "T_linear: %g ", rr_indexed_data[index].T_linear);
fprintf(fp, "T_quadratic: %g ", rr_indexed_data[index].T_quadratic);
fprintf(fp, "C_load: %g\n", rr_indexed_data[index].C_load);
}
static void
build_unidir_rr_opins(IN int i,
IN int j,
IN struct s_grid_tile **grid,
IN int *Fc_out,
IN int nodes_per_chan,
IN t_seg_details * seg_details,
INOUT int **Fc_xofs,
INOUT int **Fc_yofs,
INOUT t_rr_node * rr_node,
INOUT boolean * rr_edge_done,
OUT boolean * Fc_clipped,
IN t_ivec *** rr_node_indices)
{
/* This routine returns a list of the opins rr_nodes on each
* side/offset of the block. You must free the result with
* free_matrix. */
t_type_ptr type;
int ipin, iclass, ofs, chan, seg, max_len, inode;
enum e_side side;
t_rr_type chan_type;
t_linked_edge *edge_list = NULL, *next;
boolean clipped, vert, pos_dir;
int num_edges;
int **Fc_ofs;
*Fc_clipped = FALSE;
/* Only the base block of a set should use this function */
if(grid[i][j].offset > 0)
{
return;
}
type = grid[i][j].type;
/* Go through each pin and find its fanout. */
for(ipin = 0; ipin < type->num_pins; ++ipin)
{
/* Skip global pins and ipins */
iclass = type->pin_class[ipin];
if(type->class_inf[iclass].type != DRIVER)
{
continue;
}
if(type->is_global_pin[ipin])
{
continue;
}
num_edges = 0;
edge_list = NULL;
for(ofs = 0; ofs < type->height; ++ofs)
{
for(side = 0; side < 4; ++side)
{
/* Can't do anything if pin isn't at this location */
if(0 == type->pinloc[ofs][side][ipin])
{
continue;
}
/* Figure out the chan seg at that side.
* side is the side of the logic or io block. */
vert = ((side == TOP) || (side == BOTTOM));
pos_dir = ((side == TOP) || (side == RIGHT));
chan_type = (vert ? CHANX : CHANY);
chan = (vert ? (j + ofs) : i);
seg = (vert ? i : (j + ofs));
max_len = (vert ? ny : nx);
Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
if(FALSE == pos_dir)
{
--chan;
}
/* Skip the location if there is no channel. */
if(chan < 0)
{
continue;
}
if(seg < 1)
{
continue;
}
if(seg > (vert ? ny : nx))
{
continue;
}
if(chan > (vert ? nx : ny))
{
continue;
}
/* Get the list of opin to mux connections for that chan seg. */
num_edges +=
get_unidir_opin_connections(chan, seg,
Fc_out[type->
index],
chan_type,
seg_details,
&edge_list,
Fc_ofs,
rr_edge_done,
max_len,
nodes_per_chan,
rr_node_indices,
&clipped);
if(clipped)
{
*Fc_clipped = TRUE;
}
}
}
/* Add the edges */
inode = get_rr_node_index(i, j, OPIN, ipin, rr_node_indices);
alloc_and_load_edges_and_switches(rr_node, inode, num_edges,
rr_edge_done, edge_list);
while(edge_list != NULL) {
next = edge_list->next;
free(edge_list);
edge_list = next;
}
}
}
static void
load_uniform_opin_switch_pattern_paired(IN int *Fc_out,
IN int num_pins,
IN int *pins_in_chan_seg,
IN int num_wire_inc_muxes,
IN int num_wire_dec_muxes,
IN int *wire_inc_muxes,
IN int *wire_dec_muxes,
INOUT t_rr_node * rr_node,
INOUT boolean * rr_edge_done,
IN t_seg_details * seg_details,
OUT boolean * Fc_clipped)
{
/* Directionality is assumed to be uni-directional */
/* Make turn-based assignment to avoid overlap when Fc_ouput is low. This is a bipartite
* matching problem. Out of "num_wire_muxes" muxes "Fc_output" of them is assigned
* to each outpin (total "num_pins" of them); assignment is uniform (spacing - spreadout)
* and staggered to avoid overlap when Fc_output is low. */
/* The natural order wires muxes are stored in wire_muxes is alternating in directionality
* already (by my implementation), so no need to do anything extra to avoid directional bias */
/* TODO: Due to spacing, it's possible to have directional bias: all Fc_out wires connected
* to one opin goes in either INC or DEC -> whereas I want a mix of both.
* SOLUTION: Use quantization of 2 to ensure that if an opin connects to one wire, it
* must also connect to its companion wire, which runs in the opposite direction. This
* means instead of having num_wire_muxes as the matching set, pick out the INC wires
* in num_wires_muxes as the matching set (the DEC wires are their companions) April 17, 2007
* NEWS: That solution does not work, as treating wires in groups will lead to serious
* abnormal patterns (conns crossing multiple blocks) for W nonquantized to multiples of 2L.
* So, I'm chaning that approach to a new one that avoids directional bias: I will separate
* the INC muxes and DEC muxes into two sets. Each set is uniformly assigned to opins with
* Fc_output/2; this should be identical as before for normal cases and contains all conns
* in the same chan segment for the nonquantized cases. */
/* Finally, separated the two approaches: 1. Take all wire muxes and assign them to opins
* one at a time (load_uniform_opin_switch_pattern) 2. Take pairs (by companion)
* of wire muxes and assign them to opins a pair at a time (load_uniform_opin_switch_pattern_paired).
* The first is used for fringe channel segments (ends of channels, where
* there are lots of muxes due to partial wire segments) and the second is used in core */
/* float spacing, step_size, f_mux; */
int ipin, iconn, num_edges, init_mux;
int from_node, to_node, to_track;
int xlow, ylow;
t_linked_edge *edge_list;
int *wire_muxes;
int k, num_wire_muxes, Fc_output_per_side, CurFc;
int count_inc, count_dec;
t_type_ptr type;
*Fc_clipped = FALSE;
count_inc = count_dec = 0;
for(ipin = 0; ipin < num_pins; ipin++)
{
from_node = pins_in_chan_seg[ipin];
xlow = rr_node[from_node].xlow;
ylow = rr_node[from_node].ylow;
type = grid[xlow][ylow].type;
edge_list = NULL;
num_edges = 0;
/* Assigning the INC muxes first, then DEC muxes */
for(k = 0; k < 2; ++k)
{
if(k == 0)
{
num_wire_muxes = num_wire_inc_muxes;
wire_muxes = wire_inc_muxes;
}
else
{
num_wire_muxes = num_wire_dec_muxes;
wire_muxes = wire_dec_muxes;
}
/* Half the Fc will be assigned for each direction. */
assert(Fc_out[type->index] % 2 == 0);
Fc_output_per_side = Fc_out[type->index] / 2;
/* Clip the demand. Make sure to use a new variable so
* on the second pass it is not clipped. */
CurFc = Fc_output_per_side;
if(Fc_output_per_side > num_wire_muxes)
{
*Fc_clipped = TRUE;
CurFc = num_wire_muxes;
}
if(k == 0)
{
init_mux = (count_inc) % num_wire_muxes;
count_inc += CurFc;
}
else
{
init_mux = (count_dec) % num_wire_muxes;
count_dec += CurFc;
}
for(iconn = 0; iconn < CurFc; iconn++)
{
/* FINALLY, make the outpin to mux connection */
/* Latest update: I'm not using Uniform Pattern, but a similarly staggered pattern */
to_node =
wire_muxes[(init_mux +
iconn) % num_wire_muxes];
rr_node[to_node].num_opin_drivers++; /* keep track of mux size */
to_track = rr_node[to_node].ptc_num;
if(FALSE == rr_edge_done[to_node])
{
/* Use of alloc_and_load_edges_and_switches
* must be accompanied by rr_edge_done check. */
rr_edge_done[to_node] = TRUE;
edge_list =
insert_in_edge_list(edge_list,
to_node,
seg_details
[to_track].
wire_switch);
num_edges++;
}
}
}
if(num_edges < 1)
{
printf
("Error: opin %d at (%d,%d) does not connect to any "
"tracks.\n", rr_node[from_node].ptc_num,
rr_node[from_node].xlow, rr_node[from_node].ylow);
exit(1);
}
alloc_and_load_edges_and_switches(rr_node, from_node, num_edges,
rr_edge_done, edge_list);
}
}
/* This routine prints and dumps statistics on the mux sizes on a sblock
* per sblock basis, over the entire chip. Mux sizes should be balanced (off by
* at most 1) for all muxes in the same sblock in the core, and corner sblocks.
* Fringe sblocks will have imbalance due to missing one side and constrains on
* where wires must connect. Comparing two core sblock sblocks, muxes need not
* be balanced if W is not quantized to 2L multiples, again for reasons that
* there could be sblocks with different number of muxes but same number of incoming
* wires that need to make connections to these muxes (we don't want to under-connect
* user-specified Fc and Fs). */
static void
view_mux_size_distribution(t_ivec *** rr_node_indices,
int nodes_per_chan,
t_seg_details * seg_details_x,
t_seg_details * seg_details_y)
{
int i, j, itrack, seg_num, chan_num, max_len;
int start, end, inode, max_value, min_value;
int array_count, k, num_muxes;
short direction, side;
float *percent_range_array;
float percent_range, percent_range_sum, avg_percent_range;
float std_dev_percent_range, deviation_f;
int range, *range_array, global_max_range;
float avg_range, range_sum, std_dev_range;
t_seg_details *seg_details;
t_mux *new_mux, *sblock_mux_list_head, *current, *next;
#ifdef ENABLE_DUMP
FILE *dump_file_per_sblock, *dump_file;
#endif /* ENABLE_DUMP */
t_mux_size_distribution *distr_list, *distr_current, *new_distribution,
*distr_next;
#ifdef ENABLE_DUMP
dump_file = my_fopen("mux_size_dump.txt", "w");
dump_file_per_sblock = my_fopen("mux_size_per_sblock_dump.txt", "w");
#endif /* ENABLE_DUMP */
sblock_mux_list_head = NULL;
percent_range_array =
(float *)my_malloc((nx - 1) * (ny - 1) * sizeof(float));
range_array = (int *)my_malloc((nx - 1) * (ny - 1) * sizeof(int));
array_count = 0;
percent_range_sum = 0.0;
range_sum = 0.0;
global_max_range = 0;
min_value = 0;
max_value = 0;
seg_num = 0;
chan_num = 0;
direction = 0;
seg_details = 0;
max_len = 0;
distr_list = NULL;
/* With the specified range, I'm only looking at core sblocks */
for(j = (ny - 1); j > 0; j--)
{
for(i = 1; i < nx; i++)
{
num_muxes = 0;
for(side = 0; side < 4; side++)
{
switch (side)
{
case LEFT:
seg_num = i;
chan_num = j;
direction = DEC_DIRECTION; /* only DEC have muxes in that sblock */
seg_details = seg_details_x;
max_len = nx;
break;
case RIGHT:
seg_num = i + 1;
chan_num = j;
direction = INC_DIRECTION;
seg_details = seg_details_x;
max_len = nx;
break;
case TOP:
seg_num = j + 1;
chan_num = i;
direction = INC_DIRECTION;
seg_details = seg_details_y;
max_len = ny;
break;
case BOTTOM:
seg_num = j;
chan_num = i;
direction = DEC_DIRECTION;
seg_details = seg_details_y;
max_len = ny;
break;
default:
assert(FALSE);
}
assert(nodes_per_chan > 0);
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
start =
get_seg_start(seg_details, itrack,
seg_num, chan_num);
end =
get_seg_end(seg_details, itrack,
start, chan_num, max_len);
if((seg_details[itrack].direction ==
direction) && (((start == seg_num)
&& (direction ==
INC_DIRECTION))
|| ((end == seg_num)
&& (direction ==
DEC_DIRECTION))))
{ /* mux found */
num_muxes++;
if(side == LEFT || side == RIGHT)
{ /* CHANX */
inode =
get_rr_node_index
(seg_num, chan_num,
CHANX, itrack,
rr_node_indices);
}
else
{
assert((side == TOP) || (side == BOTTOM)); /* CHANY */
inode =
get_rr_node_index
(chan_num, seg_num,
CHANY, itrack,
rr_node_indices);
}
new_mux = (t_mux *)
my_malloc(sizeof(t_mux));
new_mux->size =
rr_node[inode].
num_wire_drivers +
rr_node[inode].
num_opin_drivers;
new_mux->next = NULL;
/* insert in linked list, descending */
if(sblock_mux_list_head == NULL)
{
/* first entry */
sblock_mux_list_head =
new_mux;
}
else if(sblock_mux_list_head->
size < new_mux->size)
{
/* insert before head */
new_mux->next =
sblock_mux_list_head;
sblock_mux_list_head =
new_mux;
}
else
{
/* insert after head */
current =
sblock_mux_list_head;
next = current->next;
while((next != NULL)
&& (next->size >
new_mux->size))
{
current = next;
next =
current->next;
}
if(next == NULL)
{
current->next =
new_mux;
}
else
{
new_mux->next =
current->next;
current->next =
new_mux;
}
}
/* end of insert in linked list */
}
}
} /* end of mux searching over all four sides of sblock */
/* now sblock_mux_list_head holds a linked list of all muxes in this sblock */
current = sblock_mux_list_head;
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock,
"sblock at (%d, %d) has mux sizes: {", i, j);
#endif /* ENABLE_DUMP */
if(current != NULL)
{
max_value = min_value = current->size;
}
while(current != NULL)
{
if(max_value < current->size)
max_value = current->size;
if(min_value > current->size)
min_value = current->size;
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock, "%d ",
current->size);
fprintf(dump_file, "%d\n", current->size);
#endif /* ENABLE_DUMP */
current = current->next;
}
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock, "}\n\tmax: %d\tmin:%d",
max_value, min_value);
#endif /* ENABLE_DUMP */
range = max_value - min_value;
percent_range = ((float)range) / ((float)min_value);
if(global_max_range < range)
global_max_range = range;
#ifdef ENABLE_DUMP
fprintf(dump_file_per_sblock,
"\t\trange: %d\t\tpercent range:%.2f\n",
range, percent_range);
#endif /* ENABLE_DUMP */
percent_range_array[array_count] = percent_range;
range_array[array_count] = range;
percent_range_sum += percent_range;
range_sum += range;
array_count++;
/* I will use a distribution for each (core) sblock type.
* There are more than 1 type of sblocks,
* when quantization of W to 2L multiples is not observed. */
distr_current = distr_list;
while(distr_current != NULL
&& distr_current->mux_count != num_muxes)
{
distr_current = distr_current->next;
}
if(distr_current == NULL)
{
/* Create a distribution for the new sblock type,
* and put it as head of linked list by convention */
new_distribution = (t_mux_size_distribution *)
my_malloc(sizeof(t_mux_size_distribution));
new_distribution->mux_count = num_muxes;
new_distribution->max_index = max_value;
new_distribution->distr =
(int *)my_calloc(max_value + 1, sizeof(int));
/* filling in the distribution */
current = sblock_mux_list_head;
while(current != NULL)
{
assert(current->size <=
new_distribution->max_index);
new_distribution->distr[current->size]++;
current = current->next;
}
/* add it to head */
new_distribution->next = distr_list;
distr_list = new_distribution;
}
else
{
/* distr_current->mux_count == num_muxes so add this sblock's mux sizes in this distribution */
current = sblock_mux_list_head;
while(current != NULL)
{
if(current->size >
distr_current->max_index)
{
/* needs to realloc to expand the distribution array to hold the new large-valued data */
distr_current->distr =
my_realloc(distr_current->
distr,
(current->size +
1) * sizeof(int));
/* initializing the newly allocated elements */
for(k =
(distr_current->max_index +
1); k <= current->size; k++)
distr_current->distr[k] = 0;
distr_current->max_index =
current->size;
distr_current->distr[current->
size]++;
}
else
{
distr_current->distr[current->
size]++;
}
current = current->next;
}
}
/* done - now free memory */
current = sblock_mux_list_head;
while(current != NULL)
{
next = current->next;
free(current);
current = next;
}
sblock_mux_list_head = NULL;
}
}
avg_percent_range = (float)percent_range_sum / array_count;
avg_range = (float)range_sum / array_count;
percent_range_sum = 0.0;
range_sum = 0.0;
for(k = 0; k < array_count; k++)
{
deviation_f = (percent_range_array[k] - avg_percent_range);
percent_range_sum += deviation_f * deviation_f;
deviation_f = ((float)range_array[k] - avg_range);
range_sum += deviation_f * deviation_f;
}
std_dev_percent_range =
sqrt(percent_range_sum / ((float)array_count - 1.0));
std_dev_range = sqrt(range_sum / ((float)array_count - 1.0));
printf("==== MUX size statistics ====\n");
printf("max range of mux size within a sblock is; %d\n",
global_max_range);
printf("average range of mux size within a sblock is; %.2f\n", avg_range);
printf("std dev of range of mux size within a sblock is; %.2f\n",
std_dev_range);
printf
("average percent range of mux size within a sblock is; %.2f%%\n",
avg_percent_range * 100.0);
printf
("std dev of percent range of mux size within a sblock is; %.2f%%\n",
std_dev_percent_range * 100.0);
printf(" -- Detailed MUX size distribution by sblock type -- \n");
distr_current = distr_list;
while(distr_current != NULL)
{
print_distribution(stdout, distr_current);
/* free */
distr_next = distr_current->next;
free(distr_current->distr);
free(distr_current);
distr_current = distr_next;
}
free(percent_range_array);
free(range_array);
#ifdef ENABLE_DUMP
fclose(dump_file_per_sblock);
fclose(dump_file);
#endif /* ENABLE_DUMP */
}
static void
print_distribution(FILE * fptr,
t_mux_size_distribution * distr_struct)
{
int *distr;
int k;
float sum;
boolean zeros;
distr = distr_struct->distr;
fprintf(fptr,
"For Sblocks containing %d MUXes, the MUX size distribution is:\n",
distr_struct->mux_count);
fprintf(fptr, "\t\t\tSize\t\t\tFrequency (percent)\n");
sum = 0.0;
for(k = 0; k <= distr_struct->max_index; k++)
sum += distr[k];
zeros = TRUE;
for(k = 0; k <= distr_struct->max_index; k++)
{
if(zeros && (distr[k] == 0))
{
/* do nothing for leading string of zeros */
}
else
{
zeros = FALSE; /* leading string of zeros ended */
fprintf(fptr, "\t\t\t%d\t\t\t%d (%.2f%%)\n", k, distr[k],
(float)distr[k] / sum * 100.0);
}
}
fprintf(fptr, "\nEnd of this Sblock MUX size distribution.\n");
}