blob: 8edddf3a3fd249767ab2cd3e361c8c5635253228 [file] [log] [blame]
#include <math.h> /* Needed only for sqrt call (remove if sqrt removed) */
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph2.h"
#include "rr_graph_indexed_data.h"
/******************* Subroutines local to this module ************************/
static void load_rr_indexed_data_base_costs(int nodes_per_chan,
t_ivec *** rr_node_indices,
enum e_base_cost_type
base_cost_type,
int wire_to_ipin_switch);
static float get_delay_normalization_fac(int nodes_per_chan,
t_ivec *** rr_node_indices);
static float get_average_opin_delay(t_ivec *** rr_node_indices,
int nodes_per_chan);
static void load_rr_indexed_data_T_values(int index_start,
int num_indices_to_load,
t_rr_type rr_type,
int nodes_per_chan,
t_ivec *** rr_node_indices,
t_segment_inf * segment_inf);
/******************** Subroutine definitions *********************************/
/* Allocates the rr_indexed_data array and loads it with appropriate values. *
* It currently stores the segment type (or OPEN if the index doesn't *
* correspond to an CHANX or CHANY type), the base cost of nodes of that *
* type, and some info to allow rapid estimates of time to get to a target *
* to be computed by the router. *
*
* Right now all SOURCES have the same base cost; and similarly there's only *
* one base cost for each of SINKs, OPINs, and IPINs (four total). This can *
* be changed just by allocating more space in the array below and changing *
* the cost_index values for these rr_nodes, if you want to make some pins *
* etc. more expensive than others. I give each segment type in an *
* x-channel its own cost_index, and each segment type in a y-channel its *
* own cost_index. */
void
alloc_and_load_rr_indexed_data(IN t_segment_inf * segment_inf,
IN int num_segment,
IN t_ivec *** rr_node_indices,
IN int nodes_per_chan,
int wire_to_ipin_switch,
enum e_base_cost_type base_cost_type)
{
int iseg, length, i, index;
num_rr_indexed_data = CHANX_COST_INDEX_START + (2 * num_segment);
rr_indexed_data = (t_rr_indexed_data *) my_malloc(num_rr_indexed_data *
sizeof
(t_rr_indexed_data));
/* For rr_types that aren't CHANX or CHANY, base_cost is valid, but most *
* * other fields are invalid. For IPINs, the T_linear field is also valid; *
* * all other fields are invalid. For SOURCES, SINKs and OPINs, all fields *
* * other than base_cost are invalid. Mark invalid fields as OPEN for safety. */
for(i = SOURCE_COST_INDEX; i <= IPIN_COST_INDEX; i++)
{
rr_indexed_data[i].ortho_cost_index = OPEN;
rr_indexed_data[i].seg_index = OPEN;
rr_indexed_data[i].inv_length = OPEN;
rr_indexed_data[i].T_linear = OPEN;
rr_indexed_data[i].T_quadratic = OPEN;
rr_indexed_data[i].C_load = OPEN;
}
rr_indexed_data[IPIN_COST_INDEX].T_linear =
switch_inf[wire_to_ipin_switch].Tdel;
/* X-directed segments. */
for(iseg = 0; iseg < num_segment; iseg++)
{
index = CHANX_COST_INDEX_START + iseg;
rr_indexed_data[index].ortho_cost_index = index + num_segment;
if(segment_inf[iseg].longline)
length = nx;
else
length = min(segment_inf[iseg].length, nx);
rr_indexed_data[index].inv_length = 1. / length;
rr_indexed_data[index].seg_index = iseg;
}
load_rr_indexed_data_T_values(CHANX_COST_INDEX_START, num_segment,
CHANX, nodes_per_chan, rr_node_indices,
segment_inf);
/* Y-directed segments. */
for(iseg = 0; iseg < num_segment; iseg++)
{
index = CHANX_COST_INDEX_START + num_segment + iseg;
rr_indexed_data[index].ortho_cost_index = index - num_segment;
if(segment_inf[iseg].longline)
length = ny;
else
length = min(segment_inf[iseg].length, ny);
rr_indexed_data[index].inv_length = 1. / length;
rr_indexed_data[index].seg_index = iseg;
}
load_rr_indexed_data_T_values((CHANX_COST_INDEX_START + num_segment),
num_segment, CHANY, nodes_per_chan,
rr_node_indices, segment_inf);
load_rr_indexed_data_base_costs(nodes_per_chan, rr_node_indices,
base_cost_type, wire_to_ipin_switch);
}
static void
load_rr_indexed_data_base_costs(int nodes_per_chan,
t_ivec *** rr_node_indices,
enum e_base_cost_type base_cost_type,
int wire_to_ipin_switch)
{
/* Loads the base_cost member of rr_indexed_data according to the specified *
* base_cost_type. */
float delay_normalization_fac;
int index;
if(base_cost_type == DELAY_NORMALIZED)
{
delay_normalization_fac =
get_delay_normalization_fac(nodes_per_chan, rr_node_indices);
}
else
{
delay_normalization_fac = 1.;
}
if(base_cost_type == DEMAND_ONLY || base_cost_type == DELAY_NORMALIZED)
{
rr_indexed_data[SOURCE_COST_INDEX].base_cost =
delay_normalization_fac;
rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
rr_indexed_data[OPIN_COST_INDEX].base_cost =
delay_normalization_fac;
#ifndef SPEC
rr_indexed_data[IPIN_COST_INDEX].base_cost =
0.95 * delay_normalization_fac;
#else /* Avoid roundoff for SPEC */
rr_indexed_data[IPIN_COST_INDEX].base_cost =
delay_normalization_fac;
#endif
}
else if(base_cost_type == INTRINSIC_DELAY)
{
rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0.;
rr_indexed_data[SINK_COST_INDEX].base_cost = 0.;
rr_indexed_data[OPIN_COST_INDEX].base_cost =
get_average_opin_delay(rr_node_indices, nodes_per_chan);
rr_indexed_data[IPIN_COST_INDEX].base_cost =
switch_inf[wire_to_ipin_switch].Tdel;
}
/* Load base costs for CHANX and CHANY segments */
for(index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++)
{
if(base_cost_type == INTRINSIC_DELAY)
rr_indexed_data[index].base_cost =
rr_indexed_data[index].T_linear +
rr_indexed_data[index].T_quadratic;
else
/* rr_indexed_data[index].base_cost = delay_normalization_fac /
rr_indexed_data[index].inv_length; */
rr_indexed_data[index].base_cost = delay_normalization_fac;
/* rr_indexed_data[index].base_cost = delay_normalization_fac *
sqrt (1. / rr_indexed_data[index].inv_length); */
/* rr_indexed_data[index].base_cost = delay_normalization_fac *
(1. + 1. / rr_indexed_data[index].inv_length); */
}
/* Save a copy of the base costs -- if dynamic costing is used by the *
* router, the base_cost values will get changed all the time and being *
* able to restore them from a saved version is useful. */
for(index = 0; index < num_rr_indexed_data; index++)
{
rr_indexed_data[index].saved_base_cost =
rr_indexed_data[index].base_cost;
}
}
static float
get_delay_normalization_fac(int nodes_per_chan,
t_ivec *** rr_node_indices)
{
/* Returns the average delay to go 1 FB distance along a wire. */
const int clb_dist = 3; /* Number of CLBs I think the average conn. goes. */
int inode, itrack, cost_index;
float Tdel, Tdel_sum, frac_num_seg;
Tdel_sum = 0.;
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
inode =
get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANX, itrack,
rr_node_indices);
cost_index = rr_node[inode].cost_index;
frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear +
frac_num_seg * frac_num_seg *
rr_indexed_data[cost_index].T_quadratic;
Tdel_sum += Tdel / (float)clb_dist;
}
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
inode =
get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANY, itrack,
rr_node_indices);
cost_index = rr_node[inode].cost_index;
frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length;
Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear +
frac_num_seg * frac_num_seg *
rr_indexed_data[cost_index].T_quadratic;
Tdel_sum += Tdel / (float)clb_dist;
}
return (Tdel_sum / (2. * nodes_per_chan));
}
static float
get_average_opin_delay(t_ivec *** rr_node_indices,
int nodes_per_chan)
{
/* Returns the average delay from an OPIN to a wire in an adjacent channel. */
/* RESEARCH TODO: Got to think if this heuristic needs to change for hetero, right now, I'll calculate
* the average delay of non-IO blocks */
int inode, ipin, iclass, iedge, itype, num_edges, to_switch, to_node,
num_conn;
float Cload, Tdel;
Tdel = 0.;
num_conn = 0;
for(itype = 0;
itype < num_types && &type_descriptors[itype] != IO_TYPE; itype++)
{
for(ipin = 0; ipin < type_descriptors[itype].num_pins; ipin++)
{
iclass = type_descriptors[itype].pin_class[ipin];
if(type_descriptors[itype].class_inf[iclass].type ==
DRIVER)
{ /* OPIN */
inode =
get_rr_node_index((nx + 1) / 2, (ny + 1) / 2,
OPIN, ipin,
rr_node_indices);
num_edges = rr_node[inode].num_edges;
for(iedge = 0; iedge < num_edges; iedge++)
{
to_node = rr_node[inode].edges[iedge];
to_switch =
rr_node[inode].switches[iedge];
Cload = rr_node[to_node].C;
Tdel +=
Cload * switch_inf[to_switch].R +
switch_inf[to_switch].Tdel;
num_conn++;
}
}
}
}
Tdel /= (float)num_conn;
return (Tdel);
}
static void
load_rr_indexed_data_T_values(int index_start,
int num_indices_to_load,
t_rr_type rr_type,
int nodes_per_chan,
t_ivec *** rr_node_indices,
t_segment_inf * segment_inf)
{
/* Loads the average propagation times through segments of each index type *
* for either all CHANX segment types or all CHANY segment types. It does *
* this by looking at all the segments in one channel in the middle of the *
* array and averaging the R and C values of all segments of the same type *
* and using them to compute average delay values for this type of segment. */
int itrack, iseg, inode, cost_index, iswitch;
float *C_total, *R_total; /* [0..num_rr_indexed_data - 1] */
int *num_nodes_of_index; /* [0..num_rr_indexed_data - 1] */
float Rnode, Cnode, Rsw, Tsw;
num_nodes_of_index = (int *)my_calloc(num_rr_indexed_data, sizeof(int));
C_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float));
R_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float));
/* Get average C and R values for all the segments of this type in one *
* channel segment, near the middle of the array. */
for(itrack = 0; itrack < nodes_per_chan; itrack++)
{
inode =
get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, rr_type, itrack,
rr_node_indices);
cost_index = rr_node[inode].cost_index;
num_nodes_of_index[cost_index]++;
C_total[cost_index] += rr_node[inode].C;
R_total[cost_index] += rr_node[inode].R;
}
for(cost_index = index_start;
cost_index < index_start + num_indices_to_load; cost_index++)
{
if(num_nodes_of_index[cost_index] == 0)
{ /* Segments don't exist. */
rr_indexed_data[cost_index].T_linear = OPEN;
rr_indexed_data[cost_index].T_quadratic = OPEN;
rr_indexed_data[cost_index].C_load = OPEN;
}
else
{
Rnode =
R_total[cost_index] / num_nodes_of_index[cost_index];
Cnode =
C_total[cost_index] / num_nodes_of_index[cost_index];
iseg = rr_indexed_data[cost_index].seg_index;
iswitch = segment_inf[iseg].wire_switch;
Rsw = switch_inf[iswitch].R;
Tsw = switch_inf[iswitch].Tdel;
if(switch_inf[iswitch].buffered)
{
rr_indexed_data[cost_index].T_linear =
Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode;
rr_indexed_data[cost_index].T_quadratic = 0.;
rr_indexed_data[cost_index].C_load = 0.;
}
else
{ /* Pass transistor */
rr_indexed_data[cost_index].C_load = Cnode;
/* See Dec. 23, 1997 notes for deriviation of formulae. */
rr_indexed_data[cost_index].T_linear =
Tsw + 0.5 * Rsw * Cnode;
rr_indexed_data[cost_index].T_quadratic =
(Rsw + Rnode) * 0.5 * Cnode;
}
}
}
free(num_nodes_of_index);
free(C_total);
free(R_total);
}