| #include <math.h> /* Needed only for sqrt call (remove if sqrt removed) */ |
| #include "util.h" |
| #include "vpr_types.h" |
| #include "globals.h" |
| #include "rr_graph_util.h" |
| #include "rr_graph2.h" |
| #include "rr_graph_indexed_data.h" |
| |
| |
| /******************* Subroutines local to this module ************************/ |
| |
| static void load_rr_indexed_data_base_costs(int nodes_per_chan, |
| t_ivec *** rr_node_indices, |
| enum e_base_cost_type |
| base_cost_type, |
| int wire_to_ipin_switch); |
| |
| static float get_delay_normalization_fac(int nodes_per_chan, |
| t_ivec *** rr_node_indices); |
| |
| static float get_average_opin_delay(t_ivec *** rr_node_indices, |
| int nodes_per_chan); |
| |
| static void load_rr_indexed_data_T_values(int index_start, |
| int num_indices_to_load, |
| t_rr_type rr_type, |
| int nodes_per_chan, |
| t_ivec *** rr_node_indices, |
| t_segment_inf * segment_inf); |
| |
| |
| |
| /******************** Subroutine definitions *********************************/ |
| |
| /* Allocates the rr_indexed_data array and loads it with appropriate values. * |
| * It currently stores the segment type (or OPEN if the index doesn't * |
| * correspond to an CHANX or CHANY type), the base cost of nodes of that * |
| * type, and some info to allow rapid estimates of time to get to a target * |
| * to be computed by the router. * |
| * |
| * Right now all SOURCES have the same base cost; and similarly there's only * |
| * one base cost for each of SINKs, OPINs, and IPINs (four total). This can * |
| * be changed just by allocating more space in the array below and changing * |
| * the cost_index values for these rr_nodes, if you want to make some pins * |
| * etc. more expensive than others. I give each segment type in an * |
| * x-channel its own cost_index, and each segment type in a y-channel its * |
| * own cost_index. */ |
| void |
| alloc_and_load_rr_indexed_data(IN t_segment_inf * segment_inf, |
| IN int num_segment, |
| IN t_ivec *** rr_node_indices, |
| IN int nodes_per_chan, |
| int wire_to_ipin_switch, |
| enum e_base_cost_type base_cost_type) |
| { |
| |
| int iseg, length, i, index; |
| |
| num_rr_indexed_data = CHANX_COST_INDEX_START + (2 * num_segment); |
| rr_indexed_data = (t_rr_indexed_data *) my_malloc(num_rr_indexed_data * |
| sizeof |
| (t_rr_indexed_data)); |
| |
| /* For rr_types that aren't CHANX or CHANY, base_cost is valid, but most * |
| * * other fields are invalid. For IPINs, the T_linear field is also valid; * |
| * * all other fields are invalid. For SOURCES, SINKs and OPINs, all fields * |
| * * other than base_cost are invalid. Mark invalid fields as OPEN for safety. */ |
| |
| for(i = SOURCE_COST_INDEX; i <= IPIN_COST_INDEX; i++) |
| { |
| rr_indexed_data[i].ortho_cost_index = OPEN; |
| rr_indexed_data[i].seg_index = OPEN; |
| rr_indexed_data[i].inv_length = OPEN; |
| rr_indexed_data[i].T_linear = OPEN; |
| rr_indexed_data[i].T_quadratic = OPEN; |
| rr_indexed_data[i].C_load = OPEN; |
| } |
| |
| rr_indexed_data[IPIN_COST_INDEX].T_linear = |
| switch_inf[wire_to_ipin_switch].Tdel; |
| |
| /* X-directed segments. */ |
| |
| for(iseg = 0; iseg < num_segment; iseg++) |
| { |
| index = CHANX_COST_INDEX_START + iseg; |
| |
| rr_indexed_data[index].ortho_cost_index = index + num_segment; |
| |
| if(segment_inf[iseg].longline) |
| length = nx; |
| else |
| length = min(segment_inf[iseg].length, nx); |
| |
| |
| rr_indexed_data[index].inv_length = 1. / length; |
| rr_indexed_data[index].seg_index = iseg; |
| } |
| |
| load_rr_indexed_data_T_values(CHANX_COST_INDEX_START, num_segment, |
| CHANX, nodes_per_chan, rr_node_indices, |
| segment_inf); |
| |
| /* Y-directed segments. */ |
| |
| for(iseg = 0; iseg < num_segment; iseg++) |
| { |
| index = CHANX_COST_INDEX_START + num_segment + iseg; |
| |
| rr_indexed_data[index].ortho_cost_index = index - num_segment; |
| |
| if(segment_inf[iseg].longline) |
| length = ny; |
| else |
| length = min(segment_inf[iseg].length, ny); |
| |
| rr_indexed_data[index].inv_length = 1. / length; |
| rr_indexed_data[index].seg_index = iseg; |
| } |
| |
| load_rr_indexed_data_T_values((CHANX_COST_INDEX_START + num_segment), |
| num_segment, CHANY, nodes_per_chan, |
| rr_node_indices, segment_inf); |
| |
| load_rr_indexed_data_base_costs(nodes_per_chan, rr_node_indices, |
| base_cost_type, wire_to_ipin_switch); |
| |
| } |
| |
| |
| static void |
| load_rr_indexed_data_base_costs(int nodes_per_chan, |
| t_ivec *** rr_node_indices, |
| enum e_base_cost_type base_cost_type, |
| int wire_to_ipin_switch) |
| { |
| |
| /* Loads the base_cost member of rr_indexed_data according to the specified * |
| * base_cost_type. */ |
| |
| float delay_normalization_fac; |
| int index; |
| |
| if(base_cost_type == DELAY_NORMALIZED) |
| { |
| delay_normalization_fac = |
| get_delay_normalization_fac(nodes_per_chan, rr_node_indices); |
| } |
| else |
| { |
| delay_normalization_fac = 1.; |
| } |
| |
| if(base_cost_type == DEMAND_ONLY || base_cost_type == DELAY_NORMALIZED) |
| { |
| rr_indexed_data[SOURCE_COST_INDEX].base_cost = |
| delay_normalization_fac; |
| rr_indexed_data[SINK_COST_INDEX].base_cost = 0.; |
| rr_indexed_data[OPIN_COST_INDEX].base_cost = |
| delay_normalization_fac; |
| |
| #ifndef SPEC |
| rr_indexed_data[IPIN_COST_INDEX].base_cost = |
| 0.95 * delay_normalization_fac; |
| #else /* Avoid roundoff for SPEC */ |
| rr_indexed_data[IPIN_COST_INDEX].base_cost = |
| delay_normalization_fac; |
| #endif |
| } |
| |
| else if(base_cost_type == INTRINSIC_DELAY) |
| { |
| rr_indexed_data[SOURCE_COST_INDEX].base_cost = 0.; |
| rr_indexed_data[SINK_COST_INDEX].base_cost = 0.; |
| rr_indexed_data[OPIN_COST_INDEX].base_cost = |
| get_average_opin_delay(rr_node_indices, nodes_per_chan); |
| rr_indexed_data[IPIN_COST_INDEX].base_cost = |
| switch_inf[wire_to_ipin_switch].Tdel; |
| } |
| |
| /* Load base costs for CHANX and CHANY segments */ |
| |
| for(index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) |
| { |
| if(base_cost_type == INTRINSIC_DELAY) |
| rr_indexed_data[index].base_cost = |
| rr_indexed_data[index].T_linear + |
| rr_indexed_data[index].T_quadratic; |
| else |
| /* rr_indexed_data[index].base_cost = delay_normalization_fac / |
| rr_indexed_data[index].inv_length; */ |
| |
| rr_indexed_data[index].base_cost = delay_normalization_fac; |
| /* rr_indexed_data[index].base_cost = delay_normalization_fac * |
| sqrt (1. / rr_indexed_data[index].inv_length); */ |
| /* rr_indexed_data[index].base_cost = delay_normalization_fac * |
| (1. + 1. / rr_indexed_data[index].inv_length); */ |
| } |
| |
| /* Save a copy of the base costs -- if dynamic costing is used by the * |
| * router, the base_cost values will get changed all the time and being * |
| * able to restore them from a saved version is useful. */ |
| |
| for(index = 0; index < num_rr_indexed_data; index++) |
| { |
| rr_indexed_data[index].saved_base_cost = |
| rr_indexed_data[index].base_cost; |
| } |
| } |
| |
| |
| static float |
| get_delay_normalization_fac(int nodes_per_chan, |
| t_ivec *** rr_node_indices) |
| { |
| |
| /* Returns the average delay to go 1 FB distance along a wire. */ |
| |
| const int clb_dist = 3; /* Number of CLBs I think the average conn. goes. */ |
| |
| int inode, itrack, cost_index; |
| float Tdel, Tdel_sum, frac_num_seg; |
| |
| Tdel_sum = 0.; |
| |
| for(itrack = 0; itrack < nodes_per_chan; itrack++) |
| { |
| inode = |
| get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANX, itrack, |
| rr_node_indices); |
| cost_index = rr_node[inode].cost_index; |
| frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length; |
| Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear + |
| frac_num_seg * frac_num_seg * |
| rr_indexed_data[cost_index].T_quadratic; |
| Tdel_sum += Tdel / (float)clb_dist; |
| } |
| |
| for(itrack = 0; itrack < nodes_per_chan; itrack++) |
| { |
| inode = |
| get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, CHANY, itrack, |
| rr_node_indices); |
| cost_index = rr_node[inode].cost_index; |
| frac_num_seg = clb_dist * rr_indexed_data[cost_index].inv_length; |
| Tdel = frac_num_seg * rr_indexed_data[cost_index].T_linear + |
| frac_num_seg * frac_num_seg * |
| rr_indexed_data[cost_index].T_quadratic; |
| Tdel_sum += Tdel / (float)clb_dist; |
| } |
| |
| return (Tdel_sum / (2. * nodes_per_chan)); |
| } |
| |
| |
| static float |
| get_average_opin_delay(t_ivec *** rr_node_indices, |
| int nodes_per_chan) |
| { |
| |
| /* Returns the average delay from an OPIN to a wire in an adjacent channel. */ |
| /* RESEARCH TODO: Got to think if this heuristic needs to change for hetero, right now, I'll calculate |
| * the average delay of non-IO blocks */ |
| int inode, ipin, iclass, iedge, itype, num_edges, to_switch, to_node, |
| num_conn; |
| float Cload, Tdel; |
| |
| Tdel = 0.; |
| num_conn = 0; |
| for(itype = 0; |
| itype < num_types && &type_descriptors[itype] != IO_TYPE; itype++) |
| { |
| for(ipin = 0; ipin < type_descriptors[itype].num_pins; ipin++) |
| { |
| iclass = type_descriptors[itype].pin_class[ipin]; |
| if(type_descriptors[itype].class_inf[iclass].type == |
| DRIVER) |
| { /* OPIN */ |
| inode = |
| get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, |
| OPIN, ipin, |
| rr_node_indices); |
| num_edges = rr_node[inode].num_edges; |
| |
| for(iedge = 0; iedge < num_edges; iedge++) |
| { |
| to_node = rr_node[inode].edges[iedge]; |
| to_switch = |
| rr_node[inode].switches[iedge]; |
| Cload = rr_node[to_node].C; |
| Tdel += |
| Cload * switch_inf[to_switch].R + |
| switch_inf[to_switch].Tdel; |
| num_conn++; |
| } |
| } |
| } |
| } |
| |
| Tdel /= (float)num_conn; |
| return (Tdel); |
| } |
| |
| |
| static void |
| load_rr_indexed_data_T_values(int index_start, |
| int num_indices_to_load, |
| t_rr_type rr_type, |
| int nodes_per_chan, |
| t_ivec *** rr_node_indices, |
| t_segment_inf * segment_inf) |
| { |
| |
| /* Loads the average propagation times through segments of each index type * |
| * for either all CHANX segment types or all CHANY segment types. It does * |
| * this by looking at all the segments in one channel in the middle of the * |
| * array and averaging the R and C values of all segments of the same type * |
| * and using them to compute average delay values for this type of segment. */ |
| |
| int itrack, iseg, inode, cost_index, iswitch; |
| float *C_total, *R_total; /* [0..num_rr_indexed_data - 1] */ |
| int *num_nodes_of_index; /* [0..num_rr_indexed_data - 1] */ |
| float Rnode, Cnode, Rsw, Tsw; |
| |
| num_nodes_of_index = (int *)my_calloc(num_rr_indexed_data, sizeof(int)); |
| C_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float)); |
| R_total = (float *)my_calloc(num_rr_indexed_data, sizeof(float)); |
| |
| /* Get average C and R values for all the segments of this type in one * |
| * channel segment, near the middle of the array. */ |
| |
| for(itrack = 0; itrack < nodes_per_chan; itrack++) |
| { |
| inode = |
| get_rr_node_index((nx + 1) / 2, (ny + 1) / 2, rr_type, itrack, |
| rr_node_indices); |
| cost_index = rr_node[inode].cost_index; |
| num_nodes_of_index[cost_index]++; |
| C_total[cost_index] += rr_node[inode].C; |
| R_total[cost_index] += rr_node[inode].R; |
| } |
| |
| |
| for(cost_index = index_start; |
| cost_index < index_start + num_indices_to_load; cost_index++) |
| { |
| |
| if(num_nodes_of_index[cost_index] == 0) |
| { /* Segments don't exist. */ |
| rr_indexed_data[cost_index].T_linear = OPEN; |
| rr_indexed_data[cost_index].T_quadratic = OPEN; |
| rr_indexed_data[cost_index].C_load = OPEN; |
| } |
| else |
| { |
| Rnode = |
| R_total[cost_index] / num_nodes_of_index[cost_index]; |
| Cnode = |
| C_total[cost_index] / num_nodes_of_index[cost_index]; |
| iseg = rr_indexed_data[cost_index].seg_index; |
| iswitch = segment_inf[iseg].wire_switch; |
| Rsw = switch_inf[iswitch].R; |
| Tsw = switch_inf[iswitch].Tdel; |
| |
| if(switch_inf[iswitch].buffered) |
| { |
| rr_indexed_data[cost_index].T_linear = |
| Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; |
| rr_indexed_data[cost_index].T_quadratic = 0.; |
| rr_indexed_data[cost_index].C_load = 0.; |
| } |
| else |
| { /* Pass transistor */ |
| rr_indexed_data[cost_index].C_load = Cnode; |
| |
| /* See Dec. 23, 1997 notes for deriviation of formulae. */ |
| |
| rr_indexed_data[cost_index].T_linear = |
| Tsw + 0.5 * Rsw * Cnode; |
| rr_indexed_data[cost_index].T_quadratic = |
| (Rsw + Rnode) * 0.5 * Cnode; |
| } |
| } |
| } |
| |
| free(num_nodes_of_index); |
| free(C_total); |
| free(R_total); |
| } |