blob: a7f71d2308bb3cf493b6703c986384eeebc4a13b [file] [log] [blame]
#include <cstdio>
#include <cstring>
#include <ctime>
#include <cmath>
#include <assert.h>
#include <map>
#include <vector>
#include <algorithm>
#include "vpr_utils.h"
#include "util.h"
#include "vpr_types.h"
#include "globals.h"
#include "route_export.h"
#include "route_common.h"
#include "route_tree_timing.h"
#include "route_timing.h"
#include "heapsort.h"
#include "path_delay.h"
#include "net_delay.h"
#include "stats.h"
#include "look_ahead_bfs_precal.h"
using namespace std;
#define DEBUG_BFS_PRECAL
#define DEBUG_SEG_TYPE 1
#define DEBUG_CHAN_TYPE 0
#define DEBUG_OFFSET_X 19
#define DEBUG_OFFSET_Y 3
// TODO: change name of bfs_lookahead_info to g_precal_lookahead
// clearup the #include
// rename look_ahead_bfs to precal_look_ahead
// BIDIR wire for fill_cost_map
/*
* This file is for experimenting a new way for A* look ahead
* Purpose:
* Precalculate the timing delay & upstream C & base cost from one channel to another with offset dx, dy
* In the actual routing, lookahead will look up the map built up here to get a lower bound for future cost.
* General procedure:
* 1. start with each type of wire: pick the wire node near the corner (at location (2,2))
* 2. start point & end point are in terms of chanx / chany at (x,y)
* 3. SB & CB connection follow the exact specification in architecture
* 4. use breadth fist search to get cost at every location on chip
*/
/*********************** subroutines local to this file ********************************/
/*
* The heap in this BFS pre-cal map is called "heap2" to differentiate from
* the original heap adopted in timing driven router;
* However, heap2 is quite similar to that used in timing driven router.
* The main difference:
* 1. this heap will *remove* node after it found its ranking
* 2. the heap struct is modified to store more information
*/
static void initialize_structs(int num_segment);
static void initialize_heap2(int start_inode, float cost_start_inode,
float R_upstream, float C_upstream, float basecost_upstream);
static void node_to_heap2(int prev_node, int inode, float cost, float R_upstream,
float C_upstream, float basecost_upstream);
static void remove_heap2_node(int position);
static void setup_one_level_to_seg(int inode);
static t_heap2 * pop_heap2_head();
static void setup_seg_conn_inf(int num_segment);
static void expand_neighbours_to_heap2(t_heap2 *cheapest, int start_seg_index, t_segment_inf *segment_inf);
static void get_leave_pt_by_cb (bool is_conn_ipin, int offset, int inode, int *term_x, int *term_y);
static void fill_cost_map(t_segment_inf *segment_inf, int seg_type, int chan_type);
static void print_seg_conn_matrix(t_segment_inf *segment_inf, int num_segment);
typedef struct s_precal_fut_cost {
int prev_node; // only for debugging
int rank; // range: 1..total_num_of_wires
float cost; // cost here is equivalent to Tdel
float C_upstream; // upstream here is downstream in router look ahead
float basecost_upstream;
} t_precal_fut_cost;
/*********************** globals local to this file **********************************/
int g_debug_num_seg = 0; // for debugging: print out the matrix
t_heap2 **hptr_map = NULL; // [0..num_rr_nodes-1]: to efficiently locate the hptr to be removed from heap2, given inode for hptr
t_precal_fut_cost *rr_node_ranking = NULL;
int num_nodes_ranked;
int heap2_size;
int heap2_tail;
t_heap2 **heap2 = NULL;
int **seg_conn_inf = NULL; // if [i][j] = 1, then seg_type i can connect to seg j through the normal (core) switch block
t_segment_inf *ptr_segment_inf = NULL;
/************************ function definition *******************************/
static void initialize_structs(int num_segment) {
g_debug_num_seg = num_segment;
hptr_map = (t_heap2**) my_malloc(sizeof(t_heap2 *) * num_rr_nodes);
rr_node_ranking = (t_precal_fut_cost *) my_malloc(sizeof(t_precal_fut_cost) * num_rr_nodes);
num_nodes_ranked = 0;
heap2_size = 0;
heap2_tail = 1;
// heap2 can point to anything at the start
heap2 = NULL;
if (bfs_lookahead_info == NULL) {
bfs_lookahead_info = (t_fut_cost_inf ****)alloc_matrix4(0, nx-3, 0, ny-3, 0, num_segment-1, 0, 1, sizeof(t_fut_cost_inf));
}
for (int ix = 0; ix < nx-2; ix ++) {
for (int iy = 0; iy < ny-2; iy ++) {
for (int iseg = 0; iseg < num_segment; iseg ++) {
for (int ichan = 0; ichan < 2; ichan ++) {
bfs_lookahead_info[ix][iy][iseg][ichan].Tdel_x = HUGE_POSITIVE_FLOAT;
bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x = HUGE_POSITIVE_FLOAT;
bfs_lookahead_info[ix][iy][iseg][ichan].acc_C_x = HUGE_POSITIVE_FLOAT;
bfs_lookahead_info[ix][iy][iseg][ichan].Tdel_y = HUGE_POSITIVE_FLOAT;
bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y = HUGE_POSITIVE_FLOAT;
bfs_lookahead_info[ix][iy][iseg][ichan].acc_C_y = HUGE_POSITIVE_FLOAT;
}
}
}
}
//memset(bfs_lookahead_info, );
if (seg_conn_inf == NULL)
seg_conn_inf = (int **)alloc_matrix(0, num_segment - 1, 0, num_segment - 1, sizeof(int));
for (int i = 0; i < num_segment; i ++) {
for (int j = 0; j < num_segment; j ++) {
seg_conn_inf[i][j] = 0;
}
}
}
void pre_cal_look_ahead(t_segment_inf *segment_inf, int num_segment) {
/*
* high level function to do the pre calculation of future cost using BFS
* to be called in try_route() after rr_graph is built, before actual router is called
*/
ptr_segment_inf = segment_inf;
initialize_structs(num_segment);
setup_seg_conn_inf(num_segment);
printf("\n\nfinish seg conn setup\n\n");
print_seg_conn_matrix(segment_inf, num_segment);
/*
* range of coord on chip: [2..nx-1][2..ny-1]
* start at (2, 2); calculate for each wire type, and CHANX / CHANY
*/
for (int iseg = 0; iseg < num_segment; iseg ++) {
int chan_type[2] = {CHANX, CHANY};
for (int ichan = 0; ichan < 2; ichan ++) {
//int length = segment_inf[i].length;
int start_inode = -1;
// first try to find wire start at the left bottom corner
for (int inode = 0; inode < num_rr_nodes; inode ++) {
// start wire is either INC_DIR or BI_DIR
if (rr_node[inode].get_direction() == DEC_DIRECTION)
continue;
int i_seg_index = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
if (i_seg_index != iseg)
continue;
if (rr_node[inode].type == chan_type[ichan]
&& rr_node[inode].get_xlow() == 2
&& rr_node[inode].get_ylow() == 2) {
/*
if (i_seg_index == 0) {
int num_edges = rr_node[inode].get_num_edges();
for (int iconn = 0; iconn < num_edges; iconn++) {
int to_node = rr_node[inode].edges[iconn];
int to_node_seg_index = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index;
if (to_node_seg_index == 1 || to_node_seg_index == 2) {
start_inode = inode;
inode = num_rr_nodes;
break;
}
}
} else {
*/
start_inode = inode;
inode = num_rr_nodes;
//}
}
}
// if not found such wire (maybe due to a too small chip size / too long wire)
// then try the right bottom corner
if (start_inode == -1) {
for (int inode = 0; inode < num_rr_nodes; inode ++) {
if (rr_node[inode].get_direction() == INC_DIRECTION)
continue;
if (rr_indexed_data[rr_node[inode].get_cost_index()].seg_index != iseg)
continue;
if (rr_node[inode].type == chan_type[ichan]
&& rr_node[inode].get_xhigh() == nx - 1
&& rr_node[inode].get_yhigh() == ny - 1) {
start_inode = inode;
inode = num_rr_nodes;
}
}
}
// found a valid starting node, next do the calculation using BFS
if (start_inode != -1) {
#ifdef DEBUG_BFS_PRECAL
if (iseg == DEBUG_SEG_TYPE && ichan == DEBUG_CHAN_TYPE) {
printf("== DEBUG ==: start node %d\n", start_inode);
}
#endif
vpr_printf_info("**** START BFS for segment %s\tchan type %d\n", segment_inf[iseg].name, ichan);
clock_t begin = clock();
num_nodes_ranked = 0;
look_ahead_bfs_precal(start_inode, segment_inf);
clock_t end = clock();
vpr_printf_info("\tThis BFS took %g seconds.\n", (float)(end - begin) / CLOCKS_PER_SEC);
vpr_printf_info("**** END BFS for segment %s\n", segment_inf[iseg].name);
// setup the global map for future look ahead lookup
fill_cost_map(segment_inf, iseg, ichan);
start_inode = -1;
} else {
vpr_printf_error(__FILE__, __LINE__,
"segment %s has no wire starting at corner\n", segment_inf[iseg].name);
}
}
}
print_cost_map("init_bfs_map");
free(hptr_map);
free(rr_node_ranking);
free(heap2+1);
free_matrix(seg_conn_inf, 0, num_segment - 1, 0, sizeof(int));
hptr_map = NULL;
rr_node_ranking = NULL;
heap2 = NULL;
seg_conn_inf = NULL;
}
void look_ahead_bfs_precal(int start_inode, t_segment_inf *segment_inf) {
/*
* routine to actually do the bfs calculation
* general procedure similar to that in timing driven route
*/
for (int i = 0; i < num_rr_nodes; i ++) {
hptr_map[i] = NULL;
rr_node_ranking[i].prev_node = -1;
rr_node_ranking[i].rank = -1;
rr_node_ranking[i].cost = -1;
rr_node_ranking[i].C_upstream = -1;
rr_node_ranking[i].basecost_upstream = -1;
}
// heap2 for this precal is modified to facilitate removal of replicate nodes
// cost & C & basecost is excluding the start node, so set them to be all 0.
node_to_heap2(-1, start_inode, 0.0, 0.0, 0.0, 0.0);
t_heap2* cheapest = pop_heap2_head();
int start_seg_index = rr_indexed_data[rr_node[start_inode].get_cost_index()].seg_index;
while (cheapest != NULL) {
expand_neighbours_to_heap2(cheapest, start_seg_index, segment_inf);
free(cheapest);
cheapest = pop_heap2_head();
}
}
static void setup_seg_conn_inf(int num_segment) {
/*
* to test how many other type of segments a certain type of wire can
* connect to, via the standard switch block (sb which are at the core place of chip)
*
* the reason for doing so:
* for some weird arch, for example, a certain wire can only connect to wires of its
* same type, via sb in core area on chip. but it may be able to connect to other wire
* types via corner / fringe sb. --> this will cause inaccuracy of bfs, as we pick the
* starting point to be near the corner
*/
vector<int> seg_type_to_check;
for (int iseg = 0; iseg < num_segment; iseg ++) {
seg_type_to_check.push_back(iseg);
}
vector< pair<int, int> > nearest_to_core_pos;
for (int iseg = 0; iseg < num_segment; iseg ++) {
nearest_to_core_pos.push_back(make_pair(-1, -1));
}
// we have to go through this step, cuz on some small chips, there are just no
// long wires that have their mid-point going through exactly (nx / 2) & (ny / 2)
// find the smallest distance between the wire mid point and the chip mid point
for (int inode = 0; inode < num_rr_nodes; inode ++) {
int cur_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
if (cur_seg_type < 0 || cur_seg_type >= num_segment) {
// this check is to eliminate non-wire seg types (e.g.: seg_type = -1)
continue;
}
int cur_x_mid, cur_y_mid;
cur_x_mid = (rr_node[inode].get_xhigh() + rr_node[inode].get_xlow()) / 2;
cur_y_mid = (rr_node[inode].get_yhigh() + rr_node[inode].get_ylow()) / 2;
if (abs(nearest_to_core_pos[cur_seg_type].first - nx / 2) > abs(cur_x_mid - nx / 2)
&& abs(nearest_to_core_pos[cur_seg_type].second - ny / 2) > abs(cur_y_mid - ny / 2)) {
nearest_to_core_pos[cur_seg_type].first = cur_x_mid;
nearest_to_core_pos[cur_seg_type].second = cur_y_mid;
}
}
// find the inode that has the smallest distance (i.e.: wire that is closest to center of chip)
for (int inode = 0; inode < num_rr_nodes; inode ++) {
int cur_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
if (cur_seg_type < 0 || cur_seg_type >= num_segment)
continue;
int cur_x_mid, cur_y_mid;
cur_x_mid = (rr_node[inode].get_xhigh() + rr_node[inode].get_xlow()) / 2;
cur_y_mid = (rr_node[inode].get_yhigh() + rr_node[inode].get_ylow()) / 2;
vector<int>::iterator seg_pos = find(seg_type_to_check.begin(), seg_type_to_check.end(), cur_seg_type);
if (seg_pos != seg_type_to_check.end()
&& cur_x_mid == nearest_to_core_pos[cur_seg_type].first
&& cur_y_mid == nearest_to_core_pos[cur_seg_type].second) {
seg_type_to_check.erase(seg_pos);
// find the wire types that are driven by inode via *one* switch block
// i.e.: find the wire types that are connected directly by inode
setup_one_level_to_seg(inode);
}
if (seg_type_to_check.empty())
break;
}
// next, test convergence for seg_conn_inf
// seg_conn_inf is setup here
bool new_conn_added = false;
do {
new_conn_added = false;
for (int from_seg = 0; from_seg < num_segment; from_seg ++) {
for (int to_seg = 0; to_seg < num_segment; to_seg ++) {
if (seg_conn_inf[from_seg][to_seg] == 0)
continue;
for (int to_to_seg = 0; to_to_seg < num_segment; to_to_seg ++) {
if (seg_conn_inf[to_seg][to_to_seg] == 0)
continue;
if (seg_conn_inf[from_seg][to_to_seg] == 0) {
seg_conn_inf[from_seg][to_to_seg] = 1;
new_conn_added = true;
}
}
}
}
} while (new_conn_added);
}
static void setup_one_level_to_seg(int inode) {
/*
* find the wire type that are driven directly by inode
* setup the init value for seg_conn_inf
*/
assert(rr_node[inode].type == CHANX || rr_node[inode].type == CHANY);
int num_edges = rr_node[inode].get_num_edges();
int from_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
if (from_seg_type >= g_num_segment || from_seg_type < 0)
return;
for (int iconn = 0; iconn < num_edges; iconn ++) {
int to_node = rr_node[inode].edges[iconn];
int to_seg_type = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index;
if (to_seg_type >= g_num_segment || to_seg_type < 0)
continue;
seg_conn_inf[from_seg_type][to_seg_type] = 1;
}
}
static void expand_neighbours_to_heap2(t_heap2 *cheapest, int start_seg_index, t_segment_inf *segment_inf) {
/*
* expand neighbours for cheapest (standard BFS procedure)
*/
int inode = cheapest->inode;
int num_edges = rr_node[inode].get_num_edges();
int iswitch;
float cur_R_upstream = cheapest->R_upstream;
float cur_C_upstream = cheapest->C_upstream;
float cur_basecost_upstream = cheapest->basecost_upstream;
float new_R_upstream, new_C_upstream, new_basecost_upstream;
for (int iconn = 0; iconn < num_edges; iconn++) {
int to_node = rr_node[inode].edges[iconn];
if (rr_node[to_node].type != CHANX && rr_node[to_node].type != CHANY)
continue;
if (rr_node_ranking[to_node].rank != -1)
continue;
int to_seg_index = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index;
if (seg_conn_inf[start_seg_index][to_seg_index] == 0)
continue;
// calculate the backward delay of this node
// referring to route_timing.c: timing_driven_expand_neighbours()
// it is not accurate to use the R & C stored in rr_node
// think about the wire that is cut short at fringe:
// if a L4 wire is cut to be of length 2 only, then its R will be half of the normal value
// the solution is to use the R per length / C per length in segment_inf
iswitch = rr_node[inode].switches[iconn];
float cur_wire_R = segment_inf[to_seg_index].Rmetal * segment_inf[to_seg_index].length;
float cur_wire_C = segment_inf[to_seg_index].Cmetal * segment_inf[to_seg_index].length;
float cur_wire_basecost = rr_indexed_data[rr_node[to_node].get_cost_index()].base_cost;
if (g_rr_switch_inf[iswitch].buffered) {
new_R_upstream = g_rr_switch_inf[iswitch].R;
// if switch is buffered, don't add C.
// but this will still produce an overestimation of C
new_C_upstream = cur_C_upstream;
} else {
new_R_upstream = cur_R_upstream + g_rr_switch_inf[iswitch].R;
new_C_upstream = cur_C_upstream + cur_wire_C;
}
float to_node_cost = cur_wire_C * (new_R_upstream + 0.5 * cur_wire_R);
to_node_cost += g_rr_switch_inf[iswitch].Tdel;
to_node_cost += cheapest->cost;
new_R_upstream += cur_wire_R;
new_basecost_upstream = cur_basecost_upstream + cur_wire_basecost;
if (hptr_map[to_node] != NULL) {
// if to_node is already on the heap, then either
// replace the outdated node (by first remove it) or do nothing
if (hptr_map[to_node]->cost <= to_node_cost) {
continue;
} else {
remove_heap2_node(hptr_map[to_node]->position);
}
}
node_to_heap2(inode, to_node, to_node_cost, new_R_upstream, new_C_upstream, new_basecost_upstream);
}
}
static void initialize_heap2(int start_inode, float cost_start_inode, float R_start_inode,
float C_start_inode, float basecost_start_inode) {
assert(heap2 == NULL && heap2_size == 0 && heap2_tail == 1);
heap2_size = 1;
heap2_tail = 2;
t_heap2 *head = (t_heap2 *) my_malloc(sizeof(t_heap2));
head->prev_node = -1;
head->inode = start_inode;
head->position = 1;
head->cost = cost_start_inode;
head->R_upstream = R_start_inode;
head->C_upstream = C_start_inode;
head->basecost_upstream = basecost_start_inode;
heap2 = (t_heap2 **) my_malloc(sizeof(t_heap2 *) * heap2_size);
heap2 --;
heap2[1] = head;
}
static void node_to_heap2(int prev_node, int inode, float cost,
float R_upstream, float C_upstream, float basecost_upstream) {
/*
* when you call this function, it has already been made sure that
* heap node for inode is removed (hptr_map[inode] = NULL)
*/
if (heap2_size == 0) {
initialize_heap2(inode, cost, R_upstream, C_upstream, basecost_upstream);
return;
}
t_heap2 *hptr = (t_heap2 *) my_malloc(sizeof(t_heap2));
// setup all fields
assert(hptr_map[inode] == NULL);
hptr_map[inode] = hptr;
hptr->prev_node = prev_node;
hptr->inode = inode;
hptr->position = -1;
hptr->cost = cost;
hptr->R_upstream = R_upstream;
hptr->C_upstream = C_upstream;
hptr->basecost_upstream = basecost_upstream;
// dynamic expansion of heap size
// (referring to old heap method)
if (heap2_tail > heap2_size) {
heap2_size *= 2;
heap2 = (t_heap2 **)my_realloc((void *)(heap2 + 1), heap2_size * sizeof(t_heap2 *));
heap2 --;
}
heap2[heap2_tail] = hptr;
// sifting up
hptr->position = heap2_tail;
int ifrom = heap2_tail;
int ito = ifrom / 2;
heap2_tail ++;
t_heap2 *temp_ptr = NULL;
while ((ito >= 1) && (heap2[ifrom]->cost < heap2[ito]->cost)) {
temp_ptr = heap2[ito];
// update position to facilitate future removal
heap2[ifrom]->position = ito;
heap2[ito]->position = ifrom;
heap2[ito] = heap2[ifrom];
heap2[ifrom] = temp_ptr;
ifrom = ito;
ito = ifrom / 2;
}
}
static void remove_heap2_node(int position) {
/*
* remove node of [position] from heap
* sifting to maintain the heap property
*/
hptr_map[heap2[position]->inode] = NULL;
free(heap2[position]);
heap2_tail --;
heap2[position] = heap2[heap2_tail];
heap2[heap2_tail] = NULL;
if (heap2[position] != NULL)
heap2[position]->position = position;
// Sifting:
int ifrom = position;
int ito = 2 * ifrom;
t_heap2 *temp;
while (ito < heap2_tail) {
if (ito != heap2_tail - 1 && heap2[ito + 1]->cost < heap2[ito]->cost)
ito++;
if (heap2[ito]->cost > heap2[ifrom]->cost)
break;
temp = heap2[ito];
heap2[ito] = heap2[ifrom];
heap2[ifrom] = temp;
heap2[ito]->position = ito;
heap2[ifrom]->position = ifrom;
ifrom = ito;
ito = 2 * ifrom;
}
}
static t_heap2 * pop_heap2_head(void) {
if (heap2_tail == 1)
return NULL;
t_heap2 * poped_head = (t_heap2 *)my_malloc(sizeof (t_heap2));
memcpy(poped_head, heap2[1], sizeof(t_heap2));
remove_heap2_node(1);
rr_node_ranking[poped_head->inode].prev_node = poped_head->prev_node;
rr_node_ranking[poped_head->inode].rank = num_nodes_ranked;
rr_node_ranking[poped_head->inode].cost = poped_head->cost;
rr_node_ranking[poped_head->inode].C_upstream = poped_head->C_upstream;
rr_node_ranking[poped_head->inode].basecost_upstream = poped_head->basecost_upstream;
num_nodes_ranked ++;
return poped_head;
}
static void fill_cost_map(t_segment_inf *segment_inf, int wire_type, int chan_type) {
/*
* setup the global cost map to enable future lookup by timing driven router
*/
int print_inode_x = -1, print_inode_y = -1;
int term_x = -1;
int term_y = -1;
for (int inode = 0; inode < num_rr_nodes; inode++) {
if (rr_node_ranking[inode].rank == -1)
continue;
// TODO: what about BI_DIR????
int segment_index = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
bool *cb_pattern = segment_inf[segment_index].cb;
int target_chan_type = rr_node[inode].type;
for (int offset = 0; offset < rr_node[inode].get_len(); offset ++) {
get_leave_pt_by_cb(cb_pattern[offset], offset, inode, &term_x, &term_y);
if (term_x == -1 || term_y == -1)
continue;
if (target_chan_type == CHANX) {
if (bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_x == UNDEFINED
|| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_x > rr_node_ranking[inode].cost) {
bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_x = rr_node_ranking[inode].cost;
bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_C_x = rr_node_ranking[inode].C_upstream;
bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_basecost_x = rr_node_ranking[inode].basecost_upstream;
#ifdef DEBUG_BFS_PRECAL
if (term_x == DEBUG_OFFSET_X && term_y == DEBUG_OFFSET_Y
&& wire_type == DEBUG_SEG_TYPE && chan_type == DEBUG_CHAN_TYPE) {
print_inode_x = inode;
}
#endif
}
} else {
if (bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y == UNDEFINED
|| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y > rr_node_ranking[inode].cost) {
bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y = rr_node_ranking[inode].cost;
bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_C_y = rr_node_ranking[inode].C_upstream;
bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_basecost_y = rr_node_ranking[inode].basecost_upstream;
#ifdef DEBUG_BFS_PRECAL
if (term_x == DEBUG_OFFSET_X && term_y == DEBUG_OFFSET_Y
&& wire_type == DEBUG_SEG_TYPE && chan_type == DEBUG_CHAN_TYPE) {
print_inode_y = inode;
}
#endif
}
}
}
}
#ifdef DEBUG_BFS_PRECAL
if (print_inode_x != -1) {
printf("BFS SELECTED PATH (CHANX): [%d][%d][%d][%d]\n", DEBUG_OFFSET_X, DEBUG_OFFSET_Y, DEBUG_SEG_TYPE, DEBUG_CHAN_TYPE);
print_bfs_optimal_path(print_inode_x);
}
if (print_inode_y != -1) {
printf("BFS SELECTED PATH (CHANY): [%d][%d][%d][%d]\n", DEBUG_OFFSET_X, DEBUG_OFFSET_Y, DEBUG_SEG_TYPE, DEBUG_CHAN_TYPE);
print_bfs_optimal_path(print_inode_y);
}
#endif
}
static void get_leave_pt_by_cb (bool is_conn_ipin, int offset, int inode, int *term_x, int *term_y) {
/*
* return the end point of a wire (term_x, term_y), calculated by inode start point + offset
* if it can drive a clb at (term_x, term_y) (i.e.: is_conn_ipin = 1)
*/
if (!is_conn_ipin) {
*term_x = -1;
*term_y = -1;
return;
}
int start_x = -1, start_y = -1;
get_unidir_seg_start(inode, &start_x, &start_y);
int wire_type = rr_node[inode].type;
assert(wire_type == CHANX || wire_type == CHANY);
if (wire_type == CHANX) {
*term_y = start_y - 2;
if (rr_node[inode].get_direction() == INC_DIRECTION)
*term_x = start_x + offset - 2;
else
*term_x = start_x - offset - 2;
} else {
*term_x = start_x - 2;
if (rr_node[inode].get_direction() == INC_DIRECTION)
*term_y = start_y + offset - 2;
else
*term_y = start_y - offset - 2;
}
// exclude end points falling outside the range of cost map
if (*term_x < 0 || *term_x > nx-3)
*term_x = -1;
if (*term_y < 0 || *term_y > ny-3)
*term_y = -1;
}
/**************************** util functions for debugging & profiling ************************************/
void print_cost_map(char *fname) {
/*
* export to file the map of future cost
*/
FILE *fp;
fp = fopen(fname, "w+");
if (fp == NULL) {
vpr_printf_warning(__FILE__, __LINE__, "cannot open file\n");
}
float max_basecost = -1;
fprintf(fp, "START: COST RANKING MAP\n");
fprintf(fp, "x\ty\tcost_x\tcost_y\n");
for (int ix = 0; ix < nx-2; ix ++) {
for (int iy = 0; iy < ny-2; iy ++) {
for (int iseg = 0; iseg < g_debug_num_seg; iseg ++) {
for (int ichan = 0; ichan < 2; ichan ++) {
fprintf(fp, "%d\t%d\t%.1f e-10\t%.1f e-10\n", ix, iy,
bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x * pow(10, 10),
bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y * pow(10, 10));
if (bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x > max_basecost)
max_basecost = bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x;
if (bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y > max_basecost)
max_basecost = bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y;
}
}
}
}
//printf("MAX basecost\t%e\n", max_basecost);
fprintf(fp, "END MAP PRINTING\n");
fclose(fp);
}
static void print_seg_conn_matrix(t_segment_inf *segment_inf, int num_segment) {
vpr_printf_info(">> SEGMENT CONN INFO\n");
for (int iseg = 0; iseg < num_segment; iseg ++) {
printf("\tSEG %s --> ", segment_inf[iseg].name);
for (int i_to_seg = 0; i_to_seg < num_segment; i_to_seg ++) {
if (seg_conn_inf[iseg][i_to_seg] == 1) {
printf("%s ", segment_inf[i_to_seg].name);
}
}
printf("\n");
}
}
void print_bfs_optimal_path(int term_node) {
assert(term_node >= 0 && term_node < num_rr_nodes);
int cur_node = term_node;
vpr_printf_info("*** PRINT BFS OPTIMAL PATH\n");
while (rr_node_ranking[cur_node].prev_node != -1) {
int x_s, x_e, y_s, y_e;
get_unidir_seg_end(cur_node, &x_s, &y_s);
get_unidir_seg_start(cur_node, &x_e, &y_e);
int len = rr_node[cur_node].get_len();
vpr_printf_info("\tinode %d\tL%d\tstart(%d,%d)\tend(%d,%d)\tbackTdel %.3f\t0_basecost %.3f\tacc_basecost %.3f\n",
cur_node, len, x_s, y_s, x_e, y_e, rr_node_ranking[cur_node].cost * pow(10,10),
rr_indexed_data[rr_node[cur_node].get_cost_index()].base_cost * pow(10, 10),
rr_node_ranking[cur_node].basecost_upstream * pow(10, 10));
cur_node = rr_node_ranking[cur_node].prev_node;
}
}