| #include <cstdio> |
| #include <cstring> |
| #include <ctime> |
| #include <cmath> |
| |
| #include <assert.h> |
| |
| #include <map> |
| #include <vector> |
| #include <algorithm> |
| |
| #include "vpr_utils.h" |
| #include "util.h" |
| #include "vpr_types.h" |
| #include "globals.h" |
| #include "route_export.h" |
| #include "route_common.h" |
| #include "route_tree_timing.h" |
| #include "route_timing.h" |
| #include "heapsort.h" |
| #include "path_delay.h" |
| #include "net_delay.h" |
| #include "stats.h" |
| #include "look_ahead_bfs_precal.h" |
| |
| using namespace std; |
| |
| #define DEBUG_BFS_PRECAL |
| #define DEBUG_SEG_TYPE 1 |
| #define DEBUG_CHAN_TYPE 0 |
| #define DEBUG_OFFSET_X 19 |
| #define DEBUG_OFFSET_Y 3 |
| |
| // TODO: change name of bfs_lookahead_info to g_precal_lookahead |
| // clearup the #include |
| // rename look_ahead_bfs to precal_look_ahead |
| // BIDIR wire for fill_cost_map |
| |
| /* |
| * This file is for experimenting a new way for A* look ahead |
| * Purpose: |
| * Precalculate the timing delay & upstream C & base cost from one channel to another with offset dx, dy |
| * In the actual routing, lookahead will look up the map built up here to get a lower bound for future cost. |
| * General procedure: |
| * 1. start with each type of wire: pick the wire node near the corner (at location (2,2)) |
| * 2. start point & end point are in terms of chanx / chany at (x,y) |
| * 3. SB & CB connection follow the exact specification in architecture |
| * 4. use breadth fist search to get cost at every location on chip |
| */ |
| |
| |
| /*********************** subroutines local to this file ********************************/ |
| /* |
| * The heap in this BFS pre-cal map is called "heap2" to differentiate from |
| * the original heap adopted in timing driven router; |
| * However, heap2 is quite similar to that used in timing driven router. |
| * The main difference: |
| * 1. this heap will *remove* node after it found its ranking |
| * 2. the heap struct is modified to store more information |
| */ |
| static void initialize_structs(int num_segment); |
| static void initialize_heap2(int start_inode, float cost_start_inode, |
| float R_upstream, float C_upstream, float basecost_upstream); |
| static void node_to_heap2(int prev_node, int inode, float cost, float R_upstream, |
| float C_upstream, float basecost_upstream); |
| static void remove_heap2_node(int position); |
| static void setup_one_level_to_seg(int inode); |
| static t_heap2 * pop_heap2_head(); |
| static void setup_seg_conn_inf(int num_segment); |
| static void expand_neighbours_to_heap2(t_heap2 *cheapest, int start_seg_index, t_segment_inf *segment_inf); |
| static void get_leave_pt_by_cb (bool is_conn_ipin, int offset, int inode, int *term_x, int *term_y); |
| static void fill_cost_map(t_segment_inf *segment_inf, int seg_type, int chan_type); |
| static void print_seg_conn_matrix(t_segment_inf *segment_inf, int num_segment); |
| |
| typedef struct s_precal_fut_cost { |
| int prev_node; // only for debugging |
| int rank; // range: 1..total_num_of_wires |
| float cost; // cost here is equivalent to Tdel |
| float C_upstream; // upstream here is downstream in router look ahead |
| float basecost_upstream; |
| } t_precal_fut_cost; |
| |
| |
| /*********************** globals local to this file **********************************/ |
| int g_debug_num_seg = 0; // for debugging: print out the matrix |
| t_heap2 **hptr_map = NULL; // [0..num_rr_nodes-1]: to efficiently locate the hptr to be removed from heap2, given inode for hptr |
| t_precal_fut_cost *rr_node_ranking = NULL; |
| int num_nodes_ranked; |
| int heap2_size; |
| int heap2_tail; |
| t_heap2 **heap2 = NULL; |
| int **seg_conn_inf = NULL; // if [i][j] = 1, then seg_type i can connect to seg j through the normal (core) switch block |
| t_segment_inf *ptr_segment_inf = NULL; |
| |
| |
| /************************ function definition *******************************/ |
| static void initialize_structs(int num_segment) { |
| g_debug_num_seg = num_segment; |
| hptr_map = (t_heap2**) my_malloc(sizeof(t_heap2 *) * num_rr_nodes); |
| rr_node_ranking = (t_precal_fut_cost *) my_malloc(sizeof(t_precal_fut_cost) * num_rr_nodes); |
| num_nodes_ranked = 0; |
| heap2_size = 0; |
| heap2_tail = 1; |
| // heap2 can point to anything at the start |
| heap2 = NULL; |
| |
| if (bfs_lookahead_info == NULL) { |
| bfs_lookahead_info = (t_fut_cost_inf ****)alloc_matrix4(0, nx-3, 0, ny-3, 0, num_segment-1, 0, 1, sizeof(t_fut_cost_inf)); |
| } |
| for (int ix = 0; ix < nx-2; ix ++) { |
| for (int iy = 0; iy < ny-2; iy ++) { |
| for (int iseg = 0; iseg < num_segment; iseg ++) { |
| for (int ichan = 0; ichan < 2; ichan ++) { |
| bfs_lookahead_info[ix][iy][iseg][ichan].Tdel_x = HUGE_POSITIVE_FLOAT; |
| bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x = HUGE_POSITIVE_FLOAT; |
| bfs_lookahead_info[ix][iy][iseg][ichan].acc_C_x = HUGE_POSITIVE_FLOAT; |
| bfs_lookahead_info[ix][iy][iseg][ichan].Tdel_y = HUGE_POSITIVE_FLOAT; |
| bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y = HUGE_POSITIVE_FLOAT; |
| bfs_lookahead_info[ix][iy][iseg][ichan].acc_C_y = HUGE_POSITIVE_FLOAT; |
| } |
| } |
| } |
| } |
| //memset(bfs_lookahead_info, ); |
| if (seg_conn_inf == NULL) |
| seg_conn_inf = (int **)alloc_matrix(0, num_segment - 1, 0, num_segment - 1, sizeof(int)); |
| for (int i = 0; i < num_segment; i ++) { |
| for (int j = 0; j < num_segment; j ++) { |
| seg_conn_inf[i][j] = 0; |
| } |
| } |
| } |
| |
| |
| void pre_cal_look_ahead(t_segment_inf *segment_inf, int num_segment) { |
| /* |
| * high level function to do the pre calculation of future cost using BFS |
| * to be called in try_route() after rr_graph is built, before actual router is called |
| */ |
| ptr_segment_inf = segment_inf; |
| initialize_structs(num_segment); |
| setup_seg_conn_inf(num_segment); |
| printf("\n\nfinish seg conn setup\n\n"); |
| print_seg_conn_matrix(segment_inf, num_segment); |
| /* |
| * range of coord on chip: [2..nx-1][2..ny-1] |
| * start at (2, 2); calculate for each wire type, and CHANX / CHANY |
| */ |
| for (int iseg = 0; iseg < num_segment; iseg ++) { |
| int chan_type[2] = {CHANX, CHANY}; |
| for (int ichan = 0; ichan < 2; ichan ++) { |
| //int length = segment_inf[i].length; |
| int start_inode = -1; |
| // first try to find wire start at the left bottom corner |
| for (int inode = 0; inode < num_rr_nodes; inode ++) { |
| // start wire is either INC_DIR or BI_DIR |
| if (rr_node[inode].get_direction() == DEC_DIRECTION) |
| continue; |
| int i_seg_index = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index; |
| if (i_seg_index != iseg) |
| continue; |
| if (rr_node[inode].type == chan_type[ichan] |
| && rr_node[inode].get_xlow() == 2 |
| && rr_node[inode].get_ylow() == 2) { |
| /* |
| if (i_seg_index == 0) { |
| int num_edges = rr_node[inode].get_num_edges(); |
| for (int iconn = 0; iconn < num_edges; iconn++) { |
| int to_node = rr_node[inode].edges[iconn]; |
| int to_node_seg_index = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index; |
| if (to_node_seg_index == 1 || to_node_seg_index == 2) { |
| start_inode = inode; |
| inode = num_rr_nodes; |
| break; |
| } |
| } |
| } else { |
| */ |
| start_inode = inode; |
| inode = num_rr_nodes; |
| //} |
| } |
| } |
| // if not found such wire (maybe due to a too small chip size / too long wire) |
| // then try the right bottom corner |
| if (start_inode == -1) { |
| for (int inode = 0; inode < num_rr_nodes; inode ++) { |
| if (rr_node[inode].get_direction() == INC_DIRECTION) |
| continue; |
| if (rr_indexed_data[rr_node[inode].get_cost_index()].seg_index != iseg) |
| continue; |
| if (rr_node[inode].type == chan_type[ichan] |
| && rr_node[inode].get_xhigh() == nx - 1 |
| && rr_node[inode].get_yhigh() == ny - 1) { |
| start_inode = inode; |
| inode = num_rr_nodes; |
| } |
| } |
| } |
| // found a valid starting node, next do the calculation using BFS |
| if (start_inode != -1) { |
| #ifdef DEBUG_BFS_PRECAL |
| if (iseg == DEBUG_SEG_TYPE && ichan == DEBUG_CHAN_TYPE) { |
| printf("== DEBUG ==: start node %d\n", start_inode); |
| } |
| #endif |
| vpr_printf_info("**** START BFS for segment %s\tchan type %d\n", segment_inf[iseg].name, ichan); |
| clock_t begin = clock(); |
| num_nodes_ranked = 0; |
| look_ahead_bfs_precal(start_inode, segment_inf); |
| clock_t end = clock(); |
| vpr_printf_info("\tThis BFS took %g seconds.\n", (float)(end - begin) / CLOCKS_PER_SEC); |
| vpr_printf_info("**** END BFS for segment %s\n", segment_inf[iseg].name); |
| // setup the global map for future look ahead lookup |
| fill_cost_map(segment_inf, iseg, ichan); |
| start_inode = -1; |
| } else { |
| vpr_printf_error(__FILE__, __LINE__, |
| "segment %s has no wire starting at corner\n", segment_inf[iseg].name); |
| } |
| } |
| } |
| print_cost_map("init_bfs_map"); |
| |
| free(hptr_map); |
| free(rr_node_ranking); |
| free(heap2+1); |
| free_matrix(seg_conn_inf, 0, num_segment - 1, 0, sizeof(int)); |
| hptr_map = NULL; |
| rr_node_ranking = NULL; |
| heap2 = NULL; |
| seg_conn_inf = NULL; |
| } |
| |
| |
| void look_ahead_bfs_precal(int start_inode, t_segment_inf *segment_inf) { |
| /* |
| * routine to actually do the bfs calculation |
| * general procedure similar to that in timing driven route |
| */ |
| for (int i = 0; i < num_rr_nodes; i ++) { |
| hptr_map[i] = NULL; |
| rr_node_ranking[i].prev_node = -1; |
| rr_node_ranking[i].rank = -1; |
| rr_node_ranking[i].cost = -1; |
| rr_node_ranking[i].C_upstream = -1; |
| rr_node_ranking[i].basecost_upstream = -1; |
| } |
| // heap2 for this precal is modified to facilitate removal of replicate nodes |
| // cost & C & basecost is excluding the start node, so set them to be all 0. |
| node_to_heap2(-1, start_inode, 0.0, 0.0, 0.0, 0.0); |
| t_heap2* cheapest = pop_heap2_head(); |
| int start_seg_index = rr_indexed_data[rr_node[start_inode].get_cost_index()].seg_index; |
| while (cheapest != NULL) { |
| expand_neighbours_to_heap2(cheapest, start_seg_index, segment_inf); |
| free(cheapest); |
| cheapest = pop_heap2_head(); |
| } |
| } |
| |
| |
| static void setup_seg_conn_inf(int num_segment) { |
| /* |
| * to test how many other type of segments a certain type of wire can |
| * connect to, via the standard switch block (sb which are at the core place of chip) |
| * |
| * the reason for doing so: |
| * for some weird arch, for example, a certain wire can only connect to wires of its |
| * same type, via sb in core area on chip. but it may be able to connect to other wire |
| * types via corner / fringe sb. --> this will cause inaccuracy of bfs, as we pick the |
| * starting point to be near the corner |
| */ |
| |
| vector<int> seg_type_to_check; |
| for (int iseg = 0; iseg < num_segment; iseg ++) { |
| seg_type_to_check.push_back(iseg); |
| } |
| vector< pair<int, int> > nearest_to_core_pos; |
| for (int iseg = 0; iseg < num_segment; iseg ++) { |
| nearest_to_core_pos.push_back(make_pair(-1, -1)); |
| } |
| // we have to go through this step, cuz on some small chips, there are just no |
| // long wires that have their mid-point going through exactly (nx / 2) & (ny / 2) |
| |
| // find the smallest distance between the wire mid point and the chip mid point |
| for (int inode = 0; inode < num_rr_nodes; inode ++) { |
| int cur_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index; |
| if (cur_seg_type < 0 || cur_seg_type >= num_segment) { |
| // this check is to eliminate non-wire seg types (e.g.: seg_type = -1) |
| continue; |
| } |
| int cur_x_mid, cur_y_mid; |
| cur_x_mid = (rr_node[inode].get_xhigh() + rr_node[inode].get_xlow()) / 2; |
| cur_y_mid = (rr_node[inode].get_yhigh() + rr_node[inode].get_ylow()) / 2; |
| if (abs(nearest_to_core_pos[cur_seg_type].first - nx / 2) > abs(cur_x_mid - nx / 2) |
| && abs(nearest_to_core_pos[cur_seg_type].second - ny / 2) > abs(cur_y_mid - ny / 2)) { |
| nearest_to_core_pos[cur_seg_type].first = cur_x_mid; |
| nearest_to_core_pos[cur_seg_type].second = cur_y_mid; |
| } |
| } |
| // find the inode that has the smallest distance (i.e.: wire that is closest to center of chip) |
| for (int inode = 0; inode < num_rr_nodes; inode ++) { |
| int cur_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index; |
| if (cur_seg_type < 0 || cur_seg_type >= num_segment) |
| continue; |
| int cur_x_mid, cur_y_mid; |
| cur_x_mid = (rr_node[inode].get_xhigh() + rr_node[inode].get_xlow()) / 2; |
| cur_y_mid = (rr_node[inode].get_yhigh() + rr_node[inode].get_ylow()) / 2; |
| vector<int>::iterator seg_pos = find(seg_type_to_check.begin(), seg_type_to_check.end(), cur_seg_type); |
| if (seg_pos != seg_type_to_check.end() |
| && cur_x_mid == nearest_to_core_pos[cur_seg_type].first |
| && cur_y_mid == nearest_to_core_pos[cur_seg_type].second) { |
| seg_type_to_check.erase(seg_pos); |
| // find the wire types that are driven by inode via *one* switch block |
| // i.e.: find the wire types that are connected directly by inode |
| setup_one_level_to_seg(inode); |
| } |
| if (seg_type_to_check.empty()) |
| break; |
| } |
| // next, test convergence for seg_conn_inf |
| // seg_conn_inf is setup here |
| bool new_conn_added = false; |
| do { |
| new_conn_added = false; |
| for (int from_seg = 0; from_seg < num_segment; from_seg ++) { |
| for (int to_seg = 0; to_seg < num_segment; to_seg ++) { |
| if (seg_conn_inf[from_seg][to_seg] == 0) |
| continue; |
| for (int to_to_seg = 0; to_to_seg < num_segment; to_to_seg ++) { |
| if (seg_conn_inf[to_seg][to_to_seg] == 0) |
| continue; |
| if (seg_conn_inf[from_seg][to_to_seg] == 0) { |
| seg_conn_inf[from_seg][to_to_seg] = 1; |
| new_conn_added = true; |
| } |
| } |
| } |
| } |
| } while (new_conn_added); |
| } |
| |
| |
| static void setup_one_level_to_seg(int inode) { |
| /* |
| * find the wire type that are driven directly by inode |
| * setup the init value for seg_conn_inf |
| */ |
| assert(rr_node[inode].type == CHANX || rr_node[inode].type == CHANY); |
| int num_edges = rr_node[inode].get_num_edges(); |
| int from_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index; |
| if (from_seg_type >= g_num_segment || from_seg_type < 0) |
| return; |
| for (int iconn = 0; iconn < num_edges; iconn ++) { |
| int to_node = rr_node[inode].edges[iconn]; |
| int to_seg_type = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index; |
| if (to_seg_type >= g_num_segment || to_seg_type < 0) |
| continue; |
| seg_conn_inf[from_seg_type][to_seg_type] = 1; |
| } |
| } |
| |
| |
| static void expand_neighbours_to_heap2(t_heap2 *cheapest, int start_seg_index, t_segment_inf *segment_inf) { |
| /* |
| * expand neighbours for cheapest (standard BFS procedure) |
| */ |
| int inode = cheapest->inode; |
| int num_edges = rr_node[inode].get_num_edges(); |
| int iswitch; |
| |
| float cur_R_upstream = cheapest->R_upstream; |
| float cur_C_upstream = cheapest->C_upstream; |
| float cur_basecost_upstream = cheapest->basecost_upstream; |
| |
| float new_R_upstream, new_C_upstream, new_basecost_upstream; |
| |
| for (int iconn = 0; iconn < num_edges; iconn++) { |
| int to_node = rr_node[inode].edges[iconn]; |
| if (rr_node[to_node].type != CHANX && rr_node[to_node].type != CHANY) |
| continue; |
| if (rr_node_ranking[to_node].rank != -1) |
| continue; |
| |
| int to_seg_index = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index; |
| if (seg_conn_inf[start_seg_index][to_seg_index] == 0) |
| continue; |
| // calculate the backward delay of this node |
| // referring to route_timing.c: timing_driven_expand_neighbours() |
| // it is not accurate to use the R & C stored in rr_node |
| // think about the wire that is cut short at fringe: |
| // if a L4 wire is cut to be of length 2 only, then its R will be half of the normal value |
| // the solution is to use the R per length / C per length in segment_inf |
| iswitch = rr_node[inode].switches[iconn]; |
| float cur_wire_R = segment_inf[to_seg_index].Rmetal * segment_inf[to_seg_index].length; |
| float cur_wire_C = segment_inf[to_seg_index].Cmetal * segment_inf[to_seg_index].length; |
| float cur_wire_basecost = rr_indexed_data[rr_node[to_node].get_cost_index()].base_cost; |
| if (g_rr_switch_inf[iswitch].buffered) { |
| new_R_upstream = g_rr_switch_inf[iswitch].R; |
| // if switch is buffered, don't add C. |
| // but this will still produce an overestimation of C |
| new_C_upstream = cur_C_upstream; |
| } else { |
| new_R_upstream = cur_R_upstream + g_rr_switch_inf[iswitch].R; |
| new_C_upstream = cur_C_upstream + cur_wire_C; |
| } |
| float to_node_cost = cur_wire_C * (new_R_upstream + 0.5 * cur_wire_R); |
| to_node_cost += g_rr_switch_inf[iswitch].Tdel; |
| to_node_cost += cheapest->cost; |
| new_R_upstream += cur_wire_R; |
| new_basecost_upstream = cur_basecost_upstream + cur_wire_basecost; |
| |
| if (hptr_map[to_node] != NULL) { |
| // if to_node is already on the heap, then either |
| // replace the outdated node (by first remove it) or do nothing |
| if (hptr_map[to_node]->cost <= to_node_cost) { |
| continue; |
| } else { |
| remove_heap2_node(hptr_map[to_node]->position); |
| } |
| } |
| node_to_heap2(inode, to_node, to_node_cost, new_R_upstream, new_C_upstream, new_basecost_upstream); |
| } |
| } |
| |
| |
| static void initialize_heap2(int start_inode, float cost_start_inode, float R_start_inode, |
| float C_start_inode, float basecost_start_inode) { |
| assert(heap2 == NULL && heap2_size == 0 && heap2_tail == 1); |
| heap2_size = 1; |
| heap2_tail = 2; |
| t_heap2 *head = (t_heap2 *) my_malloc(sizeof(t_heap2)); |
| head->prev_node = -1; |
| head->inode = start_inode; |
| head->position = 1; |
| head->cost = cost_start_inode; |
| head->R_upstream = R_start_inode; |
| head->C_upstream = C_start_inode; |
| head->basecost_upstream = basecost_start_inode; |
| heap2 = (t_heap2 **) my_malloc(sizeof(t_heap2 *) * heap2_size); |
| heap2 --; |
| heap2[1] = head; |
| } |
| |
| |
| static void node_to_heap2(int prev_node, int inode, float cost, |
| float R_upstream, float C_upstream, float basecost_upstream) { |
| /* |
| * when you call this function, it has already been made sure that |
| * heap node for inode is removed (hptr_map[inode] = NULL) |
| */ |
| if (heap2_size == 0) { |
| initialize_heap2(inode, cost, R_upstream, C_upstream, basecost_upstream); |
| return; |
| } |
| t_heap2 *hptr = (t_heap2 *) my_malloc(sizeof(t_heap2)); |
| // setup all fields |
| assert(hptr_map[inode] == NULL); |
| hptr_map[inode] = hptr; |
| hptr->prev_node = prev_node; |
| hptr->inode = inode; |
| hptr->position = -1; |
| hptr->cost = cost; |
| hptr->R_upstream = R_upstream; |
| hptr->C_upstream = C_upstream; |
| hptr->basecost_upstream = basecost_upstream; |
| // dynamic expansion of heap size |
| // (referring to old heap method) |
| if (heap2_tail > heap2_size) { |
| heap2_size *= 2; |
| heap2 = (t_heap2 **)my_realloc((void *)(heap2 + 1), heap2_size * sizeof(t_heap2 *)); |
| heap2 --; |
| } |
| heap2[heap2_tail] = hptr; |
| // sifting up |
| hptr->position = heap2_tail; |
| int ifrom = heap2_tail; |
| int ito = ifrom / 2; |
| heap2_tail ++; |
| |
| t_heap2 *temp_ptr = NULL; |
| while ((ito >= 1) && (heap2[ifrom]->cost < heap2[ito]->cost)) { |
| temp_ptr = heap2[ito]; |
| // update position to facilitate future removal |
| heap2[ifrom]->position = ito; |
| heap2[ito]->position = ifrom; |
| heap2[ito] = heap2[ifrom]; |
| heap2[ifrom] = temp_ptr; |
| ifrom = ito; |
| ito = ifrom / 2; |
| } |
| } |
| |
| |
| static void remove_heap2_node(int position) { |
| /* |
| * remove node of [position] from heap |
| * sifting to maintain the heap property |
| */ |
| |
| hptr_map[heap2[position]->inode] = NULL; |
| |
| free(heap2[position]); |
| heap2_tail --; |
| heap2[position] = heap2[heap2_tail]; |
| heap2[heap2_tail] = NULL; |
| if (heap2[position] != NULL) |
| heap2[position]->position = position; |
| // Sifting: |
| int ifrom = position; |
| int ito = 2 * ifrom; |
| t_heap2 *temp; |
| while (ito < heap2_tail) { |
| if (ito != heap2_tail - 1 && heap2[ito + 1]->cost < heap2[ito]->cost) |
| ito++; |
| if (heap2[ito]->cost > heap2[ifrom]->cost) |
| break; |
| temp = heap2[ito]; |
| heap2[ito] = heap2[ifrom]; |
| heap2[ifrom] = temp; |
| heap2[ito]->position = ito; |
| heap2[ifrom]->position = ifrom; |
| ifrom = ito; |
| ito = 2 * ifrom; |
| } |
| } |
| |
| |
| static t_heap2 * pop_heap2_head(void) { |
| if (heap2_tail == 1) |
| return NULL; |
| t_heap2 * poped_head = (t_heap2 *)my_malloc(sizeof (t_heap2)); |
| memcpy(poped_head, heap2[1], sizeof(t_heap2)); |
| |
| remove_heap2_node(1); |
| |
| rr_node_ranking[poped_head->inode].prev_node = poped_head->prev_node; |
| rr_node_ranking[poped_head->inode].rank = num_nodes_ranked; |
| rr_node_ranking[poped_head->inode].cost = poped_head->cost; |
| rr_node_ranking[poped_head->inode].C_upstream = poped_head->C_upstream; |
| rr_node_ranking[poped_head->inode].basecost_upstream = poped_head->basecost_upstream; |
| num_nodes_ranked ++; |
| |
| return poped_head; |
| } |
| |
| |
| static void fill_cost_map(t_segment_inf *segment_inf, int wire_type, int chan_type) { |
| /* |
| * setup the global cost map to enable future lookup by timing driven router |
| */ |
| int print_inode_x = -1, print_inode_y = -1; |
| int term_x = -1; |
| int term_y = -1; |
| for (int inode = 0; inode < num_rr_nodes; inode++) { |
| if (rr_node_ranking[inode].rank == -1) |
| continue; |
| // TODO: what about BI_DIR???? |
| int segment_index = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index; |
| bool *cb_pattern = segment_inf[segment_index].cb; |
| int target_chan_type = rr_node[inode].type; |
| for (int offset = 0; offset < rr_node[inode].get_len(); offset ++) { |
| get_leave_pt_by_cb(cb_pattern[offset], offset, inode, &term_x, &term_y); |
| if (term_x == -1 || term_y == -1) |
| continue; |
| if (target_chan_type == CHANX) { |
| if (bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_x == UNDEFINED |
| || bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_x > rr_node_ranking[inode].cost) { |
| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_x = rr_node_ranking[inode].cost; |
| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_C_x = rr_node_ranking[inode].C_upstream; |
| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_basecost_x = rr_node_ranking[inode].basecost_upstream; |
| #ifdef DEBUG_BFS_PRECAL |
| if (term_x == DEBUG_OFFSET_X && term_y == DEBUG_OFFSET_Y |
| && wire_type == DEBUG_SEG_TYPE && chan_type == DEBUG_CHAN_TYPE) { |
| print_inode_x = inode; |
| } |
| #endif |
| } |
| } else { |
| if (bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y == UNDEFINED |
| || bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y > rr_node_ranking[inode].cost) { |
| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y = rr_node_ranking[inode].cost; |
| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_C_y = rr_node_ranking[inode].C_upstream; |
| bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_basecost_y = rr_node_ranking[inode].basecost_upstream; |
| #ifdef DEBUG_BFS_PRECAL |
| if (term_x == DEBUG_OFFSET_X && term_y == DEBUG_OFFSET_Y |
| && wire_type == DEBUG_SEG_TYPE && chan_type == DEBUG_CHAN_TYPE) { |
| print_inode_y = inode; |
| } |
| #endif |
| } |
| } |
| } |
| } |
| #ifdef DEBUG_BFS_PRECAL |
| if (print_inode_x != -1) { |
| printf("BFS SELECTED PATH (CHANX): [%d][%d][%d][%d]\n", DEBUG_OFFSET_X, DEBUG_OFFSET_Y, DEBUG_SEG_TYPE, DEBUG_CHAN_TYPE); |
| print_bfs_optimal_path(print_inode_x); |
| } |
| if (print_inode_y != -1) { |
| printf("BFS SELECTED PATH (CHANY): [%d][%d][%d][%d]\n", DEBUG_OFFSET_X, DEBUG_OFFSET_Y, DEBUG_SEG_TYPE, DEBUG_CHAN_TYPE); |
| print_bfs_optimal_path(print_inode_y); |
| } |
| #endif |
| } |
| |
| |
| static void get_leave_pt_by_cb (bool is_conn_ipin, int offset, int inode, int *term_x, int *term_y) { |
| /* |
| * return the end point of a wire (term_x, term_y), calculated by inode start point + offset |
| * if it can drive a clb at (term_x, term_y) (i.e.: is_conn_ipin = 1) |
| */ |
| if (!is_conn_ipin) { |
| *term_x = -1; |
| *term_y = -1; |
| return; |
| } |
| int start_x = -1, start_y = -1; |
| get_unidir_seg_start(inode, &start_x, &start_y); |
| int wire_type = rr_node[inode].type; |
| assert(wire_type == CHANX || wire_type == CHANY); |
| if (wire_type == CHANX) { |
| *term_y = start_y - 2; |
| if (rr_node[inode].get_direction() == INC_DIRECTION) |
| *term_x = start_x + offset - 2; |
| else |
| *term_x = start_x - offset - 2; |
| } else { |
| *term_x = start_x - 2; |
| if (rr_node[inode].get_direction() == INC_DIRECTION) |
| *term_y = start_y + offset - 2; |
| else |
| *term_y = start_y - offset - 2; |
| } |
| // exclude end points falling outside the range of cost map |
| if (*term_x < 0 || *term_x > nx-3) |
| *term_x = -1; |
| if (*term_y < 0 || *term_y > ny-3) |
| *term_y = -1; |
| } |
| |
| |
| /**************************** util functions for debugging & profiling ************************************/ |
| void print_cost_map(char *fname) { |
| /* |
| * export to file the map of future cost |
| */ |
| FILE *fp; |
| fp = fopen(fname, "w+"); |
| if (fp == NULL) { |
| vpr_printf_warning(__FILE__, __LINE__, "cannot open file\n"); |
| } |
| float max_basecost = -1; |
| fprintf(fp, "START: COST RANKING MAP\n"); |
| fprintf(fp, "x\ty\tcost_x\tcost_y\n"); |
| for (int ix = 0; ix < nx-2; ix ++) { |
| for (int iy = 0; iy < ny-2; iy ++) { |
| for (int iseg = 0; iseg < g_debug_num_seg; iseg ++) { |
| for (int ichan = 0; ichan < 2; ichan ++) { |
| fprintf(fp, "%d\t%d\t%.1f e-10\t%.1f e-10\n", ix, iy, |
| bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x * pow(10, 10), |
| bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y * pow(10, 10)); |
| if (bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x > max_basecost) |
| max_basecost = bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x; |
| if (bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y > max_basecost) |
| max_basecost = bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y; |
| } |
| } |
| } |
| } |
| //printf("MAX basecost\t%e\n", max_basecost); |
| fprintf(fp, "END MAP PRINTING\n"); |
| fclose(fp); |
| } |
| |
| |
| static void print_seg_conn_matrix(t_segment_inf *segment_inf, int num_segment) { |
| vpr_printf_info(">> SEGMENT CONN INFO\n"); |
| for (int iseg = 0; iseg < num_segment; iseg ++) { |
| printf("\tSEG %s --> ", segment_inf[iseg].name); |
| for (int i_to_seg = 0; i_to_seg < num_segment; i_to_seg ++) { |
| if (seg_conn_inf[iseg][i_to_seg] == 1) { |
| printf("%s ", segment_inf[i_to_seg].name); |
| } |
| } |
| printf("\n"); |
| } |
| } |
| |
| |
| void print_bfs_optimal_path(int term_node) { |
| assert(term_node >= 0 && term_node < num_rr_nodes); |
| int cur_node = term_node; |
| vpr_printf_info("*** PRINT BFS OPTIMAL PATH\n"); |
| while (rr_node_ranking[cur_node].prev_node != -1) { |
| int x_s, x_e, y_s, y_e; |
| get_unidir_seg_end(cur_node, &x_s, &y_s); |
| get_unidir_seg_start(cur_node, &x_e, &y_e); |
| int len = rr_node[cur_node].get_len(); |
| vpr_printf_info("\tinode %d\tL%d\tstart(%d,%d)\tend(%d,%d)\tbackTdel %.3f\t0_basecost %.3f\tacc_basecost %.3f\n", |
| cur_node, len, x_s, y_s, x_e, y_e, rr_node_ranking[cur_node].cost * pow(10,10), |
| rr_indexed_data[rr_node[cur_node].get_cost_index()].base_cost * pow(10, 10), |
| rr_node_ranking[cur_node].basecost_upstream * pow(10, 10)); |
| cur_node = rr_node_ranking[cur_node].prev_node; |
| } |
| } |