added documentation for new lookahead
diff --git a/vpr/SRC/base/globals.c b/vpr/SRC/base/globals.c
index 9f2af88..0f74167 100644
--- a/vpr/SRC/base/globals.c
+++ b/vpr/SRC/base/globals.c
@@ -105,7 +105,7 @@
 //map< pair<int, int>, float ** > bfs_cost_map;

 //map< pair<int, int>, float ** > bfs_C_downstream_map;

 // dimensions: x, y, segment_type, chan_type

-struct s_bfs_cost_inf **** bfs_lookahead_info = NULL;

+t_fut_cost_inf **** bfs_lookahead_info = NULL;

 

 /********** Structures representing timing graph information */

 float pb_max_internal_delay = UNDEFINED; /* biggest internal delay of physical block */

diff --git a/vpr/SRC/base/globals.h b/vpr/SRC/base/globals.h
index 8c8f8b7..f6cf434 100644
--- a/vpr/SRC/base/globals.h
+++ b/vpr/SRC/base/globals.h
@@ -140,7 +140,7 @@
 extern int **rr_blk_source; /* [0..num_blocks-1][0..num_class-1] */

 //extern map< pair<int, int>, float ** > bfs_cost_map;

 //extern map< pair<int, int>, float ** > bfs_C_downstream_map;

-struct s_bfs_cost_inf {

+typedef struct s_fut_cost_inf {

     float Tdel_x;

     float Tdel_y;

     float acc_basecost_x;

@@ -148,8 +148,8 @@
     float acc_C_x;

     float acc_C_y;

     //int is_arrive_chanx;

-};

-extern struct s_bfs_cost_inf **** bfs_lookahead_info;

+} t_fut_cost_inf;

+extern t_fut_cost_inf **** bfs_lookahead_info;

 /* the head pointers of structures that are "freed" and used constantly */

 /*struct s_heap *g_heap_free_head;

 struct s_trace *g_trace_free_head;

diff --git a/vpr/SRC/route/look_ahead_bfs.c b/vpr/SRC/route/look_ahead_bfs_precal.c
similarity index 64%
rename from vpr/SRC/route/look_ahead_bfs.c
rename to vpr/SRC/route/look_ahead_bfs_precal.c
index 6270d4d..a7f71d2 100644
--- a/vpr/SRC/route/look_ahead_bfs.c
+++ b/vpr/SRC/route/look_ahead_bfs_precal.c
@@ -21,86 +21,91 @@
 #include "path_delay.h"
 #include "net_delay.h"
 #include "stats.h"
-#include "ReadOptions.h"
-#include "look_ahead_bfs.h"
+#include "look_ahead_bfs_precal.h"
 
 using namespace std;
 
+#define DEBUG_BFS_PRECAL
 #define DEBUG_SEG_TYPE 1
 #define DEBUG_CHAN_TYPE 0
 #define DEBUG_OFFSET_X 19
 #define DEBUG_OFFSET_Y 3
 
+// TODO: change name of bfs_lookahead_info to g_precal_lookahead
+//       clearup the #include
+//       rename look_ahead_bfs to precal_look_ahead
+//       BIDIR wire for fill_cost_map
+
 /*
- * This file is just for experimenting a new look ahead
+ * This file is for experimenting a new way for A* look ahead
+ * Purpose:
+ *  Precalculate the timing delay & upstream C & base cost from one channel to another with offset dx, dy
+ *  In the actual routing, lookahead will look up the map built up here to get a lower bound for future cost.
  * General procedure:
- *  1. start from each type of wire: pick the inode at the corner (not the real corner(1,1), but a little bit inside (2,2))??
- *  2. BFS to every chip location (in terms of clb) -- wire connection and wire cb pattern follows the arch specification
- *  3. calculate the min cost of (x, y) by comparing cost of all wires ending there
- *     -- also differentiate the ending side (whether ends at chany or chanx wire)
+ *  1. start with each type of wire: pick the wire node near the corner (at location (2,2))
+ *  2. start point & end point are in terms of chanx / chany at (x,y)
+ *  3. SB & CB connection follow the exact specification in architecture
+ *  4. use breadth fist search to get cost at every location on chip
  */
+
+
+/*********************** subroutines local to this file ********************************/
 /*
- * a revised version of s_heap:
- *    inode: rr_node index
- *    position: index on heap (useful when you try to remove a node from heap)
- *    cost & R_upstream: the same as s_heap
+ * The heap in this BFS pre-cal map is called "heap2" to differentiate from 
+ * the original heap adopted in timing driven router;
+ * However, heap2 is quite similar to that used in timing driven router.
+ * The main difference:
+ *  1. this heap will *remove* node after it found its ranking
+ *  2. the heap struct is modified to store more information
  */
-/**************************************************************************/
-// some heap functions revised
 static void initialize_structs(int num_segment);
-static void initialize_heap_2(int start_inode, float cost_start_inode, 
+static void initialize_heap2(int start_inode, float cost_start_inode, 
                                 float R_upstream, float C_upstream, float basecost_upstream);
-static void node_to_heap_2(int prev_node, int inode, float cost, float R_upstream, 
+static void node_to_heap2(int prev_node, int inode, float cost, float R_upstream, 
                             float C_upstream, float basecost_upstream);
-static void remove_heap_2_node(int position);
+static void remove_heap2_node(int position);
 static void setup_one_level_to_seg(int inode);
-static struct s_heap_2 * pop_heap_2_head();
+static t_heap2 * pop_heap2_head();
 static void setup_seg_conn_inf(int num_segment);
-static void expand_neighbours_2(struct s_heap_2 *cheapest, int start_seg_index, t_segment_inf *segment_inf);
+static void expand_neighbours_to_heap2(t_heap2 *cheapest, int start_seg_index, t_segment_inf *segment_inf);
 static void get_leave_pt_by_cb (bool is_conn_ipin, int offset, int inode, int *term_x, int *term_y);
 static void fill_cost_map(t_segment_inf *segment_inf, int seg_type, int chan_type);
 static void print_seg_conn_matrix(t_segment_inf *segment_inf, int num_segment);
-// TODO: rr_indexed_data
-//       segment_inf
-struct s_bfs_cost {
-    // XXX: prev_node is only added for debugging bfs
-    int prev_node;
-    int rank;
-    float cost;
-    // C_upstream is used as C_downstream in router look ahead
-    float C_upstream;
-    float basecost_upstream;
-};
-/***************************************************************************/
-// globals
-// this array is to efficiently identify the hptr to be removed from heap
-int g_bfs_num_seg = 0;
-s_heap_2 **hptr_map =  NULL;
-struct s_bfs_cost *rr_node_ranking = NULL;
+
+typedef struct s_precal_fut_cost {
+    int prev_node;  // only for debugging
+    int rank;       // range: 1..total_num_of_wires
+    float cost;     // cost here is equivalent to Tdel
+    float C_upstream;   // upstream here is downstream in router look ahead
+    float basecost_upstream;    
+} t_precal_fut_cost;
+
+
+/*********************** globals local to this file **********************************/
+int g_debug_num_seg = 0;    // for debugging: print out the matrix
+t_heap2 **hptr_map =  NULL; // [0..num_rr_nodes-1]: to efficiently locate the hptr to be removed from heap2, given inode for hptr
+t_precal_fut_cost *rr_node_ranking = NULL;
 int num_nodes_ranked;
-int heap_2_size;
-int heap_2_tail;
-// heap_2 can point to anything at the start
-struct s_heap_2 **heap_2 = NULL;
-// if [i][j] = 1, then seg_type i can connect to seg j through the normal (core) switch block
-int **seg_conn_inf = NULL;
+int heap2_size;
+int heap2_tail;
+t_heap2 **heap2 = NULL;
+int **seg_conn_inf = NULL; // if [i][j] = 1, then seg_type i can connect to seg j through the normal (core) switch block
 t_segment_inf *ptr_segment_inf = NULL;
-/****************************************************************************/
-// function definition
-// to be called in try_route()
+
+
+/************************ function definition *******************************/
 static void initialize_structs(int num_segment) {
-    g_bfs_num_seg = num_segment;
-    hptr_map = (s_heap_2**) my_malloc(sizeof(struct s_heap_2 *) * num_rr_nodes);
-    rr_node_ranking = (struct s_bfs_cost *) my_malloc(sizeof(struct s_bfs_cost) * num_rr_nodes);
+    g_debug_num_seg = num_segment;
+    hptr_map = (t_heap2**) my_malloc(sizeof(t_heap2 *) * num_rr_nodes);
+    rr_node_ranking = (t_precal_fut_cost *) my_malloc(sizeof(t_precal_fut_cost) * num_rr_nodes);
     num_nodes_ranked = 0;
-    heap_2_size = 0;
-    heap_2_tail = 1;
-    // heap_2 can point to anything at the start
-    heap_2 = NULL;
-    //cost_ranking_map = (int **)alloc_matrix(0, nx-3, 0, ny-3, sizeof(int));
-    //cost_map = (float **)alloc_matrix(0, nx-3, 0, ny-3, sizeof(float));
+    heap2_size = 0;
+    heap2_tail = 1;
+    // heap2 can point to anything at the start
+    heap2 = NULL;
+
     if (bfs_lookahead_info == NULL) {
-        bfs_lookahead_info = (struct s_bfs_cost_inf ****)alloc_matrix4(0, nx-3, 0, ny-3, 0, num_segment-1, 0, 1, sizeof(struct s_bfs_cost_inf));
+        bfs_lookahead_info = (t_fut_cost_inf ****)alloc_matrix4(0, nx-3, 0, ny-3, 0, num_segment-1, 0, 1, sizeof(t_fut_cost_inf));
     }
     for (int ix = 0; ix < nx-2; ix ++) {
         for (int iy = 0; iy < ny-2; iy ++) {
@@ -125,26 +130,23 @@
         }
     }
 }
+
+
 void pre_cal_look_ahead(t_segment_inf *segment_inf, int num_segment) {
+    /*
+     * high level function to do the pre calculation of future cost using BFS
+     * to be called in try_route() after rr_graph is built, before actual router is called
+     */
     ptr_segment_inf = segment_inf;
     initialize_structs(num_segment);
     setup_seg_conn_inf(num_segment);
     printf("\n\nfinish seg conn setup\n\n");
     print_seg_conn_matrix(segment_inf, num_segment);
     /*
-     * should be [2..nx-1][2..ny-1]
-     * +-------------------------+
-     * | +---------------------+ | 
-     * | |                     | |
-     * | |                     | |
-     * | ^                     | |
-     * | |                     | |
-     * | +-->------------------+ |
-     * +-------------------------+
-     * get rid of the fringe
-     * start at (2, 2); calculate for each type, and CHANX and CHANY
+     * range of coord on chip: [2..nx-1][2..ny-1]
+     * start at (2, 2); calculate for each wire type, and CHANX / CHANY
      */
-    for (int i = 0; i < num_segment; i ++) {
+    for (int iseg = 0; iseg < num_segment; iseg ++) {
         int chan_type[2] = {CHANX, CHANY};
         for (int ichan = 0; ichan < 2; ichan ++) {
             //int length = segment_inf[i].length;
@@ -155,7 +157,7 @@
                 if (rr_node[inode].get_direction() == DEC_DIRECTION)
                     continue;
                 int i_seg_index = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
-                if (i_seg_index != i)
+                if (i_seg_index != iseg)
                     continue;
                 if (rr_node[inode].type == chan_type[ichan]
                  && rr_node[inode].get_xlow() == 2
@@ -185,7 +187,7 @@
                 for (int inode = 0; inode < num_rr_nodes; inode ++) {
                     if (rr_node[inode].get_direction() == INC_DIRECTION)
                         continue;
-                    if (rr_indexed_data[rr_node[inode].get_cost_index()].seg_index != i)
+                    if (rr_indexed_data[rr_node[inode].get_cost_index()].seg_index != iseg)
                         continue;
                     if (rr_node[inode].type == chan_type[ichan]
                      && rr_node[inode].get_xhigh() == nx - 1
@@ -195,58 +197,47 @@
                     }
                 }
             }
+            // found a valid starting node, next do the calculation using BFS
             if (start_inode != -1) {
-                if (i == DEBUG_SEG_TYPE && ichan == DEBUG_CHAN_TYPE) {
+#ifdef DEBUG_BFS_PRECAL
+                if (iseg == DEBUG_SEG_TYPE && ichan == DEBUG_CHAN_TYPE) {
                     printf("== DEBUG ==: start node %d\n", start_inode);
                 }
-                printf("**** START BFS for segment %s\tchan type %d\n", segment_inf[i].name, ichan);
+#endif
+                vpr_printf_info("**** START BFS for segment %s\tchan type %d\n", segment_inf[iseg].name, ichan);
                 clock_t begin = clock();
                 num_nodes_ranked = 0;
-                look_ahead_bfs(start_inode, segment_inf);
+                look_ahead_bfs_precal(start_inode, segment_inf);
                 clock_t end = clock();
                 vpr_printf_info("\tThis BFS took %g seconds.\n", (float)(end - begin) / CLOCKS_PER_SEC);
-                printf("**** END BFS for segment %s\n", segment_inf[i].name);
-                fill_cost_map(segment_inf, i, ichan);
-                //print_C_map(bfs_C_downstream_map[make_pair(i, chan_type[ichan])]);
+                vpr_printf_info("**** END BFS for segment %s\n", segment_inf[iseg].name);
+                // setup the global map for future look ahead lookup
+                fill_cost_map(segment_inf, iseg, ichan);
                 start_inode = -1;
             } else {
                 vpr_printf_error(__FILE__, __LINE__, 
-                    "segment %s has no wire starting at corner\n", segment_inf[i].name);
+                    "segment %s has no wire starting at corner\n", segment_inf[iseg].name);
             }
         }
     }
     print_cost_map("init_bfs_map");
-    /*
-    for (int ix = 0; ix < nx - 2; ix ++) {
-        for (int iy = 0; iy < ny - 2; iy ++) {
-            for (int iseg = 0; iseg < num_segment; iseg ++) {
-                for (int ichan = 0; ichan < 2; ichan ++) {
-                    assert(bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x < 2 &&
-                        bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_y < 2);
-                }
-            }
-        }
-    }
-    */
+
     free(hptr_map);
     free(rr_node_ranking);
-    free(heap_2+1);
+    free(heap2+1);
     free_matrix(seg_conn_inf, 0, num_segment - 1, 0, sizeof(int));
     hptr_map = NULL;
     rr_node_ranking = NULL;
-    heap_2 = NULL;
+    heap2 = NULL;
     seg_conn_inf = NULL;
 }
 
-void look_ahead_bfs(int start_inode, t_segment_inf *segment_inf) {
-    // 1. node to heap (start_inode)
-    // 2. get_heap_head
-    //    while (head != null) {
-    //        remove head from heap;
-    //        set cost ranking of head (1 ~ num_rr_nodes)
-    //        expand neighbours & put them onto heap
-    //        get head for next itr
-    //    }
+
+void look_ahead_bfs_precal(int start_inode, t_segment_inf *segment_inf) {
+    /*
+     * routine to actually do the bfs calculation
+     * general procedure similar to that in timing driven route
+     */
     for (int i = 0; i < num_rr_nodes; i ++) {
         hptr_map[i] = NULL;
         rr_node_ranking[i].prev_node = -1;
@@ -255,32 +246,31 @@
         rr_node_ranking[i].C_upstream = -1;
         rr_node_ranking[i].basecost_upstream = -1;
     }
-    float R_start_inode = rr_node[start_inode].R;
-    float cost_start_inode = rr_node[start_inode].C * (0.5 * R_start_inode);
-    R_start_inode += 0.;
-    cost_start_inode += 0.;
-    // I would like to change the heap to facilitate removal of replicate nodes
-    //node_to_heap_2(start_inode, cost_start_inode, R_start_inode);
-    node_to_heap_2(-1, start_inode, 0.0, 0.0, 0.0, 0.0);
-    struct s_heap_2* cheapest = pop_heap_2_head();
+    // heap2 for this precal is modified to facilitate removal of replicate nodes
+    // cost & C & basecost is excluding the start node, so set them to be all 0.
+    node_to_heap2(-1, start_inode, 0.0, 0.0, 0.0, 0.0);
+    t_heap2* cheapest = pop_heap2_head();
     int start_seg_index = rr_indexed_data[rr_node[start_inode].get_cost_index()].seg_index;
     while (cheapest != NULL) {
-        expand_neighbours_2(cheapest, start_seg_index, segment_inf);
+        expand_neighbours_to_heap2(cheapest, start_seg_index, segment_inf);
         free(cheapest);
-        cheapest = pop_heap_2_head();
+        cheapest = pop_heap2_head();
     }
 }
-/*
- * to test how many other type of segments a certain type of wire can 
- * connect to, via the standard switch block (sb which are at the core place of chip)
- * 
- * the reason for doing so:
- *     for some weird arch, for example, a certain wire can only connect to wires of its
- *     same type, via sb in core area on chip. but it may be able to connect to other wire
- *     types via corner / fringe sb. --> this will cause inaccuracy of bfs, as we pick the 
- *     starting point to be near the corner
- */
+
+
 static void setup_seg_conn_inf(int num_segment) {
+    /*
+     * to test how many other type of segments a certain type of wire can 
+     * connect to, via the standard switch block (sb which are at the core place of chip)
+     * 
+     * the reason for doing so:
+     *     for some weird arch, for example, a certain wire can only connect to wires of its
+     *     same type, via sb in core area on chip. but it may be able to connect to other wire
+     *     types via corner / fringe sb. --> this will cause inaccuracy of bfs, as we pick the 
+     *     starting point to be near the corner
+     */
+
     vector<int> seg_type_to_check;
     for (int iseg = 0; iseg < num_segment; iseg ++) {
         seg_type_to_check.push_back(iseg);
@@ -291,10 +281,14 @@
     }
     // we have to go through this step, cuz on some small chips, there are just no 
     // long wires that have their mid-point going through exactly (nx / 2) & (ny / 2)
+
+    // find the smallest distance between the wire mid point and the chip mid point
     for (int inode = 0; inode < num_rr_nodes; inode ++) {
         int cur_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
-        if (cur_seg_type < 0 || cur_seg_type >= num_segment)
+        if (cur_seg_type < 0 || cur_seg_type >= num_segment) {
+            // this check is to eliminate non-wire seg types (e.g.: seg_type = -1)
             continue;
+        }
         int cur_x_mid, cur_y_mid;
         cur_x_mid = (rr_node[inode].get_xhigh() + rr_node[inode].get_xlow()) / 2;
         cur_y_mid = (rr_node[inode].get_yhigh() + rr_node[inode].get_ylow()) / 2;
@@ -304,7 +298,7 @@
             nearest_to_core_pos[cur_seg_type].second = cur_y_mid;
         }
     }
-
+    // find the inode that has the smallest distance (i.e.: wire that is closest to center of chip)
     for (int inode = 0; inode < num_rr_nodes; inode ++) {
         int cur_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
         if (cur_seg_type < 0 || cur_seg_type >= num_segment)
@@ -317,12 +311,15 @@
          && cur_x_mid == nearest_to_core_pos[cur_seg_type].first 
          && cur_y_mid == nearest_to_core_pos[cur_seg_type].second) {
             seg_type_to_check.erase(seg_pos);
+            // find the wire types that are driven by inode via *one* switch block
+            // i.e.: find the wire types that are connected directly by inode
             setup_one_level_to_seg(inode);
         }
         if (seg_type_to_check.empty())
             break;
     }
     // next, test convergence for seg_conn_inf
+    // seg_conn_inf is setup here
     bool new_conn_added = false;
     do {
         new_conn_added = false;
@@ -342,7 +339,13 @@
         }
     } while (new_conn_added);
 }
+
+
 static void setup_one_level_to_seg(int inode) {
+    /*
+     * find the wire type that are driven directly by inode
+     * setup the init value for seg_conn_inf
+     */
     assert(rr_node[inode].type == CHANX || rr_node[inode].type == CHANY);
     int num_edges = rr_node[inode].get_num_edges();
     int from_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
@@ -356,7 +359,12 @@
         seg_conn_inf[from_seg_type][to_seg_type] = 1;
     }
 }
-static void expand_neighbours_2(struct s_heap_2 *cheapest, int start_seg_index, t_segment_inf *segment_inf) {
+
+
+static void expand_neighbours_to_heap2(t_heap2 *cheapest, int start_seg_index, t_segment_inf *segment_inf) {
+    /*
+     * expand neighbours for cheapest (standard BFS procedure)
+     */
     int inode = cheapest->inode;
     int num_edges = rr_node[inode].get_num_edges();
     int iswitch;
@@ -364,7 +372,9 @@
     float cur_R_upstream = cheapest->R_upstream;
     float cur_C_upstream = cheapest->C_upstream;
     float cur_basecost_upstream = cheapest->basecost_upstream;
+
     float new_R_upstream, new_C_upstream, new_basecost_upstream;
+
     for (int iconn = 0; iconn < num_edges; iconn++) {
         int to_node = rr_node[inode].edges[iconn];
         if (rr_node[to_node].type != CHANX && rr_node[to_node].type != CHANY)
@@ -375,14 +385,12 @@
         int to_seg_index = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index;
         if (seg_conn_inf[start_seg_index][to_seg_index] == 0)
             continue;
-        /*
-        // eliminate the wires that lie completely on the fringe track
-        if ((rr_node[to_node].type == CHANX && rr_node[to_node].get_xlow() == 0)
-         || (rr_node[to_node].type == CHANY && rr_node[to_node].get_ylow() == 0))
-            continue;
-        */
         // calculate the backward delay of this node
         // referring to route_timing.c: timing_driven_expand_neighbours()
+        // it is not accurate to use the R & C stored in rr_node
+        // think about the wire that is cut short at fringe:
+        //    if a L4 wire is cut to be of length 2 only, then its R will be half of the normal value
+        // the solution is to use the R per length / C per length in segment_inf
         iswitch = rr_node[inode].switches[iconn];
         float cur_wire_R = segment_inf[to_seg_index].Rmetal * segment_inf[to_seg_index].length;
         float cur_wire_C = segment_inf[to_seg_index].Cmetal * segment_inf[to_seg_index].length;
@@ -393,35 +401,35 @@
             // but this will still produce an overestimation of C
             new_C_upstream = cur_C_upstream;
 		} else {
-            printf("\nUN BUFFERED SWITCH\n");
 			new_R_upstream = cur_R_upstream + g_rr_switch_inf[iswitch].R;
-            //new_C_upstream += rr_node[to_node].C;
             new_C_upstream = cur_C_upstream + cur_wire_C;
 		}
-        // we cannot use the R stored in rr_node directly, as it may be an underestimated value
-        // if wire is cut short at the fringe
         float to_node_cost = cur_wire_C * (new_R_upstream + 0.5 * cur_wire_R);
         to_node_cost += g_rr_switch_inf[iswitch].Tdel;
         to_node_cost += cheapest->cost;
         new_R_upstream += cur_wire_R;
         new_basecost_upstream = cur_basecost_upstream + cur_wire_basecost;
+   
         if (hptr_map[to_node] != NULL) {
+            // if to_node is already on the heap, then either 
+            // replace the outdated node (by first remove it) or do nothing
             if (hptr_map[to_node]->cost <= to_node_cost) {
                 continue;
             } else {
-                remove_heap_2_node(hptr_map[to_node]->position);
+                remove_heap2_node(hptr_map[to_node]->position);
             }
         }
-        node_to_heap_2(inode, to_node, to_node_cost, new_R_upstream, new_C_upstream, new_basecost_upstream);
+        node_to_heap2(inode, to_node, to_node_cost, new_R_upstream, new_C_upstream, new_basecost_upstream);
     }
 }
 
-static void initialize_heap_2(int start_inode, float cost_start_inode, float R_start_inode, 
+
+static void initialize_heap2(int start_inode, float cost_start_inode, float R_start_inode, 
                               float C_start_inode, float basecost_start_inode) {
-    assert(heap_2 == NULL && heap_2_size == 0 && heap_2_tail == 1);
-    heap_2_size = 1;
-    heap_2_tail = 2;
-    struct s_heap_2 *head = (struct s_heap_2 *) my_malloc(sizeof(struct s_heap_2));
+    assert(heap2 == NULL && heap2_size == 0 && heap2_tail == 1);
+    heap2_size = 1;
+    heap2_tail = 2;
+    t_heap2 *head = (t_heap2 *) my_malloc(sizeof(t_heap2));
     head->prev_node = -1;
     head->inode = start_inode;
     head->position = 1;
@@ -429,23 +437,24 @@
     head->R_upstream = R_start_inode;
     head->C_upstream = C_start_inode;
     head->basecost_upstream = basecost_start_inode;
-    heap_2 = (struct s_heap_2 **) my_malloc(sizeof(struct s_heap_2 *) * heap_2_size);
-    heap_2 --;
-    heap_2[1] = head;
+    heap2 = (t_heap2 **) my_malloc(sizeof(t_heap2 *) * heap2_size);
+    heap2 --;
+    heap2[1] = head;
 }   
 
-/*
- * when you call this function, it has already been made sure that
- * heap node for inode is removed
- */
-static void node_to_heap_2(int prev_node, int inode, float cost, 
+
+static void node_to_heap2(int prev_node, int inode, float cost, 
         float R_upstream, float C_upstream, float basecost_upstream) {
-    if (heap_2_size == 0) {
-        initialize_heap_2(inode, cost, R_upstream, C_upstream, basecost_upstream);
+    /*
+     * when you call this function, it has already been made sure that
+     * heap node for inode is removed (hptr_map[inode] = NULL)
+     */
+    if (heap2_size == 0) {
+        initialize_heap2(inode, cost, R_upstream, C_upstream, basecost_upstream);
         return;
     }
-    struct s_heap_2 *hptr = (struct s_heap_2 *) my_malloc(sizeof(s_heap_2));
-    // XXX
+    t_heap2 *hptr = (t_heap2 *) my_malloc(sizeof(t_heap2));
+    // setup all fields
     assert(hptr_map[inode] == NULL);
     hptr_map[inode] = hptr;
     hptr->prev_node = prev_node;
@@ -455,68 +464,75 @@
     hptr->R_upstream = R_upstream;
     hptr->C_upstream = C_upstream;
     hptr->basecost_upstream = basecost_upstream;
-    if (heap_2_tail > heap_2_size) {
-        heap_2_size *= 2;
-        heap_2 = (struct s_heap_2 **)my_realloc((void *)(heap_2 + 1), heap_2_size * sizeof(struct s_heap_2 *));
-        heap_2 --;
+    // dynamic expansion of heap size
+    // (referring to old heap method)
+    if (heap2_tail > heap2_size) {
+        heap2_size *= 2;
+        heap2 = (t_heap2 **)my_realloc((void *)(heap2 + 1), heap2_size * sizeof(t_heap2 *));
+        heap2 --;
     }
-    heap_2[heap_2_tail] = hptr;
-    // XXX
-    hptr->position = heap_2_tail;
-    int ifrom = heap_2_tail;
+    heap2[heap2_tail] = hptr;
+    // sifting up
+    hptr->position = heap2_tail;
+    int ifrom = heap2_tail;
     int ito = ifrom / 2;
-    heap_2_tail ++;
+    heap2_tail ++;
 
-    struct s_heap_2 *temp_ptr = NULL;
-    while ((ito >= 1) && (heap_2[ifrom]->cost < heap_2[ito]->cost)) {
-        temp_ptr = heap_2[ito];
-        // XXX
-        heap_2[ifrom]->position = ito;
-        heap_2[ito]->position = ifrom;
-        heap_2[ito] = heap_2[ifrom];
-        heap_2[ifrom] = temp_ptr;
+    t_heap2 *temp_ptr = NULL;
+    while ((ito >= 1) && (heap2[ifrom]->cost < heap2[ito]->cost)) {
+        temp_ptr = heap2[ito];
+        // update position to facilitate future removal
+        heap2[ifrom]->position = ito;
+        heap2[ito]->position = ifrom;
+        heap2[ito] = heap2[ifrom];
+        heap2[ifrom] = temp_ptr;
         ifrom = ito;
         ito = ifrom / 2;
     }
 }
-/*
- * remove node of [position] from heap
- * sifting to maintain the heap property
- */
-static void remove_heap_2_node(int position) {
-    // XXX
-    hptr_map[heap_2[position]->inode] = NULL;
-    free(heap_2[position]);
-    heap_2_tail --;
-    heap_2[position] = heap_2[heap_2_tail];
-    heap_2[heap_2_tail] = NULL;
-    if (heap_2[position] != NULL)
-        heap_2[position]->position = position;
+
+
+static void remove_heap2_node(int position) {
+    /*
+     * remove node of [position] from heap
+     * sifting to maintain the heap property
+     */
+
+    hptr_map[heap2[position]->inode] = NULL;
+
+    free(heap2[position]);
+    heap2_tail --;
+    heap2[position] = heap2[heap2_tail];
+    heap2[heap2_tail] = NULL;
+    if (heap2[position] != NULL)
+        heap2[position]->position = position;
     // Sifting:
     int ifrom = position;
     int ito = 2 * ifrom;
-    struct s_heap_2 *temp;
-    while (ito < heap_2_tail) {
-        if (ito != heap_2_tail - 1 && heap_2[ito + 1]->cost < heap_2[ito]->cost)
+    t_heap2 *temp;
+    while (ito < heap2_tail) {
+        if (ito != heap2_tail - 1 && heap2[ito + 1]->cost < heap2[ito]->cost)
             ito++;
-        if (heap_2[ito]->cost > heap_2[ifrom]->cost)
+        if (heap2[ito]->cost > heap2[ifrom]->cost)
             break;
-        temp = heap_2[ito];
-        heap_2[ito] = heap_2[ifrom];
-        heap_2[ifrom] = temp;
-        heap_2[ito]->position = ito;
-        heap_2[ifrom]->position = ifrom;
+        temp = heap2[ito];
+        heap2[ito] = heap2[ifrom];
+        heap2[ifrom] = temp;
+        heap2[ito]->position = ito;
+        heap2[ifrom]->position = ifrom;
         ifrom = ito;
         ito = 2 * ifrom;
     }
 }
-static struct s_heap_2 * pop_heap_2_head(void) {
-    if (heap_2_tail == 1)
-        return NULL;
-    struct s_heap_2 * poped_head = (struct s_heap_2 *)my_malloc(sizeof (struct s_heap_2));
-    memcpy(poped_head, heap_2[1], sizeof(struct s_heap_2));
 
-    remove_heap_2_node(1);
+
+static t_heap2 * pop_heap2_head(void) {
+    if (heap2_tail == 1)
+        return NULL;
+    t_heap2 * poped_head = (t_heap2 *)my_malloc(sizeof (t_heap2));
+    memcpy(poped_head, heap2[1], sizeof(t_heap2));
+
+    remove_heap2_node(1);
     
     rr_node_ranking[poped_head->inode].prev_node = poped_head->prev_node;
     rr_node_ranking[poped_head->inode].rank = num_nodes_ranked;
@@ -530,6 +546,9 @@
 
 
 static void fill_cost_map(t_segment_inf *segment_inf, int wire_type, int chan_type) {
+    /*
+     * setup the global cost map to enable future lookup by timing driven router
+     */
     int print_inode_x = -1, print_inode_y = -1;
     int term_x = -1;
     int term_y = -1;
@@ -550,12 +569,12 @@
                     bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_x = rr_node_ranking[inode].cost;
                     bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_C_x = rr_node_ranking[inode].C_upstream;
                     bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_basecost_x = rr_node_ranking[inode].basecost_upstream;
-                    
+#ifdef DEBUG_BFS_PRECAL 
                     if (term_x == DEBUG_OFFSET_X && term_y == DEBUG_OFFSET_Y
                      && wire_type == DEBUG_SEG_TYPE && chan_type == DEBUG_CHAN_TYPE) {
                         print_inode_x = inode;
                     }
-                    
+#endif              
                 }
             } else {
                 if (bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y == UNDEFINED
@@ -563,15 +582,17 @@
                     bfs_lookahead_info[term_x][term_y][wire_type][chan_type].Tdel_y = rr_node_ranking[inode].cost;
                     bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_C_y = rr_node_ranking[inode].C_upstream;
                     bfs_lookahead_info[term_x][term_y][wire_type][chan_type].acc_basecost_y = rr_node_ranking[inode].basecost_upstream;
-                    
+#ifdef DEBUG_BFS_PRECAL              
                     if (term_x == DEBUG_OFFSET_X && term_y == DEBUG_OFFSET_Y
                      && wire_type == DEBUG_SEG_TYPE && chan_type == DEBUG_CHAN_TYPE) {
                         print_inode_y = inode;
                     }
+#endif
                 }
             }
         }
     }
+#ifdef DEBUG_BFS_PRECAL
     if (print_inode_x != -1) {
         printf("BFS SELECTED PATH (CHANX): [%d][%d][%d][%d]\n", DEBUG_OFFSET_X, DEBUG_OFFSET_Y, DEBUG_SEG_TYPE, DEBUG_CHAN_TYPE);
         print_bfs_optimal_path(print_inode_x);
@@ -580,9 +601,15 @@
         printf("BFS SELECTED PATH (CHANY): [%d][%d][%d][%d]\n", DEBUG_OFFSET_X, DEBUG_OFFSET_Y, DEBUG_SEG_TYPE, DEBUG_CHAN_TYPE);
         print_bfs_optimal_path(print_inode_y);
     }
+#endif
 }
 
+
 static void get_leave_pt_by_cb (bool is_conn_ipin, int offset, int inode, int *term_x, int *term_y) {
+    /*
+     * return the end point of a wire (term_x, term_y), calculated by inode start point + offset
+     * if it can drive a clb at (term_x, term_y) (i.e.: is_conn_ipin = 1)
+     */
     if (!is_conn_ipin) {
         *term_x = -1;
         *term_y = -1;
@@ -605,24 +632,30 @@
         else 
             *term_y = start_y - offset - 2;
     }
+    // exclude end points falling outside the range of cost map
     if (*term_x < 0 || *term_x > nx-3)
         *term_x = -1;
     if (*term_y < 0 || *term_y > ny-3)
         *term_y = -1;
 }
 
+
+/**************************** util functions for debugging & profiling ************************************/
 void print_cost_map(char *fname) {
+    /*
+     * export to file the map of future cost
+     */
     FILE *fp;
     fp = fopen(fname, "w+");
     if (fp == NULL) {
-        printf("cannot open file\n");
+        vpr_printf_warning(__FILE__, __LINE__, "cannot open file\n");
     }
     float max_basecost = -1;
     fprintf(fp, "START: COST RANKING MAP\n");
     fprintf(fp, "x\ty\tcost_x\tcost_y\n");
     for (int ix = 0; ix < nx-2; ix ++) {
         for (int iy = 0; iy < ny-2; iy ++) {
-            for (int iseg = 0; iseg < g_bfs_num_seg; iseg ++) {
+            for (int iseg = 0; iseg < g_debug_num_seg; iseg ++) {
                 for (int ichan = 0; ichan < 2; ichan ++) {
                     fprintf(fp, "%d\t%d\t%.1f e-10\t%.1f e-10\n", ix, iy, 
                         bfs_lookahead_info[ix][iy][iseg][ichan].acc_basecost_x * pow(10, 10), 
@@ -635,44 +668,39 @@
             }
         }
     }
-    printf("MAX basecost\t%e\n", max_basecost);
+    //printf("MAX basecost\t%e\n", max_basecost);
+    fprintf(fp, "END MAP PRINTING\n");
     fclose(fp);
-    printf("END MAP PRINTING\n");
 }
 
-void print_C_map(float **c_map) {
-    printf("START: COST RANKING MAP\n");
-    printf("x\ty\tC_upstream\n");
-    for (int ix = 0; ix < nx-2; ix++){
-        for (int iy = 0; iy < ny-2; iy++){
-            printf("%d\t%d\t%e\n", ix, iy, c_map[ix][iy]);
-        }
-    }
-}
 
 static void print_seg_conn_matrix(t_segment_inf *segment_inf, int num_segment) {
-    printf(">> SEGMENT CONN INFO\n");
+    vpr_printf_info(">> SEGMENT CONN INFO\n");
     for (int iseg = 0; iseg < num_segment; iseg ++) {
-        printf("\tSEG %s\t-->\t", segment_inf[iseg].name);
+        printf("\tSEG %s  -->  ", segment_inf[iseg].name);
         for (int i_to_seg = 0; i_to_seg < num_segment; i_to_seg ++) {
             if (seg_conn_inf[iseg][i_to_seg] == 1) {
-                printf("%s\t", segment_inf[i_to_seg].name);
+                printf("%s  ", segment_inf[i_to_seg].name);
             }
         }
         printf("\n");
     }
 }
 
+
 void print_bfs_optimal_path(int term_node) {
     assert(term_node >= 0 && term_node < num_rr_nodes);
     int cur_node = term_node;
-    printf("*** PRINT BFS OPTIMAL PATH\n");
+    vpr_printf_info("*** PRINT BFS OPTIMAL PATH\n");
     while (rr_node_ranking[cur_node].prev_node != -1) {
         int x_s, x_e, y_s, y_e;
         get_unidir_seg_end(cur_node, &x_s, &y_s);
         get_unidir_seg_start(cur_node, &x_e, &y_e);
         int len = rr_node[cur_node].get_len();
-        printf("\tinode %d\tL%d\tstart(%d,%d)\tend(%d,%d)\tbackTdel %.3f\t0_basecost %.3f\tacc_basecost %.3f\n", cur_node, len, x_s, y_s, x_e, y_e, rr_node_ranking[cur_node].cost * pow(10,10), rr_indexed_data[rr_node[cur_node].get_cost_index()].base_cost * pow(10, 10), rr_node_ranking[cur_node].basecost_upstream * pow(10, 10));
+        vpr_printf_info("\tinode %d\tL%d\tstart(%d,%d)\tend(%d,%d)\tbackTdel %.3f\t0_basecost %.3f\tacc_basecost %.3f\n", 
+                cur_node, len, x_s, y_s, x_e, y_e, rr_node_ranking[cur_node].cost * pow(10,10), 
+                rr_indexed_data[rr_node[cur_node].get_cost_index()].base_cost * pow(10, 10), 
+                rr_node_ranking[cur_node].basecost_upstream * pow(10, 10));
         cur_node = rr_node_ranking[cur_node].prev_node;
     }
 }
diff --git a/vpr/SRC/route/look_ahead_bfs.h b/vpr/SRC/route/look_ahead_bfs_precal.h
similarity index 77%
rename from vpr/SRC/route/look_ahead_bfs.h
rename to vpr/SRC/route/look_ahead_bfs_precal.h
index 8383291..f856bf4 100644
--- a/vpr/SRC/route/look_ahead_bfs.h
+++ b/vpr/SRC/route/look_ahead_bfs_precal.h
@@ -1,7 +1,7 @@
 #pragma once
 #include "physical_types.h"
 // new data structure
-struct s_heap_2 {
+typedef struct s_heap2 {
     int prev_node;
     int inode;
     int position;
@@ -9,8 +9,8 @@
     float R_upstream;
     float C_upstream;
     float basecost_upstream;
-};
-void look_ahead_bfs(int start_inode, t_segment_inf *segment_inf);
+} t_heap2;
+void look_ahead_bfs_precal(int start_inode, t_segment_inf *segment_inf);
 void pre_cal_look_ahead(t_segment_inf *segment_inf, int num_segment);
 void print_cost_map(char *fname);
 void print_C_map(float **c_map);
diff --git a/vpr/SRC/route/profile_lookahead.c b/vpr/SRC/route/profile_lookahead.c
index e887c0b..38e3651 100644
--- a/vpr/SRC/route/profile_lookahead.c
+++ b/vpr/SRC/route/profile_lookahead.c
@@ -6,10 +6,12 @@
 #if DEBUGEXPANSIONRATIO == 1
     if (itry == DB_ITRY && target_node == DB_TARGET_NODE){
         if (is_start) {
-            printf("\n**** itry %d\tSTART ROUTE TO net %d\ttarget %d\tinode %d\n", itry, inet, itarget, target_node);
+            printf("\n**** itry %d\tSTART ROUTE TO net %d\ttarget %d\tinode %d\n", 
+                    itry, inet, itarget, target_node);
             printf("COORD: (%d,%d)\n", rr_node[target_node].get_xlow(), rr_node[target_node].get_ylow());
         } else {
-            printf("\n**** itry %d\tFINISH ROUTE TO net %d\ttarget %d\tinode %d\n\n", itry, inet, itarget, target_node);
+            printf("\n**** itry %d\tFINISH ROUTE TO net %d\ttarget %d\tinode %d\n\n", 
+                    itry, inet, itarget, target_node);
         }
     } 
 #endif
@@ -19,7 +21,6 @@
     int target_chan_type, int target_pin_x, int target_pin_y, 
     float expected_cost, float future_Tdel, float new_tot_cost, 
     float cur_basecost, float cong_cost) {
-#if DEBUGEXPANSIONRATIO == 1
     if (itry_share == DB_ITRY && target_node == DB_TARGET_NODE) {
         const char *dir_name[] = {"INC", "DEC", NULL};
         const char *node_seg_type[] = {"source", "sink", "ipin", "opin", "chanX", "chanY", NULL};   
@@ -48,10 +49,59 @@
                 inode, len, dir_name[dir], node_seg_type[inode_type], 
                 inode_seg_index, s_x, s_y, e_x, e_y);
             printf("exp cost %.3f\tb-Tdel %.3f\ttot cost %.3f\tbCost %.3fcCost %.3f\n",
-                expected_cost * pow(10, 10), future_Tdel * pow(10, 10), 
+                expected_cost * pow(10, 10), future_Tdel * pow(10, 10),
                 new_tot_cost * pow(10, 10), cur_basecost * pow(10, 10),
                 cong_cost * pow(10, 10));
         }
     }
-#endif
+}
+
+void setup_max_min_criticality(float &max_crit, float &min_crit, 
+        int &max_inet, int &min_inet, int &max_ipin, int &min_ipin, t_slack * slacks) {
+    /*
+     *  set up max/min criticality && where the max/min happens
+     *  called in try_timing_driven_route()
+     */
+    float max_crit_len_product = -1;
+    float min_crit_len_product = pow(10, 20);
+    max_crit = -1;
+    min_crit = pow(10, 20);
+    max_inet = -1;
+    max_ipin = -1;
+    min_inet = -1;
+    min_ipin = -1;
+    for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+        if (should_route_net(inet) == false)
+            continue; 
+        if (g_clbs_nlist.net[inet].is_global == true)
+            continue;
+        if (g_clbs_nlist.net[inet].is_fixed == true)
+            continue;
+        int source_inode = net_rr_terminals[inet][0];
+        int x_source_mid = (rr_node[source_inode].get_xhigh() + rr_node[source_inode].get_xlow()) / 2;
+        int y_source_mid = (rr_node[source_inode].get_yhigh() + rr_node[source_inode].get_ylow()) / 2;
+        unsigned int pin_upbound = (g_clbs_nlist.net[inet].pins.size() > 2) ? 2 : g_clbs_nlist.net[inet].pins.size();
+        for (unsigned int ipin = 1; ipin < pin_upbound; ipin++) {
+            float pin_criticality = slacks->timing_criticality[inet][ipin];
+            int target_inode = net_rr_terminals[inet][ipin];
+            int x_target_mid = (rr_node[target_inode].get_xhigh() + rr_node[target_inode].get_xlow()) / 2;
+            int y_target_mid = (rr_node[target_inode].get_yhigh() + rr_node[target_inode].get_ylow()) / 2;
+            // print long nets only
+            int bb_len = abs(x_source_mid - x_target_mid + 1) + abs(y_source_mid - y_target_mid + 1);
+            float len_crit_product = bb_len * pin_criticality;
+            float len_inv_crit_product = (1./bb_len) * pin_criticality;
+            max_inet = (len_crit_product > max_crit_len_product) ? inet : max_inet;
+            max_ipin = (len_crit_product > max_crit_len_product) ? ipin : max_ipin;
+            min_inet = (len_inv_crit_product < min_crit_len_product) ? inet : min_inet;
+            min_ipin = (len_inv_crit_product < min_crit_len_product) ? ipin : min_ipin;
+            max_crit = (len_crit_product > max_crit_len_product) ? pin_criticality : max_crit;
+            min_crit = (len_inv_crit_product < min_crit_len_product) ? pin_criticality : min_crit;
+            max_crit_len_product = (len_crit_product > max_crit_len_product) ? len_crit_product : max_crit_len_product;
+            min_crit_len_product = (len_inv_crit_product < min_crit_len_product) ? len_inv_crit_product : min_crit_len_product;
+        }
+    }
+    //printf("max_inet %d\tmax_ipin %d\tmin_inet %d\tmin_ipin %d\n", max_inet, max_ipin, min_inet, min_ipin);
+    //printf("max_crit_len_product %f\tmin_crit %f\n", max_crit_len_product, min_crit);
+    //printf("\n");
+
 }
diff --git a/vpr/SRC/route/profile_lookahead.h b/vpr/SRC/route/profile_lookahead.h
index 374e86f..2d9c3b3 100644
--- a/vpr/SRC/route/profile_lookahead.h
+++ b/vpr/SRC/route/profile_lookahead.h
@@ -29,3 +29,5 @@
     int target_chan_type, int target_pin_x, int target_pin_y,
     float expected_cost, float future_Tdel, float new_tot_cost,
     float cur_basecost, float cong_cost);
+void setup_max_min_criticality(float &max_crit, float &min_crit, 
+        int &max_inet, int &min_inet, int &max_ipin, int &min_ipin, t_slack * slacks);
diff --git a/vpr/SRC/route/route_common.c b/vpr/SRC/route/route_common.c
index f4acd58..c9c5631 100755
--- a/vpr/SRC/route/route_common.c
+++ b/vpr/SRC/route/route_common.c
@@ -21,7 +21,7 @@
 #include "rr_graph.h"

 #include "read_xml_arch_file.h"

 #include "ReadOptions.h"

-#include "look_ahead_bfs.h"

+#include "look_ahead_bfs_precal.h"

 

 // Disable the routing predictor for circuits with less that this number of nets.

 // This was experimentally determined, by Matthew Walker, to be the most useful

diff --git a/vpr/SRC/route/route_common.h b/vpr/SRC/route/route_common.h
index b5fc42c..e2bbbc0 100755
--- a/vpr/SRC/route/route_common.h
+++ b/vpr/SRC/route/route_common.h
@@ -19,6 +19,8 @@
 #define INPRECISE_GET_HEAP_HEAD 0
 #define SPACEDRIVENHEAP 0
 
+#define CRIT_THRES_INIT 0.8     /* criticality threshold for switching to new look ahead */
+#define CRIT_THRES_INC_RATE 1.01
 #define OPIN_INIT_PENALTY 2.2
 #define OPIN_DECAY_RATE 0.9
 #define ALLOWED_HEAP_ERR 0.02  /* for the new get heap head method (take into account (x,y)) */
diff --git a/vpr/SRC/route/route_timing.c b/vpr/SRC/route/route_timing.c
index 312d5d8..268cc58 100755
--- a/vpr/SRC/route/route_timing.c
+++ b/vpr/SRC/route/route_timing.c
@@ -21,22 +21,41 @@
 #include "stats.h"

 #include "ReadOptions.h"

 #include "profile_lookahead.h"

-#include "look_ahead_bfs.h"

+#include "look_ahead_bfs_precal.h"

 

 using namespace std;

-int g_target_node;

-float g_basecost;

-#define DEBUGSMITHWATERMAN

-#ifdef DEBUGSMITHWATERMAN

-bool debug_bt_begin = false;

-bool debug_itr_begin = false;

-bool heap_head_begin = false;

-int target_count = 0;

-#endif

+

+/************************* variables used by new look ahead *****************************/

+typedef struct s_opin_cost_inf {

+    float Tdel;

+    float acc_basecost;

+    float acc_C;

+} t_opin_cost_inf;

+// map key: seg_index; map value: cost of that seg_index from source to target

+map<int, t_opin_cost_inf> opin_cost_by_seg_type;

+typedef struct s_target_pin_side {

+    int chan_type;

+    int x;

+    int y;

+} t_target_pin_side;

+// map key: currently expanded inode; map value: ipin side, x, y that will produce a min cost to target

+map<int, t_target_pin_side> expanded_node_min_target_side;

+int target_chosen_chan_type = UNDEFINED;

+int target_pin_x = UNDEFINED;

+int target_pin_y = UNDEFINED;

+

+float crit_threshold;   // for switch between new & old lookahead: only use new lookahead for high critical path

+

+

+/******************* variables for profiling & debugging *******************/

+float basecost_for_printing;    // for printing in expand_neighbours()

+float acc_route_time = 0.;      // accumulated route time: sum of router runtime over all iterations

+

+float time_pre_itr;

+float time_1st_itr;

+

 #if PRINTCRITICALPATH == 1

 bool is_first_itr = false;

-float max_crit_len_product = -1;

-float min_crit_len_product = pow(10, 20);

 float max_crit = -1;

 float min_crit = 10;

 int max_inet = -1;

@@ -44,31 +63,8 @@
 int min_inet = -1;

 int min_ipin = -1;

 #endif

-float acc_route_time = 0.;

-struct s_opin_cost_inf {

-    float Tdel;

-    float acc_basecost;

-    float acc_C;

-};

-// map key: seg_index; map value: cost of that seg_index from source to target

-map<int, struct s_opin_cost_inf> opin_cost_by_seg_type;

-struct s_target_pin_side {

-    int chan_type;

-    int x;

-    int y;

-};

-// map key: currently expanded inode; map value: ipin side, x, y that will produce a min cost to target

-map<int, struct s_target_pin_side> expanded_node_min_target_side;

-int target_chan_type = UNDEFINED;

-int target_pin_x = UNDEFINED;

-int target_pin_y = UNDEFINED;

 

-float crit_threshold_init = 0.8;

-float crit_threshold_inc_rate = 1.01;

-float crit_threshold;

 

-float time_pre_itr;

-float time_1st_itr;

 /******************** Subroutines local to route_timing.c ********************/

 

 static int get_max_pins_per_net(void);

@@ -100,21 +96,22 @@
 

 static double get_overused_ratio();

 

-static bool should_route_net(int inet);

+// should_route_net is used in setup_max_min_criticality() in profile_lookahead.c

+//static bool should_route_net(int inet);

 

 static bool turn_on_bfs_map_lookup(float criticality);

 

-static void setup_min_side_reaching_target(int inode, int t_chan_type, int t_x, int t_y, 

+static void setup_min_side_reaching_target(int inode, int target_chan_type, int target_x, int target_y, 

         float *min_Tdel, float cur_Tdel, float *C_downstream, float *future_base_cost, 

         float cur_C_downstream, float cur_basecost);

 

 float bfs_direct_lookup(int offset_x, int offset_y, int seg_type, int chan_type, 

                 int to_chan_type, float *acc_C, float *acc_basecost);

 

-static void adjust_coord_for_wrong_dir_wire(int *s_chan_type, int *s_dir, int start_len, 

+static void adjust_coord_for_wrong_dir_wire(int *start_chan_type, int *start_dir, int start_len, 

                                             int *x_start, int *y_start, int x_target, int y_target);

  

-static float get_bfs_lookup_Tdel(int s_inode, int s_chan_type, int t_chan_type, int s_seg_type, int s_dir,

+static float get_precal_Tdel(int start_node, int start_chan_type, int target_chan_type, int start_seg_type, int start_dir,

                                  int x_start, int y_start, int x_target, int y_target, int len_start,

                                  float *acc_C, float *acc_basecost);

    

@@ -204,9 +201,12 @@
 #endif

     acc_route_time = 0.;

 	for (int itry = 1; itry <= router_opts.max_router_iterations; ++itry) {

-        if (itry == 1) crit_threshold = crit_threshold_init;

-        else crit_threshold *= crit_threshold_inc_rate;

-        if (crit_threshold > 0.988) crit_threshold = 0.988;

+        if (itry == 1) 

+            crit_threshold = CRIT_THRES_INIT;

+        else 

+            crit_threshold *= CRIT_THRES_INC_RATE;

+        if (crit_threshold > 0.988) 

+            crit_threshold = 0.988;

         if (itry == 2) {

             nodes_expanded_1st_itr = nodes_expanded_cur_itr;

             nodes_expanded_max_itr = nodes_expanded_cur_itr;

@@ -228,17 +228,6 @@
 #if OPINPENALTY == 1

         opin_penalty *= OPIN_DECAY_RATE;

 #endif

-#if LOOKAHEADCONGPENALTY == 1

-        if (itry > 5 && itry <= 15)

-            future_cong_penalty *= 1.1;

-#endif

-#if PRINTCRITICALPATH == 1

-        if (itry == 1) {

-            is_first_itr = true;

-        } else {

-            is_first_itr = false;

-        }

-#endif

 #if DEPTHWISELOOKAHEADEVAL == 1

         if (router_opts.lookahead_eval) {

             subtree_count.clear();

@@ -254,51 +243,13 @@
 			g_clbs_nlist.net[inet].is_fixed = false;

 		}

 #if PRINTCRITICALPATH == 1

-        // XXX: set up max/min criticality && where the max/min happens

-        max_crit_len_product = -1;

-        min_crit_len_product = pow(10, 20);

-        max_crit = -1;

-        min_crit = pow(10, 20);

-        max_inet = -1;

-        max_ipin = -1;

-        min_inet = -1;

-        min_ipin = -1;

-		for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {

-            if (should_route_net(inet) == false)

-                continue; 

-    		if (g_clbs_nlist.net[inet].is_global == true)

-                continue;

-            if (g_clbs_nlist.net[inet].is_fixed == true)

-                continue;

-            int source_inode = net_rr_terminals[inet][0];

-            int x_source_mid = (rr_node[source_inode].get_xhigh() + rr_node[source_inode].get_xlow()) / 2;

-            int y_source_mid = (rr_node[source_inode].get_yhigh() + rr_node[source_inode].get_ylow()) / 2;

-            unsigned int pin_upbound = (g_clbs_nlist.net[inet].pins.size() > 2) ? 2 : g_clbs_nlist.net[inet].pins.size();

-            for (unsigned int ipin = 1; ipin < pin_upbound; ipin++) {

-			    float pin_criticality = slacks->timing_criticality[inet][ipin];

-                //printf("%f  ", pin_criticality);

-                int target_inode = net_rr_terminals[inet][ipin];

-                int x_target_mid = (rr_node[target_inode].get_xhigh() + rr_node[target_inode].get_xlow()) / 2;

-                int y_target_mid = (rr_node[target_inode].get_yhigh() + rr_node[target_inode].get_ylow()) / 2;

-                // print long nets only

-                int bb_len = abs(x_source_mid - x_target_mid + 1) + abs(y_source_mid - y_target_mid + 1);

-                //if (abs(x_source_mid - x_target_mid) + abs(y_source_mid - y_target_mid) > 0.3 * (nx + ny)) {

-                float len_crit_product = bb_len * pin_criticality;

-                float len_inv_crit_product = (1./bb_len) * pin_criticality;

-                max_inet = (len_crit_product > max_crit_len_product) ? inet : max_inet;

-                max_ipin = (len_crit_product > max_crit_len_product) ? ipin : max_ipin;

-                min_inet = (len_inv_crit_product < min_crit_len_product) ? inet : min_inet;

-                min_ipin = (len_inv_crit_product < min_crit_len_product) ? ipin : min_ipin;

-                max_crit = (len_crit_product > max_crit_len_product) ? pin_criticality : max_crit;

-                min_crit = (len_inv_crit_product < min_crit_len_product) ? pin_criticality : min_crit;

-                max_crit_len_product = (len_crit_product > max_crit_len_product) ? len_crit_product : max_crit_len_product;

-                min_crit_len_product = (len_inv_crit_product < min_crit_len_product) ? len_inv_crit_product : min_crit_len_product;

-                //}

-            }

+        if (itry == 1) {

+            is_first_itr = true;

+        } else {

+            is_first_itr = false;

         }

-        //printf("max_inet %d\tmax_ipin %d\tmin_inet %d\tmin_ipin %d\n", max_inet, max_ipin, min_inet, min_ipin);

-        //printf("max_crit_len_product %f\tmin_crit %f\n", max_crit_len_product, min_crit);

-        //printf("\n");

+        setup_max_min_criticality(max_crit, min_crit, 

+                max_inet, min_inet, max_ipin, min_ipin, slacks);

 #endif

 		for (unsigned int i = 0; i < g_clbs_nlist.net.size(); ++i) {

 			bool is_routable = try_timing_driven_route_net(

@@ -755,13 +706,12 @@
             printf("INET %d\n", inet);

 		target_criticality = pin_criticality[target_pin];

 #if PRINTCRITICALPATH == 1

-        if ((int)inet == max_inet && itarget == max_ipin) {

+        if ((int)inet == max_inet && itarget == max_ipin)

             is_print_cpath = true;

-        } else {

+        else

             is_print_cpath = false;

-        }

 #endif

-        target_chan_type = UNDEFINED;

+        target_chosen_chan_type = UNDEFINED;

         target_pin_x = UNDEFINED;

         target_pin_y = UNDEFINED;

         expanded_node_min_target_side.clear();

@@ -826,15 +776,12 @@
 						target_criticality, target_node, astar_fac,

 						highfanout_rlim, lookahead_eval, target_criticality);

 			}

-            else if (debug_bt_begin && debug_itr_begin) {

-                printf("\tfree %d\tTdel %e\n", inode, cheapest->back_Tdel);

-            }

 			free_heap_data(cheapest);

 			cheapest = get_heap_head();

-            if (target_chan_type == UNDEFINED) {

+            if (target_chosen_chan_type == UNDEFINED) {

                 int cheap_inode = cheapest->index;

                 if (expanded_node_min_target_side.count(cheap_inode) > 0) {

-                    target_chan_type = expanded_node_min_target_side[cheap_inode].chan_type;

+                    target_chosen_chan_type = expanded_node_min_target_side[cheap_inode].chan_type;

                     target_pin_x = expanded_node_min_target_side[cheap_inode].x;

                     target_pin_y = expanded_node_min_target_side[cheap_inode].y;

                     expanded_node_min_target_side.clear();

@@ -847,8 +794,6 @@
 				free_route_tree(rt_root);

 				return (false);

 			}

-            if (heap_head_begin) 

-                printf("HEAPHEAD: %d\n", cheapest->index);

 			inode = cheapest->index;

 		}

 

@@ -955,9 +900,9 @@
 #if DEBUGEXPANSIONRATIO == 1

     if (itry_share == DB_ITRY && target_node == DB_TARGET_NODE) 

         printf("CRITICALITY %f\n", target_criticality);

-#endif

     print_expand_neighbours(true, inode, target_node, 

-        target_chan_type, target_pin_x, target_pin_y, 0., 0., 0., 0., 0.);

+        target_chosen_chan_type, target_pin_x, target_pin_y, 0., 0., 0., 0., 0.);

+#endif

 

 	for (iconn = 0; iconn < num_edges; iconn++) {

 		to_node = rr_node[inode].edges[iconn];

@@ -1035,34 +980,22 @@
                 node_expanded_per_sink ++;

             }

         }

+#if DEBUGEXPANSIONRATIO == 1

         if (new_tot_cost < rr_node_route_inf[to_node].path_cost) {

             print_expand_neighbours(false, to_node, target_node, 

-                target_chan_type, target_pin_x, target_pin_y, 

+                target_chosen_chan_type, target_pin_x, target_pin_y, 

                 expected_cost, Tdel + current->back_Tdel, new_tot_cost, 

-                g_basecost, get_rr_cong_cost(to_node));

+                basecost_for_printing, get_rr_cong_cost(to_node));

         }

+#endif

 	} /* End for all neighbours */

 }

-static bool turn_on_bfs_map_lookup(float criticality) {

-    // Not currently add the bfs map look up in placement

-    if (!route_start)

-        return false;

-    if (itry_share == 1)

-        return true;

-    //if (nodes_expanded_pre_itr > 2 * nodes_expanded_1st_itr)

-    //    return false;

-    if (criticality > crit_threshold) return true;

-    else return false;

-    //if (nodes_expanded_pre_itr < 2 * nodes_expanded_1st_itr)

-    //    return true;

-    //return false;

-}

+

 static float get_timing_driven_expected_cost(int inode, int target_node,

 		float criticality_fac, float R_upstream) {

 	/* Determines the expected cost (due to both delay and resouce cost) to reach *

 	 * the target node from inode.  It doesn't include the cost of inode --       *

 	 * that's already in the "known" path_cost.                                   */

-    g_target_node = target_node;

 

 	t_rr_type rr_type;

 	int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;

@@ -1076,18 +1009,17 @@
 #endif

         float Tdel_future = 0.;

 #if LOOKAHEADBFS == 1

-        if (turn_on_bfs_map_lookup(criticality_fac)) {// && criticality_fac > 0.15) {

+        if (turn_on_bfs_map_lookup(criticality_fac)) {

             // calculate Tdel, not just Tdel_future

             float C_downstream = -1.0;

-            cong_cost = -1.0;

             Tdel_future = get_timing_driven_future_Tdel(inode, target_node, &C_downstream, &cong_cost);

-            g_basecost = cong_cost;

+            basecost_for_printing = cong_cost;

             Tdel = Tdel_future + R_upstream * C_downstream;

             cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost

                        + rr_indexed_data[SINK_COST_INDEX].base_cost;

 

         } else {

- #endif

+#endif

             if (rr_type == OPIN) {

                 // keep the same as the original router

                 return 0;

@@ -1133,22 +1065,43 @@
 	}

 }

 

-/*

- * the whole motivation of this version of bfs look ahead:

- *    we don't evaluate the Tdel cost from (x1, y1) to (x2, y2)

- *    instead we evaluate by channel base. --> for example: 

- *    give the cost from chanx at (x1, y1) to chany at (x2, y2)

- *    prerequisite of reaching this level of precision:

- *         given a sink at (x,y), can it be reached from 

- *         TOP / RIGHT / BOTTOM / LEFT side of the clb?

- *    this prerequisite requires extra backward information about the sink

- *    which is store in g_pin_side, and setup when you build rr_graph (in rr_graph.c / rr_graph2.c)

- *         

- *

- *

- */

+

+/******************************* START function definition for new look ahead ***************************************/

+static bool turn_on_bfs_map_lookup(float criticality) {

+    /*

+     * check function to decide whether we should use the

+     * new lookahead or the old one

+     * crit_threshold is updated after each iteration

+     */

+    // Not currently add the bfs map look up in placement

+    if (!route_start)

+        return false;

+    if (itry_share == 1)

+        return true;

+    if (criticality > crit_threshold) 

+        return true;

+    else 

+        return false;

+}

+

+

 float get_timing_driven_future_Tdel (int inode, int target_node, 

         float *C_downstream, float *future_base_cost) {

+    /*

+     * the whole motivation of this version of bfs look ahead:

+     *    we don't evaluate the Tdel cost from (x1, y1) to (x2, y2)

+     *    instead we evaluate by channel. --> for example: 

+     *    give the cost from chanx at (x1, y1) to chany at (x2, y2)

+     *    prerequisite of supporting this level of precision:

+     *         given a sink at (x,y), we should know if it can be

+     *         reached from TOP / RIGHT / BOTTOM / LEFT side of the clb

+     *    this prerequisite requires extra backward information about the sink

+     *    which is store in g_pin_side, and setup when building rr_graph 

+     *    (in rr_graph.c / rr_graph2.c)     

+     *

+     *

+     */

+

     *future_base_cost = 0;

     *C_downstream = 0;

 

@@ -1161,47 +1114,48 @@
     get_unidir_seg_start(inode, &x_start, &y_start);

     // s_* stands for information related to start node (inode)

     // t_* is for target node

-    int s_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;

-    int s_dir = rr_node[inode].get_direction();

+    int start_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;

+    int start_dir = rr_node[inode].get_direction();

     int s_len = rr_node[inode].get_len();

-    int s_chan_type = rr_node[inode].type;

-    // if we haven't decided which side is the cheapest to reach the target node,

-    // then we expand and get the estimated cost of all possible sides.

-    if (target_chan_type == UNDEFINED && rr_node[inode].type != OPIN) {

-        int t_chan_type;

+    int start_chan_type = rr_node[inode].type;

+

+    if (target_chosen_chan_type == UNDEFINED && rr_node[inode].type != OPIN) {

+        // if we haven't decided which side is the cheapest to reach the target node,

+        // then we expand and get the estimated cost of all possible sides.

+        int target_chan_type;

         int x_target, y_target;

         get_unidir_seg_end(target_node, &x_target, &y_target);

         float cur_C_downstream, cur_basecost;

         float min_Tdel = HUGE_POSITIVE_FLOAT, cur_Tdel = HUGE_POSITIVE_FLOAT;

         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::TOP)) == static_cast<char>(Pin_side::TOP)) {

-            t_chan_type = CHANX;

-            cur_Tdel = get_bfs_lookup_Tdel(inode, s_chan_type, t_chan_type, s_seg_type, s_dir, 

+            target_chan_type = CHANX;

+            cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir, 

                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);

-            setup_min_side_reaching_target(inode, t_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

+            setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);

         }

         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::RIGHT)) == static_cast<char>(Pin_side::RIGHT)) {

-            t_chan_type = CHANY;

-            cur_Tdel = get_bfs_lookup_Tdel(inode, s_chan_type, t_chan_type, s_seg_type, s_dir, 

+            target_chan_type = CHANY;

+            cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir, 

                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);

-            setup_min_side_reaching_target(inode, t_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

+            setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);

         }

         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::BOTTOM)) == static_cast<char>(Pin_side::BOTTOM)) {

-            t_chan_type = CHANX;

+            target_chan_type = CHANX;

             y_target --;

-            cur_Tdel = get_bfs_lookup_Tdel(inode, s_chan_type, t_chan_type, s_seg_type, s_dir, 

+            cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir, 

                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);

-            setup_min_side_reaching_target(inode, t_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

+            setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);

             y_target ++;

         }

         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::LEFT)) == static_cast<char>(Pin_side::LEFT)) {

-            t_chan_type = CHANY;

+            target_chan_type = CHANY;

             x_target --;

-            cur_Tdel = get_bfs_lookup_Tdel(inode, s_chan_type, t_chan_type, s_seg_type, s_dir, 

+            cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir, 

                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);

-            setup_min_side_reaching_target(inode, t_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

+            setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,

                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);

             x_target ++;

         }

@@ -1210,38 +1164,49 @@
         assert(min_Tdel < 0.98 * HUGE_POSITIVE_FLOAT);

         return min_Tdel;

     } else {

-        int t_chan_type, x_target, y_target;

-        if (target_chan_type == UNDEFINED) {

+        int target_chan_type, x_target, y_target;

+        if (target_chosen_chan_type == UNDEFINED) {

             // always do coarse estimation for OPIN

             // OPIN

             get_unidir_seg_end(target_node, &x_target, &y_target);

-            t_chan_type = CHANX;

+            target_chan_type = CHANX;

         } else {

-            // let get_bfs_lookup_Tdel() directly look up correct value

-            // from target_chan_type, target_pin_x, target_pin_y

-            t_chan_type = target_chan_type;

+            // let get_precal_Tdel() directly look up correct value

+            // from target_chosen_chan_type, target_pin_x, target_pin_y

+            target_chan_type = target_chosen_chan_type;

             x_target = target_pin_x;

             y_target = target_pin_y;

         }

-        return get_bfs_lookup_Tdel(inode, s_chan_type, t_chan_type, s_seg_type, s_dir, 

+        return get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir, 

                     x_start, y_start, x_target, y_target, s_len, C_downstream, future_base_cost);

     }

 }

-static float get_bfs_lookup_Tdel(int s_inode, int s_chan_type, int t_chan_type, int s_seg_type, int s_dir,

+

+

+static float get_precal_Tdel(int start_node, int start_chan_type, int target_chan_type, int start_seg_type, int start_dir,

                                  int x_start, int y_start, int x_target, int y_target, int len_start,

                                  float *acc_C, float *acc_basecost) {

+    /*

+     * lookup the precalculated Tdel from start node to target node

+     * The real target node is SINK, and the target node in this function 

+     * is wire that can drive the SINK. So it has chan / seg type

+     * I currently adopt different policies for OPIN & wire as start node:

+     *    use a coarse Tdel for OPIN, and accurate Tdel for wire

+     *    TODO: may want to change OPIN lookup to be accurate as well, since

+     *    I am using OPIN penalty to restrict OPIN expansion

+     */

     // dealing with CHANX / CHANY / OPIN

     float Tdel = 0.0;

     int offset_x, offset_y;

-    if (rr_node[s_inode].type == OPIN) {

+    if (rr_node[start_node].type == OPIN) {

 #if OPINPENALTY == 0

         opin_penalty = 1;

 #endif

         float opin_del = HUGE_POSITIVE_FLOAT;

         int to_seg_type = -1;

-        int num_edges = rr_node[s_inode].get_num_edges();

+        int num_edges = rr_node[start_node].get_num_edges();

         for (int iconn = 0; iconn < num_edges; iconn ++) {

-            int to_node = rr_node[s_inode].edges[iconn];

+            int to_node = rr_node[start_node].edges[iconn];

             int seg_type = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index;

             float cur_opin_del = 0.5 * rr_node[to_node].R * rr_node[to_node].C;

             if (opin_cost_by_seg_type.count(seg_type) > 0) {

@@ -1268,19 +1233,21 @@
         // CHANX or CHANY

         bool is_heading_opposite;

         // first check if the direction is correct

-        if (s_dir == INC_DIRECTION) { 

-            if (s_chan_type == CHANX)

+        // if wire is heading away from target, we cannot directly lookup the cost map,

+        // instead we transform this case to the normal case in adjust_coord_for_wrong_dir_wire()

+        if (start_dir == INC_DIRECTION) { 

+            if (start_chan_type == CHANX)

                 is_heading_opposite = (x_start <= x_target) ? false : true; 

             else

                 is_heading_opposite = (y_start <= y_target) ? false : true;

         } else {

-            if (s_chan_type == CHANX) {

-                if (t_chan_type == CHANX)

+            if (start_chan_type == CHANX) {

+                if (target_chan_type == CHANX)

                     is_heading_opposite = (x_start >= x_target) ? false : true;

                 else

                     is_heading_opposite = (x_start >= x_target + 1) ? false : true;

             } else {

-                if (t_chan_type == CHANY)

+                if (target_chan_type == CHANY)

                     is_heading_opposite = (y_start >= y_target) ? false : true;

                 else 

                     is_heading_opposite = (y_start >= y_target + 1) ? false : true;

@@ -1290,28 +1257,32 @@
         if (is_heading_opposite) {

             // we make assumption how wires heading for wrong direction 

             // will eventually get to the right track

-            adjust_coord_for_wrong_dir_wire(&s_chan_type, &s_dir, len_start, &x_start, &y_start, x_target, y_target);

-            int guessed_iswitch = rr_node[s_inode].switches[0];

+            adjust_coord_for_wrong_dir_wire(&start_chan_type, &start_dir, len_start, &x_start, &y_start, x_target, y_target);

+            int guessed_iswitch = rr_node[start_node].switches[0];

             // XXX not updating basecost and C now

-            Tdel += (0.5 * rr_node[s_inode].R + g_rr_switch_inf[guessed_iswitch].R) * rr_node[s_inode].C;

+            Tdel += (0.5 * rr_node[start_node].R + g_rr_switch_inf[guessed_iswitch].R) * rr_node[start_node].C;

             Tdel += g_rr_switch_inf[guessed_iswitch].Tdel;

         }

         offset_x = abs(x_start - x_target);

         offset_y = abs(y_start - y_target);

-        if (s_chan_type == CHANX && t_chan_type == CHANY) {

+        if (start_chan_type == CHANX && target_chan_type == CHANY) {

             offset_y += (y_start > y_target) ? 1 : 0;

-            offset_x += (s_dir == DEC_DIRECTION) ? -1 : 0;

-        } else if (s_chan_type == CHANY && t_chan_type == CHANX) {

+            offset_x += (start_dir == DEC_DIRECTION) ? -1 : 0;

+        } else if (start_chan_type == CHANY && target_chan_type == CHANX) {

             offset_x += (x_start > x_target) ? 1 : 0;

-            offset_y += (s_dir == DEC_DIRECTION) ? -1 : 0;

+            offset_y += (start_dir == DEC_DIRECTION) ? -1 : 0;

         }

-        Tdel += bfs_direct_lookup(offset_x, offset_y, s_seg_type, s_chan_type, t_chan_type, acc_C, acc_basecost);

+        Tdel += bfs_direct_lookup(offset_x, offset_y, start_seg_type, start_chan_type, target_chan_type, acc_C, acc_basecost);

         return Tdel;

     }

 }

 

+

 float bfs_direct_lookup(int offset_x, int offset_y, int seg_type, int chan_type, 

                 int to_chan_type, float *acc_C, float *acc_basecost) {

+    /*

+     * directly lookup the global cost map

+     */

     offset_x = (offset_x > (nx-3)) ? nx-3 : offset_x;

     offset_y = (offset_y > (ny-3)) ? ny-3 : offset_y;

 

@@ -1329,31 +1300,48 @@
     return Tdel;

 }

 

-static void setup_min_side_reaching_target(int inode, int t_chan_type, int t_x, int t_y, 

+

+static void setup_min_side_reaching_target(int inode, int target_chan_type, int target_x, int target_y, 

         float *min_Tdel, float cur_Tdel, float *C_downstream, float *future_base_cost, 

         float cur_C_downstream, float cur_basecost) {

+    /*

+     * setup expanded_node_min_target_side after we determine which 

+     * side of target clb is the cheapest

+     */

     if (cur_Tdel < *min_Tdel) {

         *min_Tdel = cur_Tdel;

         *C_downstream = cur_C_downstream;

         *future_base_cost = cur_basecost;

-        expanded_node_min_target_side[inode].chan_type = t_chan_type;

-        expanded_node_min_target_side[inode].x = t_x;

-        expanded_node_min_target_side[inode].y = t_y;

+        expanded_node_min_target_side[inode].chan_type = target_chan_type;

+        expanded_node_min_target_side[inode].x = target_x;

+        expanded_node_min_target_side[inode].y = target_y;

     }

 }

 

-static void adjust_coord_for_wrong_dir_wire(int *s_chan_type, int *s_dir, int start_len, 

+

+static void adjust_coord_for_wrong_dir_wire(int *start_chan_type, int *start_dir, int start_len, 

                                             int *x_start, int *y_start, int x_target, int y_target) {

-    int orig_dir = *s_dir;

-    if (*s_chan_type == CHANX) {

-        *s_dir = (*y_start <= y_target) ? INC_DIRECTION : DEC_DIRECTION;

+    /*

+     * for nodes heading away from target:

+     *    we cannot directly lookup the entry in the global cost map.

+     *    I transform the case to the normal case where wire is heading towards

+     *    the target by making the following assumption:

+     *        the wire heading for wrong direction will choose an othogonal wire

+     *        of the same seg type at the next hop, thus switching to the correct

+     *        direction. 

+     *    This assumption may not be accurate in some cases (e.g.: L16 heading for

+     *    wrong dir may likely take a turn using L4 rather than L16 at next hop).

+     */

+    int orig_dir = *start_dir;

+    if (*start_chan_type == CHANX) {

+        *start_dir = (*y_start <= y_target) ? INC_DIRECTION : DEC_DIRECTION;

     } else {

-        *s_dir = (*x_start <= x_target) ? INC_DIRECTION : DEC_DIRECTION;

+        *start_dir = (*x_start <= x_target) ? INC_DIRECTION : DEC_DIRECTION;

     }

     int new_x_start = *x_start;

     int new_y_start = *y_start;

     int len = start_len - 1;

-    if (*s_chan_type == CHANX) {

+    if (*start_chan_type == CHANX) {

         if (orig_dir == INC_DIRECTION) new_x_start += len;

         else new_x_start -= len;

     } else {

@@ -1362,10 +1350,16 @@
     }

     *x_start = new_x_start;

     *y_start = new_y_start;

-    *s_chan_type = (*s_chan_type == CHANX) ? CHANY : CHANX;

+    *start_chan_type = (*start_chan_type == CHANX) ? CHANY : CHANX;

 }

 

+

 static void setup_opin_cost_by_seg_type(int source_node, int target_node) {

+    /*

+     * precalculate the cost from wires that are driven by OPINs of source_node

+     * to target_node. Setup the opin cost map: opin_cost_by_seg_type. 

+     * Later when expanding OPINs, just lookup opin_cost_by_seg_type

+     */

     assert(rr_node[source_node].type == SOURCE);

     int num_to_opin_edges = rr_node[source_node].get_num_edges();

     for (int iopin = 0; iopin < num_to_opin_edges; iopin++) {

@@ -1391,6 +1385,9 @@
         }

     }

 }

+

+/****************************** END function definition for new look ahead *************************************/

+

 float get_timing_driven_cong_penalty (int inode, int target_node) {

     float penalty = 1.0;

 #if CONGESTIONBYCHIPAREA == 1

@@ -1797,7 +1794,7 @@
 

 

 /* Detect if net should be routed or not */

-static bool should_route_net(int inet) {

+bool should_route_net(int inet) {

 	t_trace * tptr = trace_head[inet];

 

 	if (tptr == NULL) {

diff --git a/vpr/SRC/route/route_timing.h b/vpr/SRC/route/route_timing.h
index f402265..1e031f2 100755
--- a/vpr/SRC/route/route_timing.h
+++ b/vpr/SRC/route/route_timing.h
@@ -7,6 +7,9 @@
 		float* pin_criticality, int* sink_order,
 		t_rt_node** rt_node_of_sink, float** net_delay, t_slack* slacks);
 
+// this is used in setup_max_min_criticality() in profile_lookahead.c
+bool should_route_net(int inet);
+
 bool timing_driven_route_net(int inet, int itry, float pres_fac, float max_criticality,
 		float criticality_exp, float astar_fac, float bend_cost,
 		float *pin_criticality, int *sink_order, t_rt_node ** rt_node_of_sink,