vpr/SRC/route/route_timing.c - third_party/vtr-verilog-to-routing - Git at Google

 #include <cstdio>
 #include <ctime>
 #include <cmath>

 #include <assert.h>

 #include <vector>
 #include <algorithm>

 #include "vpr_utils.h"
 #include "util.h"
 #include "vpr_types.h"
 #include "globals.h"
 #include "route_export.h"
 #include "route_common.h"
 #include "route_tree_timing.h"
 #include "route_timing.h"
 #include "heapsort.h"
 #include "path_delay.h"
 #include "net_delay.h"
 #include "stats.h"
 #include "ReadOptions.h"
 #include "profile_lookahead.h"
 #include "look_ahead_bfs_precal.h"

 using namespace std;

 /************************* variables used by new look ahead *****************************/
 typedef struct s_opin_cost_inf {
     float Tdel;
     float acc_basecost;
     float acc_C;
 } t_opin_cost_inf;
 // map key: seg_index; map value: cost of that seg_index from source to target
 map<int, t_opin_cost_inf> opin_cost_by_seg_type;
 typedef struct s_target_pin_side {
     int chan_type;
     int x;
     int y;
 } t_target_pin_side;
 // map key: currently expanded inode; map value: ipin side, x, y that will produce a min cost to target
 map<int, t_target_pin_side> expanded_node_min_target_side;
 int target_chosen_chan_type = UNDEFINED;
 int target_pin_x = UNDEFINED;
 int target_pin_y = UNDEFINED;

 float crit_threshold;   // for switch between new & old lookahead: only use new lookahead for high critical path


 /******************* variables for profiling & debugging *******************/
 float basecost_for_printing;    // for printing in expand_neighbours()
 float acc_route_time = 0.;      // accumulated route time: sum of router runtime over all iterations

 float time_pre_itr;
 float time_1st_itr;

 #if PRINTCRITICALPATH == 1
 bool is_first_itr = false;
 float max_crit = -1;
 float min_crit = 10;
 int max_inet = -1;
 int max_ipin = -1;
 int min_inet = -1;
 int min_ipin = -1;
 #endif


 /******************** Subroutines local to route_timing.c ********************/

 static int get_max_pins_per_net(void);

 static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
 		float target_criticality, float astar_fac);

 static void timing_driven_expand_neighbours(struct s_heap *current,
 		int inet, int itry,
 		float bend_cost, float criticality_fac, int target_node,
 		float astar_fac, int highfanout_rlim, bool lookahead_eval, float crit);

 static float get_timing_driven_expected_cost(int inode, int target_node,
 		float criticality_fac, float R_upstream);

 #ifdef INTERPOSER_BASED_ARCHITECTURE
 static int get_num_expected_interposer_hops_to_target(int inode, int target_node);
 #endif

 static int get_expected_segs_to_target(int inode, int target_node,
 		int *num_segs_ortho_dir_ptr);

 static void update_rr_base_costs(int inet, float largest_criticality);

 static void timing_driven_check_net_delays(float **net_delay);

 static int mark_node_expansion_by_bin(int inet, int target_node,
 		t_rt_node * rt_node);

 static double get_overused_ratio();

 // should_route_net is used in setup_max_min_criticality() in profile_lookahead.c
 //static bool should_route_net(int inet);

 static bool turn_on_bfs_map_lookup(float criticality);

 static void setup_min_side_reaching_target(int inode, int target_chan_type, int target_x, int target_y,
         float *min_Tdel, float cur_Tdel, float *C_downstream, float *future_base_cost,
         float cur_C_downstream, float cur_basecost);

 float bfs_direct_lookup(int offset_x, int offset_y, int seg_type, int chan_type,
                 int to_chan_type, float *acc_C, float *acc_basecost);

 static void adjust_coord_for_wrong_dir_wire(int *start_chan_type, int *start_dir, int start_len,
                                             int *x_start, int *y_start, int x_target, int y_target);

 static float get_precal_Tdel(int start_node, int start_chan_type, int target_chan_type, int start_seg_type, int start_dir,
                                  int x_start, int y_start, int x_target, int y_target, int len_start,
                                  float *acc_C, float *acc_basecost);

 static void setup_opin_cost_by_seg_type(int source_node, int target_node);

 struct more_sinks_than {
 	inline bool operator() (const int& net_index1, const int& net_index2) {
 		return g_clbs_nlist.net[net_index1].num_sinks() > g_clbs_nlist.net[net_index2].num_sinks();
 	}
 };


 static void congestion_analysis();
 static void time_on_fanout_analysis();
 constexpr int fanout_per_bin = 5;
 static vector<float> time_on_fanout;
 static vector<int> itry_on_fanout;

 /************************ Subroutine definitions *****************************/
 bool try_timing_driven_route(struct s_router_opts router_opts,
 		float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally,
 		bool timing_analysis_enabled, const t_timing_inf &timing_inf) {

 	/* Timing-driven routing algorithm.  The timing graph (includes slack)   *
 	 * must have already been allocated, and net_delay must have been allocated. *
 	 * Returns true if the routing succeeds, false otherwise.                    */

 	const int max_fanout {get_max_pins_per_net()};
 	time_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);
 	itry_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);

 	auto sorted_nets = vector<int>(g_clbs_nlist.net.size());

 	for (size_t i = 0; i < g_clbs_nlist.net.size(); ++i) {
 		sorted_nets[i] = i;
 	}

 	// sort so net with most sinks is first.
 	std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than());

 	timing_driven_route_structs route_structs{}; // allocs route strucs (calls default constructor)
 	// contains:
 	// float *pin_criticality; /* [1..max_pins_per_net-1] */
 	// int *sink_order; /* [1..max_pins_per_net-1] */;
 	// t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */

 	/* First do one routing iteration ignoring congestion to get reasonable net
 	   delay estimates. Set criticalities to 1 when timing analysis is on to
 	   optimize timing, and to 0 when timing analysis is off to optimize routability. */

 	float init_timing_criticality_val = (timing_analysis_enabled ? 1.0 : 0.0);

 	/* Variables used to do the optimization of the routing, aborting visibly
 	 * impossible Ws */
 	double overused_ratio;
 	vector<double> historical_overuse_ratio;
 	historical_overuse_ratio.reserve(router_opts.max_router_iterations + 1);

 	// for unroutable large circuits, the time per iteration seems to increase dramatically
 	vector<float> time_per_iteration;
 	time_per_iteration.reserve(router_opts.max_router_iterations + 1);

 	for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
 		if (g_clbs_nlist.net[inet].is_global == false) {
 			for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
 				slacks->timing_criticality[inet][ipin] = init_timing_criticality_val;
 			}
 #ifdef PATH_COUNTING
 				slacks->path_criticality[inet][ipin] = init_timing_criticality_val;
 #endif
 		} else {
 			/* Set delay of global signals to zero. Non-global net delays are set by
 			   update_net_delays_from_route_tree() inside timing_driven_route_net(),
 			   which is only called for non-global nets. */
 			for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
 				net_delay[inet][ipin] = 0.;
 			}
 		}
 	}

 	float pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */
 #if OPINPENALTY == 1
     opin_penalty = OPIN_INIT_PENALTY;
 #else
     opin_penalty = 1;
 #endif
     acc_route_time = 0.;
 	for (int itry = 1; itry <= router_opts.max_router_iterations; ++itry) {
         if (itry == 1)
             crit_threshold = CRIT_THRES_INIT;
         else
             crit_threshold *= CRIT_THRES_INC_RATE;
         if (crit_threshold > 0.988)
             crit_threshold = 0.988;
         if (itry == 2) {
             nodes_expanded_1st_itr = nodes_expanded_cur_itr;
             nodes_expanded_max_itr = nodes_expanded_cur_itr;
         }
         if (itry > 1) {
             nodes_expanded_pre_itr = nodes_expanded_cur_itr;
             nodes_expanded_max_itr = (nodes_expanded_cur_itr > nodes_expanded_max_itr) ? nodes_expanded_cur_itr:nodes_expanded_max_itr;
             if (nodes_expanded_pre_itr > 2 * nodes_expanded_1st_itr) {
                 crit_threshold = (crit_threshold > 0.98) ? crit_threshold : 0.98;
                 printf("BFS RAISE THRESHOLD to 0.98 for %d\n", itry);
             }
         }
         nodes_expanded_cur_itr = 0;
 		clock_t begin = clock();
         total_nodes_expanded = 0;
         itry_share = itry;
         //if (itry <= 20)
             //opin_penalty *= 0.93;
 #if OPINPENALTY == 1
         opin_penalty *= OPIN_DECAY_RATE;
 #endif
 #if DEPTHWISELOOKAHEADEVAL == 1
         if (router_opts.lookahead_eval) {
             subtree_count.clear();
             subtree_size_avg.clear();
             subtree_est_dev_avg.clear();
             subtree_est_dev_abs_avg.clear();
             subtree_size_avg[0] = 0;
         }
 #endif
 		/* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */
 		for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
 			g_clbs_nlist.net[inet].is_routed = false;
 			g_clbs_nlist.net[inet].is_fixed = false;
 		}
 #if PRINTCRITICALPATH == 1
         if (itry == 1) {
             is_first_itr = true;
         } else {
             is_first_itr = false;
         }
         setup_max_min_criticality(max_crit, min_crit,
                 max_inet, min_inet, max_ipin, min_ipin, slacks);
 #endif
 		for (unsigned int i = 0; i < g_clbs_nlist.net.size(); ++i) {
 			bool is_routable = try_timing_driven_route_net(
 				sorted_nets[i],
 				itry,
 				pres_fac,
 				router_opts,
 				route_structs.pin_criticality,
 				route_structs.sink_order,
 				route_structs.rt_node_of_sink,
 				net_delay,
 				slacks
 			);

 			if (!is_routable) {
 				return (false);
 			}
 		}
 #if PRINTCRITICALPATH == 1
         printf("MAX crit: %f\tMIN crit: %f\tmaxpin: %d\tminpin: %d\n", max_crit, min_crit, max_ipin, min_ipin);
 #endif
 		clock_t end = clock();

 		float time = static_cast<float>(end - begin) / CLOCKS_PER_SEC;
         if (itry == 1) time_1st_itr = time;
         time_pre_itr = time;

         acc_route_time += time;
 		time_per_iteration.push_back(time);
         printf("\tTOTAL NODES EXPANDED FOR ITERATION %d is %d\n", itry, total_nodes_expanded);
 		if (itry == 1) {
 			/* Early exit code for cases where it is obvious that a successful route will not be found
 			   Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */
 			int total_wirelength = 0;
 			int available_wirelength = 0;

 			for (int i = 0; i < num_rr_nodes; ++i) {
 				if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
 					available_wirelength += 1 +
 							rr_node[i].get_xhigh() - rr_node[i].get_xlow() +
 							rr_node[i].get_yhigh() - rr_node[i].get_ylow();
 				}
 			}

 			for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
 				if (g_clbs_nlist.net[inet].is_global == false
 						&& g_clbs_nlist.net[inet].num_sinks() != 0) { /* Globals don't count. */
 					int bends, wirelength, segments;
 					get_num_bends_and_length(inet, &bends, &wirelength, &segments);

 					total_wirelength += wirelength;
 				}
 			}
 			vpr_printf_info("Wire length after first iteration %d, total available wire length %d, ratio %g\n",
 					total_wirelength, available_wirelength,
 					(float) (total_wirelength) / (float) (available_wirelength));
 			if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) {
 				vpr_printf_info("Wire length usage ratio exceeds limit of %g, fail routing.\n",
 						FIRST_ITER_WIRELENTH_LIMIT);

 				time_on_fanout_analysis();
 				return false;
 			}
 			vpr_printf_info("--------- ---------- ----------- ---------------------\n");
 			vpr_printf_info("Iteration       Time   Crit Path     Overused RR Nodes\n");
 			vpr_printf_info("--------- ---------- ----------- ---------------------\n");
 		}

 		/* Make sure any CLB OPINs used up by subblocks being hooked directly
 		   to them are reserved for that purpose. */

 		bool rip_up_local_opins = (itry == 1 ? false : true);
 		reserve_locally_used_opins(pres_fac, router_opts.acc_fac, rip_up_local_opins, clb_opins_used_locally);

 		/* Pathfinder guys quit after finding a feasible route. I may want to keep
 		   going longer, trying to improve timing.  Think about this some. */

 		bool success = feasible_routing();

 		/* Verification to check the ratio of overused nodes, depending on the configuration
 		 * may abort the routing if the ratio is too high. */
 		overused_ratio = get_overused_ratio();
 		historical_overuse_ratio.push_back(overused_ratio);

 		/* Determine when routing is impossible hence should be aborted */
 		if (itry > 5){

 			int expected_success_route_iter = predict_success_route_iter(historical_overuse_ratio, router_opts);
 			if (expected_success_route_iter == UNDEFINED) {
 				time_on_fanout_analysis();
                 vpr_printf_info("\tTOTAL ROUTE TIME (SUM OF ALL ITR)\t%f\n", acc_route_time);
 				return false;
 			}

 			if (itry > 15) {
 				// compare their slopes over the last 5 iterations
 				double time_per_iteration_slope = linear_regression_vector(time_per_iteration, itry-5);
 				double congestion_per_iteration_slope = linear_regression_vector(historical_overuse_ratio, itry-5);
 				if (router_opts.congestion_analysis)
 					vpr_printf_info("%f s/iteration %f %/iteration\n", time_per_iteration_slope, congestion_per_iteration_slope);
 				// time is increasing and congestion is non-decreasing (grows faster than 10% per iteration)
 				if (congestion_per_iteration_slope > 0 && time_per_iteration_slope > 0.1*time_per_iteration.back()
 					&& time_per_iteration_slope > 1) {	// filter out noise
 					vpr_printf_info("Time per iteration growing too fast at slope %f s/iteration \n\
 									 while congestion grows at %f %/iteration, unlikely to finish.\n",
 						time_per_iteration_slope, congestion_per_iteration_slope);

 					time_on_fanout_analysis();
                     vpr_printf_info("\tTOTAL ROUTE TIME (SUM OF ALL ITR)\t%f\n", acc_route_time);
 					return false;
 				}
 			}
 		}
         if (router_opts.lookahead_eval)
             print_lookahead_eval();
         clear_lookahead_history_array();
         // I don't clear the congestion map: it is the accumulated version across all itry
         //clear_congestion_map();
 #if CONGESTIONBYCHIPAREA
         if (itry % 4 == 0)
             print_congestion_map();
         clear_congestion_map_relative();
 #endif
 		if (success) {
             vpr_printf_info("\tTOTAL ROUTE TIME (SUM OF ALL ITR)\t%f\n", acc_route_time);
 			if (timing_analysis_enabled) {
 				load_timing_graph_net_delays(net_delay);
 				do_timing_analysis(slacks, timing_inf, false, false);
 				float critical_path_delay = get_critical_path_delay();
                 vpr_printf_info("%9d %6.2f sec %8.5f ns   %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
 				vpr_printf_info("Critical path: %g ns\n", critical_path_delay);
 			} else {
                 vpr_printf_info("%9d %6.2f sec         N/A   %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
 			}

 			vpr_printf_info("Successfully routed after %d routing iterations.\n", itry);
 #ifdef DEBUG
 			if (timing_analysis_enabled)
 				timing_driven_check_net_delays(net_delay);
 #endif
 			time_on_fanout_analysis();
 			return (true);
 		}

 		if (itry == 1) {
 			pres_fac = router_opts.initial_pres_fac;
 			pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */
 		} else {
 			pres_fac *= router_opts.pres_fac_mult;

 			/* Avoid overflow for high iteration counts, even if acc_cost is big */
 			pres_fac = min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5));

 			pathfinder_update_cost(pres_fac, router_opts.acc_fac);
 		}

 		if (timing_analysis_enabled) {
 			/* Update slack values by doing another timing analysis.
 			   Timing_driven_route_net updated the net delay values. */

 			load_timing_graph_net_delays(net_delay);

 			do_timing_analysis(slacks, timing_inf, false, false);

 		} else {
 			/* If timing analysis is not enabled, make sure that the criticalities and the
 			   net_delays stay as 0 so that wirelength can be optimized. */

 			for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
 				for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
 					slacks->timing_criticality[inet][ipin] = 0.;
 #ifdef PATH_COUNTING
 					slacks->path_criticality[inet][ipin] = 0.;
 #endif
 					net_delay[inet][ipin] = 0.;
 				}
 			}
 		}


 		if (timing_analysis_enabled) {
 			float critical_path_delay = get_critical_path_delay();
             vpr_printf_info("%9d %6.2f sec %8.5f ns   %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
 		} else {
             vpr_printf_info("%9d %6.2f sec         N/A   %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
 		}

         if (router_opts.congestion_analysis) {
         	congestion_analysis();
         }
 		fflush(stdout);
 	}
     vpr_printf_info("\tTOTAL ROUTE TIME (SUM OF ALL ITR)\t%f\n", acc_route_time);
 	vpr_printf_info("Routing failed.\n");
 	time_on_fanout_analysis();
 	return (false);
 }


 void time_on_fanout_analysis() {
 	// using the global time_on_fanout and itry_on_fanout
 	vpr_printf_info("fanout low           time (s)        attemps\n");
 	for (size_t bin = 0; bin < time_on_fanout.size(); ++bin) {
 		if (itry_on_fanout[bin]) {	// avoid printing the many 0 bins
 			vpr_printf_info("%4d           %14.3f   %12d\n",bin*fanout_per_bin, time_on_fanout[bin], itry_on_fanout[bin]);
 		}
 	}
 }

 // at the end of a routing iteration, profile how much congestion is taken up by each type of rr_node
 // efficient bit array for checking against congested type
 struct Congested_node_types {
 	uint32_t mask;
 	Congested_node_types() : mask{0} {}
 	void set_congested(int rr_node_type) {mask |= (1 << rr_node_type);}
 	void clear_congested(int rr_node_type) {mask &= ~(1 << rr_node_type);}
 	bool is_congested(int rr_node_type) const {return mask & (1 << rr_node_type);}
 	bool empty() const {return mask == 0;}
 };
 void congestion_analysis() {
 	static const std::vector<const char*> node_typename {
 		"SOURCE",
 		"SINK",
 		"IPIN",
 		"OPIN",
 		"CHANX",
 		"CHANY",
 		"ICE"
 	};
 	// each type indexes into array which holds the congestion for that type
 	std::vector<int> congestion_per_type((size_t) NUM_RR_TYPES, 0);
 	// print out specific node information if congestion for type is low enough

 	int total_congestion = 0;
 	for (int inode = 0; inode < num_rr_nodes; ++inode) {
 		const t_rr_node& node = rr_node[inode];
 		int congestion = node.get_occ() - node.get_capacity();

 		if (congestion > 0) {
 			total_congestion += congestion;
 			congestion_per_type[node.type] += congestion;
 		}
 	}

 	constexpr int specific_node_print_threshold = 5;
 	Congested_node_types congested;
 	for (int type = SOURCE; type < NUM_RR_TYPES; ++type) {
 		float congestion_percentage = (float)congestion_per_type[type] / (float) total_congestion * 100;
 		vpr_printf_info(" %6s: %10.6f %\n", node_typename[type], congestion_percentage);
 		// nodes of that type need specific printing
 		if (congestion_per_type[type] > 0 &&
 			congestion_per_type[type] < specific_node_print_threshold) congested.set_congested(type);
 	}

 	// specific print out each congested node
 	if (!congested.empty()) {
 		vpr_printf_info("Specific congested nodes\nxlow ylow   type\n");
 		for (int inode = 0; inode < num_rr_nodes; ++inode) {
 			const t_rr_node& node = rr_node[inode];
 			if (congested.is_congested(node.type) && (node.get_occ() - node.get_capacity()) > 0) {
 				vpr_printf_info("(%3d,%3d) %6s\n", node.get_xlow(), node.get_ylow(), node_typename[node.type]);
 			}
 		}
 	}
 }

 bool try_timing_driven_route_net(int inet, int itry, float pres_fac,
 		struct s_router_opts router_opts,
 		float* pin_criticality, int* sink_order,
 		t_rt_node** rt_node_of_sink, float** net_delay, t_slack* slacks) {

 	bool is_routed = false;

 	if (g_clbs_nlist.net[inet].is_fixed) { /* Skip pre-routed nets. */
 		is_routed = true;
 	} else if (g_clbs_nlist.net[inet].is_global) { /* Skip global nets. */
 		is_routed = true;
 	} else if (should_route_net(inet) == false) {
 		is_routed = true;
 	} else{
 		// track time spent vs fanout
 		clock_t net_begin = clock();

 		is_routed = timing_driven_route_net(inet, itry, pres_fac,
 				router_opts.max_criticality, router_opts.criticality_exp,
 				router_opts.astar_fac, router_opts.bend_cost,
 				pin_criticality, sink_order,
 				rt_node_of_sink, net_delay[inet], slacks, router_opts.lookahead_eval);

 		float time_for_net = static_cast<float>(clock() - net_begin) / CLOCKS_PER_SEC;
 		int net_fanout = g_clbs_nlist.net[inet].num_sinks();
 		time_on_fanout[net_fanout / fanout_per_bin] += time_for_net;
 		itry_on_fanout[net_fanout / fanout_per_bin] += 1;

 		/* Impossible to route? (disconnected rr_graph) */
 		if (is_routed) {
 			g_clbs_nlist.net[inet].is_routed = true;
 			g_atoms_nlist.net[clb_to_vpack_net_mapping[inet]].is_routed = true;
 		} else {
 			vpr_printf_info("Routing failed.\n");
 		}
 	}
 	return (is_routed);
 }

 /*
  * NOTE:
  * Suggest using a timing_driven_route_structs struct. Memory is managed for you
  */
 void alloc_timing_driven_route_structs(float **pin_criticality_ptr,
 		int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) {

 	/* Allocates all the structures needed only by the timing-driven router.   */

 	int max_pins_per_net = get_max_pins_per_net();

 	float *pin_criticality = (float *) my_malloc((max_pins_per_net - 1) * sizeof(float));
 	*pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */

 	int *sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int));
 	*sink_order_ptr = sink_order - 1;

 	t_rt_node **rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) * sizeof(t_rt_node *));
 	*rt_node_of_sink_ptr = rt_node_of_sink - 1;

 	alloc_route_tree_timing_structs();
 }

 /* This function gets ratio of overused nodes (overused_nodes / num_rr_nodes) */
 static double get_overused_ratio(){
 	double overused_nodes = 0.0;
 	int inode;
 	for(inode = 0; inode < num_rr_nodes; inode++){
 		if(rr_node[inode].get_occ() > rr_node[inode].get_capacity())
 			overused_nodes += (rr_node[inode].get_occ() - rr_node[inode].get_capacity());
 	}
 	overused_nodes /= (double)num_rr_nodes;
 	return overused_nodes;
 }

 /*
  * NOTE:
  * Suggest using a timing_driven_route_structs struct. Memory is managed for you
  */
 void free_timing_driven_route_structs(float *pin_criticality, int *sink_order,
 		t_rt_node ** rt_node_of_sink) {

 	/* Frees all the stuctures needed only by the timing-driven router.        */

 	free(pin_criticality + 1); /* Starts at index 1. */
 	free(sink_order + 1);
 	free(rt_node_of_sink + 1);
 	free_route_tree_timing_structs();
 }

 timing_driven_route_structs::timing_driven_route_structs() {
 	alloc_timing_driven_route_structs(
 		&pin_criticality,
 		&sink_order,
 		&rt_node_of_sink
 	);
 }

 timing_driven_route_structs::~timing_driven_route_structs() {
 	free_timing_driven_route_structs(
 		pin_criticality,
 		sink_order,
 		rt_node_of_sink
 	);
 }

 static int get_max_pins_per_net(void) {

 	/* Returns the largest number of pins on any non-global net.    */

 	unsigned int inet;
 	int max_pins_per_net;

 	max_pins_per_net = 0;
 	for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
 		if (g_clbs_nlist.net[inet].is_global == false) {
 			max_pins_per_net = max(max_pins_per_net,
 					(int) g_clbs_nlist.net[inet].pins.size());
 		}
 	}

 	return (max_pins_per_net);
 }

 bool timing_driven_route_net(int inet, int itry, float pres_fac, float max_criticality,
 		float criticality_exp, float astar_fac, float bend_cost,
 		float *pin_criticality, int *sink_order,
 		t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks, bool lookahead_eval) {

 	/* Returns true as long is found some way to hook up this net, even if that *
 	 * way resulted in overuse of resources (congestion).  If there is no way   *
 	 * to route this net, even ignoring congestion, it returns false.  In this  *
 	 * case the rr_graph is disconnected and you can give up. If slacks = NULL, *
 	 * give each net a dummy criticality of 0.									*/
 	unsigned int ipin;
 	int num_sinks, itarget, target_pin, target_node, inode;
 	float target_criticality, old_total_cost, new_total_cost, largest_criticality,
 		old_back_cost, new_back_cost;
 	t_rt_node *rt_root;

 	struct s_trace *new_route_start_tptr;
 	int highfanout_rlim;

 	/* Rip-up any old routing. */

 	pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);
 	free_traceback(inet);
 	for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
 		if (!slacks) {
 			/* Use criticality of 1. This makes all nets critical.  Note: There is a big difference between setting pin criticality to 0
 			compared to 1.  If pin criticality is set to 0, then the current path delay is completely ignored during routing.  By setting
 			pin criticality to 1, the current path delay to the pin will always be considered and optimized for */
 			pin_criticality[ipin] = 1.0;
 		} else {
 #ifdef PATH_COUNTING
 			/* Pin criticality is based on a weighted sum of timing and path criticalities. */
 			pin_criticality[ipin] =		 ROUTE_PATH_WEIGHT	* slacks->path_criticality[inet][ipin]
 								  + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];
 #else
 			/* Pin criticality is based on only timing criticality. */
 			pin_criticality[ipin] = slacks->timing_criticality[inet][ipin];
 #endif
             pin_criticality[ipin] *= 0.98;
             pin_criticality[ipin] += 0.01;
 		}
 	}

 	num_sinks = g_clbs_nlist.net[inet].num_sinks();
 	heapsort(sink_order, pin_criticality, num_sinks, 0);

 	/* Update base costs according to fanout and criticality rules */

 	largest_criticality = pin_criticality[sink_order[1]];
 	update_rr_base_costs(inet, largest_criticality);

 	mark_ends(inet); /* Only needed to check for multiply-connected SINKs */

 	rt_root = init_route_tree_to_source(inet);

     node_expanded_per_net = 0;
     node_on_path = 0;
     estimated_cost_deviation = 0;
     estimated_cost_deviation_abs = 0;
 	// explore in order of decreasing criticality
 	for (itarget = 1; itarget <= num_sinks; itarget++) {
 		target_pin = sink_order[itarget];
 		target_node = net_rr_terminals[inet][target_pin];
         if (target_node == 52356)
             printf("INET %d\n", inet);
 		target_criticality = pin_criticality[target_pin];
 #if PRINTCRITICALPATH == 1
         if ((int)inet == max_inet && itarget == max_ipin)
             is_print_cpath = true;
         else
             is_print_cpath = false;
 #endif
         target_chosen_chan_type = UNDEFINED;
         target_pin_x = UNDEFINED;
         target_pin_y = UNDEFINED;
         expanded_node_min_target_side.clear();

 		highfanout_rlim = mark_node_expansion_by_bin(inet, target_node,
 				rt_root);

 		if (itarget > 1 && itry > 5) {
 			/* Enough iterations given to determine opin, to speed up legal solution, do not let net use two opins */
 			assert(rr_node[rt_root->inode].type == SOURCE);
 			rt_root->re_expand = false;
 		}

 		// reexplore route tree from root to add any new nodes
 		add_route_tree_to_heap(rt_root, target_node, target_criticality,
 				astar_fac);

 		// cheapest s_heap (gives index to rr_node) in current route tree to be expanded on
 		struct s_heap* cheapest = get_heap_head();

 		if (cheapest == NULL) { /* Infeasible routing.  No possible path for net. */
 			vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
 					   inet, g_clbs_nlist.net[inet].name, itarget);
 			reset_path_costs();
 			free_route_tree(rt_root);
 			return (false);
 		}

 		inode = cheapest->index;
         node_expanded_per_sink = 0;

         print_start_end_to_sink(true, itry, inet, target_node, itarget);

 		while (inode != target_node) {
 			old_total_cost = rr_node_route_inf[inode].path_cost;
 			new_total_cost = cheapest->cost;
 			if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
 				old_back_cost = HUGE_POSITIVE_FLOAT;
 			else
 				old_back_cost = rr_node_route_inf[inode].backward_path_cost;

 			new_back_cost = cheapest->backward_path_cost;

 			/* I only re-expand a node if both the "known" backward cost is lower  *
 			 * in the new expansion (this is necessary to prevent loops from       *
 			 * forming in the routing and causing havoc) *and* the expected total  *
 			 * cost to the sink is lower than the old value.  Different R_upstream *
 			 * values could make a path with lower back_path_cost less desirable   *
 			 * than one with higher cost.  Test whether or not I should disallow   *
 			 * re-expansion based on a higher total cost.                          */

 			if (old_total_cost > new_total_cost && old_back_cost > new_back_cost) {
 				rr_node_route_inf[inode].prev_node = cheapest->u.prev_node;
 				rr_node_route_inf[inode].prev_edge = cheapest->prev_edge;
 				rr_node_route_inf[inode].path_cost = new_total_cost;
 				rr_node_route_inf[inode].backward_path_cost = new_back_cost;
                 rr_node_route_inf[inode].back_Tdel = cheapest->back_Tdel;
 				if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
 					add_to_mod_list(&rr_node_route_inf[inode].path_cost);

 				timing_driven_expand_neighbours(cheapest, inet, itry, bend_cost,
 						target_criticality, target_node, astar_fac,
 						highfanout_rlim, lookahead_eval, target_criticality);
 			}
 			free_heap_data(cheapest);
 			cheapest = get_heap_head();
             if (target_chosen_chan_type == UNDEFINED) {
                 int cheap_inode = cheapest->index;
                 if (expanded_node_min_target_side.count(cheap_inode) > 0) {
                     target_chosen_chan_type = expanded_node_min_target_side[cheap_inode].chan_type;
                     target_pin_x = expanded_node_min_target_side[cheap_inode].x;
                     target_pin_y = expanded_node_min_target_side[cheap_inode].y;
                     expanded_node_min_target_side.clear();
                 }
             }
 			if (cheapest == NULL) { /* Impossible routing.  No path for net. */
 				vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
 						 inet, g_clbs_nlist.net[inet].name, itarget);
 				reset_path_costs();
 				free_route_tree(rt_root);
 				return (false);
 			}
 			inode = cheapest->index;
 		}

 		/* NB:  In the code below I keep two records of the partial routing:  the   *
 		 * traceback and the route_tree.  The route_tree enables fast recomputation *
 		 * of the Elmore delay to each node in the partial routing.  The traceback  *
 		 * lets me reuse all the routines written for breadth-first routing, which  *
 		 * all take a traceback structure as input.  Before this routine exits the  *
 		 * route_tree structure is destroyed; only the traceback is needed at that  *
 		 * point.                                                                   */
 #if DEPTHWISELOOKAHEADEVAL == 1
         subtree_size_avg[0] = node_expanded_per_sink;
 #endif
 		rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */
 		new_route_start_tptr = update_traceback(cheapest, inet);
 		rt_node_of_sink[target_pin] = update_route_tree(cheapest, lookahead_eval, target_criticality);
 		free_heap_data(cheapest);
 		pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);
         print_start_end_to_sink(false, itry, inet, target_node, itarget);
 		empty_heap();
 		reset_path_costs();
 	}

 	/* For later timing analysis. */

 	update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);
 	free_route_tree(rt_root);
 	return (true);
 }

 static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
 		float target_criticality, float astar_fac) {

 	/* Puts the entire partial routing below and including rt_node onto the heap *
 	 * (except for those parts marked as not to be expanded) by calling itself   *
 	 * recursively.                                                              */
     opin_cost_by_seg_type.clear();

 	int inode;
 	t_rt_node *child_node;
 	t_linked_rt_edge *linked_rt_edge;
 	float tot_cost, backward_path_cost, R_upstream;

 	/* Pre-order depth-first traversal */

 	if (rt_node->re_expand) {
 		inode = rt_node->inode;
 		backward_path_cost = target_criticality * rt_node->Tdel;
 		R_upstream = rt_node->R_upstream;
 		tot_cost = backward_path_cost
 				+ astar_fac
 						* get_timing_driven_expected_cost(inode, target_node,
 								target_criticality, R_upstream);
 #if INPRECISE_GET_HEAP_HEAD == 1 || SPACEDRIVENHEAP == 1
         int target_x = rr_node[target_node].get_xlow();
         int target_y = rr_node[target_node].get_ylow();
         int offset_x = (rr_node[inode].get_xlow() + rr_node[inode].get_xhigh()) / 2;
         int offset_y = (rr_node[inode].get_ylow() + rr_node[inode].get_yhigh()) / 2;
         offset_x = abs(target_x - offset_x);
         offset_y = abs(target_y - offset_y);
         node_to_heap(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,
                 backward_path_cost, R_upstream, rt_node->Tdel, offset_x, offset_y);
 #else
 		node_to_heap(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,
 				backward_path_cost, R_upstream, rt_node->Tdel);
 #endif
 	}

 	linked_rt_edge = rt_node->u.child_list;

 	while (linked_rt_edge != NULL) {
 		child_node = linked_rt_edge->child;
 		add_route_tree_to_heap(child_node, target_node, target_criticality,
 				astar_fac);
 		linked_rt_edge = linked_rt_edge->next;
 	}
 }

 static void timing_driven_expand_neighbours(struct s_heap *current,
 		int inet, int itry,
 		float bend_cost, float criticality_fac, int target_node,
 		float astar_fac, int highfanout_rlim, bool lookahead_eval, float target_criticality) {

 	/* Puts all the rr_nodes adjacent to current on the heap.  rr_nodes outside *
 	 * the expanded bounding box specified in route_bb are not added to the     *
 	 * heap.                                                                    */

 	int iconn, to_node, num_edges, inode, iswitch, target_x, target_y;
 	t_rr_type from_type, to_type;
 	float new_tot_cost, old_back_pcost, new_back_pcost, R_upstream;
 	float new_R_upstream, Tdel;

 	inode = current->index;
 	old_back_pcost = current->backward_path_cost;
 	R_upstream = current->R_upstream;
 	num_edges = rr_node[inode].get_num_edges();

 	target_x = rr_node[target_node].get_xhigh();
 	target_y = rr_node[target_node].get_yhigh();
     if (rr_node[inode].type == SOURCE && route_start) {
         opin_cost_by_seg_type.clear();
         setup_opin_cost_by_seg_type(inode, target_node);
     }
 #if DEBUGEXPANSIONRATIO == 1
     if (itry_share == DB_ITRY && target_node == DB_TARGET_NODE)
         printf("CRITICALITY %f\n", target_criticality);
     print_expand_neighbours(true, inode, target_node,
         target_chosen_chan_type, target_pin_x, target_pin_y, 0., 0., 0., 0., 0.);
 #endif

 	for (iconn = 0; iconn < num_edges; iconn++) {
 		to_node = rr_node[inode].edges[iconn];
 		if (g_clbs_nlist.net[inet].num_sinks() >= HIGH_FANOUT_NET_LIM) {
 			if (rr_node[to_node].get_xhigh() < target_x - highfanout_rlim
 					|| rr_node[to_node].get_xlow() > target_x + highfanout_rlim
 					|| rr_node[to_node].get_yhigh() < target_y - highfanout_rlim
 					|| rr_node[to_node].get_ylow() > target_y + highfanout_rlim){
 				continue; /* Node is outside high fanout bin. */
 			}
 		}
 		else if (rr_node[to_node].get_xhigh() < route_bb[inet].xmin
 				|| rr_node[to_node].get_xlow() > route_bb[inet].xmax
 				|| rr_node[to_node].get_yhigh() < route_bb[inet].ymin
 				|| rr_node[to_node].get_ylow() > route_bb[inet].ymax)
 			continue; /* Node is outside (expanded) bounding box. */


 		/* Prune away IPINs that lead to blocks other than the target one.  Avoids  *
 		 * the issue of how to cost them properly so they don't get expanded before *
 		 * more promising routes, but makes route-throughs (via CLBs) impossible.   *
 		 * Change this if you want to investigate route-throughs.                   */

 		to_type = rr_node[to_node].type;
 		if (to_type == IPIN
 				&& (rr_node[to_node].get_xhigh() != target_x
 						|| rr_node[to_node].get_yhigh() != target_y))
 			continue;

 		/* new_back_pcost stores the "known" part of the cost to this node -- the   *
 		 * congestion cost of all the routing resources back to the existing route  *
 		 * plus the known delay of the total path back to the source.  new_tot_cost *
 		 * is this "known" backward cost + an expected cost to get to the target.   */
 #if INPRECISE_GET_HEAP_HEAD == 1 || SPACEDRIVENHEAP == 1
         int offset_x = (rr_node[to_node].get_xlow() + rr_node[to_node].get_xhigh()) / 2;
         int offset_y = (rr_node[to_node].get_ylow() + rr_node[to_node].get_yhigh()) / 2;
         offset_x = abs(offset_x - target_x);
         offset_y = abs(offset_y - target_y);
 #endif
 		iswitch = rr_node[inode].switches[iconn];
 		if (g_rr_switch_inf[iswitch].buffered) {
 			new_R_upstream = g_rr_switch_inf[iswitch].R;
 		} else {
 			new_R_upstream = R_upstream + g_rr_switch_inf[iswitch].R;
 		}
         // Tdel is the delay of current node
 		Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R);
 		Tdel += g_rr_switch_inf[iswitch].Tdel;
 		new_R_upstream += rr_node[to_node].R;
         new_back_pcost = old_back_pcost;
 		new_back_pcost += criticality_fac * Tdel
                         + (1. - criticality_fac) * get_rr_cong_cost(to_node);

 		if (bend_cost != 0.) {
 			from_type = rr_node[inode].type;
 			to_type = rr_node[to_node].type;
 			if ((from_type == CHANX && to_type == CHANY)
 					|| (from_type == CHANY && to_type == CHANX))
 				new_back_pcost += bend_cost;
 		}
 		float expected_cost = get_timing_driven_expected_cost(to_node, target_node,
 		    		criticality_fac, new_R_upstream);

 		new_tot_cost = new_back_pcost + astar_fac * expected_cost;
 #if INPRECISE_GET_HEAP_HEAD == 1 || SPACEDRIVENHEAP == 1
  		node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,
 				new_R_upstream, Tdel + current->back_Tdel, offset_x, offset_y);
 #else
         node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,
                 new_R_upstream, Tdel + current->back_Tdel);
 #endif
         if (lookahead_eval) {
             if (new_tot_cost < rr_node_route_inf[to_node].path_cost) {
                 node_expanded_per_net ++;
                 node_expanded_per_sink ++;
             }
         }
 #if DEBUGEXPANSIONRATIO == 1
         if (new_tot_cost < rr_node_route_inf[to_node].path_cost) {
             print_expand_neighbours(false, to_node, target_node,
                 target_chosen_chan_type, target_pin_x, target_pin_y,
                 expected_cost, Tdel + current->back_Tdel, new_tot_cost,
                 basecost_for_printing, get_rr_cong_cost(to_node));
         }
 #endif
 	} /* End for all neighbours */
 }

 static float get_timing_driven_expected_cost(int inode, int target_node,
 		float criticality_fac, float R_upstream) {
 	/* Determines the expected cost (due to both delay and resouce cost) to reach *
 	 * the target node from inode.  It doesn't include the cost of inode --       *
 	 * that's already in the "known" path_cost.                                   */

 	t_rr_type rr_type;
 	int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
 	float expected_cost, cong_cost, Tdel;

 	rr_type = rr_node[inode].type;

 	if (rr_type == CHANX || rr_type == CHANY || rr_type == OPIN) {
 #ifdef INTERPOSER_BASED_ARCHITECTURE
 		int num_interposer_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
 #endif
         float Tdel_future = 0.;
 #if LOOKAHEADBFS == 1
         if (turn_on_bfs_map_lookup(criticality_fac)) {
             // calculate Tdel, not just Tdel_future
             float C_downstream = -1.0;
             Tdel_future = get_timing_driven_future_Tdel(inode, target_node, &C_downstream, &cong_cost);
             basecost_for_printing = cong_cost;
             Tdel = Tdel_future + R_upstream * C_downstream;
             cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost
                        + rr_indexed_data[SINK_COST_INDEX].base_cost;

         } else {
 #endif
             if (rr_type == OPIN) {
                 // keep the same as the original router
                 return 0;
             }
             num_segs_same_dir = get_expected_segs_to_target(inode, target_node,
                     &num_segs_ortho_dir);
             cost_index = rr_node[inode].get_cost_index();
             ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
             cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost
                       + num_segs_ortho_dir * rr_indexed_data[ortho_cost_index].base_cost;
             cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost
                        + rr_indexed_data[SINK_COST_INDEX].base_cost;

             // future without R_upstream
             Tdel_future = num_segs_same_dir * rr_indexed_data[cost_index].T_linear
                          + num_segs_ortho_dir * rr_indexed_data[ortho_cost_index].T_linear
                          + num_segs_same_dir * num_segs_same_dir * rr_indexed_data[cost_index].T_quadratic
                          + num_segs_ortho_dir * num_segs_ortho_dir * rr_indexed_data[ortho_cost_index].T_quadratic;
             // add R_upstream factor
             Tdel = Tdel_future + R_upstream * (num_segs_same_dir * rr_indexed_data[cost_index].C_load
                                 + num_segs_ortho_dir * rr_indexed_data[ortho_cost_index].C_load);
 #if LOOKAHEADBFS == 1
         }
 #endif
 		Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;

 #ifdef INTERPOSER_BASED_ARCHITECTURE
 		float interposer_hop_delay = (float)delay_increase * 1e-12;
 		Tdel += num_interposer_hops * interposer_hop_delay;
 #endif

 		expected_cost = criticality_fac * Tdel
 				+ (1. - criticality_fac) * cong_cost;
 		return (expected_cost);
 	}

 	else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
 		return (rr_indexed_data[SINK_COST_INDEX].base_cost);
 	}

 	else { /* Change this if you want to investigate route-throughs */
 		return (0.);
 	}
 }


 /******************************* START function definition for new look ahead ***************************************/
 static bool turn_on_bfs_map_lookup(float criticality) {
     /*
      * check function to decide whether we should use the
      * new lookahead or the old one
      * crit_threshold is updated after each iteration
      */
     // Not currently add the bfs map look up in placement
     if (!route_start)
         return false;
     if (itry_share == 1)
         return true;
     if (criticality > crit_threshold)
         return true;
     else
         return false;
 }


 float get_timing_driven_future_Tdel (int inode, int target_node,
         float *C_downstream, float *future_base_cost) {
     /*
      * the whole motivation of this version of bfs look ahead:
      *    we don't evaluate the Tdel cost from (x1, y1) to (x2, y2)
      *    instead we evaluate by channel. --> for example:
      *    give the cost from chanx at (x1, y1) to chany at (x2, y2)
      *    prerequisite of supporting this level of precision:
      *         given a sink at (x,y), we should know if it can be
      *         reached from TOP / RIGHT / BOTTOM / LEFT side of the clb
      *    this prerequisite requires extra backward information about the sink
      *    which is store in g_pin_side, and setup when building rr_graph
      *    (in rr_graph.c / rr_graph2.c)
      *
      *
      */

     *future_base_cost = 0;
     *C_downstream = 0;

     if (rr_node[inode].type != CHANX
      && rr_node[inode].type != CHANY
      && rr_node[inode].type != OPIN) {
         return 0;
     }
     int x_start, y_start;
     get_unidir_seg_start(inode, &x_start, &y_start);
     // s_* stands for information related to start node (inode)
     // t_* is for target node
     int start_seg_type = rr_indexed_data[rr_node[inode].get_cost_index()].seg_index;
     int start_dir = rr_node[inode].get_direction();
     int s_len = rr_node[inode].get_len();
     int start_chan_type = rr_node[inode].type;

     if (target_chosen_chan_type == UNDEFINED && rr_node[inode].type != OPIN) {
         // if we haven't decided which side is the cheapest to reach the target node,
         // then we expand and get the estimated cost of all possible sides.
         int target_chan_type;
         int x_target, y_target;
         get_unidir_seg_end(target_node, &x_target, &y_target);
         float cur_C_downstream, cur_basecost;
         float min_Tdel = HUGE_POSITIVE_FLOAT, cur_Tdel = HUGE_POSITIVE_FLOAT;
         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::TOP)) == static_cast<char>(Pin_side::TOP)) {
             target_chan_type = CHANX;
             cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir,
                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);
             setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,
                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);
         }
         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::RIGHT)) == static_cast<char>(Pin_side::RIGHT)) {
             target_chan_type = CHANY;
             cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir,
                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);
             setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,
                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);
         }
         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::BOTTOM)) == static_cast<char>(Pin_side::BOTTOM)) {
             target_chan_type = CHANX;
             y_target --;
             cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir,
                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);
             setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,
                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);
             y_target ++;
         }
         if ((g_pin_side[target_node] & static_cast<char>(Pin_side::LEFT)) == static_cast<char>(Pin_side::LEFT)) {
             target_chan_type = CHANY;
             x_target --;
             cur_Tdel = get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir,
                         x_start, y_start, x_target, y_target, s_len, &cur_C_downstream, &cur_basecost);
             setup_min_side_reaching_target(inode, target_chan_type, x_target, y_target, &min_Tdel, cur_Tdel,
                                     C_downstream, future_base_cost, cur_C_downstream, cur_basecost);
             x_target ++;
         }
         // C_downstream & basecost is set by setup_min_side_reaching_target
         assert(cur_Tdel < 0.98 * HUGE_POSITIVE_FLOAT);
         assert(min_Tdel < 0.98 * HUGE_POSITIVE_FLOAT);
         return min_Tdel;
     } else {
         int target_chan_type, x_target, y_target;
         if (target_chosen_chan_type == UNDEFINED) {
             // always do coarse estimation for OPIN
             // OPIN
             get_unidir_seg_end(target_node, &x_target, &y_target);
             target_chan_type = CHANX;
         } else {
             // let get_precal_Tdel() directly look up correct value
             // from target_chosen_chan_type, target_pin_x, target_pin_y
             target_chan_type = target_chosen_chan_type;
             x_target = target_pin_x;
             y_target = target_pin_y;
         }
         return get_precal_Tdel(inode, start_chan_type, target_chan_type, start_seg_type, start_dir,
                     x_start, y_start, x_target, y_target, s_len, C_downstream, future_base_cost);
     }
 }


 static float get_precal_Tdel(int start_node, int start_chan_type, int target_chan_type, int start_seg_type, int start_dir,
                                  int x_start, int y_start, int x_target, int y_target, int len_start,
                                  float *acc_C, float *acc_basecost) {
     /*
      * lookup the precalculated Tdel from start node to target node
      * The real target node is SINK, and the target node in this function
      * is wire that can drive the SINK. So it has chan / seg type
      * I currently adopt different policies for OPIN & wire as start node:
      *    use a coarse Tdel for OPIN, and accurate Tdel for wire
      *    TODO: may want to change OPIN lookup to be accurate as well, since
      *    I am using OPIN penalty to restrict OPIN expansion
      */
     // dealing with CHANX / CHANY / OPIN
     float Tdel = 0.0;
     int offset_x, offset_y;
     if (rr_node[start_node].type == OPIN) {
 #if OPINPENALTY == 0
         opin_penalty = 1;
 #endif
         float opin_del = HUGE_POSITIVE_FLOAT;
         int to_seg_type = -1;
         int num_edges = rr_node[start_node].get_num_edges();
         for (int iconn = 0; iconn < num_edges; iconn ++) {
             int to_node = rr_node[start_node].edges[iconn];
             int seg_type = rr_indexed_data[rr_node[to_node].get_cost_index()].seg_index;
             float cur_opin_del = 0.5 * rr_node[to_node].R * rr_node[to_node].C;
             if (opin_cost_by_seg_type.count(seg_type) > 0) {
                 cur_opin_del += opin_cost_by_seg_type[seg_type].Tdel;
                 opin_del = (opin_del > cur_opin_del) ? cur_opin_del : opin_del;
                 to_seg_type = seg_type;
             }
         }
         if (opin_del > 0.98 * HUGE_POSITIVE_FLOAT) {
             offset_x = abs(x_start - x_target);
             offset_y = abs(y_start - y_target);
             offset_x = (offset_x > nx-3) ? nx-3 : offset_x;
             offset_y = (offset_y > ny-3) ? ny-3 : offset_y;
             *acc_C = bfs_lookahead_info[offset_x][offset_y][0][0].acc_C_x;
             *acc_basecost = bfs_lookahead_info[offset_x][offset_y][0][0].acc_basecost_x;
             return opin_penalty * bfs_lookahead_info[offset_x][offset_y][0][0].Tdel_x;
         } else {
             *acc_C = opin_cost_by_seg_type[to_seg_type].acc_C;
             *acc_basecost = opin_cost_by_seg_type[to_seg_type].acc_basecost;
             return opin_penalty * opin_del;
         }

     } else {
         // CHANX or CHANY
         bool is_heading_opposite;
         // first check if the direction is correct
         // if wire is heading away from target, we cannot directly lookup the cost map,
         // instead we transform this case to the normal case in adjust_coord_for_wrong_dir_wire()
         if (start_dir == INC_DIRECTION) {
             if (start_chan_type == CHANX)
                 is_heading_opposite = (x_start <= x_target) ? false : true;
             else
                 is_heading_opposite = (y_start <= y_target) ? false : true;
         } else {
             if (start_chan_type == CHANX) {
                 if (target_chan_type == CHANX)
                     is_heading_opposite = (x_start >= x_target) ? false : true;
                 else
                     is_heading_opposite = (x_start >= x_target + 1) ? false : true;
             } else {
                 if (target_chan_type == CHANY)
                     is_heading_opposite = (y_start >= y_target) ? false : true;
                 else
                     is_heading_opposite = (y_start >= y_target + 1) ? false : true;
             }
         }
         // get Tdel: carefully calculate offset x and y
         if (is_heading_opposite) {
             // we make assumption how wires heading for wrong direction
             // will eventually get to the right track
             adjust_coord_for_wrong_dir_wire(&start_chan_type, &start_dir, len_start, &x_start, &y_start, x_target, y_target);
             int guessed_iswitch = rr_node[start_node].switches[0];
             // XXX not updating basecost and C now
             Tdel += (0.5 * rr_node[start_node].R + g_rr_switch_inf[guessed_iswitch].R) * rr_node[start_node].C;
             Tdel += g_rr_switch_inf[guessed_iswitch].Tdel;
         }
         offset_x = abs(x_start - x_target);
         offset_y = abs(y_start - y_target);
         if (start_chan_type == CHANX && target_chan_type == CHANY) {
             offset_y += (y_start > y_target) ? 1 : 0;
             offset_x += (start_dir == DEC_DIRECTION) ? -1 : 0;
         } else if (start_chan_type == CHANY && target_chan_type == CHANX) {
             offset_x += (x_start > x_target) ? 1 : 0;
             offset_y += (start_dir == DEC_DIRECTION) ? -1 : 0;
         }
         Tdel += bfs_direct_lookup(offset_x, offset_y, start_seg_type, start_chan_type, target_chan_type, acc_C, acc_basecost);
         return Tdel;
     }
 }


 float bfs_direct_lookup(int offset_x, int offset_y, int seg_type, int chan_type,
                 int to_chan_type, float *acc_C, float *acc_basecost) {
     /*
      * directly lookup the global cost map
      */
     offset_x = (offset_x > (nx-3)) ? nx-3 : offset_x;
     offset_y = (offset_y > (ny-3)) ? ny-3 : offset_y;

     chan_type = (chan_type == CHANX) ? 0 : 1;
     float Tdel = 0.;
     if (to_chan_type == CHANX) {
         *acc_C = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].acc_C_x;
         *acc_basecost = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].acc_basecost_x;
         Tdel = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].Tdel_x;
     } else {
         *acc_C = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].acc_C_y;
         *acc_basecost = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].acc_basecost_y;
         Tdel = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].Tdel_y;
     }
     return Tdel;
 }


 static void setup_min_side_reaching_target(int inode, int target_chan_type, int target_x, int target_y,
         float *min_Tdel, float cur_Tdel, float *C_downstream, float *future_base_cost,
         float cur_C_downstream, float cur_basecost) {
     /*
      * setup expanded_node_min_target_side after we determine which
      * side of target clb is the cheapest
      */
     if (cur_Tdel < *min_Tdel) {
         *min_Tdel = cur_Tdel;
         *C_downstream = cur_C_downstream;
         *future_base_cost = cur_basecost;
         expanded_node_min_target_side[inode].chan_type = target_chan_type;
         expanded_node_min_target_side[inode].x = target_x;
         expanded_node_min_target_side[inode].y = target_y;
     }
 }


 static void adjust_coord_for_wrong_dir_wire(int *start_chan_type, int *start_dir, int start_len,
                                             int *x_start, int *y_start, int x_target, int y_target) {
     /*
      * for nodes heading away from target:
      *    we cannot directly lookup the entry in the global cost map.
      *    I transform the case to the normal case where wire is heading towards
      *    the target by making the following assumption:
      *        the wire heading for wrong direction will choose an othogonal wire
      *        of the same seg type at the next hop, thus switching to the correct
      *        direction.
      *    This assumption may not be accurate in some cases (e.g.: L16 heading for
      *    wrong dir may likely take a turn using L4 rather than L16 at next hop).
      */
     int orig_dir = *start_dir;
     if (*start_chan_type == CHANX) {
         *start_dir = (*y_start <= y_target) ? INC_DIRECTION : DEC_DIRECTION;
     } else {
         *start_dir = (*x_start <= x_target) ? INC_DIRECTION : DEC_DIRECTION;
     }
     int new_x_start = *x_start;
     int new_y_start = *y_start;
     int len = start_len - 1;
     if (*start_chan_type == CHANX) {
         if (orig_dir == INC_DIRECTION) new_x_start += len;
         else new_x_start -= len;
     } else {
         if (orig_dir == INC_DIRECTION) new_y_start += len;
         else new_y_start -= len;
     }
     *x_start = new_x_start;
     *y_start = new_y_start;
     *start_chan_type = (*start_chan_type == CHANX) ? CHANY : CHANX;
 }


 static void setup_opin_cost_by_seg_type(int source_node, int target_node) {
     /*
      * precalculate the cost from wires that are driven by OPINs of source_node
      * to target_node. Setup the opin cost map: opin_cost_by_seg_type.
      * Later when expanding OPINs, just lookup opin_cost_by_seg_type
      */
     assert(rr_node[source_node].type == SOURCE);
     int num_to_opin_edges = rr_node[source_node].get_num_edges();
     for (int iopin = 0; iopin < num_to_opin_edges; iopin++) {
         int opin_node = rr_node[source_node].edges[iopin];
         int num_to_wire_edges = rr_node[opin_node].get_num_edges();
         for (int iwire = 0; iwire < num_to_wire_edges; iwire ++) {
             int wire_node = rr_node[opin_node].edges[iwire];
             int seg_type = rr_indexed_data[rr_node[wire_node].get_cost_index()].seg_index;
             if (seg_type >= g_num_segment || seg_type < 0)
                 continue;
             int chan_type = (rr_node[wire_node].type == CHANX) ? 0 : 1;
             // quick, coarse calculation
             int offset_x = abs(rr_node[opin_node].get_xlow() - rr_node[target_node].get_xlow());
             int offset_y = abs(rr_node[opin_node].get_ylow() - rr_node[target_node].get_ylow());
             offset_x = (offset_x > (nx-3)) ? nx-3 : offset_x;
             offset_y = (offset_y > (ny-3)) ? ny-3 : offset_y;
             opin_cost_by_seg_type[seg_type].Tdel = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].Tdel_x;
             opin_cost_by_seg_type[seg_type].acc_C = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].acc_C_x;
             opin_cost_by_seg_type[seg_type].acc_basecost = bfs_lookahead_info[offset_x][offset_y][seg_type][chan_type].acc_basecost_x;
             if ((int)opin_cost_by_seg_type.size() == g_num_segment) {
                 return;
             }
         }
     }
 }

 /****************************** END function definition for new look ahead *************************************/

 float get_timing_driven_cong_penalty (int inode, int target_node) {
     float penalty = 1.0;
 #if CONGESTIONBYCHIPAREA == 1
     int to_x_mid = (rr_node[inode].get_xhigh() + rr_node[inode].get_xlow()) / 2;
     int to_y_mid = (rr_node[inode].get_yhigh() + rr_node[inode].get_ylow()) / 2;
     int target_x_mid = (rr_node[target_node].get_xhigh() + rr_node[target_node].get_xlow()) / 2;
     int target_y_mid = (rr_node[target_node].get_yhigh() + rr_node[target_node].get_ylow()) / 2;
     //float avg_cong_on_chip = total_cong_tile_count / ((nx + 1.)*(ny+1.));
     float avg_cong_bb = 0;
     int x_start = (to_x_mid > target_x_mid) ? target_x_mid : to_x_mid;
     int x_end = (to_x_mid <= target_x_mid) ? target_x_mid : to_x_mid;
     int y_start = (to_y_mid > target_y_mid) ? target_y_mid : to_y_mid;
     int y_end = (to_y_mid <= target_y_mid) ? target_y_mid : to_y_mid;
     for (int i = x_start; i <= x_end; i++) {
         for (int j = y_start; j <= y_end; j++) {
             avg_cong_bb += congestion_map_by_area[i][j];
         }
     }
     avg_cong_bb /= ((x_end - x_start + 1) * (y_end - y_start + 1));
     //float penalty = (avg_cong_bb > avg_cong_on_chip) ? (avg_cong_bb / avg_cong_on_chip) : 1;
     float avg_by_chan_width = (200+200);
     float penalty = (avg_cong_bb > avg_by_chan_width) ? avg_cong_bb / avg_by_chan_width : 1;
     penalty = (penalty > 2) ? 2 : penalty;
     //penalty = 1 + pow(penalty - 1, 1.5+target_criticality);
     //penalty = (penalty > 1.5) ? 1.5 : penalty;
     //if (penalty > 1.2)
     //    printf("penalty %f\n", penalty);
 #endif
     return penalty;
 }

 #ifdef INTERPOSER_BASED_ARCHITECTURE
 static int get_num_expected_interposer_hops_to_target(int inode, int target_node)
 {
 	/*
 	Description:
 		returns the expected number of times you have to cross the
 		interposer in order to go from inode to target_node.
 		This does not include inode itself.

 	Assumptions:
 		1- Cuts are horizontal
 		2- target_node is a terminal pin (hence its ylow==yhigh)
 		3- wires that go through the interposer are uni-directional

 		---------	y=150
 		|		|
 		---------	y=100
 		|		|
 		---------	y=50
 		|		|
 		---------	y=0

 		num_cuts = 2, cut_step = 50, cut_locations = {50, 100}
 	*/
 	int y_start; /* start point y-coordinate. (y-coordinate at the *end* of wire 'i') */
 	int y_end;   /* destination (target) y-coordinate */
 	int cut_i, y_cut_location, num_expected_hops;
 	t_rr_type rr_type;

 	num_expected_hops = 0;
 	y_end   = rr_node[target_node].get_ylow();
 	rr_type = rr_node[inode].type;

 	if(rr_type == CHANX)
 	{	/* inode is a horizontal wire */
 		/* the ylow and yhigh are the same */
 		assert(rr_node[inode].get_ylow() == rr_node[inode].get_yhigh());
 		y_start = rr_node[inode].get_ylow();
 	}
 	else
 	{	/* inode is a CHANY */
 		if(rr_node[inode].direction == INC_DIRECTION)
 		{
 			y_start = rr_node[inode].get_yhigh();
 		}
 		else if(rr_node[inode].direction == DEC_DIRECTION)
 		{
 			y_start = rr_node[inode].get_ylow();
 		}
 		else
 		{
 			/*	this means direction is BIDIRECTIONAL! Error out. */
 			y_start = -1;
 			assert(rr_node[inode].direction!=BI_DIRECTION);
 		}
 	}

 	/* for every cut, is it between 'i' and 'target'? */
 	for(cut_i=0 ; cut_i<num_cuts; ++cut_i)
 	{
 		y_cut_location = arch_cut_locations[cut_i];
 		if( (y_start < y_cut_location &&  y_cut_location < y_end) ||
 			(y_end   < y_cut_location &&  y_cut_location < y_start))
 		{
 			++num_expected_hops;
 		}
 	}

 	/* Make there is no off-by-1 error.For current node i:
 	   if it's a vertical wire, node 'i' itself may be crossing the interposer.
 	*/
 	if(rr_type == CHANY)
 	{
 		/* for every cut, does it cut wire 'i'? */
 		for(cut_i=0 ; cut_i<num_cuts; ++cut_i)
 		{
 			y_cut_location = arch_cut_locations[cut_i];
 			if(rr_node[inode].get_ylow() < y_cut_location && y_cut_location < rr_node[inode].get_yhigh())
 			{
 				++num_expected_hops;
 			}
 		}
 	}

 	return num_expected_hops;
 }
 #endif

 /* Macro used below to ensure that fractions are rounded up, but floating   *
  * point values very close to an integer are rounded to that integer.       */

 #define ROUND_UP(x) (ceil (x - 0.001))

 static int get_expected_segs_to_target(int inode, int target_node,
 		int *num_segs_ortho_dir_ptr) {

 	/* Returns the number of segments the same type as inode that will be needed *
 	 * to reach target_node (not including inode) in each direction (the same    *
 	 * direction (horizontal or vertical) as inode and the orthogonal direction).*/

 	t_rr_type rr_type;
 	int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;
 	int no_need_to_pass_by_clb;
 	float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;

 	#ifdef INTERPOSER_BASED_ARCHITECTURE
 	int num_expected_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
 	#endif

 	target_x = rr_node[target_node].get_xlow();
 	target_y = rr_node[target_node].get_ylow();
 	cost_index = rr_node[inode].get_cost_index();
 	inv_length = rr_indexed_data[cost_index].inv_length;
 	ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
 	ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;
 	rr_type = rr_node[inode].type;

 	if (rr_type == CHANX) {
 		ylow = rr_node[inode].get_ylow();
 		xhigh = rr_node[inode].get_xhigh();
 		xlow = rr_node[inode].get_xlow();

 		/* Count vertical (orthogonal to inode) segs first. */

 		if (ylow > target_y) { /* Coming from a row above target? */
 			*num_segs_ortho_dir_ptr =
 					(int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));
 			no_need_to_pass_by_clb = 1;
 		} else if (ylow < target_y - 1) { /* Below the CLB bottom? */
 			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *
 					ortho_inv_length));
 			no_need_to_pass_by_clb = 1;
 		} else { /* In a row that passes by target CLB */
 			*num_segs_ortho_dir_ptr = 0;
 			no_need_to_pass_by_clb = 0;
 		}

 #ifdef INTERPOSER_BASED_ARCHITECTURE
 		(*num_segs_ortho_dir_ptr) = (*num_segs_ortho_dir_ptr) + 2*num_expected_hops;
 #endif

 		/* Now count horizontal (same dir. as inode) segs. */
         // TODO: must take into account direction
         // TODO: if distance is less than wire length, could enforce the restriction that end point to target distance should not be larger than start point to target distance
         int x = (rr_node[inode].get_direction() == INC_DIRECTION) ? xhigh : xlow;
         x += 0;
 		if (xlow > target_x + no_need_to_pass_by_clb) {
 			//num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -
 			//				target_x) * inv_length));
             num_segs_same_dir = (int)(ROUND_UP((x - no_need_to_pass_by_clb - target_x) * inv_length));
 		} else if (xhigh < target_x - no_need_to_pass_by_clb) {
 			//num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -
 			//				xhigh) * inv_length));
             num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb - x) * inv_length));
 		} else {
 			num_segs_same_dir = 0;
 		}
         /*
         //XXX: discourage wires going in the wrong dir
         int x_start = (rr_node[inode].get_direction() == INC_DIRECTION) ? xlow : xhigh;
         if (abs(x_start - target_x) < abs(x - target_x))
             num_segs_same_dir ++;
         */
 	}

 	else { /* inode is a CHANY */
 		ylow = rr_node[inode].get_ylow();
 		yhigh = rr_node[inode].get_yhigh();
 		xlow = rr_node[inode].get_xlow();

 		/* Count horizontal (orthogonal to inode) segs first. */

 		if (xlow > target_x) { /* Coming from a column right of target? */
 			*num_segs_ortho_dir_ptr = (int)(
 					ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));
 			no_need_to_pass_by_clb = 1;
 		} else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */
 			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *
 					ortho_inv_length));
 			no_need_to_pass_by_clb = 1;
 		} else { /* In a column that passes by target CLB */
 			*num_segs_ortho_dir_ptr = 0;
 			no_need_to_pass_by_clb = 0;
 		}

 		/* Now count vertical (same dir. as inode) segs. */
         // TODO: changed the way num seg is calculated
         int y = (rr_node[inode].get_direction() == INC_DIRECTION) ? yhigh : ylow;
         y += 0;
 		if (ylow > target_y + no_need_to_pass_by_clb) {
 			//num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -
 			//				target_y) * inv_length));
             num_segs_same_dir = (int)(ROUND_UP((y - no_need_to_pass_by_clb - target_y) * inv_length));
 		} else if (yhigh < target_y - no_need_to_pass_by_clb) {
 			//num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -
 			//				yhigh) * inv_length));
             num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb - y) * inv_length));
 		} else {
 			num_segs_same_dir = 0;
 		}
         /*
         //XXX: discourage wires going in the wrong dir
         int y_start = (rr_node[inode].get_direction() == INC_DIRECTION) ? ylow : yhigh;
         if (abs(y_start - target_y) < abs(y - target_y))
             num_segs_same_dir ++;
         */
 #ifdef INTERPOSER_BASED_ARCHITECTURE
 		num_segs_same_dir = num_segs_same_dir + 2*num_expected_hops;
 #endif
 	}

 	return (num_segs_same_dir);
 }

 static void update_rr_base_costs(int inet, float largest_criticality) {

 	/* Changes the base costs of different types of rr_nodes according to the  *
 	 * criticality, fanout, etc. of the current net being routed (inet).       */

 	float fanout, factor;
 	int index;

 	fanout = g_clbs_nlist.net[inet].num_sinks();

 	/* Other reasonable values for factor include fanout and 1 */
 	factor = sqrt(fanout);

 	for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {
 		if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */
 			rr_indexed_data[index].base_cost =
 					rr_indexed_data[index].saved_base_cost * factor;
 		} else {
 			rr_indexed_data[index].base_cost =
 					rr_indexed_data[index].saved_base_cost;
 		}
 	}
 }

 /* Nets that have high fanout can take a very long time to route. Routing for sinks that are part of high-fanout nets should be
    done within a rectangular 'bin' centered around a target (as opposed to the entire net bounding box) the size of which is returned by this function. */
 static int mark_node_expansion_by_bin(int inet, int target_node,
 		t_rt_node * rt_node) {
 	int target_xlow, target_ylow, target_xhigh, target_yhigh;
 	int rlim = 1;
 	int inode;
 	float area;
 	bool success;
 	t_linked_rt_edge *linked_rt_edge;
 	t_rt_node * child_node;

 	target_xlow = rr_node[target_node].get_xlow();
 	target_ylow = rr_node[target_node].get_ylow();
 	target_xhigh = rr_node[target_node].get_xhigh();
 	target_yhigh = rr_node[target_node].get_yhigh();

 	if (g_clbs_nlist.net[inet].num_sinks() < HIGH_FANOUT_NET_LIM) {
 		/* This algorithm only applies to high fanout nets */
 		return 1;
 	}
 	if (rt_node == NULL || rt_node->u.child_list == NULL) {
 		/* If unknown traceback, set radius of bin to be size of chip */
 		rlim = max(nx + 2, ny + 2);
 		return rlim;
 	}

 	area = (route_bb[inet].xmax - route_bb[inet].xmin)
 			* (route_bb[inet].ymax - route_bb[inet].ymin);
 	if (area <= 0) {
 		area = 1;
 	}

 	rlim = (int)(ceil(sqrt((float) area / (float) g_clbs_nlist.net[inet].num_sinks())));

 	success = false;
 	/* determine quickly a feasible bin radius to route sink for high fanout nets
 	 this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)
 	 */
 	linked_rt_edge = rt_node->u.child_list;
 	while (success == false && linked_rt_edge != NULL) {
 		while (linked_rt_edge != NULL && success == false) {
 			child_node = linked_rt_edge->child;
 			inode = child_node->inode;
 			if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
 				if (rr_node[inode].get_xlow() <= target_xhigh + rlim
 						&& rr_node[inode].get_xhigh() >= target_xhigh - rlim
 						&& rr_node[inode].get_ylow() <= target_yhigh + rlim
 						&& rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
 					success = true;
 				}
 			}
 			linked_rt_edge = linked_rt_edge->next;
 		}

 		if (success == false) {
 			if (rlim > max(nx + 2, ny + 2)) {
 				vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
 					 "VPR internal error, net %s has paths that are not found in traceback.\n", g_clbs_nlist.net[inet].name);
 			}
 			/* if sink not in bin, increase bin size until fit */
 			rlim *= 2;
 		} else {
 			/* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */
 			rlim += 4;
 		}
 		linked_rt_edge = rt_node->u.child_list;
 	}

 	/* adjust rlim to account for width/height of block containing the target sink */
 	int target_span = max( target_xhigh - target_xlow, target_yhigh - target_ylow );
 	rlim += target_span;

 	/* redetermine expansion based on rlim */
 	linked_rt_edge = rt_node->u.child_list;
 	while (linked_rt_edge != NULL) {
 		child_node = linked_rt_edge->child;
 		inode = child_node->inode;
 		if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
 			if (rr_node[inode].get_xlow() <= target_xhigh + rlim
 					&& rr_node[inode].get_xhigh() >= target_xhigh - rlim
 					&& rr_node[inode].get_ylow() <= target_yhigh + rlim
 					&& rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
 				child_node->re_expand = true;
 			} else {
 				child_node->re_expand = false;
 			}
 		}
 		linked_rt_edge = linked_rt_edge->next;
 	}
 	return rlim;
 }

 #define ERROR_TOL 0.0001

 static void timing_driven_check_net_delays(float **net_delay) {

 	/* Checks that the net delays computed incrementally during timing driven    *
 	 * routing match those computed from scratch by the net_delay.c module.      */

 	unsigned int inet, ipin;
 	float **net_delay_check;

 	t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL};

 	/*struct s_linked_vptr *ch_list_head_net_delay_check;*/

 	net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, g_clbs_nlist.net,
 		g_clbs_nlist.net.size());
 	load_net_delay_from_routing(net_delay_check, g_clbs_nlist.net, g_clbs_nlist.net.size());

 	for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
 		for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
 			if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */
 				if (fabs(net_delay[inet][ipin]) > ERROR_TOL) {
 					vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
 						"in timing_driven_check_net_delays: net %d pin %d.\n"
 						"\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
 						inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
 				}
 			} else {
 				if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) {
 					vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
 						"in timing_driven_check_net_delays: net %d pin %d.\n"
 						"\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
 						inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
 				}
 			}
 		}
 	}

 	free_net_delay(net_delay_check, &list_head_net_delay_check_ch);
 	vpr_printf_info("Completed net delay value cross check successfully.\n");
 }


 /* Detect if net should be routed or not */
 bool should_route_net(int inet) {
 	t_trace * tptr = trace_head[inet];

 	if (tptr == NULL) {
 		/* No routing yet. */
 		return true;
 	}

 	for (;;) {
 		int inode = tptr->index;
 		int occ = rr_node[inode].get_occ();
 		int capacity = rr_node[inode].get_capacity();

 		if (occ > capacity) {
 			return true; /* overuse detected */
 		}

 		if (rr_node[inode].type == SINK) {
 			tptr = tptr->next; /* Skip next segment. */
 			if (tptr == NULL)
 				break;
 		}

 		tptr = tptr->next;

 	} /* End while loop -- did an entire traceback. */

 	return false; /* Current route has no overuse */
 }