Adding 'strong' look-ahead

commit: 7dfb613a72940f651abb1e6e246359ea95b124e2 [log] [tgz]
author: Oleg Petelin <o.d.petelin@gmail.com> Fri Aug 14 19:24:52 2015 -0400
committer: Oleg Petelin <o.d.petelin@gmail.com> Fri Aug 14 19:24:52 2015 -0400
tree: 6e27360d95f2644ce3c5e408c3009565ba8f1a5c
parent: cda9fff41eece5c679a2a06a8e14b95924471ef7 [diff]
diff --git a/vpr/SRC/route/route_timing.c b/vpr/SRC/route/route_timing.c
index a86b1c1..8004761 100755
--- a/vpr/SRC/route/route_timing.c
+++ b/vpr/SRC/route/route_timing.c

@@ -1,1306 +1,1509 @@
-#include <cstdio>

-#include <ctime>

-#include <cmath>

-#include <assert.h>

-#include <vector>

-#include <algorithm>

-#include "vpr_utils.h"

-#include "util.h"

-#include "vpr_types.h"

-#include "globals.h"

-#include "route_export.h"

-#include "route_common.h"

-#include "route_tree_timing.h"

-#include "route_timing.h"

-#include "heapsort.h"

-#include "path_delay.h"

-#include "net_delay.h"

-#include "stats.h"

-#include "ReadOptions.h"

-

-

-using namespace std;

-

-

-

-/******************** Subroutines local to route_timing.c ********************/

-

-static int get_max_pins_per_net(void);

-

-static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,

-		float target_criticality, float astar_fac);

-

-static void timing_driven_expand_neighbours(struct s_heap *current, 

-		int inet, int itry,

-		float bend_cost, float criticality_fac, int target_node,

-		float astar_fac, int highfanout_rlim);

-

-static float get_timing_driven_expected_cost(int inode, int target_node,

-		float criticality_fac, float R_upstream);

-#ifdef INTERPOSER_BASED_ARCHITECTURE

-static int get_num_expected_interposer_hops_to_target(int inode, int target_node);

-#endif

-

-static int get_expected_segs_to_target(int inode, int target_node,

-		int *num_segs_ortho_dir_ptr);

-

-static void update_rr_base_costs(int inet, float largest_criticality);

-

-static void timing_driven_check_net_delays(float **net_delay);

-

-static int mark_node_expansion_by_bin(int inet, int target_node,

-		t_rt_node * rt_node);

-

-static double get_overused_ratio();

-

-static bool should_route_net(int inet);

-static bool early_exit_heuristic(const t_router_opts& router_opts);

-struct more_sinks_than {

-	inline bool operator() (const int& net_index1, const int& net_index2) {

-		return g_clbs_nlist.net[net_index1].num_sinks() > g_clbs_nlist.net[net_index2].num_sinks();

-	}

-};

-

-

-#ifdef PROFILE

-static void congestion_analysis();

-static void time_on_criticality_analysis();

-constexpr int fanout_per_bin = 4;

-constexpr float criticality_per_bin = 0.05;

-static vector<float> time_on_fanout;

-static vector<float> time_to_build_heap;

-static vector<int> itry_on_fanout;

-static vector<float> time_on_criticality;

-static vector<int> itry_on_criticality;

-#endif

-

-/************************ Subroutine definitions *****************************/

-bool try_timing_driven_route(struct s_router_opts router_opts,

-		float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally, 

-		bool timing_analysis_enabled, const t_timing_inf &timing_inf) {

-

-	/* Timing-driven routing algorithm.  The timing graph (includes slack)   *

-	 * must have already been allocated, and net_delay must have been allocated. *

-	 * Returns true if the routing succeeds, false otherwise.                    */

-#ifdef PROFILE

-	const int max_fanout {get_max_pins_per_net()};

-	time_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);

-	time_to_build_heap.resize((max_fanout / fanout_per_bin) + 1, 0);

-	itry_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);

-	time_on_criticality.resize((1 / criticality_per_bin) + 1, 0);

-	itry_on_criticality.resize((1 / criticality_per_bin) + 1, 0);

-#endif

-

-	auto sorted_nets = vector<int>(g_clbs_nlist.net.size());

-

-	for (size_t i = 0; i < g_clbs_nlist.net.size(); ++i) {

-		sorted_nets[i] = i;

-	}

-

-	// sort so net with most sinks is first.

-	std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than());

-

-	timing_driven_route_structs route_structs{}; // allocs route strucs (calls default constructor)

-	// contains:

-	// float *pin_criticality; /* [1..max_pins_per_net-1] */

-	// int *sink_order; /* [1..max_pins_per_net-1] */;

-	// t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */

-

-	/* First do one routing iteration ignoring congestion to get reasonable net 

-	   delay estimates. Set criticalities to 1 when timing analysis is on to 

-	   optimize timing, and to 0 when timing analysis is off to optimize routability. */

-

-	float init_timing_criticality_val = (timing_analysis_enabled ? 1.0 : 0.0);

-

-	/* Variables used to do the optimization of the routing, aborting visibly

-	 * impossible Ws */

-	double overused_ratio;

-	vector<double> historical_overuse_ratio; 

-	historical_overuse_ratio.reserve(router_opts.max_router_iterations + 1);

-

-	// for unroutable large circuits, the time per iteration seems to increase dramatically

-	vector<float> time_per_iteration;

-	time_per_iteration.reserve(router_opts.max_router_iterations + 1);

-	

-	for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {

-		if (g_clbs_nlist.net[inet].is_global == false) {

-			for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {

-				slacks->timing_criticality[inet][ipin] = init_timing_criticality_val;

-			}

-#ifdef PATH_COUNTING

-				slacks->path_criticality[inet][ipin] = init_timing_criticality_val;

-#endif		

-		} else { 

-			/* Set delay of global signals to zero. Non-global net delays are set by

-			   update_net_delays_from_route_tree() inside timing_driven_route_net(), 

-			   which is only called for non-global nets. */

-			for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {

-				net_delay[inet][ipin] = 0.;

-			}

-		}

-	}

-

-	float pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */

-

-	for (int itry = 1; itry <= router_opts.max_router_iterations; ++itry) {

-		clock_t begin = clock();

-

-		/* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */

-		for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {

-			g_clbs_nlist.net[inet].is_routed = false;

-			g_clbs_nlist.net[inet].is_fixed = false;

-		}

-

-		for (unsigned int i = 0; i < g_clbs_nlist.net.size(); ++i) {

-			bool is_routable = try_timing_driven_route_net(

-				sorted_nets[i],

-				itry,

-				pres_fac,

-				router_opts,

-				route_structs.pin_criticality,

-				route_structs.sink_order,

-				route_structs.rt_node_of_sink,

-				net_delay,

-				slacks

-			);

-

-			if (!is_routable) {

-				return (false);

-			}

-		}

-

-		clock_t end = clock();

-		float time = static_cast<float>(end - begin) / CLOCKS_PER_SEC;

-		time_per_iteration.push_back(time);

-

-		if (itry == 1 && early_exit_heuristic(router_opts)) return false;

-

-		/* Make sure any CLB OPINs used up by subblocks being hooked directly

-		   to them are reserved for that purpose. */

-

-		bool rip_up_local_opins = (itry == 1 ? false : true);

-		reserve_locally_used_opins(pres_fac, router_opts.acc_fac, rip_up_local_opins, clb_opins_used_locally);

-

-		/* Pathfinder guys quit after finding a feasible route. I may want to keep 

-		   going longer, trying to improve timing.  Think about this some. */

-

-		bool success = feasible_routing();

-

-		/* Verification to check the ratio of overused nodes, depending on the configuration

-		 * may abort the routing if the ratio is too high. */

-		overused_ratio = get_overused_ratio();

-		historical_overuse_ratio.push_back(overused_ratio);

-

-		/* Determine when routing is impossible hence should be aborted */

-		if (itry > 5){

-			

-			int expected_success_route_iter = predict_success_route_iter(historical_overuse_ratio, router_opts);

-			if (expected_success_route_iter == UNDEFINED) {

-				return false;

-			}

-

-#ifdef REGRESSION_EXIT

-			if (itry > 15) {

-				// compare their slopes over the last 5 iterations

-				double time_per_iteration_slope = linear_regression_vector(time_per_iteration, itry-5);

-				double congestion_per_iteration_slope = linear_regression_vector(historical_overuse_ratio, itry-5);

-				if (router_opts.congestion_analysis)

-					vpr_printf_info("%f s/iteration %f %/iteration\n", time_per_iteration_slope, congestion_per_iteration_slope);

-				// time is increasing and congestion is non-decreasing (grows faster than 10% per iteration)

-				if (congestion_per_iteration_slope > 0 && time_per_iteration_slope > 0.1*time_per_iteration.back()

-					&& time_per_iteration_slope > 1) {	// filter out noise

-					vpr_printf_info("Time per iteration growing too fast at slope %f s/iteration \n\

-									 while congestion grows at %f %/iteration, unlikely to finish.\n",

-						time_per_iteration_slope, congestion_per_iteration_slope);

-

-					return false;

-				}

-			}

-#endif

-		}

-

-        //print_usage_by_wire_length();

-

-

-		if (success) {

-   

-			if (timing_analysis_enabled) {

-				load_timing_graph_net_delays(net_delay);

-				do_timing_analysis(slacks, timing_inf, false, false);

-				float critical_path_delay = get_critical_path_delay();

-                vpr_printf_info("%9d %6.2f sec %8.5f ns   %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);

-				vpr_printf_info("Critical path: %g ns\n", critical_path_delay);

-			} else {

-                vpr_printf_info("%9d %6.2f sec         N/A   %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);

-			}

-

-			vpr_printf_info("Successfully routed after %d routing iterations.\n", itry);

-#ifdef DEBUG

-			if (timing_analysis_enabled)

-				timing_driven_check_net_delays(net_delay);

-#endif

-			return (true);

-		}

-

-		if (itry == 1) {

-			pres_fac = router_opts.initial_pres_fac;

-			pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */

-		} else {

-			pres_fac *= router_opts.pres_fac_mult;

-

-			/* Avoid overflow for high iteration counts, even if acc_cost is big */

-			pres_fac = min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5));

-

-			pathfinder_update_cost(pres_fac, router_opts.acc_fac);

-		}

-

-		if (timing_analysis_enabled) {		

-			/* Update slack values by doing another timing analysis.

-			   Timing_driven_route_net updated the net delay values. */

-

-			load_timing_graph_net_delays(net_delay);

-

-			do_timing_analysis(slacks, timing_inf, false, false);

-

-		} else {

-			/* If timing analysis is not enabled, make sure that the criticalities and the

-			   net_delays stay as 0 so that wirelength can be optimized. */

-			

-			for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {

-				for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {

-					slacks->timing_criticality[inet][ipin] = 0.;

-#ifdef PATH_COUNTING 		

-					slacks->path_criticality[inet][ipin] = 0.; 		

-#endif

-					net_delay[inet][ipin] = 0.;

-				}

-			}

-		}

-

-		

-		if (timing_analysis_enabled) {

-			float critical_path_delay = get_critical_path_delay();

-            vpr_printf_info("%9d %6.2f sec %8.5f ns   %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);

-		} else {

-            vpr_printf_info("%9d %6.2f sec         N/A   %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);

-		}

-

-#ifdef PROFILE

-        if (router_opts.congestion_analysis) congestion_analysis();

-        if (router_opts.fanout_analysis) time_on_fanout_analysis();

-        time_on_criticality_analysis();

-#endif

-		fflush(stdout);

-	}

-	vpr_printf_info("Routing failed.\n");

-

-	return (false);

-}

-

-// profiling per iteration functions

-#ifdef PROFILE

-void time_on_fanout_analysis() {

-	// using the global time_on_fanout and itry_on_fanout

-	vpr_printf_info("fanout low           time (s)        attemps     heap build time (s)\n");

-	for (size_t bin = 0; bin < time_on_fanout.size(); ++bin) {

-		if (itry_on_fanout[bin]) {	// avoid printing the many 0 bins

-			vpr_printf_info("%4d           %14.3f   %12d %14.3f\n",bin*fanout_per_bin, time_on_fanout[bin], itry_on_fanout[bin], time_to_build_heap[bin]);

-		}

-	}

-}

-

-void time_on_criticality_analysis() {

-	vpr_printf_info("congestion low           time (s)        attemps\n");

-	for (size_t bin = 0; bin < time_on_criticality.size(); ++bin) {

-		if (itry_on_criticality[bin]) {	// avoid printing the many 0 bins

-			vpr_printf_info("%4d           %14.3f   %12d\n",bin*criticality_per_bin, time_on_criticality[bin], itry_on_criticality[bin]);

-		}

-	}	

-}

-// at the end of a routing iteration, profile how much congestion is taken up by each type of rr_node

-// efficient bit array for checking against congested type

-struct Congested_node_types {

-	uint32_t mask;

-	Congested_node_types() : mask{0} {}

-	void set_congested(int rr_node_type) {mask |= (1 << rr_node_type);}

-	void clear_congested(int rr_node_type) {mask &= ~(1 << rr_node_type);}

-	bool is_congested(int rr_node_type) const {return mask & (1 << rr_node_type);}

-	bool empty() const {return mask == 0;}

-};

-

-void congestion_analysis() {

-	static const std::vector<const char*> node_typename {

-		"SOURCE",

-		"SINK",

-		"IPIN",

-		"OPIN",

-		"CHANX",

-		"CHANY",

-		"ICE"

-	};

-	// each type indexes into array which holds the congestion for that type

-	std::vector<int> congestion_per_type((size_t) NUM_RR_TYPES, 0);

-	// print out specific node information if congestion for type is low enough

-

-	int total_congestion = 0;

-	for (int inode = 0; inode < num_rr_nodes; ++inode) {

-		const t_rr_node& node = rr_node[inode];

-		int congestion = node.get_occ() - node.get_capacity();

-

-		if (congestion > 0) {

-			total_congestion += congestion;

-			congestion_per_type[node.type] += congestion;

-		}

-	}

-

-	constexpr int specific_node_print_threshold = 5;

-	Congested_node_types congested;

-	for (int type = SOURCE; type < NUM_RR_TYPES; ++type) {

-		float congestion_percentage = (float)congestion_per_type[type] / (float) total_congestion * 100;

-		vpr_printf_info(" %6s: %10.6f %\n", node_typename[type], congestion_percentage); 

-		// nodes of that type need specific printing

-		if (congestion_per_type[type] > 0 &&

-			congestion_per_type[type] < specific_node_print_threshold) congested.set_congested(type);

-	}

-

-	// specific print out each congested node

-	if (!congested.empty()) {

-		vpr_printf_info("Specific congested nodes\nxlow ylow   type\n");

-		for (int inode = 0; inode < num_rr_nodes; ++inode) {

-			const t_rr_node& node = rr_node[inode];

-			if (congested.is_congested(node.type) && (node.get_occ() - node.get_capacity()) > 0) {

-				vpr_printf_info("(%3d,%3d) %6s\n", node.get_xlow(), node.get_ylow(), node_typename[node.type]);

-			}

-		}

-	}

-}

-#endif

-

-bool try_timing_driven_route_net(int inet, int itry, float pres_fac, 

-		struct s_router_opts router_opts,

-		float* pin_criticality, int* sink_order,

-		t_rt_node** rt_node_of_sink, float** net_delay, t_slack* slacks) {

-

-	bool is_routed = false;

-

-	if (g_clbs_nlist.net[inet].is_fixed) { /* Skip pre-routed nets. */

-		is_routed = true;

-	} else if (g_clbs_nlist.net[inet].is_global) { /* Skip global nets. */

-		is_routed = true;

-	} else if (should_route_net(inet) == false) {

-		is_routed = true;

-	} else{

-		// track time spent vs fanout

-#ifdef PROFILE

-		clock_t net_begin = clock();

-#endif

-

-		is_routed = timing_driven_route_net(inet, itry, pres_fac,

-				router_opts.max_criticality, router_opts.criticality_exp, 

-				router_opts.astar_fac, router_opts.bend_cost, 

-				pin_criticality, sink_order, 

-				rt_node_of_sink, net_delay[inet], slacks);

-

-#ifdef PROFILE

-		float time_for_net = static_cast<float>(clock() - net_begin) / CLOCKS_PER_SEC;

-		int net_fanout = g_clbs_nlist.net[inet].num_sinks();

-		time_on_fanout[net_fanout / fanout_per_bin] += time_for_net;

-		itry_on_fanout[net_fanout / fanout_per_bin] += 1;

-#endif

-

-		/* Impossible to route? (disconnected rr_graph) */

-		if (is_routed) {

-			g_clbs_nlist.net[inet].is_routed = true;

-			g_atoms_nlist.net[clb_to_vpack_net_mapping[inet]].is_routed = true;

-		} else {

-			vpr_printf_info("Routing failed.\n");

-		}

-	}

-	return (is_routed);

-}

-

-/*

- * NOTE:

- * Suggest using a timing_driven_route_structs struct. Memory is managed for you

- */

-void alloc_timing_driven_route_structs(float **pin_criticality_ptr,

-		int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) {

-

-	/* Allocates all the structures needed only by the timing-driven router.   */

-

-	int max_pins_per_net = get_max_pins_per_net();

-

-	float *pin_criticality = (float *) my_malloc((max_pins_per_net - 1) * sizeof(float));

-	*pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */

-

-	int *sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int));

-	*sink_order_ptr = sink_order - 1;

-

-	t_rt_node **rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) * sizeof(t_rt_node *));

-	*rt_node_of_sink_ptr = rt_node_of_sink - 1;

-

-	alloc_route_tree_timing_structs();

-}

-

-/* This function gets ratio of overused nodes (overused_nodes / num_rr_nodes) */

-static double get_overused_ratio(){

-	double overused_nodes = 0.0;

-	int inode;

-	for(inode = 0; inode < num_rr_nodes; inode++){

-		if(rr_node[inode].get_occ() > rr_node[inode].get_capacity())

-			overused_nodes += (rr_node[inode].get_occ() - rr_node[inode].get_capacity());

-	}

-	overused_nodes /= (double)num_rr_nodes;

-	return overused_nodes;

-}

-

-/*

- * NOTE:

- * Suggest using a timing_driven_route_structs struct. Memory is managed for you

- */

-void free_timing_driven_route_structs(float *pin_criticality, int *sink_order,

-		t_rt_node ** rt_node_of_sink) {

-

-	/* Frees all the stuctures needed only by the timing-driven router.        */

-

-	free(pin_criticality + 1); /* Starts at index 1. */

-	free(sink_order + 1);

-	free(rt_node_of_sink + 1);

-	free_route_tree_timing_structs();

-}

-

-timing_driven_route_structs::timing_driven_route_structs() {

-	alloc_timing_driven_route_structs(

-		&pin_criticality,

-		&sink_order,

-		&rt_node_of_sink

-	);

-}

-

-timing_driven_route_structs::~timing_driven_route_structs() {

-	free_timing_driven_route_structs(

-		pin_criticality,

-		sink_order,

-		rt_node_of_sink

-	);

-}

-

-static int get_max_pins_per_net(void) {

-

-	/* Returns the largest number of pins on any non-global net.    */

-

-	unsigned int inet;

-	int max_pins_per_net;

-

-	max_pins_per_net = 0;

-	for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {

-		if (g_clbs_nlist.net[inet].is_global == false) {

-			max_pins_per_net = max(max_pins_per_net,

-					(int) g_clbs_nlist.net[inet].pins.size());

-		}

-	}

-

-	return (max_pins_per_net);

-}

-

-bool timing_driven_route_net(int inet, int itry, float pres_fac, float max_criticality,

-		float criticality_exp, float astar_fac, float bend_cost,

-		float *pin_criticality, int *sink_order,

-		t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks) {

-

-	/* Returns true as long is found some way to hook up this net, even if that *

-	 * way resulted in overuse of resources (congestion).  If there is no way   *

-	 * to route this net, even ignoring congestion, it returns false.  In this  *

-	 * case the rr_graph is disconnected and you can give up. If slacks = NULL, *

-	 * give each net a dummy criticality of 0.									*/

-

-	unsigned int ipin;

-	int num_sinks, itarget, target_pin, target_node, inode;

-	float target_criticality, old_total_cost, new_total_cost, largest_criticality,

-		old_back_cost, new_back_cost;

-	t_rt_node *rt_root;

-	

-	struct s_trace *new_route_start_tptr;

-	int highfanout_rlim;

-

-	/* Rip-up any old routing. */

-

-	pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);

-	free_traceback(inet);

-	

-	for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {

-		if (!slacks) {

-			/* Use criticality of 1. This makes all nets critical.  Note: There is a big difference between setting pin criticality to 0

-			compared to 1.  If pin criticality is set to 0, then the current path delay is completely ignored during routing.  By setting

-			pin criticality to 1, the current path delay to the pin will always be considered and optimized for */

-			pin_criticality[ipin] = 1.0;

-		} else { 

-#ifdef PATH_COUNTING

-			/* Pin criticality is based on a weighted sum of timing and path criticalities. */	

-			pin_criticality[ipin] =		 ROUTE_PATH_WEIGHT	* slacks->path_criticality[inet][ipin]

-								  + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin]; 

-#else

-			/* Pin criticality is based on only timing criticality. */

-			pin_criticality[ipin] = slacks->timing_criticality[inet][ipin];

-#endif

-			/* Currently, pin criticality is between 0 and 1. Now shift it downwards 

-			by 1 - max_criticality (max_criticality is 0.99 by default, so shift down

-			by 0.01) and cut off at 0.  This means that all pins with small criticalities 

-			(<0.01) get criticality 0 and are ignored entirely, and everything

-			else becomes a bit less critical. This effect becomes more pronounced if

-			max_criticality is set lower. */

-			// assert(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);

-			pin_criticality[ipin] = max(pin_criticality[ipin] - (1.0 - max_criticality), 0.0);

-

-			/* Take pin criticality to some power (1 by default). */

-			pin_criticality[ipin] = pow(pin_criticality[ipin], criticality_exp);

-			

-			/* Cut off pin criticality at max_criticality. */

-			pin_criticality[ipin] = min(pin_criticality[ipin], max_criticality);

-		}

-	}

-

-	num_sinks = g_clbs_nlist.net[inet].num_sinks();

-	heapsort(sink_order, pin_criticality, num_sinks, 0);

-

-	/* Update base costs according to fanout and criticality rules */

-

-	largest_criticality = pin_criticality[sink_order[1]];

-	update_rr_base_costs(inet, largest_criticality);

-

-	mark_ends(inet); /* Only needed to check for multiply-connected SINKs */

-

-	rt_root = init_route_tree_to_source(inet);

-#ifdef PROFILE

-		float build_heap_time = 0;

-#endif

-	// explore in order of decreasing criticality

-	for (itarget = 1; itarget <= num_sinks; itarget++) {

-		target_pin = sink_order[itarget];

-		target_node = net_rr_terminals[inet][target_pin];

-

-		target_criticality = pin_criticality[target_pin];

-#ifdef PROFILE

-			clock_t sink_criticality_start = clock();

-#endif

-

-		highfanout_rlim = mark_node_expansion_by_bin(inet, target_node,

-				rt_root);

-		

-		if (itarget > 1 && itry > 5) {

-			/* Enough iterations given to determine opin, to speed up legal solution, do not let net use two opins */

-			assert(rr_node[rt_root->inode].type == SOURCE);

-			rt_root->re_expand = false;

-		}

-

-		// reexplore route tree from root to add any new nodes (buildheap afterwards)

-#ifdef PROFILE

-			clock_t route_tree_start = clock();

-#endif

-		add_route_tree_to_heap(rt_root, target_node, target_criticality,

-				astar_fac);

-		heap_::build_heap();	// via sifting down everything

-		// if (itarget == num_sinks && itarget > 800) {

-		// 	vpr_printf_info("heap for target %d: ", itarget); 

-		// 	heap_::verify_extract_top();

-		// }

-#ifdef PROFILE

-			build_heap_time += static_cast<float>(clock() - route_tree_start) / CLOCKS_PER_SEC;

-#endif

-		

-

-		// cheapest s_heap (gives index to rr_node) in current route tree to be expanded on

-		struct s_heap* cheapest = get_heap_head();

-

-		if (cheapest == NULL) { /* Infeasible routing.  No possible path for net. */

-			vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",

-					   inet, g_clbs_nlist.net[inet].name, itarget);

-			reset_path_costs();

-			free_route_tree(rt_root);

-			return (false);

-		}

-

-		inode = cheapest->index;

-		while (inode != target_node) {

-			old_total_cost = rr_node_route_inf[inode].path_cost;

-			new_total_cost = cheapest->cost;

-

-			if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */

-				old_back_cost = HUGE_POSITIVE_FLOAT;

-			else

-				old_back_cost = rr_node_route_inf[inode].backward_path_cost;

-

-			new_back_cost = cheapest->backward_path_cost;

-

-			/* I only re-expand a node if both the "known" backward cost is lower  *

-			 * in the new expansion (this is necessary to prevent loops from       *

-			 * forming in the routing and causing havoc) *and* the expected total  *

-			 * cost to the sink is lower than the old value.  Different R_upstream *

-			 * values could make a path with lower back_path_cost less desirable   *

-			 * than one with higher cost.  Test whether or not I should disallow   *

-			 * re-expansion based on a higher total cost.                          */

-

-			if (old_total_cost > new_total_cost && old_back_cost > new_back_cost) {

-				rr_node_route_inf[inode].prev_node = cheapest->u.prev_node;

-				rr_node_route_inf[inode].prev_edge = cheapest->prev_edge;

-				rr_node_route_inf[inode].path_cost = new_total_cost;

-				rr_node_route_inf[inode].backward_path_cost = new_back_cost;

-

-				if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */

-					add_to_mod_list(&rr_node_route_inf[inode].path_cost);

-

-				timing_driven_expand_neighbours(cheapest, inet, itry, bend_cost,

-						target_criticality, target_node, astar_fac,

-						highfanout_rlim);

-			}

-

-			free_heap_data(cheapest);

-			cheapest = get_heap_head();

-			// test heap property

-			// if (!heap_::is_valid()) {vpr_printf_info("invalid heap: "); heap_::pop_heap();}

-

-			if (cheapest == NULL) { /* Impossible routing.  No path for net. */

-				vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",

-						 inet, g_clbs_nlist.net[inet].name, itarget);

-				reset_path_costs();

-				free_route_tree(rt_root);

-				return (false);

-			}

-

-			inode = cheapest->index;

-		}

-#ifdef PROFILE

-			time_on_criticality[target_criticality / criticality_per_bin] += static_cast<float>(clock() - sink_criticality_start) / CLOCKS_PER_SEC;

-			++itry_on_criticality[target_criticality / criticality_per_bin];

-#endif 

-		/* NB:  In the code below I keep two records of the partial routing:  the   *

-		 * traceback and the route_tree.  The route_tree enables fast recomputation *

-		 * of the Elmore delay to each node in the partial routing.  The traceback  *

-		 * lets me reuse all the routines written for breadth-first routing, which  *

-		 * all take a traceback structure as input.  Before this routine exits the  *

-		 * route_tree structure is destroyed; only the traceback is needed at that  *

-		 * point.                                                                   */

-

-		rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */

-		new_route_start_tptr = update_traceback(cheapest, inet);

-		rt_node_of_sink[target_pin] = update_route_tree(cheapest);

-		free_heap_data(cheapest);

-		pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);

-

-		empty_heap();

-		reset_path_costs();

-	}

-#ifdef PROFILE

-		if (!time_to_build_heap.empty()) time_to_build_heap[num_sinks / fanout_per_bin] += build_heap_time;

-#endif

-	/* For later timing analysis. */

-

-	update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);

-	free_route_tree(rt_root);

-	return (true);

-}

-

-static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,

-		float target_criticality, float astar_fac) {

-

-	/* Puts the entire partial routing below and including rt_node onto the heap *

-	 * (except for those parts marked as not to be expanded) by calling itself   *

-	 * recursively.                                                              */

-

-	int inode;

-	t_rt_node *child_node;

-	t_linked_rt_edge *linked_rt_edge;

-	float tot_cost, backward_path_cost, R_upstream;

-

-	/* Pre-order depth-first traversal */

-

-	if (rt_node->re_expand) {

-		inode = rt_node->inode;

-		backward_path_cost = target_criticality * rt_node->Tdel;

-		R_upstream = rt_node->R_upstream;

-		tot_cost = backward_path_cost

-				+ astar_fac

-						* get_timing_driven_expected_cost(inode, target_node,

-								target_criticality, R_upstream);

-		heap_::push_back_node(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,

-				backward_path_cost, R_upstream);

-

-	}

-

-	linked_rt_edge = rt_node->u.child_list;

-

-	while (linked_rt_edge != NULL) {

-		child_node = linked_rt_edge->child;

-		add_route_tree_to_heap(child_node, target_node, target_criticality,

-				astar_fac);

-		linked_rt_edge = linked_rt_edge->next;

-	}

-}

-

-static void timing_driven_expand_neighbours(struct s_heap *current, 

-		int inet, int itry,

-		float bend_cost, float criticality_fac, int target_node,

-		float astar_fac, int highfanout_rlim) {

-

-	/* Puts all the rr_nodes adjacent to current on the heap.  rr_nodes outside *

-	 * the expanded bounding box specified in route_bb are not added to the     *

-	 * heap.                                                                    */

-

-	float new_R_upstream;

-

-	int inode = current->index;

-	float old_back_pcost = current->backward_path_cost;

-	float R_upstream = current->R_upstream;

-	int num_edges = rr_node[inode].get_num_edges();

-

-	int target_x = rr_node[target_node].get_xhigh();

-	int target_y = rr_node[target_node].get_yhigh();

-	bool high_fanout = g_clbs_nlist.net[inet].num_sinks() >= HIGH_FANOUT_NET_LIM;

-

-	for (int iconn = 0; iconn < num_edges; iconn++) {

-		int to_node = rr_node[inode].edges[iconn];

-

-		if (high_fanout) {

-			// since target node is an IPIN, xhigh and xlow are the same (same for y values)

-			if (rr_node[to_node].get_xhigh() < target_x - highfanout_rlim

-					|| rr_node[to_node].get_xlow() > target_x + highfanout_rlim

-					|| rr_node[to_node].get_yhigh() < target_y - highfanout_rlim

-					|| rr_node[to_node].get_ylow() > target_y + highfanout_rlim){

-				continue; /* Node is outside high fanout bin. */

-			}

-		}

-		else if (rr_node[to_node].get_xhigh() < route_bb[inet].xmin

-				|| rr_node[to_node].get_xlow() > route_bb[inet].xmax

-				|| rr_node[to_node].get_yhigh() < route_bb[inet].ymin

-				|| rr_node[to_node].get_ylow() > route_bb[inet].ymax)

-			continue; /* Node is outside (expanded) bounding box. */

-

-

-		/* Prune away IPINs that lead to blocks other than the target one.  Avoids  *

-		 * the issue of how to cost them properly so they don't get expanded before *

-		 * more promising routes, but makes route-throughs (via CLBs) impossible.   *

-		 * Change this if you want to investigate route-throughs.                   */

-

-		t_rr_type to_type = rr_node[to_node].type;

-		if (to_type == IPIN

-				&& (rr_node[to_node].get_xhigh() != target_x

-						|| rr_node[to_node].get_yhigh() != target_y))

-			continue;

-

-		/* new_back_pcost stores the "known" part of the cost to this node -- the   *

-		 * congestion cost of all the routing resources back to the existing route  *

-		 * plus the known delay of the total path back to the source.  new_tot_cost *

-		 * is this "known" backward cost + an expected cost to get to the target.   */

-

-		float new_back_pcost = old_back_pcost

-				+ (1. - criticality_fac) * get_rr_cong_cost(to_node);

-		

-		int iswitch = rr_node[inode].switches[iconn];

-		if (g_rr_switch_inf[iswitch].buffered) {

-			new_R_upstream = g_rr_switch_inf[iswitch].R;

-		} else {

-			new_R_upstream = R_upstream + g_rr_switch_inf[iswitch].R;

-		}

-

-		float Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R);

-		Tdel += g_rr_switch_inf[iswitch].Tdel;

-		new_R_upstream += rr_node[to_node].R;

-		new_back_pcost += criticality_fac * Tdel;

-

-		if (bend_cost != 0.) {

-			t_rr_type from_type = rr_node[inode].type;

-			to_type = rr_node[to_node].type;

-			if ((from_type == CHANX && to_type == CHANY)

-					|| (from_type == CHANY && to_type == CHANX))

-				new_back_pcost += bend_cost;

-		}

-

-		float expected_cost = get_timing_driven_expected_cost(to_node, target_node,

-				criticality_fac, new_R_upstream);

-		float new_tot_cost = new_back_pcost + astar_fac * expected_cost;

-

-		node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,

-				new_R_upstream);

-

-	} /* End for all neighbours */

-}

-

-static float get_timing_driven_expected_cost(int inode, int target_node,

-		float criticality_fac, float R_upstream) {

-

-	/* Determines the expected cost (due to both delay and resouce cost) to reach *

-	 * the target node from inode.  It doesn't include the cost of inode --       *

-	 * that's already in the "known" path_cost.                                   */

-

-	t_rr_type rr_type;

-	int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;

-	float expected_cost, cong_cost, Tdel;

-

-	rr_type = rr_node[inode].type;

-

-	if (rr_type == CHANX || rr_type == CHANY) {

-#ifdef INTERPOSER_BASED_ARCHITECTURE		

-		int num_interposer_hops = get_num_expected_interposer_hops_to_target(inode, target_node);

-#endif

-

-		num_segs_same_dir = get_expected_segs_to_target(inode, target_node,

-				&num_segs_ortho_dir);

-		cost_index = rr_node[inode].get_cost_index();

-		ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;

-

-		cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost

-				+ num_segs_ortho_dir

-						* rr_indexed_data[ortho_cost_index].base_cost;

-		cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost

-				+ rr_indexed_data[SINK_COST_INDEX].base_cost;

-

-		Tdel =

-				num_segs_same_dir * rr_indexed_data[cost_index].T_linear

-						+ num_segs_ortho_dir

-								* rr_indexed_data[ortho_cost_index].T_linear

-						+ num_segs_same_dir * num_segs_same_dir

-								* rr_indexed_data[cost_index].T_quadratic

-						+ num_segs_ortho_dir * num_segs_ortho_dir

-								* rr_indexed_data[ortho_cost_index].T_quadratic

-						+ R_upstream

-								* (num_segs_same_dir

-										* rr_indexed_data[cost_index].C_load

-										+ num_segs_ortho_dir

-												* rr_indexed_data[ortho_cost_index].C_load);

-

-		Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;

-#ifdef INTERPOSER_BASED_ARCHITECTURE

-		float interposer_hop_delay = (float)delay_increase * 1e-12;

-		Tdel += num_interposer_hops * interposer_hop_delay;

-#endif

-

-		expected_cost = criticality_fac * Tdel

-				+ (1. - criticality_fac) * cong_cost;

-		return (expected_cost);

-	}

-

-	else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */

-		return (rr_indexed_data[SINK_COST_INDEX].base_cost);

-	}

-

-	else { /* Change this if you want to investigate route-throughs */

-		return (0.);

-	}

-}

-#ifdef INTERPOSER_BASED_ARCHITECTURE

-static int get_num_expected_interposer_hops_to_target(int inode, int target_node) 

-{

-	/* 

-	Description:

-		returns the expected number of times you have to cross the 

-		interposer in order to go from inode to target_node.

-		This does not include inode itself.

-	

-	Assumptions: 

-		1- Cuts are horizontal

-		2- target_node is a terminal pin (hence its ylow==yhigh)

-		3- wires that go through the interposer are uni-directional

-

-		---------	y=150

-		|		|

-		---------	y=100

-		|		|

-		---------	y=50

-		|		|

-		---------	y=0

-

-		num_cuts = 2, cut_step = 50, cut_locations = {50, 100}

-	*/

-	int y_start; /* start point y-coordinate. (y-coordinate at the *end* of wire 'i') */

-	int y_end;   /* destination (target) y-coordinate */

-	int cut_i, y_cut_location, num_expected_hops;

-	t_rr_type rr_type;

-

-	num_expected_hops = 0;

-	y_end   = rr_node[target_node].get_ylow(); 

-	rr_type = rr_node[inode].type;

-

-	if(rr_type == CHANX) 

-	{	/* inode is a horizontal wire */

-		/* the ylow and yhigh are the same */

-		assert(rr_node[inode].get_ylow() == rr_node[inode].get_yhigh());

-		y_start = rr_node[inode].get_ylow();

-	}

-	else

-	{	/* inode is a CHANY */

-		if(rr_node[inode].direction == INC_DIRECTION)

-		{

-			y_start = rr_node[inode].get_yhigh();

-		}

-		else if(rr_node[inode].direction == DEC_DIRECTION)

-		{

-			y_start = rr_node[inode].get_ylow();

-		}

-		else

-		{

-			/*	this means direction is BIDIRECTIONAL! Error out. */

-			y_start = -1;

-			assert(rr_node[inode].direction!=BI_DIRECTION);

-		}

-	}

-

-	/* for every cut, is it between 'i' and 'target'? */

-	for(cut_i=0 ; cut_i<num_cuts; ++cut_i) 

-	{

-		y_cut_location = arch_cut_locations[cut_i];

-		if( (y_start < y_cut_location &&  y_cut_location < y_end) ||

-			(y_end   < y_cut_location &&  y_cut_location < y_start)) 

-		{

-			++num_expected_hops;

-		}

-	}

-

-	/* Make there is no off-by-1 error.For current node i: 

-	   if it's a vertical wire, node 'i' itself may be crossing the interposer.

-	*/

-	if(rr_type == CHANY)

-	{	

-		/* for every cut, does it cut wire 'i'? */

-		for(cut_i=0 ; cut_i<num_cuts; ++cut_i) 

-		{

-			y_cut_location = arch_cut_locations[cut_i];

-			if(rr_node[inode].get_ylow() < y_cut_location && y_cut_location < rr_node[inode].get_yhigh())

-			{

-				++num_expected_hops;

-			}

-		}

-	}

-

-	return num_expected_hops;

-}

-#endif

-

-/* Macro used below to ensure that fractions are rounded up, but floating   *

- * point values very close to an integer are rounded to that integer.       */

-#define ROUND_UP(x) (ceil (x - 0.001))

-

-static int get_expected_segs_to_target(int inode, int target_node,

-		int *num_segs_ortho_dir_ptr) {

-

-	/* Returns the number of segments the same type as inode that will be needed *

-	 * to reach target_node (not including inode) in each direction (the same    *

-	 * direction (horizontal or vertical) as inode and the orthogonal direction).*/

-

-	t_rr_type rr_type;

-	int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;

-	int no_need_to_pass_by_clb;

-	float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;

-#ifdef INTERPOSER_BASED_ARCHITECTURE

-	int num_expected_hops = get_num_expected_interposer_hops_to_target(inode, target_node);

-#endif

-	

-	target_x = rr_node[target_node].get_xlow();

-	target_y = rr_node[target_node].get_ylow();

-	cost_index = rr_node[inode].get_cost_index();

-	inv_length = rr_indexed_data[cost_index].inv_length;

-	ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;

-	ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;

-	rr_type = rr_node[inode].type;

-

-	if (rr_type == CHANX) {

-		ylow = rr_node[inode].get_ylow();

-		xhigh = rr_node[inode].get_xhigh();

-		xlow = rr_node[inode].get_xlow();

-

-		/* Count vertical (orthogonal to inode) segs first. */

-

-		if (ylow > target_y) { /* Coming from a row above target? */

-			*num_segs_ortho_dir_ptr =

-					(int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));

-			no_need_to_pass_by_clb = 1;

-		} else if (ylow < target_y - 1) { /* Below the CLB bottom? */

-			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *

-					ortho_inv_length));

-			no_need_to_pass_by_clb = 1;

-		} else { /* In a row that passes by target CLB */

-			*num_segs_ortho_dir_ptr = 0;

-			no_need_to_pass_by_clb = 0;

-		}

-#ifdef INTERPOSER_BASED_ARCHITECTURE		

-		(*num_segs_ortho_dir_ptr) = (*num_segs_ortho_dir_ptr) + 2*num_expected_hops;

-#endif

-		

-		/* Now count horizontal (same dir. as inode) segs. */

-

-		if (xlow > target_x + no_need_to_pass_by_clb) {

-			num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -

-							target_x) * inv_length));

-		} else if (xhigh < target_x - no_need_to_pass_by_clb) {

-			num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -

-							xhigh) * inv_length));

-		} else {

-			num_segs_same_dir = 0;

-		}

-	}

-

-	else { /* inode is a CHANY */

-		ylow = rr_node[inode].get_ylow();

-		yhigh = rr_node[inode].get_yhigh();

-		xlow = rr_node[inode].get_xlow();

-

-		/* Count horizontal (orthogonal to inode) segs first. */

-

-		if (xlow > target_x) { /* Coming from a column right of target? */

-			*num_segs_ortho_dir_ptr = (int)(

-					ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));

-			no_need_to_pass_by_clb = 1;

-		} else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */

-			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *

-					ortho_inv_length));

-			no_need_to_pass_by_clb = 1;

-		} else { /* In a column that passes by target CLB */

-			*num_segs_ortho_dir_ptr = 0;

-			no_need_to_pass_by_clb = 0;

-		}

-

-		/* Now count vertical (same dir. as inode) segs. */

-

-		if (ylow > target_y + no_need_to_pass_by_clb) {

-			num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -

-							target_y) * inv_length));

-		} else if (yhigh < target_y - no_need_to_pass_by_clb) {

-			num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -

-							yhigh) * inv_length));

-		} else {

-			num_segs_same_dir = 0;

-		}

-#ifdef INTERPOSER_BASED_ARCHITECTURE		

-		num_segs_same_dir = num_segs_same_dir + 2*num_expected_hops;

-#endif

-	}

-

-	return (num_segs_same_dir);

-}

-

-static void update_rr_base_costs(int inet, float largest_criticality) {

-

-	/* Changes the base costs of different types of rr_nodes according to the  *

-	 * criticality, fanout, etc. of the current net being routed (inet).       */

-

-	float fanout, factor;

-	int index;

-

-	fanout = g_clbs_nlist.net[inet].num_sinks();

-

-	/* Other reasonable values for factor include fanout and 1 */

-	factor = sqrt(fanout);

-

-	for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {

-		if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */

-			rr_indexed_data[index].base_cost =

-					rr_indexed_data[index].saved_base_cost * factor;

-		} else {

-			rr_indexed_data[index].base_cost =

-					rr_indexed_data[index].saved_base_cost;

-		}

-	}

-}

-

-/* Nets that have high fanout can take a very long time to route. Routing for sinks that are part of high-fanout nets should be

-   done within a rectangular 'bin' centered around a target (as opposed to the entire net bounding box) the size of which is returned by this function. */

-static int mark_node_expansion_by_bin(int inet, int target_node,

-		t_rt_node * rt_node) {

-	int target_xlow, target_ylow, target_xhigh, target_yhigh;

-	int rlim = 1;

-	int inode;

-	float area;

-	bool success;

-	t_linked_rt_edge *linked_rt_edge;

-	t_rt_node * child_node;

-

-	target_xlow = rr_node[target_node].get_xlow();

-	target_ylow = rr_node[target_node].get_ylow();

-	target_xhigh = rr_node[target_node].get_xhigh();

-	target_yhigh = rr_node[target_node].get_yhigh();

-

-	if (g_clbs_nlist.net[inet].num_sinks() < HIGH_FANOUT_NET_LIM) {

-		/* This algorithm only applies to high fanout nets */

-		return 1;

-	}

-	if (rt_node == NULL || rt_node->u.child_list == NULL) {

-		/* If unknown traceback, set radius of bin to be size of chip */

-		rlim = max(nx + 2, ny + 2);

-		return rlim;

-	}

-

-	area = (route_bb[inet].xmax - route_bb[inet].xmin)

-			* (route_bb[inet].ymax - route_bb[inet].ymin);

-	if (area <= 0) {

-		area = 1;

-	}

-

-	rlim = (int)(ceil(sqrt((float) area / (float) g_clbs_nlist.net[inet].num_sinks())));

-

-	success = false;

-	/* determine quickly a feasible bin radius to route sink for high fanout nets 

-	 this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)

-	 */

-	linked_rt_edge = rt_node->u.child_list;

-	while (success == false && linked_rt_edge != NULL) {

-		while (linked_rt_edge != NULL && success == false) {

-			child_node = linked_rt_edge->child;

-			inode = child_node->inode;

-			if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {

-				if (rr_node[inode].get_xlow() <= target_xhigh + rlim

-						&& rr_node[inode].get_xhigh() >= target_xhigh - rlim

-						&& rr_node[inode].get_ylow() <= target_yhigh + rlim

-						&& rr_node[inode].get_yhigh() >= target_yhigh - rlim) {

-					success = true;

-				}

-			}

-			linked_rt_edge = linked_rt_edge->next;

-		}

-

-		if (success == false) {

-			if (rlim > max(nx + 2, ny + 2)) {

-				vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, 

-					 "VPR internal error, net %s has paths that are not found in traceback.\n", g_clbs_nlist.net[inet].name);

-			}

-			/* if sink not in bin, increase bin size until fit */

-			rlim *= 2;

-		} else {

-			/* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */

-			rlim += 4;

-		}

-		linked_rt_edge = rt_node->u.child_list;

-	}

-

-	/* adjust rlim to account for width/height of block containing the target sink */

-	int target_span = max( target_xhigh - target_xlow, target_yhigh - target_ylow );

-	rlim += target_span;

-

-	/* redetermine expansion based on rlim */

-	linked_rt_edge = rt_node->u.child_list;

-	while (linked_rt_edge != NULL) {

-		child_node = linked_rt_edge->child;

-		inode = child_node->inode;

-		if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {

-			if (rr_node[inode].get_xlow() <= target_xhigh + rlim

-					&& rr_node[inode].get_xhigh() >= target_xhigh - rlim

-					&& rr_node[inode].get_ylow() <= target_yhigh + rlim

-					&& rr_node[inode].get_yhigh() >= target_yhigh - rlim) {

-				child_node->re_expand = true;

-			} else {

-				child_node->re_expand = false;

-			}

-		}

-		linked_rt_edge = linked_rt_edge->next;

-	}

-	return rlim;

-}

-#define ERROR_TOL 0.0001

-

-static void timing_driven_check_net_delays(float **net_delay) {

-

-	/* Checks that the net delays computed incrementally during timing driven    *

-	 * routing match those computed from scratch by the net_delay.c module.      */

-

-	unsigned int inet, ipin;

-	float **net_delay_check;

-

-	t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL};

-

-	/*struct s_linked_vptr *ch_list_head_net_delay_check;*/

-

-	net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, g_clbs_nlist.net,

-		g_clbs_nlist.net.size());

-	load_net_delay_from_routing(net_delay_check, g_clbs_nlist.net, g_clbs_nlist.net.size());

-

-	for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {

-		for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {

-			if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */

-				if (fabs(net_delay[inet][ipin]) > ERROR_TOL) {

-					vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, 

-						"in timing_driven_check_net_delays: net %d pin %d.\n"

-						"\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",

-						inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);

-				}

-			} else {

-				if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) {

-					vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, 

-						"in timing_driven_check_net_delays: net %d pin %d.\n"

-						"\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",

-						inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);

-				}

-			}

-		}

-	}

-

-	free_net_delay(net_delay_check, &list_head_net_delay_check_ch);

-	vpr_printf_info("Completed net delay value cross check successfully.\n");

-}

-

-

-/* Detect if net should be routed or not */

-static bool should_route_net(int inet) {

-	t_trace * tptr = trace_head[inet];

-

-	if (tptr == NULL) {

-		/* No routing yet. */

-		return true;

-	} 

-

-	for (;;) {

-		int inode = tptr->index;

-		int occ = rr_node[inode].get_occ();

-		int capacity = rr_node[inode].get_capacity();

-

-		if (occ > capacity) {

-			return true; /* overuse detected */

-		}

-

-		if (rr_node[inode].type == SINK) {

-			tptr = tptr->next; /* Skip next segment. */

-			if (tptr == NULL)

-				break;

-		}

-

-		tptr = tptr->next;

-

-	} /* End while loop -- did an entire traceback. */

-

-	return false; /* Current route has no overuse */

-}

-

-static bool early_exit_heuristic(const t_router_opts& router_opts) {

-	/* Early exit code for cases where it is obvious that a successful route will not be found 

-	   Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */

-	int total_wirelength = 0;

-	int available_wirelength = 0;

-

-	for (int i = 0; i < num_rr_nodes; ++i) {

-		if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {

-			available_wirelength += 1 + 

-					rr_node[i].get_xhigh() - rr_node[i].get_xlow() + 

-					rr_node[i].get_yhigh() - rr_node[i].get_ylow();

-		}

-	}

-

-	for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {

-		if (g_clbs_nlist.net[inet].is_global == false

-				&& g_clbs_nlist.net[inet].num_sinks() != 0) { /* Globals don't count. */

-			int bends, wirelength, segments;

-			get_num_bends_and_length(inet, &bends, &wirelength, &segments);

-

-			total_wirelength += wirelength;

-		}

-	}

-	vpr_printf_info("Wire length after first iteration %d, total available wire length %d, ratio %g\n",

-			total_wirelength, available_wirelength,

-			(float) (total_wirelength) / (float) (available_wirelength));

-	if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) {

-		vpr_printf_info("Wire length usage ratio exceeds limit of %g, fail routing.\n",

-				FIRST_ITER_WIRELENTH_LIMIT);

-		return true;

-	}

-	vpr_printf_info("--------- ---------- ----------- ---------------------\n");

-	vpr_printf_info("Iteration       Time   Crit Path     Overused RR Nodes\n");

-	vpr_printf_info("--------- ---------- ----------- ---------------------\n");	

-	return false;

-}

-

+#include <cstdio>
+#include <ctime>
+#include <cmath>
+#include <assert.h>
+#include <vector>
+#include <algorithm>
+#include "vpr_utils.h"
+#include "util.h"
+#include "vpr_types.h"
+#include "globals.h"
+#include "route_export.h"
+#include "route_common.h"
+#include "route_tree_timing.h"
+#include "route_timing.h"
+#include "heapsort.h"
+#include "path_delay.h"
+#include "net_delay.h"
+#include "stats.h"
+#include "ReadOptions.h"
+
+
+using namespace std;
+
+
+/* Oleg's test code. This is an experimental/test measure to give the timing-driven router
+   look-ahead greater awareness of the different wire lengths in the FPGA.
+   Enabling this significantly increases routing time, but the router will
+   still be much faster than running it with astar_fac = 0. */
+//#define OP_STRONG_LOOKAHEAD
+
+
+/******************** Subroutines local to route_timing.c ********************/
+
+static int get_max_pins_per_net(void);
+
+static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
+		float target_criticality, float astar_fac);
+
+static void timing_driven_expand_neighbours(struct s_heap *current, 
+		int inet, int itry,
+		float bend_cost, float criticality_fac, int target_node,
+		float astar_fac, int highfanout_rlim);
+
+#ifndef OP_STRONG_LOOKAHEAD
+static float get_timing_driven_expected_cost(int inode, int target_node,
+		float criticality_fac, float R_upstream);
+#else
+static float get_timing_driven_multiple_wirelengths_expected_cost(int inode, int target_node,
+		float criticality_fac, float R_upstream);
+#endif
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+static int get_num_expected_interposer_hops_to_target(int inode, int target_node);
+#endif
+
+#ifndef OP_STRONG_LOOKAHEAD
+static int get_expected_segs_to_target(int inode, int target_node,
+		int *num_segs_ortho_dir_ptr);
+#else
+static int get_expected_segs_to_target(int inode, int via_cost_index, int target_node,
+		int *num_segs_ortho_dir_ptr);
+#endif
+
+static void update_rr_base_costs(int inet, float largest_criticality);
+
+static void timing_driven_check_net_delays(float **net_delay);
+
+static int mark_node_expansion_by_bin(int inet, int target_node,
+		t_rt_node * rt_node);
+
+static double get_overused_ratio();
+
+static bool should_route_net(int inet);
+static bool early_exit_heuristic(const t_router_opts& router_opts);
+struct more_sinks_than {
+	inline bool operator() (const int& net_index1, const int& net_index2) {
+		return g_clbs_nlist.net[net_index1].num_sinks() > g_clbs_nlist.net[net_index2].num_sinks();
+	}
+};
+
+
+#ifdef PROFILE
+static void congestion_analysis();
+static void time_on_criticality_analysis();
+constexpr int fanout_per_bin = 4;
+constexpr float criticality_per_bin = 0.05;
+static vector<float> time_on_fanout;
+static vector<float> time_to_build_heap;
+static vector<int> itry_on_fanout;
+static vector<float> time_on_criticality;
+static vector<int> itry_on_criticality;
+#endif
+
+/************************ Subroutine definitions *****************************/
+bool try_timing_driven_route(struct s_router_opts router_opts,
+		float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally, 
+		bool timing_analysis_enabled, const t_timing_inf &timing_inf) {
+
+	/* Timing-driven routing algorithm.  The timing graph (includes slack)   *
+	 * must have already been allocated, and net_delay must have been allocated. *
+	 * Returns true if the routing succeeds, false otherwise.                    */
+#ifdef PROFILE
+	const int max_fanout {get_max_pins_per_net()};
+	time_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);
+	time_to_build_heap.resize((max_fanout / fanout_per_bin) + 1, 0);
+	itry_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);
+	time_on_criticality.resize((1 / criticality_per_bin) + 1, 0);
+	itry_on_criticality.resize((1 / criticality_per_bin) + 1, 0);
+#endif
+
+	auto sorted_nets = vector<int>(g_clbs_nlist.net.size());
+
+	for (size_t i = 0; i < g_clbs_nlist.net.size(); ++i) {
+		sorted_nets[i] = i;
+	}
+
+	// sort so net with most sinks is first.
+	std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than());
+
+	timing_driven_route_structs route_structs{}; // allocs route strucs (calls default constructor)
+	// contains:
+	// float *pin_criticality; /* [1..max_pins_per_net-1] */
+	// int *sink_order; /* [1..max_pins_per_net-1] */;
+	// t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */
+
+	/* First do one routing iteration ignoring congestion to get reasonable net 
+	   delay estimates. Set criticalities to 1 when timing analysis is on to 
+	   optimize timing, and to 0 when timing analysis is off to optimize routability. */
+
+	float init_timing_criticality_val = (timing_analysis_enabled ? 1.0 : 0.0);
+
+	/* Variables used to do the optimization of the routing, aborting visibly
+	 * impossible Ws */
+	double overused_ratio;
+	vector<double> historical_overuse_ratio; 
+	historical_overuse_ratio.reserve(router_opts.max_router_iterations + 1);
+
+	// for unroutable large circuits, the time per iteration seems to increase dramatically
+	vector<float> time_per_iteration;
+	time_per_iteration.reserve(router_opts.max_router_iterations + 1);
+	
+	for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+		if (g_clbs_nlist.net[inet].is_global == false) {
+			for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
+				slacks->timing_criticality[inet][ipin] = init_timing_criticality_val;
+			}
+#ifdef PATH_COUNTING
+				slacks->path_criticality[inet][ipin] = init_timing_criticality_val;
+#endif		
+		} else { 
+			/* Set delay of global signals to zero. Non-global net delays are set by
+			   update_net_delays_from_route_tree() inside timing_driven_route_net(), 
+			   which is only called for non-global nets. */
+			for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
+				net_delay[inet][ipin] = 0.;
+			}
+		}
+	}
+
+	float pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */
+
+	for (int itry = 1; itry <= router_opts.max_router_iterations; ++itry) {
+		clock_t begin = clock();
+
+		/* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */
+		for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+			g_clbs_nlist.net[inet].is_routed = false;
+			g_clbs_nlist.net[inet].is_fixed = false;
+		}
+
+		for (unsigned int i = 0; i < g_clbs_nlist.net.size(); ++i) {
+			bool is_routable = try_timing_driven_route_net(
+				sorted_nets[i],
+				itry,
+				pres_fac,
+				router_opts,
+				route_structs.pin_criticality,
+				route_structs.sink_order,
+				route_structs.rt_node_of_sink,
+				net_delay,
+				slacks
+			);
+
+			if (!is_routable) {
+				return (false);
+			}
+		}
+
+		clock_t end = clock();
+		float time = static_cast<float>(end - begin) / CLOCKS_PER_SEC;
+		time_per_iteration.push_back(time);
+
+		if (itry == 1 && early_exit_heuristic(router_opts)) return false;
+
+		/* Make sure any CLB OPINs used up by subblocks being hooked directly
+		   to them are reserved for that purpose. */
+
+		bool rip_up_local_opins = (itry == 1 ? false : true);
+		reserve_locally_used_opins(pres_fac, router_opts.acc_fac, rip_up_local_opins, clb_opins_used_locally);
+
+		/* Pathfinder guys quit after finding a feasible route. I may want to keep 
+		   going longer, trying to improve timing.  Think about this some. */
+
+		bool success = feasible_routing();
+
+		/* Verification to check the ratio of overused nodes, depending on the configuration
+		 * may abort the routing if the ratio is too high. */
+		overused_ratio = get_overused_ratio();
+		historical_overuse_ratio.push_back(overused_ratio);
+
+		/* Determine when routing is impossible hence should be aborted */
+		if (itry > 5){
+			
+			int expected_success_route_iter = predict_success_route_iter(historical_overuse_ratio, router_opts);
+			if (expected_success_route_iter == UNDEFINED) {
+				return false;
+			}
+
+#ifdef REGRESSION_EXIT
+			if (itry > 15) {
+				// compare their slopes over the last 5 iterations
+				double time_per_iteration_slope = linear_regression_vector(time_per_iteration, itry-5);
+				double congestion_per_iteration_slope = linear_regression_vector(historical_overuse_ratio, itry-5);
+				if (router_opts.congestion_analysis)
+					vpr_printf_info("%f s/iteration %f %/iteration\n", time_per_iteration_slope, congestion_per_iteration_slope);
+				// time is increasing and congestion is non-decreasing (grows faster than 10% per iteration)
+				if (congestion_per_iteration_slope > 0 && time_per_iteration_slope > 0.1*time_per_iteration.back()
+					&& time_per_iteration_slope > 1) {	// filter out noise
+					vpr_printf_info("Time per iteration growing too fast at slope %f s/iteration \n\
+									 while congestion grows at %f %/iteration, unlikely to finish.\n",
+						time_per_iteration_slope, congestion_per_iteration_slope);
+
+					return false;
+				}
+			}
+#endif
+		}
+
+        //print_usage_by_wire_length();
+
+
+		if (success) {
+   
+			if (timing_analysis_enabled) {
+				load_timing_graph_net_delays(net_delay);
+				do_timing_analysis(slacks, timing_inf, false, false);
+				float critical_path_delay = get_critical_path_delay();
+                vpr_printf_info("%9d %6.2f sec %8.5f ns   %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
+				vpr_printf_info("Critical path: %g ns\n", critical_path_delay);
+			} else {
+                vpr_printf_info("%9d %6.2f sec         N/A   %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
+			}
+
+			vpr_printf_info("Successfully routed after %d routing iterations.\n", itry);
+#ifdef DEBUG
+			if (timing_analysis_enabled)
+				timing_driven_check_net_delays(net_delay);
+#endif
+			return (true);
+		}
+
+		if (itry == 1) {
+			pres_fac = router_opts.initial_pres_fac;
+			pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */
+		} else {
+			pres_fac *= router_opts.pres_fac_mult;
+
+			/* Avoid overflow for high iteration counts, even if acc_cost is big */
+			pres_fac = min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5));
+
+			pathfinder_update_cost(pres_fac, router_opts.acc_fac);
+		}
+
+		if (timing_analysis_enabled) {		
+			/* Update slack values by doing another timing analysis.
+			   Timing_driven_route_net updated the net delay values. */
+
+			load_timing_graph_net_delays(net_delay);
+
+			do_timing_analysis(slacks, timing_inf, false, false);
+
+		} else {
+			/* If timing analysis is not enabled, make sure that the criticalities and the
+			   net_delays stay as 0 so that wirelength can be optimized. */
+			
+			for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+				for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
+					slacks->timing_criticality[inet][ipin] = 0.;
+#ifdef PATH_COUNTING 		
+					slacks->path_criticality[inet][ipin] = 0.; 		
+#endif
+					net_delay[inet][ipin] = 0.;
+				}
+			}
+		}
+
+		
+		if (timing_analysis_enabled) {
+			float critical_path_delay = get_critical_path_delay();
+            vpr_printf_info("%9d %6.2f sec %8.5f ns   %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
+		} else {
+            vpr_printf_info("%9d %6.2f sec         N/A   %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
+		}
+
+#ifdef PROFILE
+        if (router_opts.congestion_analysis) congestion_analysis();
+        if (router_opts.fanout_analysis) time_on_fanout_analysis();
+        time_on_criticality_analysis();
+#endif
+		fflush(stdout);
+	}
+	vpr_printf_info("Routing failed.\n");
+
+	return (false);
+}
+
+// profiling per iteration functions
+#ifdef PROFILE
+void time_on_fanout_analysis() {
+	// using the global time_on_fanout and itry_on_fanout
+	vpr_printf_info("fanout low           time (s)        attemps     heap build time (s)\n");
+	for (size_t bin = 0; bin < time_on_fanout.size(); ++bin) {
+		if (itry_on_fanout[bin]) {	// avoid printing the many 0 bins
+			vpr_printf_info("%4d           %14.3f   %12d %14.3f\n",bin*fanout_per_bin, time_on_fanout[bin], itry_on_fanout[bin], time_to_build_heap[bin]);
+		}
+	}
+}
+
+void time_on_criticality_analysis() {
+	vpr_printf_info("congestion low           time (s)        attemps\n");
+	for (size_t bin = 0; bin < time_on_criticality.size(); ++bin) {
+		if (itry_on_criticality[bin]) {	// avoid printing the many 0 bins
+			vpr_printf_info("%4d           %14.3f   %12d\n",bin*criticality_per_bin, time_on_criticality[bin], itry_on_criticality[bin]);
+		}
+	}	
+}
+// at the end of a routing iteration, profile how much congestion is taken up by each type of rr_node
+// efficient bit array for checking against congested type
+struct Congested_node_types {
+	uint32_t mask;
+	Congested_node_types() : mask{0} {}
+	void set_congested(int rr_node_type) {mask |= (1 << rr_node_type);}
+	void clear_congested(int rr_node_type) {mask &= ~(1 << rr_node_type);}
+	bool is_congested(int rr_node_type) const {return mask & (1 << rr_node_type);}
+	bool empty() const {return mask == 0;}
+};
+
+void congestion_analysis() {
+	static const std::vector<const char*> node_typename {
+		"SOURCE",
+		"SINK",
+		"IPIN",
+		"OPIN",
+		"CHANX",
+		"CHANY",
+		"ICE"
+	};
+	// each type indexes into array which holds the congestion for that type
+	std::vector<int> congestion_per_type((size_t) NUM_RR_TYPES, 0);
+	// print out specific node information if congestion for type is low enough
+
+	int total_congestion = 0;
+	for (int inode = 0; inode < num_rr_nodes; ++inode) {
+		const t_rr_node& node = rr_node[inode];
+		int congestion = node.get_occ() - node.get_capacity();
+
+		if (congestion > 0) {
+			total_congestion += congestion;
+			congestion_per_type[node.type] += congestion;
+		}
+	}
+
+	constexpr int specific_node_print_threshold = 5;
+	Congested_node_types congested;
+	for (int type = SOURCE; type < NUM_RR_TYPES; ++type) {
+		float congestion_percentage = (float)congestion_per_type[type] / (float) total_congestion * 100;
+		vpr_printf_info(" %6s: %10.6f %\n", node_typename[type], congestion_percentage); 
+		// nodes of that type need specific printing
+		if (congestion_per_type[type] > 0 &&
+			congestion_per_type[type] < specific_node_print_threshold) congested.set_congested(type);
+	}
+
+	// specific print out each congested node
+	if (!congested.empty()) {
+		vpr_printf_info("Specific congested nodes\nxlow ylow   type\n");
+		for (int inode = 0; inode < num_rr_nodes; ++inode) {
+			const t_rr_node& node = rr_node[inode];
+			if (congested.is_congested(node.type) && (node.get_occ() - node.get_capacity()) > 0) {
+				vpr_printf_info("(%3d,%3d) %6s\n", node.get_xlow(), node.get_ylow(), node_typename[node.type]);
+			}
+		}
+	}
+}
+#endif
+
+bool try_timing_driven_route_net(int inet, int itry, float pres_fac, 
+		struct s_router_opts router_opts,
+		float* pin_criticality, int* sink_order,
+		t_rt_node** rt_node_of_sink, float** net_delay, t_slack* slacks) {
+
+	bool is_routed = false;
+
+	if (g_clbs_nlist.net[inet].is_fixed) { /* Skip pre-routed nets. */
+		is_routed = true;
+	} else if (g_clbs_nlist.net[inet].is_global) { /* Skip global nets. */
+		is_routed = true;
+	} else if (should_route_net(inet) == false) {
+		is_routed = true;
+	} else{
+		// track time spent vs fanout
+#ifdef PROFILE
+		clock_t net_begin = clock();
+#endif
+
+		is_routed = timing_driven_route_net(inet, itry, pres_fac,
+				router_opts.max_criticality, router_opts.criticality_exp, 
+				router_opts.astar_fac, router_opts.bend_cost, 
+				pin_criticality, sink_order, 
+				rt_node_of_sink, net_delay[inet], slacks);
+
+#ifdef PROFILE
+		float time_for_net = static_cast<float>(clock() - net_begin) / CLOCKS_PER_SEC;
+		int net_fanout = g_clbs_nlist.net[inet].num_sinks();
+		time_on_fanout[net_fanout / fanout_per_bin] += time_for_net;
+		itry_on_fanout[net_fanout / fanout_per_bin] += 1;
+#endif
+
+		/* Impossible to route? (disconnected rr_graph) */
+		if (is_routed) {
+			g_clbs_nlist.net[inet].is_routed = true;
+			g_atoms_nlist.net[clb_to_vpack_net_mapping[inet]].is_routed = true;
+		} else {
+			vpr_printf_info("Routing failed.\n");
+		}
+	}
+	return (is_routed);
+}
+
+/*
+ * NOTE:
+ * Suggest using a timing_driven_route_structs struct. Memory is managed for you
+ */
+void alloc_timing_driven_route_structs(float **pin_criticality_ptr,
+		int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) {
+
+	/* Allocates all the structures needed only by the timing-driven router.   */
+
+	int max_pins_per_net = get_max_pins_per_net();
+
+	float *pin_criticality = (float *) my_malloc((max_pins_per_net - 1) * sizeof(float));
+	*pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */
+
+	int *sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int));
+	*sink_order_ptr = sink_order - 1;
+
+	t_rt_node **rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) * sizeof(t_rt_node *));
+	*rt_node_of_sink_ptr = rt_node_of_sink - 1;
+
+	alloc_route_tree_timing_structs();
+}
+
+/* This function gets ratio of overused nodes (overused_nodes / num_rr_nodes) */
+static double get_overused_ratio(){
+	double overused_nodes = 0.0;
+	int inode;
+	for(inode = 0; inode < num_rr_nodes; inode++){
+		if(rr_node[inode].get_occ() > rr_node[inode].get_capacity())
+			overused_nodes += (rr_node[inode].get_occ() - rr_node[inode].get_capacity());
+	}
+	overused_nodes /= (double)num_rr_nodes;
+	return overused_nodes;
+}
+
+/*
+ * NOTE:
+ * Suggest using a timing_driven_route_structs struct. Memory is managed for you
+ */
+void free_timing_driven_route_structs(float *pin_criticality, int *sink_order,
+		t_rt_node ** rt_node_of_sink) {
+
+	/* Frees all the stuctures needed only by the timing-driven router.        */
+
+	free(pin_criticality + 1); /* Starts at index 1. */
+	free(sink_order + 1);
+	free(rt_node_of_sink + 1);
+	free_route_tree_timing_structs();
+}
+
+timing_driven_route_structs::timing_driven_route_structs() {
+	alloc_timing_driven_route_structs(
+		&pin_criticality,
+		&sink_order,
+		&rt_node_of_sink
+	);
+}
+
+timing_driven_route_structs::~timing_driven_route_structs() {
+	free_timing_driven_route_structs(
+		pin_criticality,
+		sink_order,
+		rt_node_of_sink
+	);
+}
+
+static int get_max_pins_per_net(void) {
+
+	/* Returns the largest number of pins on any non-global net.    */
+
+	unsigned int inet;
+	int max_pins_per_net;
+
+	max_pins_per_net = 0;
+	for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
+		if (g_clbs_nlist.net[inet].is_global == false) {
+			max_pins_per_net = max(max_pins_per_net,
+					(int) g_clbs_nlist.net[inet].pins.size());
+		}
+	}
+
+	return (max_pins_per_net);
+}
+
+bool timing_driven_route_net(int inet, int itry, float pres_fac, float max_criticality,
+		float criticality_exp, float astar_fac, float bend_cost,
+		float *pin_criticality, int *sink_order,
+		t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks) {
+
+	/* Returns true as long is found some way to hook up this net, even if that *
+	 * way resulted in overuse of resources (congestion).  If there is no way   *
+	 * to route this net, even ignoring congestion, it returns false.  In this  *
+	 * case the rr_graph is disconnected and you can give up. If slacks = NULL, *
+	 * give each net a dummy criticality of 0.									*/
+
+	unsigned int ipin;
+	int num_sinks, itarget, target_pin, target_node, inode;
+	float target_criticality, old_total_cost, new_total_cost, largest_criticality,
+		old_back_cost, new_back_cost;
+	t_rt_node *rt_root;
+	
+	struct s_trace *new_route_start_tptr;
+	int highfanout_rlim;
+
+	/* Rip-up any old routing. */
+
+	pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);
+	free_traceback(inet);
+	
+	for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
+		if (!slacks) {
+			/* Use criticality of 1. This makes all nets critical.  Note: There is a big difference between setting pin criticality to 0
+			compared to 1.  If pin criticality is set to 0, then the current path delay is completely ignored during routing.  By setting
+			pin criticality to 1, the current path delay to the pin will always be considered and optimized for */
+			pin_criticality[ipin] = 1.0;
+		} else { 
+#ifdef PATH_COUNTING
+			/* Pin criticality is based on a weighted sum of timing and path criticalities. */	
+			pin_criticality[ipin] =		 ROUTE_PATH_WEIGHT	* slacks->path_criticality[inet][ipin]
+								  + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin]; 
+#else
+			/* Pin criticality is based on only timing criticality. */
+			pin_criticality[ipin] = slacks->timing_criticality[inet][ipin];
+#endif
+			/* Currently, pin criticality is between 0 and 1. Now shift it downwards 
+			by 1 - max_criticality (max_criticality is 0.99 by default, so shift down
+			by 0.01) and cut off at 0.  This means that all pins with small criticalities 
+			(<0.01) get criticality 0 and are ignored entirely, and everything
+			else becomes a bit less critical. This effect becomes more pronounced if
+			max_criticality is set lower. */
+			// assert(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);
+			pin_criticality[ipin] = max(pin_criticality[ipin] - (1.0 - max_criticality), 0.0);
+
+			/* Take pin criticality to some power (1 by default). */
+			pin_criticality[ipin] = pow(pin_criticality[ipin], criticality_exp);
+			
+			/* Cut off pin criticality at max_criticality. */
+			pin_criticality[ipin] = min(pin_criticality[ipin], max_criticality);
+		}
+	}
+
+	num_sinks = g_clbs_nlist.net[inet].num_sinks();
+	heapsort(sink_order, pin_criticality, num_sinks, 0);
+
+	/* Update base costs according to fanout and criticality rules */
+
+	largest_criticality = pin_criticality[sink_order[1]];
+	update_rr_base_costs(inet, largest_criticality);
+
+	mark_ends(inet); /* Only needed to check for multiply-connected SINKs */
+
+	rt_root = init_route_tree_to_source(inet);
+#ifdef PROFILE
+		float build_heap_time = 0;
+#endif
+	// explore in order of decreasing criticality
+	for (itarget = 1; itarget <= num_sinks; itarget++) {
+		target_pin = sink_order[itarget];
+		target_node = net_rr_terminals[inet][target_pin];
+
+		target_criticality = pin_criticality[target_pin];
+#ifdef PROFILE
+			clock_t sink_criticality_start = clock();
+#endif
+
+		highfanout_rlim = mark_node_expansion_by_bin(inet, target_node,
+				rt_root);
+		
+		if (itarget > 1 && itry > 5) {
+			/* Enough iterations given to determine opin, to speed up legal solution, do not let net use two opins */
+			assert(rr_node[rt_root->inode].type == SOURCE);
+			rt_root->re_expand = false;
+		}
+
+		// reexplore route tree from root to add any new nodes (buildheap afterwards)
+#ifdef PROFILE
+			clock_t route_tree_start = clock();
+#endif
+		add_route_tree_to_heap(rt_root, target_node, target_criticality,
+				astar_fac);
+		heap_::build_heap();	// via sifting down everything
+		// if (itarget == num_sinks && itarget > 800) {
+		// 	vpr_printf_info("heap for target %d: ", itarget); 
+		// 	heap_::verify_extract_top();
+		// }
+#ifdef PROFILE
+			build_heap_time += static_cast<float>(clock() - route_tree_start) / CLOCKS_PER_SEC;
+#endif
+		
+
+		// cheapest s_heap (gives index to rr_node) in current route tree to be expanded on
+		struct s_heap* cheapest = get_heap_head();
+
+		if (cheapest == NULL) { /* Infeasible routing.  No possible path for net. */
+			vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
+					   inet, g_clbs_nlist.net[inet].name, itarget);
+			reset_path_costs();
+			free_route_tree(rt_root);
+			return (false);
+		}
+
+		inode = cheapest->index;
+		while (inode != target_node) {
+			old_total_cost = rr_node_route_inf[inode].path_cost;
+			new_total_cost = cheapest->cost;
+
+			if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
+				old_back_cost = HUGE_POSITIVE_FLOAT;
+			else
+				old_back_cost = rr_node_route_inf[inode].backward_path_cost;
+
+			new_back_cost = cheapest->backward_path_cost;
+
+			/* I only re-expand a node if both the "known" backward cost is lower  *
+			 * in the new expansion (this is necessary to prevent loops from       *
+			 * forming in the routing and causing havoc) *and* the expected total  *
+			 * cost to the sink is lower than the old value.  Different R_upstream *
+			 * values could make a path with lower back_path_cost less desirable   *
+			 * than one with higher cost.  Test whether or not I should disallow   *
+			 * re-expansion based on a higher total cost.                          */
+
+			if (old_total_cost > new_total_cost && old_back_cost > new_back_cost) {
+				rr_node_route_inf[inode].prev_node = cheapest->u.prev_node;
+				rr_node_route_inf[inode].prev_edge = cheapest->prev_edge;
+				rr_node_route_inf[inode].path_cost = new_total_cost;
+				rr_node_route_inf[inode].backward_path_cost = new_back_cost;
+
+				if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
+					add_to_mod_list(&rr_node_route_inf[inode].path_cost);
+
+				timing_driven_expand_neighbours(cheapest, inet, itry, bend_cost,
+						target_criticality, target_node, astar_fac,
+						highfanout_rlim);
+			}
+
+			free_heap_data(cheapest);
+			cheapest = get_heap_head();
+			// test heap property
+			// if (!heap_::is_valid()) {vpr_printf_info("invalid heap: "); heap_::pop_heap();}
+
+			if (cheapest == NULL) { /* Impossible routing.  No path for net. */
+				vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
+						 inet, g_clbs_nlist.net[inet].name, itarget);
+				reset_path_costs();
+				free_route_tree(rt_root);
+				return (false);
+			}
+
+			inode = cheapest->index;
+		}
+#ifdef PROFILE
+			time_on_criticality[target_criticality / criticality_per_bin] += static_cast<float>(clock() - sink_criticality_start) / CLOCKS_PER_SEC;
+			++itry_on_criticality[target_criticality / criticality_per_bin];
+#endif 
+		/* NB:  In the code below I keep two records of the partial routing:  the   *
+		 * traceback and the route_tree.  The route_tree enables fast recomputation *
+		 * of the Elmore delay to each node in the partial routing.  The traceback  *
+		 * lets me reuse all the routines written for breadth-first routing, which  *
+		 * all take a traceback structure as input.  Before this routine exits the  *
+		 * route_tree structure is destroyed; only the traceback is needed at that  *
+		 * point.                                                                   */
+
+		rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */
+		new_route_start_tptr = update_traceback(cheapest, inet);
+		rt_node_of_sink[target_pin] = update_route_tree(cheapest);
+		free_heap_data(cheapest);
+		pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);
+
+		empty_heap();
+		reset_path_costs();
+	}
+#ifdef PROFILE
+		if (!time_to_build_heap.empty()) time_to_build_heap[num_sinks / fanout_per_bin] += build_heap_time;
+#endif
+	/* For later timing analysis. */
+
+	update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);
+	free_route_tree(rt_root);
+	return (true);
+}
+
+static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
+		float target_criticality, float astar_fac) {
+
+	/* Puts the entire partial routing below and including rt_node onto the heap *
+	 * (except for those parts marked as not to be expanded) by calling itself   *
+	 * recursively.                                                              */
+
+	int inode;
+	t_rt_node *child_node;
+	t_linked_rt_edge *linked_rt_edge;
+	float tot_cost, backward_path_cost, R_upstream;
+
+	/* Pre-order depth-first traversal */
+
+	if (rt_node->re_expand) {
+		inode = rt_node->inode;
+		backward_path_cost = target_criticality * rt_node->Tdel;
+		R_upstream = rt_node->R_upstream;
+#ifndef OP_STRONG_LOOKAHEAD
+		tot_cost = backward_path_cost
+				+ astar_fac
+						* get_timing_driven_expected_cost(inode, target_node,
+								target_criticality, R_upstream);
+#else
+		tot_cost = backward_path_cost
+				+ astar_fac
+						* get_timing_driven_multiple_wirelengths_expected_cost(inode, target_node,
+								target_criticality, R_upstream);
+#endif
+		heap_::push_back_node(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,
+				backward_path_cost, R_upstream);
+
+	}
+
+	linked_rt_edge = rt_node->u.child_list;
+
+	while (linked_rt_edge != NULL) {
+		child_node = linked_rt_edge->child;
+		add_route_tree_to_heap(child_node, target_node, target_criticality,
+				astar_fac);
+		linked_rt_edge = linked_rt_edge->next;
+	}
+}
+
+static void timing_driven_expand_neighbours(struct s_heap *current, 
+		int inet, int itry,
+		float bend_cost, float criticality_fac, int target_node,
+		float astar_fac, int highfanout_rlim) {
+
+	/* Puts all the rr_nodes adjacent to current on the heap.  rr_nodes outside *
+	 * the expanded bounding box specified in route_bb are not added to the     *
+	 * heap.                                                                    */
+
+	float new_R_upstream;
+
+	int inode = current->index;
+	float old_back_pcost = current->backward_path_cost;
+	float R_upstream = current->R_upstream;
+	int num_edges = rr_node[inode].get_num_edges();
+
+	int target_x = rr_node[target_node].get_xhigh();
+	int target_y = rr_node[target_node].get_yhigh();
+	bool high_fanout = g_clbs_nlist.net[inet].num_sinks() >= HIGH_FANOUT_NET_LIM;
+
+	for (int iconn = 0; iconn < num_edges; iconn++) {
+		int to_node = rr_node[inode].edges[iconn];
+
+		if (high_fanout) {
+			// since target node is an IPIN, xhigh and xlow are the same (same for y values)
+			if (rr_node[to_node].get_xhigh() < target_x - highfanout_rlim
+					|| rr_node[to_node].get_xlow() > target_x + highfanout_rlim
+					|| rr_node[to_node].get_yhigh() < target_y - highfanout_rlim
+					|| rr_node[to_node].get_ylow() > target_y + highfanout_rlim){
+				continue; /* Node is outside high fanout bin. */
+			}
+		}
+		else if (rr_node[to_node].get_xhigh() < route_bb[inet].xmin
+				|| rr_node[to_node].get_xlow() > route_bb[inet].xmax
+				|| rr_node[to_node].get_yhigh() < route_bb[inet].ymin
+				|| rr_node[to_node].get_ylow() > route_bb[inet].ymax)
+			continue; /* Node is outside (expanded) bounding box. */
+
+
+		/* Prune away IPINs that lead to blocks other than the target one.  Avoids  *
+		 * the issue of how to cost them properly so they don't get expanded before *
+		 * more promising routes, but makes route-throughs (via CLBs) impossible.   *
+		 * Change this if you want to investigate route-throughs.                   */
+
+		t_rr_type to_type = rr_node[to_node].type;
+		if (to_type == IPIN
+				&& (rr_node[to_node].get_xhigh() != target_x
+						|| rr_node[to_node].get_yhigh() != target_y))
+			continue;
+
+		/* new_back_pcost stores the "known" part of the cost to this node -- the   *
+		 * congestion cost of all the routing resources back to the existing route  *
+		 * plus the known delay of the total path back to the source.  new_tot_cost *
+		 * is this "known" backward cost + an expected cost to get to the target.   */
+
+		float new_back_pcost = old_back_pcost
+				+ (1. - criticality_fac) * get_rr_cong_cost(to_node);
+		
+		int iswitch = rr_node[inode].switches[iconn];
+		if (g_rr_switch_inf[iswitch].buffered) {
+			new_R_upstream = g_rr_switch_inf[iswitch].R;
+		} else {
+			new_R_upstream = R_upstream + g_rr_switch_inf[iswitch].R;
+		}
+
+		float Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R);
+		Tdel += g_rr_switch_inf[iswitch].Tdel;
+		new_R_upstream += rr_node[to_node].R;
+		new_back_pcost += criticality_fac * Tdel;
+
+		if (bend_cost != 0.) {
+			t_rr_type from_type = rr_node[inode].type;
+			to_type = rr_node[to_node].type;
+			if ((from_type == CHANX && to_type == CHANY)
+					|| (from_type == CHANY && to_type == CHANX))
+				new_back_pcost += bend_cost;
+		}
+
+#ifndef OP_STRONG_LOOKAHEAD
+		float expected_cost = get_timing_driven_expected_cost(to_node, target_node,
+				criticality_fac, new_R_upstream);
+#else
+		float expected_cost = get_timing_driven_multiple_wirelengths_expected_cost(to_node, target_node,
+				criticality_fac, new_R_upstream);
+#endif
+		float new_tot_cost = new_back_pcost + astar_fac * expected_cost;
+
+		node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,
+				new_R_upstream);
+
+	} /* End for all neighbours */
+}
+
+#ifndef OP_STRONG_LOOKAHEAD
+static float get_timing_driven_expected_cost(int inode, int target_node,
+		float criticality_fac, float R_upstream) {
+
+	/* Determines the expected cost (due to both delay and resouce cost) to reach *
+	 * the target node from inode.  It doesn't include the cost of inode --       *
+	 * that's already in the "known" path_cost.                                   */
+
+	t_rr_type rr_type;
+	int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
+	float expected_cost, cong_cost, Tdel;
+
+	rr_type = rr_node[inode].type;
+
+	if (rr_type == CHANX || rr_type == CHANY) {
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE		
+		int num_interposer_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
+#endif
+
+		num_segs_same_dir = get_expected_segs_to_target(inode, target_node,
+				&num_segs_ortho_dir);
+		cost_index = rr_node[inode].get_cost_index();
+		ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
+
+		cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost
+				+ num_segs_ortho_dir
+						* rr_indexed_data[ortho_cost_index].base_cost;
+		cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost
+				+ rr_indexed_data[SINK_COST_INDEX].base_cost;
+
+		Tdel =
+				num_segs_same_dir * rr_indexed_data[cost_index].T_linear
+						+ num_segs_ortho_dir
+								* rr_indexed_data[ortho_cost_index].T_linear
+						+ num_segs_same_dir * num_segs_same_dir
+								* rr_indexed_data[cost_index].T_quadratic
+						+ num_segs_ortho_dir * num_segs_ortho_dir
+								* rr_indexed_data[ortho_cost_index].T_quadratic
+						+ R_upstream
+								* (num_segs_same_dir
+										* rr_indexed_data[cost_index].C_load
+										+ num_segs_ortho_dir
+												* rr_indexed_data[ortho_cost_index].C_load);
+
+		Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+		float interposer_hop_delay = (float)delay_increase * 1e-12;
+		Tdel += num_interposer_hops * interposer_hop_delay;
+#endif
+
+		expected_cost = criticality_fac * Tdel
+				+ (1. - criticality_fac) * cong_cost;
+		return (expected_cost);
+	}
+
+	else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
+		return (rr_indexed_data[SINK_COST_INDEX].base_cost);
+	}
+
+	else { /* Change this if you want to investigate route-throughs */
+		return (0.);
+	}
+}
+
+#else
+static float get_timing_driven_multiple_wirelengths_expected_cost(int inode, int target_node,
+		float criticality_fac, float R_upstream) {
+
+	/* Determines the expected cost (due to both delay and resouce cost) to reach *
+	 * the target node from inode.  It doesn't include the cost of inode --       *
+	 * that's already in the "known" path_cost.                                   */
+
+	t_rr_type rr_type;
+	int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
+	float expected_cost, cong_cost, Tdel;
+
+	rr_type = rr_node[inode].type;
+
+	if (rr_type == CHANX || rr_type == CHANY) {
+
+		/* OP: Want to account for multiple wirelengths by breaking the assumption that 
+		   a route will stay on the same kind of node that it's currently on. So here I
+		   get the minimum expected cost across all CHANX cost indices. This significantly
+		   drives up routing time, but is still much faster than running VPR with
+		   astar_fac = 0.
+		*/
+		
+		float minimum_cost = -1.0;
+		for (cost_index = CHANX_COST_INDEX_START; cost_index < CHANX_COST_INDEX_START + (num_rr_indexed_data - CHANX_COST_INDEX_START)/2; cost_index++){
+
+			num_segs_same_dir = get_expected_segs_to_target(inode, cost_index, target_node,
+					&num_segs_ortho_dir);
+			ortho_cost_index = cost_index;
+
+			cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost
+					+ num_segs_ortho_dir
+							* rr_indexed_data[ortho_cost_index].base_cost;
+			cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost
+					+ rr_indexed_data[SINK_COST_INDEX].base_cost;
+
+			Tdel =
+					num_segs_same_dir * rr_indexed_data[cost_index].T_linear
+							+ num_segs_ortho_dir
+									* rr_indexed_data[ortho_cost_index].T_linear
+							+ num_segs_same_dir * num_segs_same_dir
+									* rr_indexed_data[cost_index].T_quadratic
+							+ num_segs_ortho_dir * num_segs_ortho_dir
+									* rr_indexed_data[ortho_cost_index].T_quadratic
+							+ R_upstream
+									* (num_segs_same_dir
+											* rr_indexed_data[cost_index].C_load
+											+ num_segs_ortho_dir
+													* rr_indexed_data[ortho_cost_index].C_load);
+
+			Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;
+
+			expected_cost = criticality_fac * Tdel
+					+ (1. - criticality_fac) * cong_cost;
+
+			if (expected_cost < minimum_cost || minimum_cost < 0){
+				minimum_cost = expected_cost;
+			}
+		}
+		//assert( minimum_cost >= 0 );
+		return (minimum_cost);
+	}
+
+	else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
+		return (rr_indexed_data[SINK_COST_INDEX].base_cost);
+	}
+
+	else { /* Change this if you want to investigate route-throughs */
+		return (0.);
+	}
+}
+#endif
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+static int get_num_expected_interposer_hops_to_target(int inode, int target_node) 
+{
+	/* 
+	Description:
+		returns the expected number of times you have to cross the 
+		interposer in order to go from inode to target_node.
+		This does not include inode itself.
+	
+	Assumptions: 
+		1- Cuts are horizontal
+		2- target_node is a terminal pin (hence its ylow==yhigh)
+		3- wires that go through the interposer are uni-directional
+
+		---------	y=150
+		|		|
+		---------	y=100
+		|		|
+		---------	y=50
+		|		|
+		---------	y=0
+
+		num_cuts = 2, cut_step = 50, cut_locations = {50, 100}
+	*/
+	int y_start; /* start point y-coordinate. (y-coordinate at the *end* of wire 'i') */
+	int y_end;   /* destination (target) y-coordinate */
+	int cut_i, y_cut_location, num_expected_hops;
+	t_rr_type rr_type;
+
+	num_expected_hops = 0;
+	y_end   = rr_node[target_node].get_ylow(); 
+	rr_type = rr_node[inode].type;
+
+	if(rr_type == CHANX) 
+	{	/* inode is a horizontal wire */
+		/* the ylow and yhigh are the same */
+		assert(rr_node[inode].get_ylow() == rr_node[inode].get_yhigh());
+		y_start = rr_node[inode].get_ylow();
+	}
+	else
+	{	/* inode is a CHANY */
+		if(rr_node[inode].direction == INC_DIRECTION)
+		{
+			y_start = rr_node[inode].get_yhigh();
+		}
+		else if(rr_node[inode].direction == DEC_DIRECTION)
+		{
+			y_start = rr_node[inode].get_ylow();
+		}
+		else
+		{
+			/*	this means direction is BIDIRECTIONAL! Error out. */
+			y_start = -1;
+			assert(rr_node[inode].direction!=BI_DIRECTION);
+		}
+	}
+
+	/* for every cut, is it between 'i' and 'target'? */
+	for(cut_i=0 ; cut_i<num_cuts; ++cut_i) 
+	{
+		y_cut_location = arch_cut_locations[cut_i];
+		if( (y_start < y_cut_location &&  y_cut_location < y_end) ||
+			(y_end   < y_cut_location &&  y_cut_location < y_start)) 
+		{
+			++num_expected_hops;
+		}
+	}
+
+	/* Make there is no off-by-1 error.For current node i: 
+	   if it's a vertical wire, node 'i' itself may be crossing the interposer.
+	*/
+	if(rr_type == CHANY)
+	{	
+		/* for every cut, does it cut wire 'i'? */
+		for(cut_i=0 ; cut_i<num_cuts; ++cut_i) 
+		{
+			y_cut_location = arch_cut_locations[cut_i];
+			if(rr_node[inode].get_ylow() < y_cut_location && y_cut_location < rr_node[inode].get_yhigh())
+			{
+				++num_expected_hops;
+			}
+		}
+	}
+
+	return num_expected_hops;
+}
+#endif
+
+/* Macro used below to ensure that fractions are rounded up, but floating   *
+ * point values very close to an integer are rounded to that integer.       */
+#define ROUND_UP(x) (ceil (x - 0.001))
+
+#ifndef OP_STRONG_LOOKAHEAD
+static int get_expected_segs_to_target(int inode, int target_node,
+		int *num_segs_ortho_dir_ptr) {
+
+	/* Returns the number of segments the same type as inode that will be needed *
+	 * to reach target_node (not including inode) in each direction (the same    *
+	 * direction (horizontal or vertical) as inode and the orthogonal direction).*/
+
+	t_rr_type rr_type;
+	int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;
+	int no_need_to_pass_by_clb;
+	float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;
+
+	#ifdef INTERPOSER_BASED_ARCHITECTURE
+	int num_expected_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
+	#endif
+	
+	target_x = rr_node[target_node].get_xlow();
+	target_y = rr_node[target_node].get_ylow();
+	cost_index = rr_node[inode].get_cost_index();
+	inv_length = rr_indexed_data[cost_index].inv_length;
+	ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
+	ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;
+	rr_type = rr_node[inode].type;
+
+	if (rr_type == CHANX) {
+		ylow = rr_node[inode].get_ylow();
+		xhigh = rr_node[inode].get_xhigh();
+		xlow = rr_node[inode].get_xlow();
+
+		/* Count vertical (orthogonal to inode) segs first. */
+
+		if (ylow > target_y) { /* Coming from a row above target? */
+			*num_segs_ortho_dir_ptr =
+					(int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else if (ylow < target_y - 1) { /* Below the CLB bottom? */
+			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *
+					ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else { /* In a row that passes by target CLB */
+			*num_segs_ortho_dir_ptr = 0;
+			no_need_to_pass_by_clb = 0;
+		}
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE		
+		(*num_segs_ortho_dir_ptr) = (*num_segs_ortho_dir_ptr) + 2*num_expected_hops;
+#endif
+		
+		/* Now count horizontal (same dir. as inode) segs. */
+
+		if (xlow > target_x + no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -
+							target_x) * inv_length));
+		} else if (xhigh < target_x - no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -
+							xhigh) * inv_length));
+		} else {
+			num_segs_same_dir = 0;
+		}
+	}
+
+	else { /* inode is a CHANY */
+		ylow = rr_node[inode].get_ylow();
+		yhigh = rr_node[inode].get_yhigh();
+		xlow = rr_node[inode].get_xlow();
+
+		/* Count horizontal (orthogonal to inode) segs first. */
+
+		if (xlow > target_x) { /* Coming from a column right of target? */
+			*num_segs_ortho_dir_ptr = (int)(
+					ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */
+			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *
+					ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else { /* In a column that passes by target CLB */
+			*num_segs_ortho_dir_ptr = 0;
+			no_need_to_pass_by_clb = 0;
+		}
+
+		/* Now count vertical (same dir. as inode) segs. */
+
+		if (ylow > target_y + no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -
+							target_y) * inv_length));
+		} else if (yhigh < target_y - no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -
+							yhigh) * inv_length));
+		} else {
+			num_segs_same_dir = 0;
+		}
+		
+#ifdef INTERPOSER_BASED_ARCHITECTURE		
+		num_segs_same_dir = num_segs_same_dir + 2*num_expected_hops;
+#endif
+	}
+
+	return (num_segs_same_dir);
+}
+
+#else
+static int get_expected_segs_to_target(int inode, int via_cost_index, int target_node,
+		int *num_segs_ortho_dir_ptr) {
+
+	/* Returns the number of segments the same type as inode that will be needed *
+	 * to reach target_node (not including inode) in each direction (the same    *
+	 * direction (horizontal or vertical) as inode and the orthogonal direction).*/
+
+	/* OP: 'via_cost_index' is the cost index representing the type of node that we
+	   will stay on for teh remainder of the routing */
+
+	t_rr_type rr_type;
+	int target_x, target_y, num_segs_same_dir, cost_index;
+	int no_need_to_pass_by_clb;
+	float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;
+
+	target_x = rr_node[target_node].get_xlow();
+	target_y = rr_node[target_node].get_ylow();
+	cost_index = via_cost_index;
+	inv_length = rr_indexed_data[cost_index].inv_length;
+	ortho_inv_length = inv_length;
+	rr_type = rr_node[inode].type;
+
+	if (rr_type == CHANX) {
+		ylow = rr_node[inode].get_ylow();
+		xhigh = rr_node[inode].get_xhigh();
+		xlow = rr_node[inode].get_xlow();
+
+		/* Count vertical (orthogonal to inode) segs first. */
+
+		if (ylow > target_y) { /* Coming from a row above target? */
+			*num_segs_ortho_dir_ptr =
+					(int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else if (ylow < target_y - 1) { /* Below the CLB bottom? */
+			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *
+					ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else { /* In a row that passes by target CLB */
+			*num_segs_ortho_dir_ptr = 0;
+			no_need_to_pass_by_clb = 0;
+		}
+		
+		/* Now count horizontal (same dir. as inode) segs. */
+
+		if (xlow > target_x + no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -
+							target_x) * inv_length));
+		} else if (xhigh < target_x - no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -
+							xhigh) * inv_length));
+		} else {
+			num_segs_same_dir = 0;
+		}
+	}
+
+	else { /* inode is a CHANY */
+		ylow = rr_node[inode].get_ylow();
+		yhigh = rr_node[inode].get_yhigh();
+		xlow = rr_node[inode].get_xlow();
+
+		/* Count horizontal (orthogonal to inode) segs first. */
+
+		if (xlow > target_x) { /* Coming from a column right of target? */
+			*num_segs_ortho_dir_ptr = (int)(
+					ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */
+			*num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *
+					ortho_inv_length));
+			no_need_to_pass_by_clb = 1;
+		} else { /* In a column that passes by target CLB */
+			*num_segs_ortho_dir_ptr = 0;
+			no_need_to_pass_by_clb = 0;
+		}
+
+		/* Now count vertical (same dir. as inode) segs. */
+
+		if (ylow > target_y + no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -
+							target_y) * inv_length));
+		} else if (yhigh < target_y - no_need_to_pass_by_clb) {
+			num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -
+							yhigh) * inv_length));
+		} else {
+			num_segs_same_dir = 0;
+		}
+	}
+
+	return (num_segs_same_dir);
+}
+#endif
+
+static void update_rr_base_costs(int inet, float largest_criticality) {
+
+	/* Changes the base costs of different types of rr_nodes according to the  *
+	 * criticality, fanout, etc. of the current net being routed (inet).       */
+
+	float fanout, factor;
+	int index;
+
+	fanout = g_clbs_nlist.net[inet].num_sinks();
+
+	/* Other reasonable values for factor include fanout and 1 */
+	factor = sqrt(fanout);
+
+	for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {
+		if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */
+			rr_indexed_data[index].base_cost =
+					rr_indexed_data[index].saved_base_cost * factor;
+		} else {
+			rr_indexed_data[index].base_cost =
+					rr_indexed_data[index].saved_base_cost;
+		}
+	}
+}
+
+/* Nets that have high fanout can take a very long time to route. Routing for sinks that are part of high-fanout nets should be
+   done within a rectangular 'bin' centered around a target (as opposed to the entire net bounding box) the size of which is returned by this function. */
+static int mark_node_expansion_by_bin(int inet, int target_node,
+		t_rt_node * rt_node) {
+	int target_xlow, target_ylow, target_xhigh, target_yhigh;
+	int rlim = 1;
+	int inode;
+	float area;
+	bool success;
+	t_linked_rt_edge *linked_rt_edge;
+	t_rt_node * child_node;
+
+	target_xlow = rr_node[target_node].get_xlow();
+	target_ylow = rr_node[target_node].get_ylow();
+	target_xhigh = rr_node[target_node].get_xhigh();
+	target_yhigh = rr_node[target_node].get_yhigh();
+
+	if (g_clbs_nlist.net[inet].num_sinks() < HIGH_FANOUT_NET_LIM) {
+		/* This algorithm only applies to high fanout nets */
+		return 1;
+	}
+	if (rt_node == NULL || rt_node->u.child_list == NULL) {
+		/* If unknown traceback, set radius of bin to be size of chip */
+		rlim = max(nx + 2, ny + 2);
+		return rlim;
+	}
+
+	area = (route_bb[inet].xmax - route_bb[inet].xmin)
+			* (route_bb[inet].ymax - route_bb[inet].ymin);
+	if (area <= 0) {
+		area = 1;
+	}
+
+	rlim = (int)(ceil(sqrt((float) area / (float) g_clbs_nlist.net[inet].num_sinks())));
+
+	success = false;
+	/* determine quickly a feasible bin radius to route sink for high fanout nets 
+	 this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)
+	 */
+	linked_rt_edge = rt_node->u.child_list;
+	while (success == false && linked_rt_edge != NULL) {
+		while (linked_rt_edge != NULL && success == false) {
+			child_node = linked_rt_edge->child;
+			inode = child_node->inode;
+			if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
+				if (rr_node[inode].get_xlow() <= target_xhigh + rlim
+						&& rr_node[inode].get_xhigh() >= target_xhigh - rlim
+						&& rr_node[inode].get_ylow() <= target_yhigh + rlim
+						&& rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
+					success = true;
+				}
+			}
+			linked_rt_edge = linked_rt_edge->next;
+		}
+
+		if (success == false) {
+			if (rlim > max(nx + 2, ny + 2)) {
+				vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, 
+					 "VPR internal error, net %s has paths that are not found in traceback.\n", g_clbs_nlist.net[inet].name);
+			}
+			/* if sink not in bin, increase bin size until fit */
+			rlim *= 2;
+		} else {
+			/* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */
+			rlim += 4;
+		}
+		linked_rt_edge = rt_node->u.child_list;
+	}
+
+	/* adjust rlim to account for width/height of block containing the target sink */
+	int target_span = max( target_xhigh - target_xlow, target_yhigh - target_ylow );
+	rlim += target_span;
+
+	/* redetermine expansion based on rlim */
+	linked_rt_edge = rt_node->u.child_list;
+	while (linked_rt_edge != NULL) {
+		child_node = linked_rt_edge->child;
+		inode = child_node->inode;
+		if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
+			if (rr_node[inode].get_xlow() <= target_xhigh + rlim
+					&& rr_node[inode].get_xhigh() >= target_xhigh - rlim
+					&& rr_node[inode].get_ylow() <= target_yhigh + rlim
+					&& rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
+				child_node->re_expand = true;
+			} else {
+				child_node->re_expand = false;
+			}
+		}
+		linked_rt_edge = linked_rt_edge->next;
+	}
+	return rlim;
+}
+#define ERROR_TOL 0.0001
+
+static void timing_driven_check_net_delays(float **net_delay) {
+
+	/* Checks that the net delays computed incrementally during timing driven    *
+	 * routing match those computed from scratch by the net_delay.c module.      */
+
+	unsigned int inet, ipin;
+	float **net_delay_check;
+
+	t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL};
+
+	/*struct s_linked_vptr *ch_list_head_net_delay_check;*/
+
+	net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, g_clbs_nlist.net,
+		g_clbs_nlist.net.size());
+	load_net_delay_from_routing(net_delay_check, g_clbs_nlist.net, g_clbs_nlist.net.size());
+
+	for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
+		for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
+			if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */
+				if (fabs(net_delay[inet][ipin]) > ERROR_TOL) {
+					vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, 
+						"in timing_driven_check_net_delays: net %d pin %d.\n"
+						"\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
+						inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
+				}
+			} else {
+				if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) {
+					vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, 
+						"in timing_driven_check_net_delays: net %d pin %d.\n"
+						"\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
+						inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
+				}
+			}
+		}
+	}
+
+	free_net_delay(net_delay_check, &list_head_net_delay_check_ch);
+	vpr_printf_info("Completed net delay value cross check successfully.\n");
+}
+
+
+/* Detect if net should be routed or not */
+static bool should_route_net(int inet) {
+	t_trace * tptr = trace_head[inet];
+
+	if (tptr == NULL) {
+		/* No routing yet. */
+		return true;
+	} 
+
+	for (;;) {
+		int inode = tptr->index;
+		int occ = rr_node[inode].get_occ();
+		int capacity = rr_node[inode].get_capacity();
+
+		if (occ > capacity) {
+			return true; /* overuse detected */
+		}
+
+		if (rr_node[inode].type == SINK) {
+			tptr = tptr->next; /* Skip next segment. */
+			if (tptr == NULL)
+				break;
+		}
+
+		tptr = tptr->next;
+
+	} /* End while loop -- did an entire traceback. */
+
+	return false; /* Current route has no overuse */
+}
+
+static bool early_exit_heuristic(const t_router_opts& router_opts) {
+	/* Early exit code for cases where it is obvious that a successful route will not be found 
+	   Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */
+	int total_wirelength = 0;
+	int available_wirelength = 0;
+
+	for (int i = 0; i < num_rr_nodes; ++i) {
+		if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
+			available_wirelength += 1 + 
+					rr_node[i].get_xhigh() - rr_node[i].get_xlow() + 
+					rr_node[i].get_yhigh() - rr_node[i].get_ylow();
+		}
+	}
+
+	for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+		if (g_clbs_nlist.net[inet].is_global == false
+				&& g_clbs_nlist.net[inet].num_sinks() != 0) { /* Globals don't count. */
+			int bends, wirelength, segments;
+			get_num_bends_and_length(inet, &bends, &wirelength, &segments);
+
+			total_wirelength += wirelength;
+		}
+	}
+	vpr_printf_info("Wire length after first iteration %d, total available wire length %d, ratio %g\n",
+			total_wirelength, available_wirelength,
+			(float) (total_wirelength) / (float) (available_wirelength));
+	if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) {
+		vpr_printf_info("Wire length usage ratio exceeds limit of %g, fail routing.\n",
+				FIRST_ITER_WIRELENTH_LIMIT);
+		return true;
+	}
+	vpr_printf_info("--------- ---------- ----------- ---------------------\n");
+	vpr_printf_info("Iteration       Time   Crit Path     Overused RR Nodes\n");
+	vpr_printf_info("--------- ---------- ----------- ---------------------\n");	
+	return false;
+}
+
commit	7dfb613a72940f651abb1e6e246359ea95b124e2	[log] [tgz]
author	Oleg Petelin <o.d.petelin@gmail.com>	Fri Aug 14 19:24:52 2015 -0400
committer	Oleg Petelin <o.d.petelin@gmail.com>	Fri Aug 14 19:24:52 2015 -0400
tree	6e27360d95f2644ce3c5e408c3009565ba8f1a5c
parent	cda9fff41eece5c679a2a06a8e14b95924471ef7 [diff]