Adding 'strong' look-ahead
diff --git a/vpr/SRC/route/route_timing.c b/vpr/SRC/route/route_timing.c
index a86b1c1..8004761 100755
--- a/vpr/SRC/route/route_timing.c
+++ b/vpr/SRC/route/route_timing.c
@@ -1,1306 +1,1509 @@
-#include <cstdio>
-#include <ctime>
-#include <cmath>
-#include <assert.h>
-#include <vector>
-#include <algorithm>
-#include "vpr_utils.h"
-#include "util.h"
-#include "vpr_types.h"
-#include "globals.h"
-#include "route_export.h"
-#include "route_common.h"
-#include "route_tree_timing.h"
-#include "route_timing.h"
-#include "heapsort.h"
-#include "path_delay.h"
-#include "net_delay.h"
-#include "stats.h"
-#include "ReadOptions.h"
-
-
-using namespace std;
-
-
-
-/******************** Subroutines local to route_timing.c ********************/
-
-static int get_max_pins_per_net(void);
-
-static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
- float target_criticality, float astar_fac);
-
-static void timing_driven_expand_neighbours(struct s_heap *current,
- int inet, int itry,
- float bend_cost, float criticality_fac, int target_node,
- float astar_fac, int highfanout_rlim);
-
-static float get_timing_driven_expected_cost(int inode, int target_node,
- float criticality_fac, float R_upstream);
-#ifdef INTERPOSER_BASED_ARCHITECTURE
-static int get_num_expected_interposer_hops_to_target(int inode, int target_node);
-#endif
-
-static int get_expected_segs_to_target(int inode, int target_node,
- int *num_segs_ortho_dir_ptr);
-
-static void update_rr_base_costs(int inet, float largest_criticality);
-
-static void timing_driven_check_net_delays(float **net_delay);
-
-static int mark_node_expansion_by_bin(int inet, int target_node,
- t_rt_node * rt_node);
-
-static double get_overused_ratio();
-
-static bool should_route_net(int inet);
-static bool early_exit_heuristic(const t_router_opts& router_opts);
-struct more_sinks_than {
- inline bool operator() (const int& net_index1, const int& net_index2) {
- return g_clbs_nlist.net[net_index1].num_sinks() > g_clbs_nlist.net[net_index2].num_sinks();
- }
-};
-
-
-#ifdef PROFILE
-static void congestion_analysis();
-static void time_on_criticality_analysis();
-constexpr int fanout_per_bin = 4;
-constexpr float criticality_per_bin = 0.05;
-static vector<float> time_on_fanout;
-static vector<float> time_to_build_heap;
-static vector<int> itry_on_fanout;
-static vector<float> time_on_criticality;
-static vector<int> itry_on_criticality;
-#endif
-
-/************************ Subroutine definitions *****************************/
-bool try_timing_driven_route(struct s_router_opts router_opts,
- float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally,
- bool timing_analysis_enabled, const t_timing_inf &timing_inf) {
-
- /* Timing-driven routing algorithm. The timing graph (includes slack) *
- * must have already been allocated, and net_delay must have been allocated. *
- * Returns true if the routing succeeds, false otherwise. */
-#ifdef PROFILE
- const int max_fanout {get_max_pins_per_net()};
- time_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);
- time_to_build_heap.resize((max_fanout / fanout_per_bin) + 1, 0);
- itry_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);
- time_on_criticality.resize((1 / criticality_per_bin) + 1, 0);
- itry_on_criticality.resize((1 / criticality_per_bin) + 1, 0);
-#endif
-
- auto sorted_nets = vector<int>(g_clbs_nlist.net.size());
-
- for (size_t i = 0; i < g_clbs_nlist.net.size(); ++i) {
- sorted_nets[i] = i;
- }
-
- // sort so net with most sinks is first.
- std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than());
-
- timing_driven_route_structs route_structs{}; // allocs route strucs (calls default constructor)
- // contains:
- // float *pin_criticality; /* [1..max_pins_per_net-1] */
- // int *sink_order; /* [1..max_pins_per_net-1] */;
- // t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */
-
- /* First do one routing iteration ignoring congestion to get reasonable net
- delay estimates. Set criticalities to 1 when timing analysis is on to
- optimize timing, and to 0 when timing analysis is off to optimize routability. */
-
- float init_timing_criticality_val = (timing_analysis_enabled ? 1.0 : 0.0);
-
- /* Variables used to do the optimization of the routing, aborting visibly
- * impossible Ws */
- double overused_ratio;
- vector<double> historical_overuse_ratio;
- historical_overuse_ratio.reserve(router_opts.max_router_iterations + 1);
-
- // for unroutable large circuits, the time per iteration seems to increase dramatically
- vector<float> time_per_iteration;
- time_per_iteration.reserve(router_opts.max_router_iterations + 1);
-
- for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
- if (g_clbs_nlist.net[inet].is_global == false) {
- for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
- slacks->timing_criticality[inet][ipin] = init_timing_criticality_val;
- }
-#ifdef PATH_COUNTING
- slacks->path_criticality[inet][ipin] = init_timing_criticality_val;
-#endif
- } else {
- /* Set delay of global signals to zero. Non-global net delays are set by
- update_net_delays_from_route_tree() inside timing_driven_route_net(),
- which is only called for non-global nets. */
- for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
- net_delay[inet][ipin] = 0.;
- }
- }
- }
-
- float pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */
-
- for (int itry = 1; itry <= router_opts.max_router_iterations; ++itry) {
- clock_t begin = clock();
-
- /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */
- for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
- g_clbs_nlist.net[inet].is_routed = false;
- g_clbs_nlist.net[inet].is_fixed = false;
- }
-
- for (unsigned int i = 0; i < g_clbs_nlist.net.size(); ++i) {
- bool is_routable = try_timing_driven_route_net(
- sorted_nets[i],
- itry,
- pres_fac,
- router_opts,
- route_structs.pin_criticality,
- route_structs.sink_order,
- route_structs.rt_node_of_sink,
- net_delay,
- slacks
- );
-
- if (!is_routable) {
- return (false);
- }
- }
-
- clock_t end = clock();
- float time = static_cast<float>(end - begin) / CLOCKS_PER_SEC;
- time_per_iteration.push_back(time);
-
- if (itry == 1 && early_exit_heuristic(router_opts)) return false;
-
- /* Make sure any CLB OPINs used up by subblocks being hooked directly
- to them are reserved for that purpose. */
-
- bool rip_up_local_opins = (itry == 1 ? false : true);
- reserve_locally_used_opins(pres_fac, router_opts.acc_fac, rip_up_local_opins, clb_opins_used_locally);
-
- /* Pathfinder guys quit after finding a feasible route. I may want to keep
- going longer, trying to improve timing. Think about this some. */
-
- bool success = feasible_routing();
-
- /* Verification to check the ratio of overused nodes, depending on the configuration
- * may abort the routing if the ratio is too high. */
- overused_ratio = get_overused_ratio();
- historical_overuse_ratio.push_back(overused_ratio);
-
- /* Determine when routing is impossible hence should be aborted */
- if (itry > 5){
-
- int expected_success_route_iter = predict_success_route_iter(historical_overuse_ratio, router_opts);
- if (expected_success_route_iter == UNDEFINED) {
- return false;
- }
-
-#ifdef REGRESSION_EXIT
- if (itry > 15) {
- // compare their slopes over the last 5 iterations
- double time_per_iteration_slope = linear_regression_vector(time_per_iteration, itry-5);
- double congestion_per_iteration_slope = linear_regression_vector(historical_overuse_ratio, itry-5);
- if (router_opts.congestion_analysis)
- vpr_printf_info("%f s/iteration %f %/iteration\n", time_per_iteration_slope, congestion_per_iteration_slope);
- // time is increasing and congestion is non-decreasing (grows faster than 10% per iteration)
- if (congestion_per_iteration_slope > 0 && time_per_iteration_slope > 0.1*time_per_iteration.back()
- && time_per_iteration_slope > 1) { // filter out noise
- vpr_printf_info("Time per iteration growing too fast at slope %f s/iteration \n\
- while congestion grows at %f %/iteration, unlikely to finish.\n",
- time_per_iteration_slope, congestion_per_iteration_slope);
-
- return false;
- }
- }
-#endif
- }
-
- //print_usage_by_wire_length();
-
-
- if (success) {
-
- if (timing_analysis_enabled) {
- load_timing_graph_net_delays(net_delay);
- do_timing_analysis(slacks, timing_inf, false, false);
- float critical_path_delay = get_critical_path_delay();
- vpr_printf_info("%9d %6.2f sec %8.5f ns %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
- vpr_printf_info("Critical path: %g ns\n", critical_path_delay);
- } else {
- vpr_printf_info("%9d %6.2f sec N/A %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
- }
-
- vpr_printf_info("Successfully routed after %d routing iterations.\n", itry);
-#ifdef DEBUG
- if (timing_analysis_enabled)
- timing_driven_check_net_delays(net_delay);
-#endif
- return (true);
- }
-
- if (itry == 1) {
- pres_fac = router_opts.initial_pres_fac;
- pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */
- } else {
- pres_fac *= router_opts.pres_fac_mult;
-
- /* Avoid overflow for high iteration counts, even if acc_cost is big */
- pres_fac = min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5));
-
- pathfinder_update_cost(pres_fac, router_opts.acc_fac);
- }
-
- if (timing_analysis_enabled) {
- /* Update slack values by doing another timing analysis.
- Timing_driven_route_net updated the net delay values. */
-
- load_timing_graph_net_delays(net_delay);
-
- do_timing_analysis(slacks, timing_inf, false, false);
-
- } else {
- /* If timing analysis is not enabled, make sure that the criticalities and the
- net_delays stay as 0 so that wirelength can be optimized. */
-
- for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
- for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
- slacks->timing_criticality[inet][ipin] = 0.;
-#ifdef PATH_COUNTING
- slacks->path_criticality[inet][ipin] = 0.;
-#endif
- net_delay[inet][ipin] = 0.;
- }
- }
- }
-
-
- if (timing_analysis_enabled) {
- float critical_path_delay = get_critical_path_delay();
- vpr_printf_info("%9d %6.2f sec %8.5f ns %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
- } else {
- vpr_printf_info("%9d %6.2f sec N/A %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
- }
-
-#ifdef PROFILE
- if (router_opts.congestion_analysis) congestion_analysis();
- if (router_opts.fanout_analysis) time_on_fanout_analysis();
- time_on_criticality_analysis();
-#endif
- fflush(stdout);
- }
- vpr_printf_info("Routing failed.\n");
-
- return (false);
-}
-
-// profiling per iteration functions
-#ifdef PROFILE
-void time_on_fanout_analysis() {
- // using the global time_on_fanout and itry_on_fanout
- vpr_printf_info("fanout low time (s) attemps heap build time (s)\n");
- for (size_t bin = 0; bin < time_on_fanout.size(); ++bin) {
- if (itry_on_fanout[bin]) { // avoid printing the many 0 bins
- vpr_printf_info("%4d %14.3f %12d %14.3f\n",bin*fanout_per_bin, time_on_fanout[bin], itry_on_fanout[bin], time_to_build_heap[bin]);
- }
- }
-}
-
-void time_on_criticality_analysis() {
- vpr_printf_info("congestion low time (s) attemps\n");
- for (size_t bin = 0; bin < time_on_criticality.size(); ++bin) {
- if (itry_on_criticality[bin]) { // avoid printing the many 0 bins
- vpr_printf_info("%4d %14.3f %12d\n",bin*criticality_per_bin, time_on_criticality[bin], itry_on_criticality[bin]);
- }
- }
-}
-// at the end of a routing iteration, profile how much congestion is taken up by each type of rr_node
-// efficient bit array for checking against congested type
-struct Congested_node_types {
- uint32_t mask;
- Congested_node_types() : mask{0} {}
- void set_congested(int rr_node_type) {mask |= (1 << rr_node_type);}
- void clear_congested(int rr_node_type) {mask &= ~(1 << rr_node_type);}
- bool is_congested(int rr_node_type) const {return mask & (1 << rr_node_type);}
- bool empty() const {return mask == 0;}
-};
-
-void congestion_analysis() {
- static const std::vector<const char*> node_typename {
- "SOURCE",
- "SINK",
- "IPIN",
- "OPIN",
- "CHANX",
- "CHANY",
- "ICE"
- };
- // each type indexes into array which holds the congestion for that type
- std::vector<int> congestion_per_type((size_t) NUM_RR_TYPES, 0);
- // print out specific node information if congestion for type is low enough
-
- int total_congestion = 0;
- for (int inode = 0; inode < num_rr_nodes; ++inode) {
- const t_rr_node& node = rr_node[inode];
- int congestion = node.get_occ() - node.get_capacity();
-
- if (congestion > 0) {
- total_congestion += congestion;
- congestion_per_type[node.type] += congestion;
- }
- }
-
- constexpr int specific_node_print_threshold = 5;
- Congested_node_types congested;
- for (int type = SOURCE; type < NUM_RR_TYPES; ++type) {
- float congestion_percentage = (float)congestion_per_type[type] / (float) total_congestion * 100;
- vpr_printf_info(" %6s: %10.6f %\n", node_typename[type], congestion_percentage);
- // nodes of that type need specific printing
- if (congestion_per_type[type] > 0 &&
- congestion_per_type[type] < specific_node_print_threshold) congested.set_congested(type);
- }
-
- // specific print out each congested node
- if (!congested.empty()) {
- vpr_printf_info("Specific congested nodes\nxlow ylow type\n");
- for (int inode = 0; inode < num_rr_nodes; ++inode) {
- const t_rr_node& node = rr_node[inode];
- if (congested.is_congested(node.type) && (node.get_occ() - node.get_capacity()) > 0) {
- vpr_printf_info("(%3d,%3d) %6s\n", node.get_xlow(), node.get_ylow(), node_typename[node.type]);
- }
- }
- }
-}
-#endif
-
-bool try_timing_driven_route_net(int inet, int itry, float pres_fac,
- struct s_router_opts router_opts,
- float* pin_criticality, int* sink_order,
- t_rt_node** rt_node_of_sink, float** net_delay, t_slack* slacks) {
-
- bool is_routed = false;
-
- if (g_clbs_nlist.net[inet].is_fixed) { /* Skip pre-routed nets. */
- is_routed = true;
- } else if (g_clbs_nlist.net[inet].is_global) { /* Skip global nets. */
- is_routed = true;
- } else if (should_route_net(inet) == false) {
- is_routed = true;
- } else{
- // track time spent vs fanout
-#ifdef PROFILE
- clock_t net_begin = clock();
-#endif
-
- is_routed = timing_driven_route_net(inet, itry, pres_fac,
- router_opts.max_criticality, router_opts.criticality_exp,
- router_opts.astar_fac, router_opts.bend_cost,
- pin_criticality, sink_order,
- rt_node_of_sink, net_delay[inet], slacks);
-
-#ifdef PROFILE
- float time_for_net = static_cast<float>(clock() - net_begin) / CLOCKS_PER_SEC;
- int net_fanout = g_clbs_nlist.net[inet].num_sinks();
- time_on_fanout[net_fanout / fanout_per_bin] += time_for_net;
- itry_on_fanout[net_fanout / fanout_per_bin] += 1;
-#endif
-
- /* Impossible to route? (disconnected rr_graph) */
- if (is_routed) {
- g_clbs_nlist.net[inet].is_routed = true;
- g_atoms_nlist.net[clb_to_vpack_net_mapping[inet]].is_routed = true;
- } else {
- vpr_printf_info("Routing failed.\n");
- }
- }
- return (is_routed);
-}
-
-/*
- * NOTE:
- * Suggest using a timing_driven_route_structs struct. Memory is managed for you
- */
-void alloc_timing_driven_route_structs(float **pin_criticality_ptr,
- int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) {
-
- /* Allocates all the structures needed only by the timing-driven router. */
-
- int max_pins_per_net = get_max_pins_per_net();
-
- float *pin_criticality = (float *) my_malloc((max_pins_per_net - 1) * sizeof(float));
- *pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */
-
- int *sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int));
- *sink_order_ptr = sink_order - 1;
-
- t_rt_node **rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) * sizeof(t_rt_node *));
- *rt_node_of_sink_ptr = rt_node_of_sink - 1;
-
- alloc_route_tree_timing_structs();
-}
-
-/* This function gets ratio of overused nodes (overused_nodes / num_rr_nodes) */
-static double get_overused_ratio(){
- double overused_nodes = 0.0;
- int inode;
- for(inode = 0; inode < num_rr_nodes; inode++){
- if(rr_node[inode].get_occ() > rr_node[inode].get_capacity())
- overused_nodes += (rr_node[inode].get_occ() - rr_node[inode].get_capacity());
- }
- overused_nodes /= (double)num_rr_nodes;
- return overused_nodes;
-}
-
-/*
- * NOTE:
- * Suggest using a timing_driven_route_structs struct. Memory is managed for you
- */
-void free_timing_driven_route_structs(float *pin_criticality, int *sink_order,
- t_rt_node ** rt_node_of_sink) {
-
- /* Frees all the stuctures needed only by the timing-driven router. */
-
- free(pin_criticality + 1); /* Starts at index 1. */
- free(sink_order + 1);
- free(rt_node_of_sink + 1);
- free_route_tree_timing_structs();
-}
-
-timing_driven_route_structs::timing_driven_route_structs() {
- alloc_timing_driven_route_structs(
- &pin_criticality,
- &sink_order,
- &rt_node_of_sink
- );
-}
-
-timing_driven_route_structs::~timing_driven_route_structs() {
- free_timing_driven_route_structs(
- pin_criticality,
- sink_order,
- rt_node_of_sink
- );
-}
-
-static int get_max_pins_per_net(void) {
-
- /* Returns the largest number of pins on any non-global net. */
-
- unsigned int inet;
- int max_pins_per_net;
-
- max_pins_per_net = 0;
- for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
- if (g_clbs_nlist.net[inet].is_global == false) {
- max_pins_per_net = max(max_pins_per_net,
- (int) g_clbs_nlist.net[inet].pins.size());
- }
- }
-
- return (max_pins_per_net);
-}
-
-bool timing_driven_route_net(int inet, int itry, float pres_fac, float max_criticality,
- float criticality_exp, float astar_fac, float bend_cost,
- float *pin_criticality, int *sink_order,
- t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks) {
-
- /* Returns true as long is found some way to hook up this net, even if that *
- * way resulted in overuse of resources (congestion). If there is no way *
- * to route this net, even ignoring congestion, it returns false. In this *
- * case the rr_graph is disconnected and you can give up. If slacks = NULL, *
- * give each net a dummy criticality of 0. */
-
- unsigned int ipin;
- int num_sinks, itarget, target_pin, target_node, inode;
- float target_criticality, old_total_cost, new_total_cost, largest_criticality,
- old_back_cost, new_back_cost;
- t_rt_node *rt_root;
-
- struct s_trace *new_route_start_tptr;
- int highfanout_rlim;
-
- /* Rip-up any old routing. */
-
- pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);
- free_traceback(inet);
-
- for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
- if (!slacks) {
- /* Use criticality of 1. This makes all nets critical. Note: There is a big difference between setting pin criticality to 0
- compared to 1. If pin criticality is set to 0, then the current path delay is completely ignored during routing. By setting
- pin criticality to 1, the current path delay to the pin will always be considered and optimized for */
- pin_criticality[ipin] = 1.0;
- } else {
-#ifdef PATH_COUNTING
- /* Pin criticality is based on a weighted sum of timing and path criticalities. */
- pin_criticality[ipin] = ROUTE_PATH_WEIGHT * slacks->path_criticality[inet][ipin]
- + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];
-#else
- /* Pin criticality is based on only timing criticality. */
- pin_criticality[ipin] = slacks->timing_criticality[inet][ipin];
-#endif
- /* Currently, pin criticality is between 0 and 1. Now shift it downwards
- by 1 - max_criticality (max_criticality is 0.99 by default, so shift down
- by 0.01) and cut off at 0. This means that all pins with small criticalities
- (<0.01) get criticality 0 and are ignored entirely, and everything
- else becomes a bit less critical. This effect becomes more pronounced if
- max_criticality is set lower. */
- // assert(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);
- pin_criticality[ipin] = max(pin_criticality[ipin] - (1.0 - max_criticality), 0.0);
-
- /* Take pin criticality to some power (1 by default). */
- pin_criticality[ipin] = pow(pin_criticality[ipin], criticality_exp);
-
- /* Cut off pin criticality at max_criticality. */
- pin_criticality[ipin] = min(pin_criticality[ipin], max_criticality);
- }
- }
-
- num_sinks = g_clbs_nlist.net[inet].num_sinks();
- heapsort(sink_order, pin_criticality, num_sinks, 0);
-
- /* Update base costs according to fanout and criticality rules */
-
- largest_criticality = pin_criticality[sink_order[1]];
- update_rr_base_costs(inet, largest_criticality);
-
- mark_ends(inet); /* Only needed to check for multiply-connected SINKs */
-
- rt_root = init_route_tree_to_source(inet);
-#ifdef PROFILE
- float build_heap_time = 0;
-#endif
- // explore in order of decreasing criticality
- for (itarget = 1; itarget <= num_sinks; itarget++) {
- target_pin = sink_order[itarget];
- target_node = net_rr_terminals[inet][target_pin];
-
- target_criticality = pin_criticality[target_pin];
-#ifdef PROFILE
- clock_t sink_criticality_start = clock();
-#endif
-
- highfanout_rlim = mark_node_expansion_by_bin(inet, target_node,
- rt_root);
-
- if (itarget > 1 && itry > 5) {
- /* Enough iterations given to determine opin, to speed up legal solution, do not let net use two opins */
- assert(rr_node[rt_root->inode].type == SOURCE);
- rt_root->re_expand = false;
- }
-
- // reexplore route tree from root to add any new nodes (buildheap afterwards)
-#ifdef PROFILE
- clock_t route_tree_start = clock();
-#endif
- add_route_tree_to_heap(rt_root, target_node, target_criticality,
- astar_fac);
- heap_::build_heap(); // via sifting down everything
- // if (itarget == num_sinks && itarget > 800) {
- // vpr_printf_info("heap for target %d: ", itarget);
- // heap_::verify_extract_top();
- // }
-#ifdef PROFILE
- build_heap_time += static_cast<float>(clock() - route_tree_start) / CLOCKS_PER_SEC;
-#endif
-
-
- // cheapest s_heap (gives index to rr_node) in current route tree to be expanded on
- struct s_heap* cheapest = get_heap_head();
-
- if (cheapest == NULL) { /* Infeasible routing. No possible path for net. */
- vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
- inet, g_clbs_nlist.net[inet].name, itarget);
- reset_path_costs();
- free_route_tree(rt_root);
- return (false);
- }
-
- inode = cheapest->index;
- while (inode != target_node) {
- old_total_cost = rr_node_route_inf[inode].path_cost;
- new_total_cost = cheapest->cost;
-
- if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
- old_back_cost = HUGE_POSITIVE_FLOAT;
- else
- old_back_cost = rr_node_route_inf[inode].backward_path_cost;
-
- new_back_cost = cheapest->backward_path_cost;
-
- /* I only re-expand a node if both the "known" backward cost is lower *
- * in the new expansion (this is necessary to prevent loops from *
- * forming in the routing and causing havoc) *and* the expected total *
- * cost to the sink is lower than the old value. Different R_upstream *
- * values could make a path with lower back_path_cost less desirable *
- * than one with higher cost. Test whether or not I should disallow *
- * re-expansion based on a higher total cost. */
-
- if (old_total_cost > new_total_cost && old_back_cost > new_back_cost) {
- rr_node_route_inf[inode].prev_node = cheapest->u.prev_node;
- rr_node_route_inf[inode].prev_edge = cheapest->prev_edge;
- rr_node_route_inf[inode].path_cost = new_total_cost;
- rr_node_route_inf[inode].backward_path_cost = new_back_cost;
-
- if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
- add_to_mod_list(&rr_node_route_inf[inode].path_cost);
-
- timing_driven_expand_neighbours(cheapest, inet, itry, bend_cost,
- target_criticality, target_node, astar_fac,
- highfanout_rlim);
- }
-
- free_heap_data(cheapest);
- cheapest = get_heap_head();
- // test heap property
- // if (!heap_::is_valid()) {vpr_printf_info("invalid heap: "); heap_::pop_heap();}
-
- if (cheapest == NULL) { /* Impossible routing. No path for net. */
- vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
- inet, g_clbs_nlist.net[inet].name, itarget);
- reset_path_costs();
- free_route_tree(rt_root);
- return (false);
- }
-
- inode = cheapest->index;
- }
-#ifdef PROFILE
- time_on_criticality[target_criticality / criticality_per_bin] += static_cast<float>(clock() - sink_criticality_start) / CLOCKS_PER_SEC;
- ++itry_on_criticality[target_criticality / criticality_per_bin];
-#endif
- /* NB: In the code below I keep two records of the partial routing: the *
- * traceback and the route_tree. The route_tree enables fast recomputation *
- * of the Elmore delay to each node in the partial routing. The traceback *
- * lets me reuse all the routines written for breadth-first routing, which *
- * all take a traceback structure as input. Before this routine exits the *
- * route_tree structure is destroyed; only the traceback is needed at that *
- * point. */
-
- rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */
- new_route_start_tptr = update_traceback(cheapest, inet);
- rt_node_of_sink[target_pin] = update_route_tree(cheapest);
- free_heap_data(cheapest);
- pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);
-
- empty_heap();
- reset_path_costs();
- }
-#ifdef PROFILE
- if (!time_to_build_heap.empty()) time_to_build_heap[num_sinks / fanout_per_bin] += build_heap_time;
-#endif
- /* For later timing analysis. */
-
- update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);
- free_route_tree(rt_root);
- return (true);
-}
-
-static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
- float target_criticality, float astar_fac) {
-
- /* Puts the entire partial routing below and including rt_node onto the heap *
- * (except for those parts marked as not to be expanded) by calling itself *
- * recursively. */
-
- int inode;
- t_rt_node *child_node;
- t_linked_rt_edge *linked_rt_edge;
- float tot_cost, backward_path_cost, R_upstream;
-
- /* Pre-order depth-first traversal */
-
- if (rt_node->re_expand) {
- inode = rt_node->inode;
- backward_path_cost = target_criticality * rt_node->Tdel;
- R_upstream = rt_node->R_upstream;
- tot_cost = backward_path_cost
- + astar_fac
- * get_timing_driven_expected_cost(inode, target_node,
- target_criticality, R_upstream);
- heap_::push_back_node(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,
- backward_path_cost, R_upstream);
-
- }
-
- linked_rt_edge = rt_node->u.child_list;
-
- while (linked_rt_edge != NULL) {
- child_node = linked_rt_edge->child;
- add_route_tree_to_heap(child_node, target_node, target_criticality,
- astar_fac);
- linked_rt_edge = linked_rt_edge->next;
- }
-}
-
-static void timing_driven_expand_neighbours(struct s_heap *current,
- int inet, int itry,
- float bend_cost, float criticality_fac, int target_node,
- float astar_fac, int highfanout_rlim) {
-
- /* Puts all the rr_nodes adjacent to current on the heap. rr_nodes outside *
- * the expanded bounding box specified in route_bb are not added to the *
- * heap. */
-
- float new_R_upstream;
-
- int inode = current->index;
- float old_back_pcost = current->backward_path_cost;
- float R_upstream = current->R_upstream;
- int num_edges = rr_node[inode].get_num_edges();
-
- int target_x = rr_node[target_node].get_xhigh();
- int target_y = rr_node[target_node].get_yhigh();
- bool high_fanout = g_clbs_nlist.net[inet].num_sinks() >= HIGH_FANOUT_NET_LIM;
-
- for (int iconn = 0; iconn < num_edges; iconn++) {
- int to_node = rr_node[inode].edges[iconn];
-
- if (high_fanout) {
- // since target node is an IPIN, xhigh and xlow are the same (same for y values)
- if (rr_node[to_node].get_xhigh() < target_x - highfanout_rlim
- || rr_node[to_node].get_xlow() > target_x + highfanout_rlim
- || rr_node[to_node].get_yhigh() < target_y - highfanout_rlim
- || rr_node[to_node].get_ylow() > target_y + highfanout_rlim){
- continue; /* Node is outside high fanout bin. */
- }
- }
- else if (rr_node[to_node].get_xhigh() < route_bb[inet].xmin
- || rr_node[to_node].get_xlow() > route_bb[inet].xmax
- || rr_node[to_node].get_yhigh() < route_bb[inet].ymin
- || rr_node[to_node].get_ylow() > route_bb[inet].ymax)
- continue; /* Node is outside (expanded) bounding box. */
-
-
- /* Prune away IPINs that lead to blocks other than the target one. Avoids *
- * the issue of how to cost them properly so they don't get expanded before *
- * more promising routes, but makes route-throughs (via CLBs) impossible. *
- * Change this if you want to investigate route-throughs. */
-
- t_rr_type to_type = rr_node[to_node].type;
- if (to_type == IPIN
- && (rr_node[to_node].get_xhigh() != target_x
- || rr_node[to_node].get_yhigh() != target_y))
- continue;
-
- /* new_back_pcost stores the "known" part of the cost to this node -- the *
- * congestion cost of all the routing resources back to the existing route *
- * plus the known delay of the total path back to the source. new_tot_cost *
- * is this "known" backward cost + an expected cost to get to the target. */
-
- float new_back_pcost = old_back_pcost
- + (1. - criticality_fac) * get_rr_cong_cost(to_node);
-
- int iswitch = rr_node[inode].switches[iconn];
- if (g_rr_switch_inf[iswitch].buffered) {
- new_R_upstream = g_rr_switch_inf[iswitch].R;
- } else {
- new_R_upstream = R_upstream + g_rr_switch_inf[iswitch].R;
- }
-
- float Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R);
- Tdel += g_rr_switch_inf[iswitch].Tdel;
- new_R_upstream += rr_node[to_node].R;
- new_back_pcost += criticality_fac * Tdel;
-
- if (bend_cost != 0.) {
- t_rr_type from_type = rr_node[inode].type;
- to_type = rr_node[to_node].type;
- if ((from_type == CHANX && to_type == CHANY)
- || (from_type == CHANY && to_type == CHANX))
- new_back_pcost += bend_cost;
- }
-
- float expected_cost = get_timing_driven_expected_cost(to_node, target_node,
- criticality_fac, new_R_upstream);
- float new_tot_cost = new_back_pcost + astar_fac * expected_cost;
-
- node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,
- new_R_upstream);
-
- } /* End for all neighbours */
-}
-
-static float get_timing_driven_expected_cost(int inode, int target_node,
- float criticality_fac, float R_upstream) {
-
- /* Determines the expected cost (due to both delay and resouce cost) to reach *
- * the target node from inode. It doesn't include the cost of inode -- *
- * that's already in the "known" path_cost. */
-
- t_rr_type rr_type;
- int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
- float expected_cost, cong_cost, Tdel;
-
- rr_type = rr_node[inode].type;
-
- if (rr_type == CHANX || rr_type == CHANY) {
-#ifdef INTERPOSER_BASED_ARCHITECTURE
- int num_interposer_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
-#endif
-
- num_segs_same_dir = get_expected_segs_to_target(inode, target_node,
- &num_segs_ortho_dir);
- cost_index = rr_node[inode].get_cost_index();
- ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
-
- cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost
- + num_segs_ortho_dir
- * rr_indexed_data[ortho_cost_index].base_cost;
- cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost
- + rr_indexed_data[SINK_COST_INDEX].base_cost;
-
- Tdel =
- num_segs_same_dir * rr_indexed_data[cost_index].T_linear
- + num_segs_ortho_dir
- * rr_indexed_data[ortho_cost_index].T_linear
- + num_segs_same_dir * num_segs_same_dir
- * rr_indexed_data[cost_index].T_quadratic
- + num_segs_ortho_dir * num_segs_ortho_dir
- * rr_indexed_data[ortho_cost_index].T_quadratic
- + R_upstream
- * (num_segs_same_dir
- * rr_indexed_data[cost_index].C_load
- + num_segs_ortho_dir
- * rr_indexed_data[ortho_cost_index].C_load);
-
- Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;
-#ifdef INTERPOSER_BASED_ARCHITECTURE
- float interposer_hop_delay = (float)delay_increase * 1e-12;
- Tdel += num_interposer_hops * interposer_hop_delay;
-#endif
-
- expected_cost = criticality_fac * Tdel
- + (1. - criticality_fac) * cong_cost;
- return (expected_cost);
- }
-
- else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
- return (rr_indexed_data[SINK_COST_INDEX].base_cost);
- }
-
- else { /* Change this if you want to investigate route-throughs */
- return (0.);
- }
-}
-#ifdef INTERPOSER_BASED_ARCHITECTURE
-static int get_num_expected_interposer_hops_to_target(int inode, int target_node)
-{
- /*
- Description:
- returns the expected number of times you have to cross the
- interposer in order to go from inode to target_node.
- This does not include inode itself.
-
- Assumptions:
- 1- Cuts are horizontal
- 2- target_node is a terminal pin (hence its ylow==yhigh)
- 3- wires that go through the interposer are uni-directional
-
- --------- y=150
- | |
- --------- y=100
- | |
- --------- y=50
- | |
- --------- y=0
-
- num_cuts = 2, cut_step = 50, cut_locations = {50, 100}
- */
- int y_start; /* start point y-coordinate. (y-coordinate at the *end* of wire 'i') */
- int y_end; /* destination (target) y-coordinate */
- int cut_i, y_cut_location, num_expected_hops;
- t_rr_type rr_type;
-
- num_expected_hops = 0;
- y_end = rr_node[target_node].get_ylow();
- rr_type = rr_node[inode].type;
-
- if(rr_type == CHANX)
- { /* inode is a horizontal wire */
- /* the ylow and yhigh are the same */
- assert(rr_node[inode].get_ylow() == rr_node[inode].get_yhigh());
- y_start = rr_node[inode].get_ylow();
- }
- else
- { /* inode is a CHANY */
- if(rr_node[inode].direction == INC_DIRECTION)
- {
- y_start = rr_node[inode].get_yhigh();
- }
- else if(rr_node[inode].direction == DEC_DIRECTION)
- {
- y_start = rr_node[inode].get_ylow();
- }
- else
- {
- /* this means direction is BIDIRECTIONAL! Error out. */
- y_start = -1;
- assert(rr_node[inode].direction!=BI_DIRECTION);
- }
- }
-
- /* for every cut, is it between 'i' and 'target'? */
- for(cut_i=0 ; cut_i<num_cuts; ++cut_i)
- {
- y_cut_location = arch_cut_locations[cut_i];
- if( (y_start < y_cut_location && y_cut_location < y_end) ||
- (y_end < y_cut_location && y_cut_location < y_start))
- {
- ++num_expected_hops;
- }
- }
-
- /* Make there is no off-by-1 error.For current node i:
- if it's a vertical wire, node 'i' itself may be crossing the interposer.
- */
- if(rr_type == CHANY)
- {
- /* for every cut, does it cut wire 'i'? */
- for(cut_i=0 ; cut_i<num_cuts; ++cut_i)
- {
- y_cut_location = arch_cut_locations[cut_i];
- if(rr_node[inode].get_ylow() < y_cut_location && y_cut_location < rr_node[inode].get_yhigh())
- {
- ++num_expected_hops;
- }
- }
- }
-
- return num_expected_hops;
-}
-#endif
-
-/* Macro used below to ensure that fractions are rounded up, but floating *
- * point values very close to an integer are rounded to that integer. */
-#define ROUND_UP(x) (ceil (x - 0.001))
-
-static int get_expected_segs_to_target(int inode, int target_node,
- int *num_segs_ortho_dir_ptr) {
-
- /* Returns the number of segments the same type as inode that will be needed *
- * to reach target_node (not including inode) in each direction (the same *
- * direction (horizontal or vertical) as inode and the orthogonal direction).*/
-
- t_rr_type rr_type;
- int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;
- int no_need_to_pass_by_clb;
- float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;
-#ifdef INTERPOSER_BASED_ARCHITECTURE
- int num_expected_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
-#endif
-
- target_x = rr_node[target_node].get_xlow();
- target_y = rr_node[target_node].get_ylow();
- cost_index = rr_node[inode].get_cost_index();
- inv_length = rr_indexed_data[cost_index].inv_length;
- ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
- ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;
- rr_type = rr_node[inode].type;
-
- if (rr_type == CHANX) {
- ylow = rr_node[inode].get_ylow();
- xhigh = rr_node[inode].get_xhigh();
- xlow = rr_node[inode].get_xlow();
-
- /* Count vertical (orthogonal to inode) segs first. */
-
- if (ylow > target_y) { /* Coming from a row above target? */
- *num_segs_ortho_dir_ptr =
- (int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));
- no_need_to_pass_by_clb = 1;
- } else if (ylow < target_y - 1) { /* Below the CLB bottom? */
- *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *
- ortho_inv_length));
- no_need_to_pass_by_clb = 1;
- } else { /* In a row that passes by target CLB */
- *num_segs_ortho_dir_ptr = 0;
- no_need_to_pass_by_clb = 0;
- }
-#ifdef INTERPOSER_BASED_ARCHITECTURE
- (*num_segs_ortho_dir_ptr) = (*num_segs_ortho_dir_ptr) + 2*num_expected_hops;
-#endif
-
- /* Now count horizontal (same dir. as inode) segs. */
-
- if (xlow > target_x + no_need_to_pass_by_clb) {
- num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -
- target_x) * inv_length));
- } else if (xhigh < target_x - no_need_to_pass_by_clb) {
- num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -
- xhigh) * inv_length));
- } else {
- num_segs_same_dir = 0;
- }
- }
-
- else { /* inode is a CHANY */
- ylow = rr_node[inode].get_ylow();
- yhigh = rr_node[inode].get_yhigh();
- xlow = rr_node[inode].get_xlow();
-
- /* Count horizontal (orthogonal to inode) segs first. */
-
- if (xlow > target_x) { /* Coming from a column right of target? */
- *num_segs_ortho_dir_ptr = (int)(
- ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));
- no_need_to_pass_by_clb = 1;
- } else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */
- *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *
- ortho_inv_length));
- no_need_to_pass_by_clb = 1;
- } else { /* In a column that passes by target CLB */
- *num_segs_ortho_dir_ptr = 0;
- no_need_to_pass_by_clb = 0;
- }
-
- /* Now count vertical (same dir. as inode) segs. */
-
- if (ylow > target_y + no_need_to_pass_by_clb) {
- num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -
- target_y) * inv_length));
- } else if (yhigh < target_y - no_need_to_pass_by_clb) {
- num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -
- yhigh) * inv_length));
- } else {
- num_segs_same_dir = 0;
- }
-#ifdef INTERPOSER_BASED_ARCHITECTURE
- num_segs_same_dir = num_segs_same_dir + 2*num_expected_hops;
-#endif
- }
-
- return (num_segs_same_dir);
-}
-
-static void update_rr_base_costs(int inet, float largest_criticality) {
-
- /* Changes the base costs of different types of rr_nodes according to the *
- * criticality, fanout, etc. of the current net being routed (inet). */
-
- float fanout, factor;
- int index;
-
- fanout = g_clbs_nlist.net[inet].num_sinks();
-
- /* Other reasonable values for factor include fanout and 1 */
- factor = sqrt(fanout);
-
- for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {
- if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */
- rr_indexed_data[index].base_cost =
- rr_indexed_data[index].saved_base_cost * factor;
- } else {
- rr_indexed_data[index].base_cost =
- rr_indexed_data[index].saved_base_cost;
- }
- }
-}
-
-/* Nets that have high fanout can take a very long time to route. Routing for sinks that are part of high-fanout nets should be
- done within a rectangular 'bin' centered around a target (as opposed to the entire net bounding box) the size of which is returned by this function. */
-static int mark_node_expansion_by_bin(int inet, int target_node,
- t_rt_node * rt_node) {
- int target_xlow, target_ylow, target_xhigh, target_yhigh;
- int rlim = 1;
- int inode;
- float area;
- bool success;
- t_linked_rt_edge *linked_rt_edge;
- t_rt_node * child_node;
-
- target_xlow = rr_node[target_node].get_xlow();
- target_ylow = rr_node[target_node].get_ylow();
- target_xhigh = rr_node[target_node].get_xhigh();
- target_yhigh = rr_node[target_node].get_yhigh();
-
- if (g_clbs_nlist.net[inet].num_sinks() < HIGH_FANOUT_NET_LIM) {
- /* This algorithm only applies to high fanout nets */
- return 1;
- }
- if (rt_node == NULL || rt_node->u.child_list == NULL) {
- /* If unknown traceback, set radius of bin to be size of chip */
- rlim = max(nx + 2, ny + 2);
- return rlim;
- }
-
- area = (route_bb[inet].xmax - route_bb[inet].xmin)
- * (route_bb[inet].ymax - route_bb[inet].ymin);
- if (area <= 0) {
- area = 1;
- }
-
- rlim = (int)(ceil(sqrt((float) area / (float) g_clbs_nlist.net[inet].num_sinks())));
-
- success = false;
- /* determine quickly a feasible bin radius to route sink for high fanout nets
- this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)
- */
- linked_rt_edge = rt_node->u.child_list;
- while (success == false && linked_rt_edge != NULL) {
- while (linked_rt_edge != NULL && success == false) {
- child_node = linked_rt_edge->child;
- inode = child_node->inode;
- if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
- if (rr_node[inode].get_xlow() <= target_xhigh + rlim
- && rr_node[inode].get_xhigh() >= target_xhigh - rlim
- && rr_node[inode].get_ylow() <= target_yhigh + rlim
- && rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
- success = true;
- }
- }
- linked_rt_edge = linked_rt_edge->next;
- }
-
- if (success == false) {
- if (rlim > max(nx + 2, ny + 2)) {
- vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
- "VPR internal error, net %s has paths that are not found in traceback.\n", g_clbs_nlist.net[inet].name);
- }
- /* if sink not in bin, increase bin size until fit */
- rlim *= 2;
- } else {
- /* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */
- rlim += 4;
- }
- linked_rt_edge = rt_node->u.child_list;
- }
-
- /* adjust rlim to account for width/height of block containing the target sink */
- int target_span = max( target_xhigh - target_xlow, target_yhigh - target_ylow );
- rlim += target_span;
-
- /* redetermine expansion based on rlim */
- linked_rt_edge = rt_node->u.child_list;
- while (linked_rt_edge != NULL) {
- child_node = linked_rt_edge->child;
- inode = child_node->inode;
- if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
- if (rr_node[inode].get_xlow() <= target_xhigh + rlim
- && rr_node[inode].get_xhigh() >= target_xhigh - rlim
- && rr_node[inode].get_ylow() <= target_yhigh + rlim
- && rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
- child_node->re_expand = true;
- } else {
- child_node->re_expand = false;
- }
- }
- linked_rt_edge = linked_rt_edge->next;
- }
- return rlim;
-}
-#define ERROR_TOL 0.0001
-
-static void timing_driven_check_net_delays(float **net_delay) {
-
- /* Checks that the net delays computed incrementally during timing driven *
- * routing match those computed from scratch by the net_delay.c module. */
-
- unsigned int inet, ipin;
- float **net_delay_check;
-
- t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL};
-
- /*struct s_linked_vptr *ch_list_head_net_delay_check;*/
-
- net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, g_clbs_nlist.net,
- g_clbs_nlist.net.size());
- load_net_delay_from_routing(net_delay_check, g_clbs_nlist.net, g_clbs_nlist.net.size());
-
- for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
- for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
- if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */
- if (fabs(net_delay[inet][ipin]) > ERROR_TOL) {
- vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
- "in timing_driven_check_net_delays: net %d pin %d.\n"
- "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
- inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
- }
- } else {
- if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) {
- vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
- "in timing_driven_check_net_delays: net %d pin %d.\n"
- "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
- inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
- }
- }
- }
- }
-
- free_net_delay(net_delay_check, &list_head_net_delay_check_ch);
- vpr_printf_info("Completed net delay value cross check successfully.\n");
-}
-
-
-/* Detect if net should be routed or not */
-static bool should_route_net(int inet) {
- t_trace * tptr = trace_head[inet];
-
- if (tptr == NULL) {
- /* No routing yet. */
- return true;
- }
-
- for (;;) {
- int inode = tptr->index;
- int occ = rr_node[inode].get_occ();
- int capacity = rr_node[inode].get_capacity();
-
- if (occ > capacity) {
- return true; /* overuse detected */
- }
-
- if (rr_node[inode].type == SINK) {
- tptr = tptr->next; /* Skip next segment. */
- if (tptr == NULL)
- break;
- }
-
- tptr = tptr->next;
-
- } /* End while loop -- did an entire traceback. */
-
- return false; /* Current route has no overuse */
-}
-
-static bool early_exit_heuristic(const t_router_opts& router_opts) {
- /* Early exit code for cases where it is obvious that a successful route will not be found
- Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */
- int total_wirelength = 0;
- int available_wirelength = 0;
-
- for (int i = 0; i < num_rr_nodes; ++i) {
- if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
- available_wirelength += 1 +
- rr_node[i].get_xhigh() - rr_node[i].get_xlow() +
- rr_node[i].get_yhigh() - rr_node[i].get_ylow();
- }
- }
-
- for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
- if (g_clbs_nlist.net[inet].is_global == false
- && g_clbs_nlist.net[inet].num_sinks() != 0) { /* Globals don't count. */
- int bends, wirelength, segments;
- get_num_bends_and_length(inet, &bends, &wirelength, &segments);
-
- total_wirelength += wirelength;
- }
- }
- vpr_printf_info("Wire length after first iteration %d, total available wire length %d, ratio %g\n",
- total_wirelength, available_wirelength,
- (float) (total_wirelength) / (float) (available_wirelength));
- if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) {
- vpr_printf_info("Wire length usage ratio exceeds limit of %g, fail routing.\n",
- FIRST_ITER_WIRELENTH_LIMIT);
- return true;
- }
- vpr_printf_info("--------- ---------- ----------- ---------------------\n");
- vpr_printf_info("Iteration Time Crit Path Overused RR Nodes\n");
- vpr_printf_info("--------- ---------- ----------- ---------------------\n");
- return false;
-}
-
+#include <cstdio>
+#include <ctime>
+#include <cmath>
+#include <assert.h>
+#include <vector>
+#include <algorithm>
+#include "vpr_utils.h"
+#include "util.h"
+#include "vpr_types.h"
+#include "globals.h"
+#include "route_export.h"
+#include "route_common.h"
+#include "route_tree_timing.h"
+#include "route_timing.h"
+#include "heapsort.h"
+#include "path_delay.h"
+#include "net_delay.h"
+#include "stats.h"
+#include "ReadOptions.h"
+
+
+using namespace std;
+
+
+/* Oleg's test code. This is an experimental/test measure to give the timing-driven router
+ look-ahead greater awareness of the different wire lengths in the FPGA.
+ Enabling this significantly increases routing time, but the router will
+ still be much faster than running it with astar_fac = 0. */
+//#define OP_STRONG_LOOKAHEAD
+
+
+/******************** Subroutines local to route_timing.c ********************/
+
+static int get_max_pins_per_net(void);
+
+static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
+ float target_criticality, float astar_fac);
+
+static void timing_driven_expand_neighbours(struct s_heap *current,
+ int inet, int itry,
+ float bend_cost, float criticality_fac, int target_node,
+ float astar_fac, int highfanout_rlim);
+
+#ifndef OP_STRONG_LOOKAHEAD
+static float get_timing_driven_expected_cost(int inode, int target_node,
+ float criticality_fac, float R_upstream);
+#else
+static float get_timing_driven_multiple_wirelengths_expected_cost(int inode, int target_node,
+ float criticality_fac, float R_upstream);
+#endif
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+static int get_num_expected_interposer_hops_to_target(int inode, int target_node);
+#endif
+
+#ifndef OP_STRONG_LOOKAHEAD
+static int get_expected_segs_to_target(int inode, int target_node,
+ int *num_segs_ortho_dir_ptr);
+#else
+static int get_expected_segs_to_target(int inode, int via_cost_index, int target_node,
+ int *num_segs_ortho_dir_ptr);
+#endif
+
+static void update_rr_base_costs(int inet, float largest_criticality);
+
+static void timing_driven_check_net_delays(float **net_delay);
+
+static int mark_node_expansion_by_bin(int inet, int target_node,
+ t_rt_node * rt_node);
+
+static double get_overused_ratio();
+
+static bool should_route_net(int inet);
+static bool early_exit_heuristic(const t_router_opts& router_opts);
+struct more_sinks_than {
+ inline bool operator() (const int& net_index1, const int& net_index2) {
+ return g_clbs_nlist.net[net_index1].num_sinks() > g_clbs_nlist.net[net_index2].num_sinks();
+ }
+};
+
+
+#ifdef PROFILE
+static void congestion_analysis();
+static void time_on_criticality_analysis();
+constexpr int fanout_per_bin = 4;
+constexpr float criticality_per_bin = 0.05;
+static vector<float> time_on_fanout;
+static vector<float> time_to_build_heap;
+static vector<int> itry_on_fanout;
+static vector<float> time_on_criticality;
+static vector<int> itry_on_criticality;
+#endif
+
+/************************ Subroutine definitions *****************************/
+bool try_timing_driven_route(struct s_router_opts router_opts,
+ float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally,
+ bool timing_analysis_enabled, const t_timing_inf &timing_inf) {
+
+ /* Timing-driven routing algorithm. The timing graph (includes slack) *
+ * must have already been allocated, and net_delay must have been allocated. *
+ * Returns true if the routing succeeds, false otherwise. */
+#ifdef PROFILE
+ const int max_fanout {get_max_pins_per_net()};
+ time_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);
+ time_to_build_heap.resize((max_fanout / fanout_per_bin) + 1, 0);
+ itry_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0);
+ time_on_criticality.resize((1 / criticality_per_bin) + 1, 0);
+ itry_on_criticality.resize((1 / criticality_per_bin) + 1, 0);
+#endif
+
+ auto sorted_nets = vector<int>(g_clbs_nlist.net.size());
+
+ for (size_t i = 0; i < g_clbs_nlist.net.size(); ++i) {
+ sorted_nets[i] = i;
+ }
+
+ // sort so net with most sinks is first.
+ std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than());
+
+ timing_driven_route_structs route_structs{}; // allocs route strucs (calls default constructor)
+ // contains:
+ // float *pin_criticality; /* [1..max_pins_per_net-1] */
+ // int *sink_order; /* [1..max_pins_per_net-1] */;
+ // t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */
+
+ /* First do one routing iteration ignoring congestion to get reasonable net
+ delay estimates. Set criticalities to 1 when timing analysis is on to
+ optimize timing, and to 0 when timing analysis is off to optimize routability. */
+
+ float init_timing_criticality_val = (timing_analysis_enabled ? 1.0 : 0.0);
+
+ /* Variables used to do the optimization of the routing, aborting visibly
+ * impossible Ws */
+ double overused_ratio;
+ vector<double> historical_overuse_ratio;
+ historical_overuse_ratio.reserve(router_opts.max_router_iterations + 1);
+
+ // for unroutable large circuits, the time per iteration seems to increase dramatically
+ vector<float> time_per_iteration;
+ time_per_iteration.reserve(router_opts.max_router_iterations + 1);
+
+ for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+ if (g_clbs_nlist.net[inet].is_global == false) {
+ for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
+ slacks->timing_criticality[inet][ipin] = init_timing_criticality_val;
+ }
+#ifdef PATH_COUNTING
+ slacks->path_criticality[inet][ipin] = init_timing_criticality_val;
+#endif
+ } else {
+ /* Set delay of global signals to zero. Non-global net delays are set by
+ update_net_delays_from_route_tree() inside timing_driven_route_net(),
+ which is only called for non-global nets. */
+ for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
+ net_delay[inet][ipin] = 0.;
+ }
+ }
+ }
+
+ float pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */
+
+ for (int itry = 1; itry <= router_opts.max_router_iterations; ++itry) {
+ clock_t begin = clock();
+
+ /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */
+ for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+ g_clbs_nlist.net[inet].is_routed = false;
+ g_clbs_nlist.net[inet].is_fixed = false;
+ }
+
+ for (unsigned int i = 0; i < g_clbs_nlist.net.size(); ++i) {
+ bool is_routable = try_timing_driven_route_net(
+ sorted_nets[i],
+ itry,
+ pres_fac,
+ router_opts,
+ route_structs.pin_criticality,
+ route_structs.sink_order,
+ route_structs.rt_node_of_sink,
+ net_delay,
+ slacks
+ );
+
+ if (!is_routable) {
+ return (false);
+ }
+ }
+
+ clock_t end = clock();
+ float time = static_cast<float>(end - begin) / CLOCKS_PER_SEC;
+ time_per_iteration.push_back(time);
+
+ if (itry == 1 && early_exit_heuristic(router_opts)) return false;
+
+ /* Make sure any CLB OPINs used up by subblocks being hooked directly
+ to them are reserved for that purpose. */
+
+ bool rip_up_local_opins = (itry == 1 ? false : true);
+ reserve_locally_used_opins(pres_fac, router_opts.acc_fac, rip_up_local_opins, clb_opins_used_locally);
+
+ /* Pathfinder guys quit after finding a feasible route. I may want to keep
+ going longer, trying to improve timing. Think about this some. */
+
+ bool success = feasible_routing();
+
+ /* Verification to check the ratio of overused nodes, depending on the configuration
+ * may abort the routing if the ratio is too high. */
+ overused_ratio = get_overused_ratio();
+ historical_overuse_ratio.push_back(overused_ratio);
+
+ /* Determine when routing is impossible hence should be aborted */
+ if (itry > 5){
+
+ int expected_success_route_iter = predict_success_route_iter(historical_overuse_ratio, router_opts);
+ if (expected_success_route_iter == UNDEFINED) {
+ return false;
+ }
+
+#ifdef REGRESSION_EXIT
+ if (itry > 15) {
+ // compare their slopes over the last 5 iterations
+ double time_per_iteration_slope = linear_regression_vector(time_per_iteration, itry-5);
+ double congestion_per_iteration_slope = linear_regression_vector(historical_overuse_ratio, itry-5);
+ if (router_opts.congestion_analysis)
+ vpr_printf_info("%f s/iteration %f %/iteration\n", time_per_iteration_slope, congestion_per_iteration_slope);
+ // time is increasing and congestion is non-decreasing (grows faster than 10% per iteration)
+ if (congestion_per_iteration_slope > 0 && time_per_iteration_slope > 0.1*time_per_iteration.back()
+ && time_per_iteration_slope > 1) { // filter out noise
+ vpr_printf_info("Time per iteration growing too fast at slope %f s/iteration \n\
+ while congestion grows at %f %/iteration, unlikely to finish.\n",
+ time_per_iteration_slope, congestion_per_iteration_slope);
+
+ return false;
+ }
+ }
+#endif
+ }
+
+ //print_usage_by_wire_length();
+
+
+ if (success) {
+
+ if (timing_analysis_enabled) {
+ load_timing_graph_net_delays(net_delay);
+ do_timing_analysis(slacks, timing_inf, false, false);
+ float critical_path_delay = get_critical_path_delay();
+ vpr_printf_info("%9d %6.2f sec %8.5f ns %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
+ vpr_printf_info("Critical path: %g ns\n", critical_path_delay);
+ } else {
+ vpr_printf_info("%9d %6.2f sec N/A %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
+ }
+
+ vpr_printf_info("Successfully routed after %d routing iterations.\n", itry);
+#ifdef DEBUG
+ if (timing_analysis_enabled)
+ timing_driven_check_net_delays(net_delay);
+#endif
+ return (true);
+ }
+
+ if (itry == 1) {
+ pres_fac = router_opts.initial_pres_fac;
+ pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */
+ } else {
+ pres_fac *= router_opts.pres_fac_mult;
+
+ /* Avoid overflow for high iteration counts, even if acc_cost is big */
+ pres_fac = min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5));
+
+ pathfinder_update_cost(pres_fac, router_opts.acc_fac);
+ }
+
+ if (timing_analysis_enabled) {
+ /* Update slack values by doing another timing analysis.
+ Timing_driven_route_net updated the net delay values. */
+
+ load_timing_graph_net_delays(net_delay);
+
+ do_timing_analysis(slacks, timing_inf, false, false);
+
+ } else {
+ /* If timing analysis is not enabled, make sure that the criticalities and the
+ net_delays stay as 0 so that wirelength can be optimized. */
+
+ for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+ for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) {
+ slacks->timing_criticality[inet][ipin] = 0.;
+#ifdef PATH_COUNTING
+ slacks->path_criticality[inet][ipin] = 0.;
+#endif
+ net_delay[inet][ipin] = 0.;
+ }
+ }
+ }
+
+
+ if (timing_analysis_enabled) {
+ float critical_path_delay = get_critical_path_delay();
+ vpr_printf_info("%9d %6.2f sec %8.5f ns %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100);
+ } else {
+ vpr_printf_info("%9d %6.2f sec N/A %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100);
+ }
+
+#ifdef PROFILE
+ if (router_opts.congestion_analysis) congestion_analysis();
+ if (router_opts.fanout_analysis) time_on_fanout_analysis();
+ time_on_criticality_analysis();
+#endif
+ fflush(stdout);
+ }
+ vpr_printf_info("Routing failed.\n");
+
+ return (false);
+}
+
+// profiling per iteration functions
+#ifdef PROFILE
+void time_on_fanout_analysis() {
+ // using the global time_on_fanout and itry_on_fanout
+ vpr_printf_info("fanout low time (s) attemps heap build time (s)\n");
+ for (size_t bin = 0; bin < time_on_fanout.size(); ++bin) {
+ if (itry_on_fanout[bin]) { // avoid printing the many 0 bins
+ vpr_printf_info("%4d %14.3f %12d %14.3f\n",bin*fanout_per_bin, time_on_fanout[bin], itry_on_fanout[bin], time_to_build_heap[bin]);
+ }
+ }
+}
+
+void time_on_criticality_analysis() {
+ vpr_printf_info("congestion low time (s) attemps\n");
+ for (size_t bin = 0; bin < time_on_criticality.size(); ++bin) {
+ if (itry_on_criticality[bin]) { // avoid printing the many 0 bins
+ vpr_printf_info("%4d %14.3f %12d\n",bin*criticality_per_bin, time_on_criticality[bin], itry_on_criticality[bin]);
+ }
+ }
+}
+// at the end of a routing iteration, profile how much congestion is taken up by each type of rr_node
+// efficient bit array for checking against congested type
+struct Congested_node_types {
+ uint32_t mask;
+ Congested_node_types() : mask{0} {}
+ void set_congested(int rr_node_type) {mask |= (1 << rr_node_type);}
+ void clear_congested(int rr_node_type) {mask &= ~(1 << rr_node_type);}
+ bool is_congested(int rr_node_type) const {return mask & (1 << rr_node_type);}
+ bool empty() const {return mask == 0;}
+};
+
+void congestion_analysis() {
+ static const std::vector<const char*> node_typename {
+ "SOURCE",
+ "SINK",
+ "IPIN",
+ "OPIN",
+ "CHANX",
+ "CHANY",
+ "ICE"
+ };
+ // each type indexes into array which holds the congestion for that type
+ std::vector<int> congestion_per_type((size_t) NUM_RR_TYPES, 0);
+ // print out specific node information if congestion for type is low enough
+
+ int total_congestion = 0;
+ for (int inode = 0; inode < num_rr_nodes; ++inode) {
+ const t_rr_node& node = rr_node[inode];
+ int congestion = node.get_occ() - node.get_capacity();
+
+ if (congestion > 0) {
+ total_congestion += congestion;
+ congestion_per_type[node.type] += congestion;
+ }
+ }
+
+ constexpr int specific_node_print_threshold = 5;
+ Congested_node_types congested;
+ for (int type = SOURCE; type < NUM_RR_TYPES; ++type) {
+ float congestion_percentage = (float)congestion_per_type[type] / (float) total_congestion * 100;
+ vpr_printf_info(" %6s: %10.6f %\n", node_typename[type], congestion_percentage);
+ // nodes of that type need specific printing
+ if (congestion_per_type[type] > 0 &&
+ congestion_per_type[type] < specific_node_print_threshold) congested.set_congested(type);
+ }
+
+ // specific print out each congested node
+ if (!congested.empty()) {
+ vpr_printf_info("Specific congested nodes\nxlow ylow type\n");
+ for (int inode = 0; inode < num_rr_nodes; ++inode) {
+ const t_rr_node& node = rr_node[inode];
+ if (congested.is_congested(node.type) && (node.get_occ() - node.get_capacity()) > 0) {
+ vpr_printf_info("(%3d,%3d) %6s\n", node.get_xlow(), node.get_ylow(), node_typename[node.type]);
+ }
+ }
+ }
+}
+#endif
+
+bool try_timing_driven_route_net(int inet, int itry, float pres_fac,
+ struct s_router_opts router_opts,
+ float* pin_criticality, int* sink_order,
+ t_rt_node** rt_node_of_sink, float** net_delay, t_slack* slacks) {
+
+ bool is_routed = false;
+
+ if (g_clbs_nlist.net[inet].is_fixed) { /* Skip pre-routed nets. */
+ is_routed = true;
+ } else if (g_clbs_nlist.net[inet].is_global) { /* Skip global nets. */
+ is_routed = true;
+ } else if (should_route_net(inet) == false) {
+ is_routed = true;
+ } else{
+ // track time spent vs fanout
+#ifdef PROFILE
+ clock_t net_begin = clock();
+#endif
+
+ is_routed = timing_driven_route_net(inet, itry, pres_fac,
+ router_opts.max_criticality, router_opts.criticality_exp,
+ router_opts.astar_fac, router_opts.bend_cost,
+ pin_criticality, sink_order,
+ rt_node_of_sink, net_delay[inet], slacks);
+
+#ifdef PROFILE
+ float time_for_net = static_cast<float>(clock() - net_begin) / CLOCKS_PER_SEC;
+ int net_fanout = g_clbs_nlist.net[inet].num_sinks();
+ time_on_fanout[net_fanout / fanout_per_bin] += time_for_net;
+ itry_on_fanout[net_fanout / fanout_per_bin] += 1;
+#endif
+
+ /* Impossible to route? (disconnected rr_graph) */
+ if (is_routed) {
+ g_clbs_nlist.net[inet].is_routed = true;
+ g_atoms_nlist.net[clb_to_vpack_net_mapping[inet]].is_routed = true;
+ } else {
+ vpr_printf_info("Routing failed.\n");
+ }
+ }
+ return (is_routed);
+}
+
+/*
+ * NOTE:
+ * Suggest using a timing_driven_route_structs struct. Memory is managed for you
+ */
+void alloc_timing_driven_route_structs(float **pin_criticality_ptr,
+ int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) {
+
+ /* Allocates all the structures needed only by the timing-driven router. */
+
+ int max_pins_per_net = get_max_pins_per_net();
+
+ float *pin_criticality = (float *) my_malloc((max_pins_per_net - 1) * sizeof(float));
+ *pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */
+
+ int *sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int));
+ *sink_order_ptr = sink_order - 1;
+
+ t_rt_node **rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) * sizeof(t_rt_node *));
+ *rt_node_of_sink_ptr = rt_node_of_sink - 1;
+
+ alloc_route_tree_timing_structs();
+}
+
+/* This function gets ratio of overused nodes (overused_nodes / num_rr_nodes) */
+static double get_overused_ratio(){
+ double overused_nodes = 0.0;
+ int inode;
+ for(inode = 0; inode < num_rr_nodes; inode++){
+ if(rr_node[inode].get_occ() > rr_node[inode].get_capacity())
+ overused_nodes += (rr_node[inode].get_occ() - rr_node[inode].get_capacity());
+ }
+ overused_nodes /= (double)num_rr_nodes;
+ return overused_nodes;
+}
+
+/*
+ * NOTE:
+ * Suggest using a timing_driven_route_structs struct. Memory is managed for you
+ */
+void free_timing_driven_route_structs(float *pin_criticality, int *sink_order,
+ t_rt_node ** rt_node_of_sink) {
+
+ /* Frees all the stuctures needed only by the timing-driven router. */
+
+ free(pin_criticality + 1); /* Starts at index 1. */
+ free(sink_order + 1);
+ free(rt_node_of_sink + 1);
+ free_route_tree_timing_structs();
+}
+
+timing_driven_route_structs::timing_driven_route_structs() {
+ alloc_timing_driven_route_structs(
+ &pin_criticality,
+ &sink_order,
+ &rt_node_of_sink
+ );
+}
+
+timing_driven_route_structs::~timing_driven_route_structs() {
+ free_timing_driven_route_structs(
+ pin_criticality,
+ sink_order,
+ rt_node_of_sink
+ );
+}
+
+static int get_max_pins_per_net(void) {
+
+ /* Returns the largest number of pins on any non-global net. */
+
+ unsigned int inet;
+ int max_pins_per_net;
+
+ max_pins_per_net = 0;
+ for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
+ if (g_clbs_nlist.net[inet].is_global == false) {
+ max_pins_per_net = max(max_pins_per_net,
+ (int) g_clbs_nlist.net[inet].pins.size());
+ }
+ }
+
+ return (max_pins_per_net);
+}
+
+bool timing_driven_route_net(int inet, int itry, float pres_fac, float max_criticality,
+ float criticality_exp, float astar_fac, float bend_cost,
+ float *pin_criticality, int *sink_order,
+ t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks) {
+
+ /* Returns true as long is found some way to hook up this net, even if that *
+ * way resulted in overuse of resources (congestion). If there is no way *
+ * to route this net, even ignoring congestion, it returns false. In this *
+ * case the rr_graph is disconnected and you can give up. If slacks = NULL, *
+ * give each net a dummy criticality of 0. */
+
+ unsigned int ipin;
+ int num_sinks, itarget, target_pin, target_node, inode;
+ float target_criticality, old_total_cost, new_total_cost, largest_criticality,
+ old_back_cost, new_back_cost;
+ t_rt_node *rt_root;
+
+ struct s_trace *new_route_start_tptr;
+ int highfanout_rlim;
+
+ /* Rip-up any old routing. */
+
+ pathfinder_update_one_cost(trace_head[inet], -1, pres_fac);
+ free_traceback(inet);
+
+ for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
+ if (!slacks) {
+ /* Use criticality of 1. This makes all nets critical. Note: There is a big difference between setting pin criticality to 0
+ compared to 1. If pin criticality is set to 0, then the current path delay is completely ignored during routing. By setting
+ pin criticality to 1, the current path delay to the pin will always be considered and optimized for */
+ pin_criticality[ipin] = 1.0;
+ } else {
+#ifdef PATH_COUNTING
+ /* Pin criticality is based on a weighted sum of timing and path criticalities. */
+ pin_criticality[ipin] = ROUTE_PATH_WEIGHT * slacks->path_criticality[inet][ipin]
+ + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin];
+#else
+ /* Pin criticality is based on only timing criticality. */
+ pin_criticality[ipin] = slacks->timing_criticality[inet][ipin];
+#endif
+ /* Currently, pin criticality is between 0 and 1. Now shift it downwards
+ by 1 - max_criticality (max_criticality is 0.99 by default, so shift down
+ by 0.01) and cut off at 0. This means that all pins with small criticalities
+ (<0.01) get criticality 0 and are ignored entirely, and everything
+ else becomes a bit less critical. This effect becomes more pronounced if
+ max_criticality is set lower. */
+ // assert(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01);
+ pin_criticality[ipin] = max(pin_criticality[ipin] - (1.0 - max_criticality), 0.0);
+
+ /* Take pin criticality to some power (1 by default). */
+ pin_criticality[ipin] = pow(pin_criticality[ipin], criticality_exp);
+
+ /* Cut off pin criticality at max_criticality. */
+ pin_criticality[ipin] = min(pin_criticality[ipin], max_criticality);
+ }
+ }
+
+ num_sinks = g_clbs_nlist.net[inet].num_sinks();
+ heapsort(sink_order, pin_criticality, num_sinks, 0);
+
+ /* Update base costs according to fanout and criticality rules */
+
+ largest_criticality = pin_criticality[sink_order[1]];
+ update_rr_base_costs(inet, largest_criticality);
+
+ mark_ends(inet); /* Only needed to check for multiply-connected SINKs */
+
+ rt_root = init_route_tree_to_source(inet);
+#ifdef PROFILE
+ float build_heap_time = 0;
+#endif
+ // explore in order of decreasing criticality
+ for (itarget = 1; itarget <= num_sinks; itarget++) {
+ target_pin = sink_order[itarget];
+ target_node = net_rr_terminals[inet][target_pin];
+
+ target_criticality = pin_criticality[target_pin];
+#ifdef PROFILE
+ clock_t sink_criticality_start = clock();
+#endif
+
+ highfanout_rlim = mark_node_expansion_by_bin(inet, target_node,
+ rt_root);
+
+ if (itarget > 1 && itry > 5) {
+ /* Enough iterations given to determine opin, to speed up legal solution, do not let net use two opins */
+ assert(rr_node[rt_root->inode].type == SOURCE);
+ rt_root->re_expand = false;
+ }
+
+ // reexplore route tree from root to add any new nodes (buildheap afterwards)
+#ifdef PROFILE
+ clock_t route_tree_start = clock();
+#endif
+ add_route_tree_to_heap(rt_root, target_node, target_criticality,
+ astar_fac);
+ heap_::build_heap(); // via sifting down everything
+ // if (itarget == num_sinks && itarget > 800) {
+ // vpr_printf_info("heap for target %d: ", itarget);
+ // heap_::verify_extract_top();
+ // }
+#ifdef PROFILE
+ build_heap_time += static_cast<float>(clock() - route_tree_start) / CLOCKS_PER_SEC;
+#endif
+
+
+ // cheapest s_heap (gives index to rr_node) in current route tree to be expanded on
+ struct s_heap* cheapest = get_heap_head();
+
+ if (cheapest == NULL) { /* Infeasible routing. No possible path for net. */
+ vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
+ inet, g_clbs_nlist.net[inet].name, itarget);
+ reset_path_costs();
+ free_route_tree(rt_root);
+ return (false);
+ }
+
+ inode = cheapest->index;
+ while (inode != target_node) {
+ old_total_cost = rr_node_route_inf[inode].path_cost;
+ new_total_cost = cheapest->cost;
+
+ if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
+ old_back_cost = HUGE_POSITIVE_FLOAT;
+ else
+ old_back_cost = rr_node_route_inf[inode].backward_path_cost;
+
+ new_back_cost = cheapest->backward_path_cost;
+
+ /* I only re-expand a node if both the "known" backward cost is lower *
+ * in the new expansion (this is necessary to prevent loops from *
+ * forming in the routing and causing havoc) *and* the expected total *
+ * cost to the sink is lower than the old value. Different R_upstream *
+ * values could make a path with lower back_path_cost less desirable *
+ * than one with higher cost. Test whether or not I should disallow *
+ * re-expansion based on a higher total cost. */
+
+ if (old_total_cost > new_total_cost && old_back_cost > new_back_cost) {
+ rr_node_route_inf[inode].prev_node = cheapest->u.prev_node;
+ rr_node_route_inf[inode].prev_edge = cheapest->prev_edge;
+ rr_node_route_inf[inode].path_cost = new_total_cost;
+ rr_node_route_inf[inode].backward_path_cost = new_back_cost;
+
+ if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */
+ add_to_mod_list(&rr_node_route_inf[inode].path_cost);
+
+ timing_driven_expand_neighbours(cheapest, inet, itry, bend_cost,
+ target_criticality, target_node, astar_fac,
+ highfanout_rlim);
+ }
+
+ free_heap_data(cheapest);
+ cheapest = get_heap_head();
+ // test heap property
+ // if (!heap_::is_valid()) {vpr_printf_info("invalid heap: "); heap_::pop_heap();}
+
+ if (cheapest == NULL) { /* Impossible routing. No path for net. */
+ vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n",
+ inet, g_clbs_nlist.net[inet].name, itarget);
+ reset_path_costs();
+ free_route_tree(rt_root);
+ return (false);
+ }
+
+ inode = cheapest->index;
+ }
+#ifdef PROFILE
+ time_on_criticality[target_criticality / criticality_per_bin] += static_cast<float>(clock() - sink_criticality_start) / CLOCKS_PER_SEC;
+ ++itry_on_criticality[target_criticality / criticality_per_bin];
+#endif
+ /* NB: In the code below I keep two records of the partial routing: the *
+ * traceback and the route_tree. The route_tree enables fast recomputation *
+ * of the Elmore delay to each node in the partial routing. The traceback *
+ * lets me reuse all the routines written for breadth-first routing, which *
+ * all take a traceback structure as input. Before this routine exits the *
+ * route_tree structure is destroyed; only the traceback is needed at that *
+ * point. */
+
+ rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */
+ new_route_start_tptr = update_traceback(cheapest, inet);
+ rt_node_of_sink[target_pin] = update_route_tree(cheapest);
+ free_heap_data(cheapest);
+ pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac);
+
+ empty_heap();
+ reset_path_costs();
+ }
+#ifdef PROFILE
+ if (!time_to_build_heap.empty()) time_to_build_heap[num_sinks / fanout_per_bin] += build_heap_time;
+#endif
+ /* For later timing analysis. */
+
+ update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet);
+ free_route_tree(rt_root);
+ return (true);
+}
+
+static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node,
+ float target_criticality, float astar_fac) {
+
+ /* Puts the entire partial routing below and including rt_node onto the heap *
+ * (except for those parts marked as not to be expanded) by calling itself *
+ * recursively. */
+
+ int inode;
+ t_rt_node *child_node;
+ t_linked_rt_edge *linked_rt_edge;
+ float tot_cost, backward_path_cost, R_upstream;
+
+ /* Pre-order depth-first traversal */
+
+ if (rt_node->re_expand) {
+ inode = rt_node->inode;
+ backward_path_cost = target_criticality * rt_node->Tdel;
+ R_upstream = rt_node->R_upstream;
+#ifndef OP_STRONG_LOOKAHEAD
+ tot_cost = backward_path_cost
+ + astar_fac
+ * get_timing_driven_expected_cost(inode, target_node,
+ target_criticality, R_upstream);
+#else
+ tot_cost = backward_path_cost
+ + astar_fac
+ * get_timing_driven_multiple_wirelengths_expected_cost(inode, target_node,
+ target_criticality, R_upstream);
+#endif
+ heap_::push_back_node(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS,
+ backward_path_cost, R_upstream);
+
+ }
+
+ linked_rt_edge = rt_node->u.child_list;
+
+ while (linked_rt_edge != NULL) {
+ child_node = linked_rt_edge->child;
+ add_route_tree_to_heap(child_node, target_node, target_criticality,
+ astar_fac);
+ linked_rt_edge = linked_rt_edge->next;
+ }
+}
+
+static void timing_driven_expand_neighbours(struct s_heap *current,
+ int inet, int itry,
+ float bend_cost, float criticality_fac, int target_node,
+ float astar_fac, int highfanout_rlim) {
+
+ /* Puts all the rr_nodes adjacent to current on the heap. rr_nodes outside *
+ * the expanded bounding box specified in route_bb are not added to the *
+ * heap. */
+
+ float new_R_upstream;
+
+ int inode = current->index;
+ float old_back_pcost = current->backward_path_cost;
+ float R_upstream = current->R_upstream;
+ int num_edges = rr_node[inode].get_num_edges();
+
+ int target_x = rr_node[target_node].get_xhigh();
+ int target_y = rr_node[target_node].get_yhigh();
+ bool high_fanout = g_clbs_nlist.net[inet].num_sinks() >= HIGH_FANOUT_NET_LIM;
+
+ for (int iconn = 0; iconn < num_edges; iconn++) {
+ int to_node = rr_node[inode].edges[iconn];
+
+ if (high_fanout) {
+ // since target node is an IPIN, xhigh and xlow are the same (same for y values)
+ if (rr_node[to_node].get_xhigh() < target_x - highfanout_rlim
+ || rr_node[to_node].get_xlow() > target_x + highfanout_rlim
+ || rr_node[to_node].get_yhigh() < target_y - highfanout_rlim
+ || rr_node[to_node].get_ylow() > target_y + highfanout_rlim){
+ continue; /* Node is outside high fanout bin. */
+ }
+ }
+ else if (rr_node[to_node].get_xhigh() < route_bb[inet].xmin
+ || rr_node[to_node].get_xlow() > route_bb[inet].xmax
+ || rr_node[to_node].get_yhigh() < route_bb[inet].ymin
+ || rr_node[to_node].get_ylow() > route_bb[inet].ymax)
+ continue; /* Node is outside (expanded) bounding box. */
+
+
+ /* Prune away IPINs that lead to blocks other than the target one. Avoids *
+ * the issue of how to cost them properly so they don't get expanded before *
+ * more promising routes, but makes route-throughs (via CLBs) impossible. *
+ * Change this if you want to investigate route-throughs. */
+
+ t_rr_type to_type = rr_node[to_node].type;
+ if (to_type == IPIN
+ && (rr_node[to_node].get_xhigh() != target_x
+ || rr_node[to_node].get_yhigh() != target_y))
+ continue;
+
+ /* new_back_pcost stores the "known" part of the cost to this node -- the *
+ * congestion cost of all the routing resources back to the existing route *
+ * plus the known delay of the total path back to the source. new_tot_cost *
+ * is this "known" backward cost + an expected cost to get to the target. */
+
+ float new_back_pcost = old_back_pcost
+ + (1. - criticality_fac) * get_rr_cong_cost(to_node);
+
+ int iswitch = rr_node[inode].switches[iconn];
+ if (g_rr_switch_inf[iswitch].buffered) {
+ new_R_upstream = g_rr_switch_inf[iswitch].R;
+ } else {
+ new_R_upstream = R_upstream + g_rr_switch_inf[iswitch].R;
+ }
+
+ float Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R);
+ Tdel += g_rr_switch_inf[iswitch].Tdel;
+ new_R_upstream += rr_node[to_node].R;
+ new_back_pcost += criticality_fac * Tdel;
+
+ if (bend_cost != 0.) {
+ t_rr_type from_type = rr_node[inode].type;
+ to_type = rr_node[to_node].type;
+ if ((from_type == CHANX && to_type == CHANY)
+ || (from_type == CHANY && to_type == CHANX))
+ new_back_pcost += bend_cost;
+ }
+
+#ifndef OP_STRONG_LOOKAHEAD
+ float expected_cost = get_timing_driven_expected_cost(to_node, target_node,
+ criticality_fac, new_R_upstream);
+#else
+ float expected_cost = get_timing_driven_multiple_wirelengths_expected_cost(to_node, target_node,
+ criticality_fac, new_R_upstream);
+#endif
+ float new_tot_cost = new_back_pcost + astar_fac * expected_cost;
+
+ node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost,
+ new_R_upstream);
+
+ } /* End for all neighbours */
+}
+
+#ifndef OP_STRONG_LOOKAHEAD
+static float get_timing_driven_expected_cost(int inode, int target_node,
+ float criticality_fac, float R_upstream) {
+
+ /* Determines the expected cost (due to both delay and resouce cost) to reach *
+ * the target node from inode. It doesn't include the cost of inode -- *
+ * that's already in the "known" path_cost. */
+
+ t_rr_type rr_type;
+ int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
+ float expected_cost, cong_cost, Tdel;
+
+ rr_type = rr_node[inode].type;
+
+ if (rr_type == CHANX || rr_type == CHANY) {
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+ int num_interposer_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
+#endif
+
+ num_segs_same_dir = get_expected_segs_to_target(inode, target_node,
+ &num_segs_ortho_dir);
+ cost_index = rr_node[inode].get_cost_index();
+ ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
+
+ cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost
+ + num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].base_cost;
+ cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost
+ + rr_indexed_data[SINK_COST_INDEX].base_cost;
+
+ Tdel =
+ num_segs_same_dir * rr_indexed_data[cost_index].T_linear
+ + num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].T_linear
+ + num_segs_same_dir * num_segs_same_dir
+ * rr_indexed_data[cost_index].T_quadratic
+ + num_segs_ortho_dir * num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].T_quadratic
+ + R_upstream
+ * (num_segs_same_dir
+ * rr_indexed_data[cost_index].C_load
+ + num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].C_load);
+
+ Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+ float interposer_hop_delay = (float)delay_increase * 1e-12;
+ Tdel += num_interposer_hops * interposer_hop_delay;
+#endif
+
+ expected_cost = criticality_fac * Tdel
+ + (1. - criticality_fac) * cong_cost;
+ return (expected_cost);
+ }
+
+ else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
+ return (rr_indexed_data[SINK_COST_INDEX].base_cost);
+ }
+
+ else { /* Change this if you want to investigate route-throughs */
+ return (0.);
+ }
+}
+
+#else
+static float get_timing_driven_multiple_wirelengths_expected_cost(int inode, int target_node,
+ float criticality_fac, float R_upstream) {
+
+ /* Determines the expected cost (due to both delay and resouce cost) to reach *
+ * the target node from inode. It doesn't include the cost of inode -- *
+ * that's already in the "known" path_cost. */
+
+ t_rr_type rr_type;
+ int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir;
+ float expected_cost, cong_cost, Tdel;
+
+ rr_type = rr_node[inode].type;
+
+ if (rr_type == CHANX || rr_type == CHANY) {
+
+ /* OP: Want to account for multiple wirelengths by breaking the assumption that
+ a route will stay on the same kind of node that it's currently on. So here I
+ get the minimum expected cost across all CHANX cost indices. This significantly
+ drives up routing time, but is still much faster than running VPR with
+ astar_fac = 0.
+ */
+
+ float minimum_cost = -1.0;
+ for (cost_index = CHANX_COST_INDEX_START; cost_index < CHANX_COST_INDEX_START + (num_rr_indexed_data - CHANX_COST_INDEX_START)/2; cost_index++){
+
+ num_segs_same_dir = get_expected_segs_to_target(inode, cost_index, target_node,
+ &num_segs_ortho_dir);
+ ortho_cost_index = cost_index;
+
+ cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost
+ + num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].base_cost;
+ cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost
+ + rr_indexed_data[SINK_COST_INDEX].base_cost;
+
+ Tdel =
+ num_segs_same_dir * rr_indexed_data[cost_index].T_linear
+ + num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].T_linear
+ + num_segs_same_dir * num_segs_same_dir
+ * rr_indexed_data[cost_index].T_quadratic
+ + num_segs_ortho_dir * num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].T_quadratic
+ + R_upstream
+ * (num_segs_same_dir
+ * rr_indexed_data[cost_index].C_load
+ + num_segs_ortho_dir
+ * rr_indexed_data[ortho_cost_index].C_load);
+
+ Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear;
+
+ expected_cost = criticality_fac * Tdel
+ + (1. - criticality_fac) * cong_cost;
+
+ if (expected_cost < minimum_cost || minimum_cost < 0){
+ minimum_cost = expected_cost;
+ }
+ }
+ //assert( minimum_cost >= 0 );
+ return (minimum_cost);
+ }
+
+ else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
+ return (rr_indexed_data[SINK_COST_INDEX].base_cost);
+ }
+
+ else { /* Change this if you want to investigate route-throughs */
+ return (0.);
+ }
+}
+#endif
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+static int get_num_expected_interposer_hops_to_target(int inode, int target_node)
+{
+ /*
+ Description:
+ returns the expected number of times you have to cross the
+ interposer in order to go from inode to target_node.
+ This does not include inode itself.
+
+ Assumptions:
+ 1- Cuts are horizontal
+ 2- target_node is a terminal pin (hence its ylow==yhigh)
+ 3- wires that go through the interposer are uni-directional
+
+ --------- y=150
+ | |
+ --------- y=100
+ | |
+ --------- y=50
+ | |
+ --------- y=0
+
+ num_cuts = 2, cut_step = 50, cut_locations = {50, 100}
+ */
+ int y_start; /* start point y-coordinate. (y-coordinate at the *end* of wire 'i') */
+ int y_end; /* destination (target) y-coordinate */
+ int cut_i, y_cut_location, num_expected_hops;
+ t_rr_type rr_type;
+
+ num_expected_hops = 0;
+ y_end = rr_node[target_node].get_ylow();
+ rr_type = rr_node[inode].type;
+
+ if(rr_type == CHANX)
+ { /* inode is a horizontal wire */
+ /* the ylow and yhigh are the same */
+ assert(rr_node[inode].get_ylow() == rr_node[inode].get_yhigh());
+ y_start = rr_node[inode].get_ylow();
+ }
+ else
+ { /* inode is a CHANY */
+ if(rr_node[inode].direction == INC_DIRECTION)
+ {
+ y_start = rr_node[inode].get_yhigh();
+ }
+ else if(rr_node[inode].direction == DEC_DIRECTION)
+ {
+ y_start = rr_node[inode].get_ylow();
+ }
+ else
+ {
+ /* this means direction is BIDIRECTIONAL! Error out. */
+ y_start = -1;
+ assert(rr_node[inode].direction!=BI_DIRECTION);
+ }
+ }
+
+ /* for every cut, is it between 'i' and 'target'? */
+ for(cut_i=0 ; cut_i<num_cuts; ++cut_i)
+ {
+ y_cut_location = arch_cut_locations[cut_i];
+ if( (y_start < y_cut_location && y_cut_location < y_end) ||
+ (y_end < y_cut_location && y_cut_location < y_start))
+ {
+ ++num_expected_hops;
+ }
+ }
+
+ /* Make there is no off-by-1 error.For current node i:
+ if it's a vertical wire, node 'i' itself may be crossing the interposer.
+ */
+ if(rr_type == CHANY)
+ {
+ /* for every cut, does it cut wire 'i'? */
+ for(cut_i=0 ; cut_i<num_cuts; ++cut_i)
+ {
+ y_cut_location = arch_cut_locations[cut_i];
+ if(rr_node[inode].get_ylow() < y_cut_location && y_cut_location < rr_node[inode].get_yhigh())
+ {
+ ++num_expected_hops;
+ }
+ }
+ }
+
+ return num_expected_hops;
+}
+#endif
+
+/* Macro used below to ensure that fractions are rounded up, but floating *
+ * point values very close to an integer are rounded to that integer. */
+#define ROUND_UP(x) (ceil (x - 0.001))
+
+#ifndef OP_STRONG_LOOKAHEAD
+static int get_expected_segs_to_target(int inode, int target_node,
+ int *num_segs_ortho_dir_ptr) {
+
+ /* Returns the number of segments the same type as inode that will be needed *
+ * to reach target_node (not including inode) in each direction (the same *
+ * direction (horizontal or vertical) as inode and the orthogonal direction).*/
+
+ t_rr_type rr_type;
+ int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index;
+ int no_need_to_pass_by_clb;
+ float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;
+
+ #ifdef INTERPOSER_BASED_ARCHITECTURE
+ int num_expected_hops = get_num_expected_interposer_hops_to_target(inode, target_node);
+ #endif
+
+ target_x = rr_node[target_node].get_xlow();
+ target_y = rr_node[target_node].get_ylow();
+ cost_index = rr_node[inode].get_cost_index();
+ inv_length = rr_indexed_data[cost_index].inv_length;
+ ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index;
+ ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length;
+ rr_type = rr_node[inode].type;
+
+ if (rr_type == CHANX) {
+ ylow = rr_node[inode].get_ylow();
+ xhigh = rr_node[inode].get_xhigh();
+ xlow = rr_node[inode].get_xlow();
+
+ /* Count vertical (orthogonal to inode) segs first. */
+
+ if (ylow > target_y) { /* Coming from a row above target? */
+ *num_segs_ortho_dir_ptr =
+ (int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else if (ylow < target_y - 1) { /* Below the CLB bottom? */
+ *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *
+ ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else { /* In a row that passes by target CLB */
+ *num_segs_ortho_dir_ptr = 0;
+ no_need_to_pass_by_clb = 0;
+ }
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+ (*num_segs_ortho_dir_ptr) = (*num_segs_ortho_dir_ptr) + 2*num_expected_hops;
+#endif
+
+ /* Now count horizontal (same dir. as inode) segs. */
+
+ if (xlow > target_x + no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -
+ target_x) * inv_length));
+ } else if (xhigh < target_x - no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -
+ xhigh) * inv_length));
+ } else {
+ num_segs_same_dir = 0;
+ }
+ }
+
+ else { /* inode is a CHANY */
+ ylow = rr_node[inode].get_ylow();
+ yhigh = rr_node[inode].get_yhigh();
+ xlow = rr_node[inode].get_xlow();
+
+ /* Count horizontal (orthogonal to inode) segs first. */
+
+ if (xlow > target_x) { /* Coming from a column right of target? */
+ *num_segs_ortho_dir_ptr = (int)(
+ ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */
+ *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *
+ ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else { /* In a column that passes by target CLB */
+ *num_segs_ortho_dir_ptr = 0;
+ no_need_to_pass_by_clb = 0;
+ }
+
+ /* Now count vertical (same dir. as inode) segs. */
+
+ if (ylow > target_y + no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -
+ target_y) * inv_length));
+ } else if (yhigh < target_y - no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -
+ yhigh) * inv_length));
+ } else {
+ num_segs_same_dir = 0;
+ }
+
+#ifdef INTERPOSER_BASED_ARCHITECTURE
+ num_segs_same_dir = num_segs_same_dir + 2*num_expected_hops;
+#endif
+ }
+
+ return (num_segs_same_dir);
+}
+
+#else
+static int get_expected_segs_to_target(int inode, int via_cost_index, int target_node,
+ int *num_segs_ortho_dir_ptr) {
+
+ /* Returns the number of segments the same type as inode that will be needed *
+ * to reach target_node (not including inode) in each direction (the same *
+ * direction (horizontal or vertical) as inode and the orthogonal direction).*/
+
+ /* OP: 'via_cost_index' is the cost index representing the type of node that we
+ will stay on for teh remainder of the routing */
+
+ t_rr_type rr_type;
+ int target_x, target_y, num_segs_same_dir, cost_index;
+ int no_need_to_pass_by_clb;
+ float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh;
+
+ target_x = rr_node[target_node].get_xlow();
+ target_y = rr_node[target_node].get_ylow();
+ cost_index = via_cost_index;
+ inv_length = rr_indexed_data[cost_index].inv_length;
+ ortho_inv_length = inv_length;
+ rr_type = rr_node[inode].type;
+
+ if (rr_type == CHANX) {
+ ylow = rr_node[inode].get_ylow();
+ xhigh = rr_node[inode].get_xhigh();
+ xlow = rr_node[inode].get_xlow();
+
+ /* Count vertical (orthogonal to inode) segs first. */
+
+ if (ylow > target_y) { /* Coming from a row above target? */
+ *num_segs_ortho_dir_ptr =
+ (int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else if (ylow < target_y - 1) { /* Below the CLB bottom? */
+ *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) *
+ ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else { /* In a row that passes by target CLB */
+ *num_segs_ortho_dir_ptr = 0;
+ no_need_to_pass_by_clb = 0;
+ }
+
+ /* Now count horizontal (same dir. as inode) segs. */
+
+ if (xlow > target_x + no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb -
+ target_x) * inv_length));
+ } else if (xhigh < target_x - no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb -
+ xhigh) * inv_length));
+ } else {
+ num_segs_same_dir = 0;
+ }
+ }
+
+ else { /* inode is a CHANY */
+ ylow = rr_node[inode].get_ylow();
+ yhigh = rr_node[inode].get_yhigh();
+ xlow = rr_node[inode].get_xlow();
+
+ /* Count horizontal (orthogonal to inode) segs first. */
+
+ if (xlow > target_x) { /* Coming from a column right of target? */
+ *num_segs_ortho_dir_ptr = (int)(
+ ROUND_UP((xlow - target_x + 1.) * ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */
+ *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) *
+ ortho_inv_length));
+ no_need_to_pass_by_clb = 1;
+ } else { /* In a column that passes by target CLB */
+ *num_segs_ortho_dir_ptr = 0;
+ no_need_to_pass_by_clb = 0;
+ }
+
+ /* Now count vertical (same dir. as inode) segs. */
+
+ if (ylow > target_y + no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb -
+ target_y) * inv_length));
+ } else if (yhigh < target_y - no_need_to_pass_by_clb) {
+ num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb -
+ yhigh) * inv_length));
+ } else {
+ num_segs_same_dir = 0;
+ }
+ }
+
+ return (num_segs_same_dir);
+}
+#endif
+
+static void update_rr_base_costs(int inet, float largest_criticality) {
+
+ /* Changes the base costs of different types of rr_nodes according to the *
+ * criticality, fanout, etc. of the current net being routed (inet). */
+
+ float fanout, factor;
+ int index;
+
+ fanout = g_clbs_nlist.net[inet].num_sinks();
+
+ /* Other reasonable values for factor include fanout and 1 */
+ factor = sqrt(fanout);
+
+ for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) {
+ if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */
+ rr_indexed_data[index].base_cost =
+ rr_indexed_data[index].saved_base_cost * factor;
+ } else {
+ rr_indexed_data[index].base_cost =
+ rr_indexed_data[index].saved_base_cost;
+ }
+ }
+}
+
+/* Nets that have high fanout can take a very long time to route. Routing for sinks that are part of high-fanout nets should be
+ done within a rectangular 'bin' centered around a target (as opposed to the entire net bounding box) the size of which is returned by this function. */
+static int mark_node_expansion_by_bin(int inet, int target_node,
+ t_rt_node * rt_node) {
+ int target_xlow, target_ylow, target_xhigh, target_yhigh;
+ int rlim = 1;
+ int inode;
+ float area;
+ bool success;
+ t_linked_rt_edge *linked_rt_edge;
+ t_rt_node * child_node;
+
+ target_xlow = rr_node[target_node].get_xlow();
+ target_ylow = rr_node[target_node].get_ylow();
+ target_xhigh = rr_node[target_node].get_xhigh();
+ target_yhigh = rr_node[target_node].get_yhigh();
+
+ if (g_clbs_nlist.net[inet].num_sinks() < HIGH_FANOUT_NET_LIM) {
+ /* This algorithm only applies to high fanout nets */
+ return 1;
+ }
+ if (rt_node == NULL || rt_node->u.child_list == NULL) {
+ /* If unknown traceback, set radius of bin to be size of chip */
+ rlim = max(nx + 2, ny + 2);
+ return rlim;
+ }
+
+ area = (route_bb[inet].xmax - route_bb[inet].xmin)
+ * (route_bb[inet].ymax - route_bb[inet].ymin);
+ if (area <= 0) {
+ area = 1;
+ }
+
+ rlim = (int)(ceil(sqrt((float) area / (float) g_clbs_nlist.net[inet].num_sinks())));
+
+ success = false;
+ /* determine quickly a feasible bin radius to route sink for high fanout nets
+ this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router)
+ */
+ linked_rt_edge = rt_node->u.child_list;
+ while (success == false && linked_rt_edge != NULL) {
+ while (linked_rt_edge != NULL && success == false) {
+ child_node = linked_rt_edge->child;
+ inode = child_node->inode;
+ if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
+ if (rr_node[inode].get_xlow() <= target_xhigh + rlim
+ && rr_node[inode].get_xhigh() >= target_xhigh - rlim
+ && rr_node[inode].get_ylow() <= target_yhigh + rlim
+ && rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
+ success = true;
+ }
+ }
+ linked_rt_edge = linked_rt_edge->next;
+ }
+
+ if (success == false) {
+ if (rlim > max(nx + 2, ny + 2)) {
+ vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
+ "VPR internal error, net %s has paths that are not found in traceback.\n", g_clbs_nlist.net[inet].name);
+ }
+ /* if sink not in bin, increase bin size until fit */
+ rlim *= 2;
+ } else {
+ /* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */
+ rlim += 4;
+ }
+ linked_rt_edge = rt_node->u.child_list;
+ }
+
+ /* adjust rlim to account for width/height of block containing the target sink */
+ int target_span = max( target_xhigh - target_xlow, target_yhigh - target_ylow );
+ rlim += target_span;
+
+ /* redetermine expansion based on rlim */
+ linked_rt_edge = rt_node->u.child_list;
+ while (linked_rt_edge != NULL) {
+ child_node = linked_rt_edge->child;
+ inode = child_node->inode;
+ if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) {
+ if (rr_node[inode].get_xlow() <= target_xhigh + rlim
+ && rr_node[inode].get_xhigh() >= target_xhigh - rlim
+ && rr_node[inode].get_ylow() <= target_yhigh + rlim
+ && rr_node[inode].get_yhigh() >= target_yhigh - rlim) {
+ child_node->re_expand = true;
+ } else {
+ child_node->re_expand = false;
+ }
+ }
+ linked_rt_edge = linked_rt_edge->next;
+ }
+ return rlim;
+}
+#define ERROR_TOL 0.0001
+
+static void timing_driven_check_net_delays(float **net_delay) {
+
+ /* Checks that the net delays computed incrementally during timing driven *
+ * routing match those computed from scratch by the net_delay.c module. */
+
+ unsigned int inet, ipin;
+ float **net_delay_check;
+
+ t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL};
+
+ /*struct s_linked_vptr *ch_list_head_net_delay_check;*/
+
+ net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, g_clbs_nlist.net,
+ g_clbs_nlist.net.size());
+ load_net_delay_from_routing(net_delay_check, g_clbs_nlist.net, g_clbs_nlist.net.size());
+
+ for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) {
+ for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) {
+ if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */
+ if (fabs(net_delay[inet][ipin]) > ERROR_TOL) {
+ vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
+ "in timing_driven_check_net_delays: net %d pin %d.\n"
+ "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
+ inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
+ }
+ } else {
+ if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) {
+ vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
+ "in timing_driven_check_net_delays: net %d pin %d.\n"
+ "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n",
+ inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]);
+ }
+ }
+ }
+ }
+
+ free_net_delay(net_delay_check, &list_head_net_delay_check_ch);
+ vpr_printf_info("Completed net delay value cross check successfully.\n");
+}
+
+
+/* Detect if net should be routed or not */
+static bool should_route_net(int inet) {
+ t_trace * tptr = trace_head[inet];
+
+ if (tptr == NULL) {
+ /* No routing yet. */
+ return true;
+ }
+
+ for (;;) {
+ int inode = tptr->index;
+ int occ = rr_node[inode].get_occ();
+ int capacity = rr_node[inode].get_capacity();
+
+ if (occ > capacity) {
+ return true; /* overuse detected */
+ }
+
+ if (rr_node[inode].type == SINK) {
+ tptr = tptr->next; /* Skip next segment. */
+ if (tptr == NULL)
+ break;
+ }
+
+ tptr = tptr->next;
+
+ } /* End while loop -- did an entire traceback. */
+
+ return false; /* Current route has no overuse */
+}
+
+static bool early_exit_heuristic(const t_router_opts& router_opts) {
+ /* Early exit code for cases where it is obvious that a successful route will not be found
+ Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */
+ int total_wirelength = 0;
+ int available_wirelength = 0;
+
+ for (int i = 0; i < num_rr_nodes; ++i) {
+ if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) {
+ available_wirelength += 1 +
+ rr_node[i].get_xhigh() - rr_node[i].get_xlow() +
+ rr_node[i].get_yhigh() - rr_node[i].get_ylow();
+ }
+ }
+
+ for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) {
+ if (g_clbs_nlist.net[inet].is_global == false
+ && g_clbs_nlist.net[inet].num_sinks() != 0) { /* Globals don't count. */
+ int bends, wirelength, segments;
+ get_num_bends_and_length(inet, &bends, &wirelength, &segments);
+
+ total_wirelength += wirelength;
+ }
+ }
+ vpr_printf_info("Wire length after first iteration %d, total available wire length %d, ratio %g\n",
+ total_wirelength, available_wirelength,
+ (float) (total_wirelength) / (float) (available_wirelength));
+ if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) {
+ vpr_printf_info("Wire length usage ratio exceeds limit of %g, fail routing.\n",
+ FIRST_ITER_WIRELENTH_LIMIT);
+ return true;
+ }
+ vpr_printf_info("--------- ---------- ----------- ---------------------\n");
+ vpr_printf_info("Iteration Time Crit Path Overused RR Nodes\n");
+ vpr_printf_info("--------- ---------- ----------- ---------------------\n");
+ return false;
+}
+