| #include <cstdio> |
| #include <ctime> |
| #include <cmath> |
| #include <assert.h> |
| #include <vector> |
| #include <algorithm> |
| #include "vpr_utils.h" |
| #include "util.h" |
| #include "vpr_types.h" |
| #include "globals.h" |
| #include "route_export.h" |
| #include "route_common.h" |
| #include "route_tree_timing.h" |
| #include "route_timing.h" |
| #include "heapsort.h" |
| #include "path_delay.h" |
| #include "net_delay.h" |
| #include "stats.h" |
| #include "ReadOptions.h" |
| |
| |
| using namespace std; |
| |
| |
| /* Oleg's test code. This is an experimental/test measure to give the timing-driven router |
| look-ahead greater awareness of the different wire lengths in the FPGA. |
| Enabling this significantly increases routing time, but the router will |
| still be much faster than running it with astar_fac = 0. */ |
| //#define OP_STRONG_LOOKAHEAD |
| |
| |
| /******************** Subroutines local to route_timing.c ********************/ |
| |
| static int get_max_pins_per_net(void); |
| |
| static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node, |
| float target_criticality, float astar_fac); |
| |
| static void timing_driven_expand_neighbours(struct s_heap *current, |
| int inet, int itry, |
| float bend_cost, float criticality_fac, int target_node, |
| float astar_fac, int highfanout_rlim); |
| |
| #ifndef OP_STRONG_LOOKAHEAD |
| static float get_timing_driven_expected_cost(int inode, int target_node, |
| float criticality_fac, float R_upstream); |
| #else |
| static float get_timing_driven_multiple_wirelengths_expected_cost(int inode, int target_node, |
| float criticality_fac, float R_upstream); |
| #endif |
| |
| #ifdef INTERPOSER_BASED_ARCHITECTURE |
| static int get_num_expected_interposer_hops_to_target(int inode, int target_node); |
| #endif |
| |
| #ifndef OP_STRONG_LOOKAHEAD |
| static int get_expected_segs_to_target(int inode, int target_node, |
| int *num_segs_ortho_dir_ptr); |
| #else |
| static int get_expected_segs_to_target(int inode, int via_cost_index, int target_node, |
| int *num_segs_ortho_dir_ptr); |
| #endif |
| |
| static void update_rr_base_costs(int inet, float largest_criticality); |
| |
| static void timing_driven_check_net_delays(float **net_delay); |
| |
| static int mark_node_expansion_by_bin(int inet, int target_node, |
| t_rt_node * rt_node); |
| |
| static double get_overused_ratio(); |
| |
| static bool should_route_net(int inet); |
| static bool early_exit_heuristic(const t_router_opts& router_opts); |
| struct more_sinks_than { |
| inline bool operator() (const int& net_index1, const int& net_index2) { |
| return g_clbs_nlist.net[net_index1].num_sinks() > g_clbs_nlist.net[net_index2].num_sinks(); |
| } |
| }; |
| |
| |
| #ifdef PROFILE |
| static void congestion_analysis(); |
| static void time_on_criticality_analysis(); |
| constexpr int fanout_per_bin = 4; |
| constexpr float criticality_per_bin = 0.05; |
| static vector<float> time_on_fanout; |
| static vector<float> time_to_build_heap; |
| static vector<int> itry_on_fanout; |
| static vector<float> time_on_criticality; |
| static vector<int> itry_on_criticality; |
| #endif |
| |
| /************************ Subroutine definitions *****************************/ |
| bool try_timing_driven_route(struct s_router_opts router_opts, |
| float **net_delay, t_slack * slacks, t_ivec ** clb_opins_used_locally, |
| bool timing_analysis_enabled, const t_timing_inf &timing_inf) { |
| |
| /* Timing-driven routing algorithm. The timing graph (includes slack) * |
| * must have already been allocated, and net_delay must have been allocated. * |
| * Returns true if the routing succeeds, false otherwise. */ |
| #ifdef PROFILE |
| const int max_fanout {get_max_pins_per_net()}; |
| time_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0); |
| time_to_build_heap.resize((max_fanout / fanout_per_bin) + 1, 0); |
| itry_on_fanout.resize((max_fanout / fanout_per_bin) + 1, 0); |
| time_on_criticality.resize((1 / criticality_per_bin) + 1, 0); |
| itry_on_criticality.resize((1 / criticality_per_bin) + 1, 0); |
| #endif |
| |
| auto sorted_nets = vector<int>(g_clbs_nlist.net.size()); |
| |
| for (size_t i = 0; i < g_clbs_nlist.net.size(); ++i) { |
| sorted_nets[i] = i; |
| } |
| |
| // sort so net with most sinks is first. |
| std::sort(sorted_nets.begin(), sorted_nets.end(), more_sinks_than()); |
| |
| timing_driven_route_structs route_structs{}; // allocs route strucs (calls default constructor) |
| // contains: |
| // float *pin_criticality; /* [1..max_pins_per_net-1] */ |
| // int *sink_order; /* [1..max_pins_per_net-1] */; |
| // t_rt_node **rt_node_of_sink; /* [1..max_pins_per_net-1] */ |
| |
| /* First do one routing iteration ignoring congestion to get reasonable net |
| delay estimates. Set criticalities to 1 when timing analysis is on to |
| optimize timing, and to 0 when timing analysis is off to optimize routability. */ |
| |
| float init_timing_criticality_val = (timing_analysis_enabled ? 1.0 : 0.0); |
| |
| /* Variables used to do the optimization of the routing, aborting visibly |
| * impossible Ws */ |
| double overused_ratio; |
| vector<double> historical_overuse_ratio; |
| historical_overuse_ratio.reserve(router_opts.max_router_iterations + 1); |
| |
| // for unroutable large circuits, the time per iteration seems to increase dramatically |
| vector<float> time_per_iteration; |
| time_per_iteration.reserve(router_opts.max_router_iterations + 1); |
| |
| for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) { |
| if (g_clbs_nlist.net[inet].is_global == false) { |
| for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) { |
| slacks->timing_criticality[inet][ipin] = init_timing_criticality_val; |
| } |
| #ifdef PATH_COUNTING |
| slacks->path_criticality[inet][ipin] = init_timing_criticality_val; |
| #endif |
| } else { |
| /* Set delay of global signals to zero. Non-global net delays are set by |
| update_net_delays_from_route_tree() inside timing_driven_route_net(), |
| which is only called for non-global nets. */ |
| for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) { |
| net_delay[inet][ipin] = 0.; |
| } |
| } |
| } |
| |
| float pres_fac = router_opts.first_iter_pres_fac; /* Typically 0 -> ignore cong. */ |
| |
| for (int itry = 1; itry <= router_opts.max_router_iterations; ++itry) { |
| clock_t begin = clock(); |
| |
| /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */ |
| for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) { |
| g_clbs_nlist.net[inet].is_routed = false; |
| g_clbs_nlist.net[inet].is_fixed = false; |
| } |
| |
| for (unsigned int i = 0; i < g_clbs_nlist.net.size(); ++i) { |
| bool is_routable = try_timing_driven_route_net( |
| sorted_nets[i], |
| itry, |
| pres_fac, |
| router_opts, |
| route_structs.pin_criticality, |
| route_structs.sink_order, |
| route_structs.rt_node_of_sink, |
| net_delay, |
| slacks |
| ); |
| |
| if (!is_routable) { |
| return (false); |
| } |
| } |
| |
| clock_t end = clock(); |
| float time = static_cast<float>(end - begin) / CLOCKS_PER_SEC; |
| time_per_iteration.push_back(time); |
| |
| if (itry == 1 && early_exit_heuristic(router_opts)) return false; |
| |
| /* Make sure any CLB OPINs used up by subblocks being hooked directly |
| to them are reserved for that purpose. */ |
| |
| bool rip_up_local_opins = (itry == 1 ? false : true); |
| reserve_locally_used_opins(pres_fac, router_opts.acc_fac, rip_up_local_opins, clb_opins_used_locally); |
| |
| /* Pathfinder guys quit after finding a feasible route. I may want to keep |
| going longer, trying to improve timing. Think about this some. */ |
| |
| bool success = feasible_routing(); |
| |
| /* Verification to check the ratio of overused nodes, depending on the configuration |
| * may abort the routing if the ratio is too high. */ |
| overused_ratio = get_overused_ratio(); |
| historical_overuse_ratio.push_back(overused_ratio); |
| |
| /* Determine when routing is impossible hence should be aborted */ |
| if (itry > 5){ |
| |
| int expected_success_route_iter = predict_success_route_iter(historical_overuse_ratio, router_opts); |
| if (expected_success_route_iter == UNDEFINED) { |
| return false; |
| } |
| |
| #ifdef REGRESSION_EXIT |
| if (itry > 15) { |
| // compare their slopes over the last 5 iterations |
| double time_per_iteration_slope = linear_regression_vector(time_per_iteration, itry-5); |
| double congestion_per_iteration_slope = linear_regression_vector(historical_overuse_ratio, itry-5); |
| if (router_opts.congestion_analysis) |
| vpr_printf_info("%f s/iteration %f %/iteration\n", time_per_iteration_slope, congestion_per_iteration_slope); |
| // time is increasing and congestion is non-decreasing (grows faster than 10% per iteration) |
| if (congestion_per_iteration_slope > 0 && time_per_iteration_slope > 0.1*time_per_iteration.back() |
| && time_per_iteration_slope > 1) { // filter out noise |
| vpr_printf_info("Time per iteration growing too fast at slope %f s/iteration \n\ |
| while congestion grows at %f %/iteration, unlikely to finish.\n", |
| time_per_iteration_slope, congestion_per_iteration_slope); |
| |
| return false; |
| } |
| } |
| #endif |
| } |
| |
| //print_usage_by_wire_length(); |
| |
| |
| if (success) { |
| |
| if (timing_analysis_enabled) { |
| load_timing_graph_net_delays(net_delay); |
| do_timing_analysis(slacks, timing_inf, false, false); |
| float critical_path_delay = get_critical_path_delay(); |
| vpr_printf_info("%9d %6.2f sec %8.5f ns %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100); |
| vpr_printf_info("Critical path: %g ns\n", critical_path_delay); |
| } else { |
| vpr_printf_info("%9d %6.2f sec N/A %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100); |
| } |
| |
| vpr_printf_info("Successfully routed after %d routing iterations.\n", itry); |
| #ifdef DEBUG |
| if (timing_analysis_enabled) |
| timing_driven_check_net_delays(net_delay); |
| #endif |
| return (true); |
| } |
| |
| if (itry == 1) { |
| pres_fac = router_opts.initial_pres_fac; |
| pathfinder_update_cost(pres_fac, 0.); /* Acc_fac=0 for first iter. */ |
| } else { |
| pres_fac *= router_opts.pres_fac_mult; |
| |
| /* Avoid overflow for high iteration counts, even if acc_cost is big */ |
| pres_fac = min(pres_fac, static_cast<float>(HUGE_POSITIVE_FLOAT / 1e5)); |
| |
| pathfinder_update_cost(pres_fac, router_opts.acc_fac); |
| } |
| |
| if (timing_analysis_enabled) { |
| /* Update slack values by doing another timing analysis. |
| Timing_driven_route_net updated the net delay values. */ |
| |
| load_timing_graph_net_delays(net_delay); |
| |
| do_timing_analysis(slacks, timing_inf, false, false); |
| |
| } else { |
| /* If timing analysis is not enabled, make sure that the criticalities and the |
| net_delays stay as 0 so that wirelength can be optimized. */ |
| |
| for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) { |
| for (unsigned int ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ++ipin) { |
| slacks->timing_criticality[inet][ipin] = 0.; |
| #ifdef PATH_COUNTING |
| slacks->path_criticality[inet][ipin] = 0.; |
| #endif |
| net_delay[inet][ipin] = 0.; |
| } |
| } |
| } |
| |
| |
| if (timing_analysis_enabled) { |
| float critical_path_delay = get_critical_path_delay(); |
| vpr_printf_info("%9d %6.2f sec %8.5f ns %3.2e (%3.4f %)\n", itry, time, critical_path_delay, overused_ratio*num_rr_nodes, overused_ratio*100); |
| } else { |
| vpr_printf_info("%9d %6.2f sec N/A %3.2e (%3.4f %)\n", itry, time, overused_ratio*num_rr_nodes, overused_ratio*100); |
| } |
| |
| #ifdef PROFILE |
| if (router_opts.congestion_analysis) congestion_analysis(); |
| if (router_opts.fanout_analysis) time_on_fanout_analysis(); |
| time_on_criticality_analysis(); |
| #endif |
| fflush(stdout); |
| } |
| vpr_printf_info("Routing failed.\n"); |
| |
| return (false); |
| } |
| |
| // profiling per iteration functions |
| #ifdef PROFILE |
| void time_on_fanout_analysis() { |
| // using the global time_on_fanout and itry_on_fanout |
| vpr_printf_info("fanout low time (s) attemps heap build time (s)\n"); |
| for (size_t bin = 0; bin < time_on_fanout.size(); ++bin) { |
| if (itry_on_fanout[bin]) { // avoid printing the many 0 bins |
| vpr_printf_info("%4d %14.3f %12d %14.3f\n",bin*fanout_per_bin, time_on_fanout[bin], itry_on_fanout[bin], time_to_build_heap[bin]); |
| } |
| } |
| } |
| |
| void time_on_criticality_analysis() { |
| vpr_printf_info("congestion low time (s) attemps\n"); |
| for (size_t bin = 0; bin < time_on_criticality.size(); ++bin) { |
| if (itry_on_criticality[bin]) { // avoid printing the many 0 bins |
| vpr_printf_info("%4d %14.3f %12d\n",bin*criticality_per_bin, time_on_criticality[bin], itry_on_criticality[bin]); |
| } |
| } |
| } |
| // at the end of a routing iteration, profile how much congestion is taken up by each type of rr_node |
| // efficient bit array for checking against congested type |
| struct Congested_node_types { |
| uint32_t mask; |
| Congested_node_types() : mask{0} {} |
| void set_congested(int rr_node_type) {mask |= (1 << rr_node_type);} |
| void clear_congested(int rr_node_type) {mask &= ~(1 << rr_node_type);} |
| bool is_congested(int rr_node_type) const {return mask & (1 << rr_node_type);} |
| bool empty() const {return mask == 0;} |
| }; |
| |
| void congestion_analysis() { |
| static const std::vector<const char*> node_typename { |
| "SOURCE", |
| "SINK", |
| "IPIN", |
| "OPIN", |
| "CHANX", |
| "CHANY", |
| "ICE" |
| }; |
| // each type indexes into array which holds the congestion for that type |
| std::vector<int> congestion_per_type((size_t) NUM_RR_TYPES, 0); |
| // print out specific node information if congestion for type is low enough |
| |
| int total_congestion = 0; |
| for (int inode = 0; inode < num_rr_nodes; ++inode) { |
| const t_rr_node& node = rr_node[inode]; |
| int congestion = node.get_occ() - node.get_capacity(); |
| |
| if (congestion > 0) { |
| total_congestion += congestion; |
| congestion_per_type[node.type] += congestion; |
| } |
| } |
| |
| constexpr int specific_node_print_threshold = 5; |
| Congested_node_types congested; |
| for (int type = SOURCE; type < NUM_RR_TYPES; ++type) { |
| float congestion_percentage = (float)congestion_per_type[type] / (float) total_congestion * 100; |
| vpr_printf_info(" %6s: %10.6f %\n", node_typename[type], congestion_percentage); |
| // nodes of that type need specific printing |
| if (congestion_per_type[type] > 0 && |
| congestion_per_type[type] < specific_node_print_threshold) congested.set_congested(type); |
| } |
| |
| // specific print out each congested node |
| if (!congested.empty()) { |
| vpr_printf_info("Specific congested nodes\nxlow ylow type\n"); |
| for (int inode = 0; inode < num_rr_nodes; ++inode) { |
| const t_rr_node& node = rr_node[inode]; |
| if (congested.is_congested(node.type) && (node.get_occ() - node.get_capacity()) > 0) { |
| vpr_printf_info("(%3d,%3d) %6s\n", node.get_xlow(), node.get_ylow(), node_typename[node.type]); |
| } |
| } |
| } |
| } |
| #endif |
| |
| bool try_timing_driven_route_net(int inet, int itry, float pres_fac, |
| struct s_router_opts router_opts, |
| float* pin_criticality, int* sink_order, |
| t_rt_node** rt_node_of_sink, float** net_delay, t_slack* slacks) { |
| |
| bool is_routed = false; |
| |
| if (g_clbs_nlist.net[inet].is_fixed) { /* Skip pre-routed nets. */ |
| is_routed = true; |
| } else if (g_clbs_nlist.net[inet].is_global) { /* Skip global nets. */ |
| is_routed = true; |
| } else if (should_route_net(inet) == false) { |
| is_routed = true; |
| } else{ |
| // track time spent vs fanout |
| #ifdef PROFILE |
| clock_t net_begin = clock(); |
| #endif |
| |
| is_routed = timing_driven_route_net(inet, itry, pres_fac, |
| router_opts.max_criticality, router_opts.criticality_exp, |
| router_opts.astar_fac, router_opts.bend_cost, |
| pin_criticality, sink_order, |
| rt_node_of_sink, net_delay[inet], slacks); |
| |
| #ifdef PROFILE |
| float time_for_net = static_cast<float>(clock() - net_begin) / CLOCKS_PER_SEC; |
| int net_fanout = g_clbs_nlist.net[inet].num_sinks(); |
| time_on_fanout[net_fanout / fanout_per_bin] += time_for_net; |
| itry_on_fanout[net_fanout / fanout_per_bin] += 1; |
| #endif |
| |
| /* Impossible to route? (disconnected rr_graph) */ |
| if (is_routed) { |
| g_clbs_nlist.net[inet].is_routed = true; |
| g_atoms_nlist.net[clb_to_vpack_net_mapping[inet]].is_routed = true; |
| } else { |
| vpr_printf_info("Routing failed.\n"); |
| } |
| } |
| return (is_routed); |
| } |
| |
| /* |
| * NOTE: |
| * Suggest using a timing_driven_route_structs struct. Memory is managed for you |
| */ |
| void alloc_timing_driven_route_structs(float **pin_criticality_ptr, |
| int **sink_order_ptr, t_rt_node *** rt_node_of_sink_ptr) { |
| |
| /* Allocates all the structures needed only by the timing-driven router. */ |
| |
| int max_pins_per_net = get_max_pins_per_net(); |
| |
| float *pin_criticality = (float *) my_malloc((max_pins_per_net - 1) * sizeof(float)); |
| *pin_criticality_ptr = pin_criticality - 1; /* First sink is pin #1. */ |
| |
| int *sink_order = (int *) my_malloc((max_pins_per_net - 1) * sizeof(int)); |
| *sink_order_ptr = sink_order - 1; |
| |
| t_rt_node **rt_node_of_sink = (t_rt_node **) my_malloc((max_pins_per_net - 1) * sizeof(t_rt_node *)); |
| *rt_node_of_sink_ptr = rt_node_of_sink - 1; |
| |
| alloc_route_tree_timing_structs(); |
| } |
| |
| /* This function gets ratio of overused nodes (overused_nodes / num_rr_nodes) */ |
| static double get_overused_ratio(){ |
| double overused_nodes = 0.0; |
| int inode; |
| for(inode = 0; inode < num_rr_nodes; inode++){ |
| if(rr_node[inode].get_occ() > rr_node[inode].get_capacity()) |
| overused_nodes += (rr_node[inode].get_occ() - rr_node[inode].get_capacity()); |
| } |
| overused_nodes /= (double)num_rr_nodes; |
| return overused_nodes; |
| } |
| |
| /* |
| * NOTE: |
| * Suggest using a timing_driven_route_structs struct. Memory is managed for you |
| */ |
| void free_timing_driven_route_structs(float *pin_criticality, int *sink_order, |
| t_rt_node ** rt_node_of_sink) { |
| |
| /* Frees all the stuctures needed only by the timing-driven router. */ |
| |
| free(pin_criticality + 1); /* Starts at index 1. */ |
| free(sink_order + 1); |
| free(rt_node_of_sink + 1); |
| free_route_tree_timing_structs(); |
| } |
| |
| timing_driven_route_structs::timing_driven_route_structs() { |
| alloc_timing_driven_route_structs( |
| &pin_criticality, |
| &sink_order, |
| &rt_node_of_sink |
| ); |
| } |
| |
| timing_driven_route_structs::~timing_driven_route_structs() { |
| free_timing_driven_route_structs( |
| pin_criticality, |
| sink_order, |
| rt_node_of_sink |
| ); |
| } |
| |
| static int get_max_pins_per_net(void) { |
| |
| /* Returns the largest number of pins on any non-global net. */ |
| |
| unsigned int inet; |
| int max_pins_per_net; |
| |
| max_pins_per_net = 0; |
| for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) { |
| if (g_clbs_nlist.net[inet].is_global == false) { |
| max_pins_per_net = max(max_pins_per_net, |
| (int) g_clbs_nlist.net[inet].pins.size()); |
| } |
| } |
| |
| return (max_pins_per_net); |
| } |
| |
| bool timing_driven_route_net(int inet, int itry, float pres_fac, float max_criticality, |
| float criticality_exp, float astar_fac, float bend_cost, |
| float *pin_criticality, int *sink_order, |
| t_rt_node ** rt_node_of_sink, float *net_delay, t_slack * slacks) { |
| |
| /* Returns true as long is found some way to hook up this net, even if that * |
| * way resulted in overuse of resources (congestion). If there is no way * |
| * to route this net, even ignoring congestion, it returns false. In this * |
| * case the rr_graph is disconnected and you can give up. If slacks = NULL, * |
| * give each net a dummy criticality of 0. */ |
| |
| unsigned int ipin; |
| int num_sinks, itarget, target_pin, target_node, inode; |
| float target_criticality, old_total_cost, new_total_cost, largest_criticality, |
| old_back_cost, new_back_cost; |
| t_rt_node *rt_root; |
| |
| struct s_trace *new_route_start_tptr; |
| int highfanout_rlim; |
| |
| /* Rip-up any old routing. */ |
| |
| pathfinder_update_one_cost(trace_head[inet], -1, pres_fac); |
| free_traceback(inet); |
| |
| for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) { |
| if (!slacks) { |
| /* Use criticality of 1. This makes all nets critical. Note: There is a big difference between setting pin criticality to 0 |
| compared to 1. If pin criticality is set to 0, then the current path delay is completely ignored during routing. By setting |
| pin criticality to 1, the current path delay to the pin will always be considered and optimized for */ |
| pin_criticality[ipin] = 1.0; |
| } else { |
| #ifdef PATH_COUNTING |
| /* Pin criticality is based on a weighted sum of timing and path criticalities. */ |
| pin_criticality[ipin] = ROUTE_PATH_WEIGHT * slacks->path_criticality[inet][ipin] |
| + (1 - ROUTE_PATH_WEIGHT) * slacks->timing_criticality[inet][ipin]; |
| #else |
| /* Pin criticality is based on only timing criticality. */ |
| pin_criticality[ipin] = slacks->timing_criticality[inet][ipin]; |
| #endif |
| /* Currently, pin criticality is between 0 and 1. Now shift it downwards |
| by 1 - max_criticality (max_criticality is 0.99 by default, so shift down |
| by 0.01) and cut off at 0. This means that all pins with small criticalities |
| (<0.01) get criticality 0 and are ignored entirely, and everything |
| else becomes a bit less critical. This effect becomes more pronounced if |
| max_criticality is set lower. */ |
| // assert(pin_criticality[ipin] > -0.01 && pin_criticality[ipin] < 1.01); |
| pin_criticality[ipin] = max(pin_criticality[ipin] - (1.0 - max_criticality), 0.0); |
| |
| /* Take pin criticality to some power (1 by default). */ |
| pin_criticality[ipin] = pow(pin_criticality[ipin], criticality_exp); |
| |
| /* Cut off pin criticality at max_criticality. */ |
| pin_criticality[ipin] = min(pin_criticality[ipin], max_criticality); |
| } |
| } |
| |
| num_sinks = g_clbs_nlist.net[inet].num_sinks(); |
| heapsort(sink_order, pin_criticality, num_sinks, 0); |
| |
| /* Update base costs according to fanout and criticality rules */ |
| |
| largest_criticality = pin_criticality[sink_order[1]]; |
| update_rr_base_costs(inet, largest_criticality); |
| |
| mark_ends(inet); /* Only needed to check for multiply-connected SINKs */ |
| |
| rt_root = init_route_tree_to_source(inet); |
| #ifdef PROFILE |
| float build_heap_time = 0; |
| #endif |
| // explore in order of decreasing criticality |
| for (itarget = 1; itarget <= num_sinks; itarget++) { |
| target_pin = sink_order[itarget]; |
| target_node = net_rr_terminals[inet][target_pin]; |
| |
| target_criticality = pin_criticality[target_pin]; |
| #ifdef PROFILE |
| clock_t sink_criticality_start = clock(); |
| #endif |
| |
| highfanout_rlim = mark_node_expansion_by_bin(inet, target_node, |
| rt_root); |
| |
| if (itarget > 1 && itry > 5) { |
| /* Enough iterations given to determine opin, to speed up legal solution, do not let net use two opins */ |
| assert(rr_node[rt_root->inode].type == SOURCE); |
| rt_root->re_expand = false; |
| } |
| |
| // reexplore route tree from root to add any new nodes (buildheap afterwards) |
| #ifdef PROFILE |
| clock_t route_tree_start = clock(); |
| #endif |
| add_route_tree_to_heap(rt_root, target_node, target_criticality, |
| astar_fac); |
| heap_::build_heap(); // via sifting down everything |
| // if (itarget == num_sinks && itarget > 800) { |
| // vpr_printf_info("heap for target %d: ", itarget); |
| // heap_::verify_extract_top(); |
| // } |
| #ifdef PROFILE |
| build_heap_time += static_cast<float>(clock() - route_tree_start) / CLOCKS_PER_SEC; |
| #endif |
| |
| |
| // cheapest s_heap (gives index to rr_node) in current route tree to be expanded on |
| struct s_heap* cheapest = get_heap_head(); |
| |
| if (cheapest == NULL) { /* Infeasible routing. No possible path for net. */ |
| vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n", |
| inet, g_clbs_nlist.net[inet].name, itarget); |
| reset_path_costs(); |
| free_route_tree(rt_root); |
| return (false); |
| } |
| |
| inode = cheapest->index; |
| while (inode != target_node) { |
| old_total_cost = rr_node_route_inf[inode].path_cost; |
| new_total_cost = cheapest->cost; |
| |
| if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */ |
| old_back_cost = HUGE_POSITIVE_FLOAT; |
| else |
| old_back_cost = rr_node_route_inf[inode].backward_path_cost; |
| |
| new_back_cost = cheapest->backward_path_cost; |
| |
| /* I only re-expand a node if both the "known" backward cost is lower * |
| * in the new expansion (this is necessary to prevent loops from * |
| * forming in the routing and causing havoc) *and* the expected total * |
| * cost to the sink is lower than the old value. Different R_upstream * |
| * values could make a path with lower back_path_cost less desirable * |
| * than one with higher cost. Test whether or not I should disallow * |
| * re-expansion based on a higher total cost. */ |
| |
| if (old_total_cost > new_total_cost && old_back_cost > new_back_cost) { |
| rr_node_route_inf[inode].prev_node = cheapest->u.prev_node; |
| rr_node_route_inf[inode].prev_edge = cheapest->prev_edge; |
| rr_node_route_inf[inode].path_cost = new_total_cost; |
| rr_node_route_inf[inode].backward_path_cost = new_back_cost; |
| |
| if (old_total_cost > 0.99 * HUGE_POSITIVE_FLOAT) /* First time touched. */ |
| add_to_mod_list(&rr_node_route_inf[inode].path_cost); |
| |
| timing_driven_expand_neighbours(cheapest, inet, itry, bend_cost, |
| target_criticality, target_node, astar_fac, |
| highfanout_rlim); |
| } |
| |
| free_heap_data(cheapest); |
| cheapest = get_heap_head(); |
| // test heap property |
| // if (!heap_::is_valid()) {vpr_printf_info("invalid heap: "); heap_::pop_heap();} |
| |
| if (cheapest == NULL) { /* Impossible routing. No path for net. */ |
| vpr_printf_info("Cannot route net #%d (%s) to sink #%d -- no possible path.\n", |
| inet, g_clbs_nlist.net[inet].name, itarget); |
| reset_path_costs(); |
| free_route_tree(rt_root); |
| return (false); |
| } |
| |
| inode = cheapest->index; |
| } |
| #ifdef PROFILE |
| time_on_criticality[target_criticality / criticality_per_bin] += static_cast<float>(clock() - sink_criticality_start) / CLOCKS_PER_SEC; |
| ++itry_on_criticality[target_criticality / criticality_per_bin]; |
| #endif |
| /* NB: In the code below I keep two records of the partial routing: the * |
| * traceback and the route_tree. The route_tree enables fast recomputation * |
| * of the Elmore delay to each node in the partial routing. The traceback * |
| * lets me reuse all the routines written for breadth-first routing, which * |
| * all take a traceback structure as input. Before this routine exits the * |
| * route_tree structure is destroyed; only the traceback is needed at that * |
| * point. */ |
| |
| rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ |
| new_route_start_tptr = update_traceback(cheapest, inet); |
| rt_node_of_sink[target_pin] = update_route_tree(cheapest); |
| free_heap_data(cheapest); |
| pathfinder_update_one_cost(new_route_start_tptr, 1, pres_fac); |
| |
| empty_heap(); |
| reset_path_costs(); |
| } |
| #ifdef PROFILE |
| if (!time_to_build_heap.empty()) time_to_build_heap[num_sinks / fanout_per_bin] += build_heap_time; |
| #endif |
| /* For later timing analysis. */ |
| |
| update_net_delays_from_route_tree(net_delay, rt_node_of_sink, inet); |
| free_route_tree(rt_root); |
| return (true); |
| } |
| |
| static void add_route_tree_to_heap(t_rt_node * rt_node, int target_node, |
| float target_criticality, float astar_fac) { |
| |
| /* Puts the entire partial routing below and including rt_node onto the heap * |
| * (except for those parts marked as not to be expanded) by calling itself * |
| * recursively. */ |
| |
| int inode; |
| t_rt_node *child_node; |
| t_linked_rt_edge *linked_rt_edge; |
| float tot_cost, backward_path_cost, R_upstream; |
| |
| /* Pre-order depth-first traversal */ |
| |
| if (rt_node->re_expand) { |
| inode = rt_node->inode; |
| backward_path_cost = target_criticality * rt_node->Tdel; |
| R_upstream = rt_node->R_upstream; |
| #ifndef OP_STRONG_LOOKAHEAD |
| tot_cost = backward_path_cost |
| + astar_fac |
| * get_timing_driven_expected_cost(inode, target_node, |
| target_criticality, R_upstream); |
| #else |
| tot_cost = backward_path_cost |
| + astar_fac |
| * get_timing_driven_multiple_wirelengths_expected_cost(inode, target_node, |
| target_criticality, R_upstream); |
| #endif |
| heap_::push_back_node(inode, tot_cost, NO_PREVIOUS, NO_PREVIOUS, |
| backward_path_cost, R_upstream); |
| |
| } |
| |
| linked_rt_edge = rt_node->u.child_list; |
| |
| while (linked_rt_edge != NULL) { |
| child_node = linked_rt_edge->child; |
| add_route_tree_to_heap(child_node, target_node, target_criticality, |
| astar_fac); |
| linked_rt_edge = linked_rt_edge->next; |
| } |
| } |
| |
| static void timing_driven_expand_neighbours(struct s_heap *current, |
| int inet, int itry, |
| float bend_cost, float criticality_fac, int target_node, |
| float astar_fac, int highfanout_rlim) { |
| |
| /* Puts all the rr_nodes adjacent to current on the heap. rr_nodes outside * |
| * the expanded bounding box specified in route_bb are not added to the * |
| * heap. */ |
| |
| float new_R_upstream; |
| |
| int inode = current->index; |
| float old_back_pcost = current->backward_path_cost; |
| float R_upstream = current->R_upstream; |
| int num_edges = rr_node[inode].get_num_edges(); |
| |
| int target_x = rr_node[target_node].get_xhigh(); |
| int target_y = rr_node[target_node].get_yhigh(); |
| bool high_fanout = g_clbs_nlist.net[inet].num_sinks() >= HIGH_FANOUT_NET_LIM; |
| |
| for (int iconn = 0; iconn < num_edges; iconn++) { |
| int to_node = rr_node[inode].edges[iconn]; |
| |
| if (high_fanout) { |
| // since target node is an IPIN, xhigh and xlow are the same (same for y values) |
| if (rr_node[to_node].get_xhigh() < target_x - highfanout_rlim |
| || rr_node[to_node].get_xlow() > target_x + highfanout_rlim |
| || rr_node[to_node].get_yhigh() < target_y - highfanout_rlim |
| || rr_node[to_node].get_ylow() > target_y + highfanout_rlim){ |
| continue; /* Node is outside high fanout bin. */ |
| } |
| } |
| else if (rr_node[to_node].get_xhigh() < route_bb[inet].xmin |
| || rr_node[to_node].get_xlow() > route_bb[inet].xmax |
| || rr_node[to_node].get_yhigh() < route_bb[inet].ymin |
| || rr_node[to_node].get_ylow() > route_bb[inet].ymax) |
| continue; /* Node is outside (expanded) bounding box. */ |
| |
| |
| /* Prune away IPINs that lead to blocks other than the target one. Avoids * |
| * the issue of how to cost them properly so they don't get expanded before * |
| * more promising routes, but makes route-throughs (via CLBs) impossible. * |
| * Change this if you want to investigate route-throughs. */ |
| |
| t_rr_type to_type = rr_node[to_node].type; |
| if (to_type == IPIN |
| && (rr_node[to_node].get_xhigh() != target_x |
| || rr_node[to_node].get_yhigh() != target_y)) |
| continue; |
| |
| /* new_back_pcost stores the "known" part of the cost to this node -- the * |
| * congestion cost of all the routing resources back to the existing route * |
| * plus the known delay of the total path back to the source. new_tot_cost * |
| * is this "known" backward cost + an expected cost to get to the target. */ |
| |
| float new_back_pcost = old_back_pcost |
| + (1. - criticality_fac) * get_rr_cong_cost(to_node); |
| |
| int iswitch = rr_node[inode].switches[iconn]; |
| if (g_rr_switch_inf[iswitch].buffered) { |
| new_R_upstream = g_rr_switch_inf[iswitch].R; |
| } else { |
| new_R_upstream = R_upstream + g_rr_switch_inf[iswitch].R; |
| } |
| |
| float Tdel = rr_node[to_node].C * (new_R_upstream + 0.5 * rr_node[to_node].R); |
| Tdel += g_rr_switch_inf[iswitch].Tdel; |
| new_R_upstream += rr_node[to_node].R; |
| new_back_pcost += criticality_fac * Tdel; |
| |
| if (bend_cost != 0.) { |
| t_rr_type from_type = rr_node[inode].type; |
| to_type = rr_node[to_node].type; |
| if ((from_type == CHANX && to_type == CHANY) |
| || (from_type == CHANY && to_type == CHANX)) |
| new_back_pcost += bend_cost; |
| } |
| |
| #ifndef OP_STRONG_LOOKAHEAD |
| float expected_cost = get_timing_driven_expected_cost(to_node, target_node, |
| criticality_fac, new_R_upstream); |
| #else |
| float expected_cost = get_timing_driven_multiple_wirelengths_expected_cost(to_node, target_node, |
| criticality_fac, new_R_upstream); |
| #endif |
| float new_tot_cost = new_back_pcost + astar_fac * expected_cost; |
| |
| node_to_heap(to_node, new_tot_cost, inode, iconn, new_back_pcost, |
| new_R_upstream); |
| |
| } /* End for all neighbours */ |
| } |
| |
| #ifndef OP_STRONG_LOOKAHEAD |
| static float get_timing_driven_expected_cost(int inode, int target_node, |
| float criticality_fac, float R_upstream) { |
| |
| /* Determines the expected cost (due to both delay and resouce cost) to reach * |
| * the target node from inode. It doesn't include the cost of inode -- * |
| * that's already in the "known" path_cost. */ |
| |
| t_rr_type rr_type; |
| int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir; |
| float expected_cost, cong_cost, Tdel; |
| |
| rr_type = rr_node[inode].type; |
| |
| if (rr_type == CHANX || rr_type == CHANY) { |
| |
| #ifdef INTERPOSER_BASED_ARCHITECTURE |
| int num_interposer_hops = get_num_expected_interposer_hops_to_target(inode, target_node); |
| #endif |
| |
| num_segs_same_dir = get_expected_segs_to_target(inode, target_node, |
| &num_segs_ortho_dir); |
| cost_index = rr_node[inode].get_cost_index(); |
| ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index; |
| |
| cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost |
| + num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].base_cost; |
| cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost |
| + rr_indexed_data[SINK_COST_INDEX].base_cost; |
| |
| Tdel = |
| num_segs_same_dir * rr_indexed_data[cost_index].T_linear |
| + num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].T_linear |
| + num_segs_same_dir * num_segs_same_dir |
| * rr_indexed_data[cost_index].T_quadratic |
| + num_segs_ortho_dir * num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].T_quadratic |
| + R_upstream |
| * (num_segs_same_dir |
| * rr_indexed_data[cost_index].C_load |
| + num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].C_load); |
| |
| Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear; |
| |
| #ifdef INTERPOSER_BASED_ARCHITECTURE |
| float interposer_hop_delay = (float)delay_increase * 1e-12; |
| Tdel += num_interposer_hops * interposer_hop_delay; |
| #endif |
| |
| expected_cost = criticality_fac * Tdel |
| + (1. - criticality_fac) * cong_cost; |
| return (expected_cost); |
| } |
| |
| else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ |
| return (rr_indexed_data[SINK_COST_INDEX].base_cost); |
| } |
| |
| else { /* Change this if you want to investigate route-throughs */ |
| return (0.); |
| } |
| } |
| |
| #else |
| static float get_timing_driven_multiple_wirelengths_expected_cost(int inode, int target_node, |
| float criticality_fac, float R_upstream) { |
| |
| /* Determines the expected cost (due to both delay and resouce cost) to reach * |
| * the target node from inode. It doesn't include the cost of inode -- * |
| * that's already in the "known" path_cost. */ |
| |
| t_rr_type rr_type; |
| int cost_index, ortho_cost_index, num_segs_same_dir, num_segs_ortho_dir; |
| float expected_cost, cong_cost, Tdel; |
| |
| rr_type = rr_node[inode].type; |
| |
| if (rr_type == CHANX || rr_type == CHANY) { |
| |
| /* OP: Want to account for multiple wirelengths by breaking the assumption that |
| a route will stay on the same kind of node that it's currently on. So here I |
| get the minimum expected cost across all CHANX cost indices. This significantly |
| drives up routing time, but is still much faster than running VPR with |
| astar_fac = 0. |
| */ |
| |
| float minimum_cost = -1.0; |
| for (cost_index = CHANX_COST_INDEX_START; cost_index < CHANX_COST_INDEX_START + (num_rr_indexed_data - CHANX_COST_INDEX_START)/2; cost_index++){ |
| |
| num_segs_same_dir = get_expected_segs_to_target(inode, cost_index, target_node, |
| &num_segs_ortho_dir); |
| ortho_cost_index = cost_index; |
| |
| cong_cost = num_segs_same_dir * rr_indexed_data[cost_index].base_cost |
| + num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].base_cost; |
| cong_cost += rr_indexed_data[IPIN_COST_INDEX].base_cost |
| + rr_indexed_data[SINK_COST_INDEX].base_cost; |
| |
| Tdel = |
| num_segs_same_dir * rr_indexed_data[cost_index].T_linear |
| + num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].T_linear |
| + num_segs_same_dir * num_segs_same_dir |
| * rr_indexed_data[cost_index].T_quadratic |
| + num_segs_ortho_dir * num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].T_quadratic |
| + R_upstream |
| * (num_segs_same_dir |
| * rr_indexed_data[cost_index].C_load |
| + num_segs_ortho_dir |
| * rr_indexed_data[ortho_cost_index].C_load); |
| |
| Tdel += rr_indexed_data[IPIN_COST_INDEX].T_linear; |
| |
| expected_cost = criticality_fac * Tdel |
| + (1. - criticality_fac) * cong_cost; |
| |
| if (expected_cost < minimum_cost || minimum_cost < 0){ |
| minimum_cost = expected_cost; |
| } |
| } |
| //assert( minimum_cost >= 0 ); |
| return (minimum_cost); |
| } |
| |
| else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ |
| return (rr_indexed_data[SINK_COST_INDEX].base_cost); |
| } |
| |
| else { /* Change this if you want to investigate route-throughs */ |
| return (0.); |
| } |
| } |
| #endif |
| |
| #ifdef INTERPOSER_BASED_ARCHITECTURE |
| static int get_num_expected_interposer_hops_to_target(int inode, int target_node) |
| { |
| /* |
| Description: |
| returns the expected number of times you have to cross the |
| interposer in order to go from inode to target_node. |
| This does not include inode itself. |
| |
| Assumptions: |
| 1- Cuts are horizontal |
| 2- target_node is a terminal pin (hence its ylow==yhigh) |
| 3- wires that go through the interposer are uni-directional |
| |
| --------- y=150 |
| | | |
| --------- y=100 |
| | | |
| --------- y=50 |
| | | |
| --------- y=0 |
| |
| num_cuts = 2, cut_step = 50, cut_locations = {50, 100} |
| */ |
| int y_start; /* start point y-coordinate. (y-coordinate at the *end* of wire 'i') */ |
| int y_end; /* destination (target) y-coordinate */ |
| int cut_i, y_cut_location, num_expected_hops; |
| t_rr_type rr_type; |
| |
| num_expected_hops = 0; |
| y_end = rr_node[target_node].get_ylow(); |
| rr_type = rr_node[inode].type; |
| |
| if(rr_type == CHANX) |
| { /* inode is a horizontal wire */ |
| /* the ylow and yhigh are the same */ |
| assert(rr_node[inode].get_ylow() == rr_node[inode].get_yhigh()); |
| y_start = rr_node[inode].get_ylow(); |
| } |
| else |
| { /* inode is a CHANY */ |
| if(rr_node[inode].direction == INC_DIRECTION) |
| { |
| y_start = rr_node[inode].get_yhigh(); |
| } |
| else if(rr_node[inode].direction == DEC_DIRECTION) |
| { |
| y_start = rr_node[inode].get_ylow(); |
| } |
| else |
| { |
| /* this means direction is BIDIRECTIONAL! Error out. */ |
| y_start = -1; |
| assert(rr_node[inode].direction!=BI_DIRECTION); |
| } |
| } |
| |
| /* for every cut, is it between 'i' and 'target'? */ |
| for(cut_i=0 ; cut_i<num_cuts; ++cut_i) |
| { |
| y_cut_location = arch_cut_locations[cut_i]; |
| if( (y_start < y_cut_location && y_cut_location < y_end) || |
| (y_end < y_cut_location && y_cut_location < y_start)) |
| { |
| ++num_expected_hops; |
| } |
| } |
| |
| /* Make there is no off-by-1 error.For current node i: |
| if it's a vertical wire, node 'i' itself may be crossing the interposer. |
| */ |
| if(rr_type == CHANY) |
| { |
| /* for every cut, does it cut wire 'i'? */ |
| for(cut_i=0 ; cut_i<num_cuts; ++cut_i) |
| { |
| y_cut_location = arch_cut_locations[cut_i]; |
| if(rr_node[inode].get_ylow() < y_cut_location && y_cut_location < rr_node[inode].get_yhigh()) |
| { |
| ++num_expected_hops; |
| } |
| } |
| } |
| |
| return num_expected_hops; |
| } |
| #endif |
| |
| /* Macro used below to ensure that fractions are rounded up, but floating * |
| * point values very close to an integer are rounded to that integer. */ |
| #define ROUND_UP(x) (ceil (x - 0.001)) |
| |
| #ifndef OP_STRONG_LOOKAHEAD |
| static int get_expected_segs_to_target(int inode, int target_node, |
| int *num_segs_ortho_dir_ptr) { |
| |
| /* Returns the number of segments the same type as inode that will be needed * |
| * to reach target_node (not including inode) in each direction (the same * |
| * direction (horizontal or vertical) as inode and the orthogonal direction).*/ |
| |
| t_rr_type rr_type; |
| int target_x, target_y, num_segs_same_dir, cost_index, ortho_cost_index; |
| int no_need_to_pass_by_clb; |
| float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh; |
| |
| #ifdef INTERPOSER_BASED_ARCHITECTURE |
| int num_expected_hops = get_num_expected_interposer_hops_to_target(inode, target_node); |
| #endif |
| |
| target_x = rr_node[target_node].get_xlow(); |
| target_y = rr_node[target_node].get_ylow(); |
| cost_index = rr_node[inode].get_cost_index(); |
| inv_length = rr_indexed_data[cost_index].inv_length; |
| ortho_cost_index = rr_indexed_data[cost_index].ortho_cost_index; |
| ortho_inv_length = rr_indexed_data[ortho_cost_index].inv_length; |
| rr_type = rr_node[inode].type; |
| |
| if (rr_type == CHANX) { |
| ylow = rr_node[inode].get_ylow(); |
| xhigh = rr_node[inode].get_xhigh(); |
| xlow = rr_node[inode].get_xlow(); |
| |
| /* Count vertical (orthogonal to inode) segs first. */ |
| |
| if (ylow > target_y) { /* Coming from a row above target? */ |
| *num_segs_ortho_dir_ptr = |
| (int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else if (ylow < target_y - 1) { /* Below the CLB bottom? */ |
| *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) * |
| ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else { /* In a row that passes by target CLB */ |
| *num_segs_ortho_dir_ptr = 0; |
| no_need_to_pass_by_clb = 0; |
| } |
| |
| #ifdef INTERPOSER_BASED_ARCHITECTURE |
| (*num_segs_ortho_dir_ptr) = (*num_segs_ortho_dir_ptr) + 2*num_expected_hops; |
| #endif |
| |
| /* Now count horizontal (same dir. as inode) segs. */ |
| |
| if (xlow > target_x + no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb - |
| target_x) * inv_length)); |
| } else if (xhigh < target_x - no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb - |
| xhigh) * inv_length)); |
| } else { |
| num_segs_same_dir = 0; |
| } |
| } |
| |
| else { /* inode is a CHANY */ |
| ylow = rr_node[inode].get_ylow(); |
| yhigh = rr_node[inode].get_yhigh(); |
| xlow = rr_node[inode].get_xlow(); |
| |
| /* Count horizontal (orthogonal to inode) segs first. */ |
| |
| if (xlow > target_x) { /* Coming from a column right of target? */ |
| *num_segs_ortho_dir_ptr = (int)( |
| ROUND_UP((xlow - target_x + 1.) * ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */ |
| *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) * |
| ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else { /* In a column that passes by target CLB */ |
| *num_segs_ortho_dir_ptr = 0; |
| no_need_to_pass_by_clb = 0; |
| } |
| |
| /* Now count vertical (same dir. as inode) segs. */ |
| |
| if (ylow > target_y + no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb - |
| target_y) * inv_length)); |
| } else if (yhigh < target_y - no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb - |
| yhigh) * inv_length)); |
| } else { |
| num_segs_same_dir = 0; |
| } |
| |
| #ifdef INTERPOSER_BASED_ARCHITECTURE |
| num_segs_same_dir = num_segs_same_dir + 2*num_expected_hops; |
| #endif |
| } |
| |
| return (num_segs_same_dir); |
| } |
| |
| #else |
| static int get_expected_segs_to_target(int inode, int via_cost_index, int target_node, |
| int *num_segs_ortho_dir_ptr) { |
| |
| /* Returns the number of segments the same type as inode that will be needed * |
| * to reach target_node (not including inode) in each direction (the same * |
| * direction (horizontal or vertical) as inode and the orthogonal direction).*/ |
| |
| /* OP: 'via_cost_index' is the cost index representing the type of node that we |
| will stay on for teh remainder of the routing */ |
| |
| t_rr_type rr_type; |
| int target_x, target_y, num_segs_same_dir, cost_index; |
| int no_need_to_pass_by_clb; |
| float inv_length, ortho_inv_length, ylow, yhigh, xlow, xhigh; |
| |
| target_x = rr_node[target_node].get_xlow(); |
| target_y = rr_node[target_node].get_ylow(); |
| cost_index = via_cost_index; |
| inv_length = rr_indexed_data[cost_index].inv_length; |
| ortho_inv_length = inv_length; |
| rr_type = rr_node[inode].type; |
| |
| if (rr_type == CHANX) { |
| ylow = rr_node[inode].get_ylow(); |
| xhigh = rr_node[inode].get_xhigh(); |
| xlow = rr_node[inode].get_xlow(); |
| |
| /* Count vertical (orthogonal to inode) segs first. */ |
| |
| if (ylow > target_y) { /* Coming from a row above target? */ |
| *num_segs_ortho_dir_ptr = |
| (int)(ROUND_UP((ylow - target_y + 1.) * ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else if (ylow < target_y - 1) { /* Below the CLB bottom? */ |
| *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_y - ylow) * |
| ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else { /* In a row that passes by target CLB */ |
| *num_segs_ortho_dir_ptr = 0; |
| no_need_to_pass_by_clb = 0; |
| } |
| |
| /* Now count horizontal (same dir. as inode) segs. */ |
| |
| if (xlow > target_x + no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((xlow - no_need_to_pass_by_clb - |
| target_x) * inv_length)); |
| } else if (xhigh < target_x - no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((target_x - no_need_to_pass_by_clb - |
| xhigh) * inv_length)); |
| } else { |
| num_segs_same_dir = 0; |
| } |
| } |
| |
| else { /* inode is a CHANY */ |
| ylow = rr_node[inode].get_ylow(); |
| yhigh = rr_node[inode].get_yhigh(); |
| xlow = rr_node[inode].get_xlow(); |
| |
| /* Count horizontal (orthogonal to inode) segs first. */ |
| |
| if (xlow > target_x) { /* Coming from a column right of target? */ |
| *num_segs_ortho_dir_ptr = (int)( |
| ROUND_UP((xlow - target_x + 1.) * ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else if (xlow < target_x - 1) { /* Left of and not adjacent to the CLB? */ |
| *num_segs_ortho_dir_ptr = (int)(ROUND_UP((target_x - xlow) * |
| ortho_inv_length)); |
| no_need_to_pass_by_clb = 1; |
| } else { /* In a column that passes by target CLB */ |
| *num_segs_ortho_dir_ptr = 0; |
| no_need_to_pass_by_clb = 0; |
| } |
| |
| /* Now count vertical (same dir. as inode) segs. */ |
| |
| if (ylow > target_y + no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((ylow - no_need_to_pass_by_clb - |
| target_y) * inv_length)); |
| } else if (yhigh < target_y - no_need_to_pass_by_clb) { |
| num_segs_same_dir = (int)(ROUND_UP((target_y - no_need_to_pass_by_clb - |
| yhigh) * inv_length)); |
| } else { |
| num_segs_same_dir = 0; |
| } |
| } |
| |
| return (num_segs_same_dir); |
| } |
| #endif |
| |
| static void update_rr_base_costs(int inet, float largest_criticality) { |
| |
| /* Changes the base costs of different types of rr_nodes according to the * |
| * criticality, fanout, etc. of the current net being routed (inet). */ |
| |
| float fanout, factor; |
| int index; |
| |
| fanout = g_clbs_nlist.net[inet].num_sinks(); |
| |
| /* Other reasonable values for factor include fanout and 1 */ |
| factor = sqrt(fanout); |
| |
| for (index = CHANX_COST_INDEX_START; index < num_rr_indexed_data; index++) { |
| if (rr_indexed_data[index].T_quadratic > 0.) { /* pass transistor */ |
| rr_indexed_data[index].base_cost = |
| rr_indexed_data[index].saved_base_cost * factor; |
| } else { |
| rr_indexed_data[index].base_cost = |
| rr_indexed_data[index].saved_base_cost; |
| } |
| } |
| } |
| |
| /* Nets that have high fanout can take a very long time to route. Routing for sinks that are part of high-fanout nets should be |
| done within a rectangular 'bin' centered around a target (as opposed to the entire net bounding box) the size of which is returned by this function. */ |
| static int mark_node_expansion_by_bin(int inet, int target_node, |
| t_rt_node * rt_node) { |
| int target_xlow, target_ylow, target_xhigh, target_yhigh; |
| int rlim = 1; |
| int inode; |
| float area; |
| bool success; |
| t_linked_rt_edge *linked_rt_edge; |
| t_rt_node * child_node; |
| |
| target_xlow = rr_node[target_node].get_xlow(); |
| target_ylow = rr_node[target_node].get_ylow(); |
| target_xhigh = rr_node[target_node].get_xhigh(); |
| target_yhigh = rr_node[target_node].get_yhigh(); |
| |
| if (g_clbs_nlist.net[inet].num_sinks() < HIGH_FANOUT_NET_LIM) { |
| /* This algorithm only applies to high fanout nets */ |
| return 1; |
| } |
| if (rt_node == NULL || rt_node->u.child_list == NULL) { |
| /* If unknown traceback, set radius of bin to be size of chip */ |
| rlim = max(nx + 2, ny + 2); |
| return rlim; |
| } |
| |
| area = (route_bb[inet].xmax - route_bb[inet].xmin) |
| * (route_bb[inet].ymax - route_bb[inet].ymin); |
| if (area <= 0) { |
| area = 1; |
| } |
| |
| rlim = (int)(ceil(sqrt((float) area / (float) g_clbs_nlist.net[inet].num_sinks()))); |
| |
| success = false; |
| /* determine quickly a feasible bin radius to route sink for high fanout nets |
| this is necessary to prevent super long runtimes for high fanout nets; in best case, a reduction in complexity from O(N^2logN) to O(NlogN) (Swartz fast router) |
| */ |
| linked_rt_edge = rt_node->u.child_list; |
| while (success == false && linked_rt_edge != NULL) { |
| while (linked_rt_edge != NULL && success == false) { |
| child_node = linked_rt_edge->child; |
| inode = child_node->inode; |
| if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) { |
| if (rr_node[inode].get_xlow() <= target_xhigh + rlim |
| && rr_node[inode].get_xhigh() >= target_xhigh - rlim |
| && rr_node[inode].get_ylow() <= target_yhigh + rlim |
| && rr_node[inode].get_yhigh() >= target_yhigh - rlim) { |
| success = true; |
| } |
| } |
| linked_rt_edge = linked_rt_edge->next; |
| } |
| |
| if (success == false) { |
| if (rlim > max(nx + 2, ny + 2)) { |
| vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, |
| "VPR internal error, net %s has paths that are not found in traceback.\n", g_clbs_nlist.net[inet].name); |
| } |
| /* if sink not in bin, increase bin size until fit */ |
| rlim *= 2; |
| } else { |
| /* Sometimes might just catch a wire in the end segment, need to give it some channel space to explore */ |
| rlim += 4; |
| } |
| linked_rt_edge = rt_node->u.child_list; |
| } |
| |
| /* adjust rlim to account for width/height of block containing the target sink */ |
| int target_span = max( target_xhigh - target_xlow, target_yhigh - target_ylow ); |
| rlim += target_span; |
| |
| /* redetermine expansion based on rlim */ |
| linked_rt_edge = rt_node->u.child_list; |
| while (linked_rt_edge != NULL) { |
| child_node = linked_rt_edge->child; |
| inode = child_node->inode; |
| if (!(rr_node[inode].type == IPIN || rr_node[inode].type == SINK)) { |
| if (rr_node[inode].get_xlow() <= target_xhigh + rlim |
| && rr_node[inode].get_xhigh() >= target_xhigh - rlim |
| && rr_node[inode].get_ylow() <= target_yhigh + rlim |
| && rr_node[inode].get_yhigh() >= target_yhigh - rlim) { |
| child_node->re_expand = true; |
| } else { |
| child_node->re_expand = false; |
| } |
| } |
| linked_rt_edge = linked_rt_edge->next; |
| } |
| return rlim; |
| } |
| #define ERROR_TOL 0.0001 |
| |
| static void timing_driven_check_net_delays(float **net_delay) { |
| |
| /* Checks that the net delays computed incrementally during timing driven * |
| * routing match those computed from scratch by the net_delay.c module. */ |
| |
| unsigned int inet, ipin; |
| float **net_delay_check; |
| |
| t_chunk list_head_net_delay_check_ch = {NULL, 0, NULL}; |
| |
| /*struct s_linked_vptr *ch_list_head_net_delay_check;*/ |
| |
| net_delay_check = alloc_net_delay(&list_head_net_delay_check_ch, g_clbs_nlist.net, |
| g_clbs_nlist.net.size()); |
| load_net_delay_from_routing(net_delay_check, g_clbs_nlist.net, g_clbs_nlist.net.size()); |
| |
| for (inet = 0; inet < g_clbs_nlist.net.size(); inet++) { |
| for (ipin = 1; ipin < g_clbs_nlist.net[inet].pins.size(); ipin++) { |
| if (net_delay_check[inet][ipin] == 0.) { /* Should be only GLOBAL nets */ |
| if (fabs(net_delay[inet][ipin]) > ERROR_TOL) { |
| vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, |
| "in timing_driven_check_net_delays: net %d pin %d.\n" |
| "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n", |
| inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]); |
| } |
| } else { |
| if (fabs(1.0 - net_delay[inet][ipin] / net_delay_check[inet][ipin]) > ERROR_TOL) { |
| vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, |
| "in timing_driven_check_net_delays: net %d pin %d.\n" |
| "\tIncremental calc. net_delay is %g, but from scratch net delay is %g.\n", |
| inet, ipin, net_delay[inet][ipin], net_delay_check[inet][ipin]); |
| } |
| } |
| } |
| } |
| |
| free_net_delay(net_delay_check, &list_head_net_delay_check_ch); |
| vpr_printf_info("Completed net delay value cross check successfully.\n"); |
| } |
| |
| |
| /* Detect if net should be routed or not */ |
| static bool should_route_net(int inet) { |
| t_trace * tptr = trace_head[inet]; |
| |
| if (tptr == NULL) { |
| /* No routing yet. */ |
| return true; |
| } |
| |
| for (;;) { |
| int inode = tptr->index; |
| int occ = rr_node[inode].get_occ(); |
| int capacity = rr_node[inode].get_capacity(); |
| |
| if (occ > capacity) { |
| return true; /* overuse detected */ |
| } |
| |
| if (rr_node[inode].type == SINK) { |
| tptr = tptr->next; /* Skip next segment. */ |
| if (tptr == NULL) |
| break; |
| } |
| |
| tptr = tptr->next; |
| |
| } /* End while loop -- did an entire traceback. */ |
| |
| return false; /* Current route has no overuse */ |
| } |
| |
| static bool early_exit_heuristic(const t_router_opts& router_opts) { |
| /* Early exit code for cases where it is obvious that a successful route will not be found |
| Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */ |
| int total_wirelength = 0; |
| int available_wirelength = 0; |
| |
| for (int i = 0; i < num_rr_nodes; ++i) { |
| if (rr_node[i].type == CHANX || rr_node[i].type == CHANY) { |
| available_wirelength += 1 + |
| rr_node[i].get_xhigh() - rr_node[i].get_xlow() + |
| rr_node[i].get_yhigh() - rr_node[i].get_ylow(); |
| } |
| } |
| |
| for (unsigned int inet = 0; inet < g_clbs_nlist.net.size(); ++inet) { |
| if (g_clbs_nlist.net[inet].is_global == false |
| && g_clbs_nlist.net[inet].num_sinks() != 0) { /* Globals don't count. */ |
| int bends, wirelength, segments; |
| get_num_bends_and_length(inet, &bends, &wirelength, &segments); |
| |
| total_wirelength += wirelength; |
| } |
| } |
| vpr_printf_info("Wire length after first iteration %d, total available wire length %d, ratio %g\n", |
| total_wirelength, available_wirelength, |
| (float) (total_wirelength) / (float) (available_wirelength)); |
| if ((float) (total_wirelength) / (float) (available_wirelength)> FIRST_ITER_WIRELENTH_LIMIT) { |
| vpr_printf_info("Wire length usage ratio exceeds limit of %g, fail routing.\n", |
| FIRST_ITER_WIRELENTH_LIMIT); |
| return true; |
| } |
| vpr_printf_info("--------- ---------- ----------- ---------------------\n"); |
| vpr_printf_info("Iteration Time Crit Path Overused RR Nodes\n"); |
| vpr_printf_info("--------- ---------- ----------- ---------------------\n"); |
| return false; |
| } |
| |