Fine tuned last commit. One additional change we're now calculating orig_timing_info with net_delay instead of max_delay_budget
diff --git a/vpr/src/route/route_budgets.cpp b/vpr/src/route/route_budgets.cpp index dafda27..3ab6224 100644 --- a/vpr/src/route/route_budgets.cpp +++ b/vpr/src/route/route_budgets.cpp
@@ -152,11 +152,8 @@ auto& cluster_ctx = g_vpr_ctx.clustering(); int ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id); - /*Target delay is calculated using equation in the RCV algorithm*/ - //delay_target[net_id][ipin] = min(0.5 * (delay_min_budget[net_id][ipin] + delay_max_budget[net_id][ipin]), delay_min_budget[net_id][ipin] + 0.1e-9); - //set min budget delay to be delay_target for now. Guardband can be added later - delay_target[net_id][ipin] = delay_min_budget[net_id][ipin]; + delay_target[net_id][ipin] = delay_min_budget[net_id][ipin]; } void route_budgets::allocate_slack_using_weights(vtr::vector_map<ClusterNetId, float *> &net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) { @@ -180,15 +177,16 @@ max_budget_change = 900e-12; //pass in original timing_info to use for total path delay calculations. This ensures the same total path delay is used throughout the RCV iterations. - orig_timing_info = perform_sta (delay_max_budget); + orig_timing_info = perform_sta (net_delay); /*This allocates long path slack and increases the budgets. Stopping conditions are set differently from RCV paper because they showed better results*/ - while ((iteration > 3 && max_budget_change > 1e-12) || iteration <= 3) { + while (max_budget_change > 1e-12) { + //use initial routing delay to perform_sta for the first iteration, since delay_max_budget is initialized to 0 timing_info = perform_sta(delay_max_budget); max_budget_change = minimax_PERT(orig_timing_info, timing_info, delay_max_budget, net_delay, netlist_pin_lookup, SETUP, true); iteration++; - if (iteration > 10) + if (iteration > 20) break; } @@ -199,12 +197,12 @@ max_budget_change = 900e-12; /*Allocate the short path slack to decrease the budgets accordingly*/ - while ((iteration > 3 && max_budget_change > 1e-12) || iteration <= 3) { + while (max_budget_change > 1e-12) { timing_info = perform_sta(delay_min_budget); max_budget_change = minimax_PERT(orig_timing_info, timing_info, delay_min_budget, net_delay, netlist_pin_lookup, HOLD, true); iteration++; - if (iteration > 10) + if (iteration > 20) break; } @@ -341,17 +339,26 @@ continue; } + /*Current: + Slack is allocated to a connection (i) as slack(i) * [numerator(i) / denominator(i)]. + Denominator: largest total_path_delay through a connection (including both routing + delays we can change and cell/intra-block delays we can't. + Numerator: is the original net_delay (routing path found for that connection on the first routing iteration, which is generally the lowest-delay one.) + TODO:Future approach that would likely result in faster slack allocation convergence: + Change the denominator to be total_mutable_path_delay(i) -- that is, the largest total routing delay path through this connection.*/ + + float budget_increment = net_delay[net_id][ipin] * path_slack / total_path_delay; /*During hold analysis, increase the budgets when there is negative slack. During setup analysis, decrease the budgets when there is negative slack*/ if ((slack_type == NEGATIVE && path_slack < 0) || (slack_type == POSITIVE && path_slack > 0) || slack_type == BOTH) { if (analysis_type == HOLD) { - temp_budgets[net_id][ipin] += -1 * net_delay[net_id][ipin] * path_slack / total_path_delay; + temp_budgets[net_id][ipin] += -1 * budget_increment; } else { - temp_budgets[net_id][ipin] += net_delay[net_id][ipin] * path_slack / total_path_delay; + temp_budgets[net_id][ipin] += budget_increment; } - max_budget_change = max(max_budget_change, abs(net_delay[net_id][ipin] * path_slack / total_path_delay)); + max_budget_change = max(max_budget_change, abs(budget_increment)); } /*Budgets need to be between maximum and minimum budgets*/
diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index fed7ebb..3f1bef9 100755 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h
@@ -30,7 +30,12 @@ * to the target. * * R_upstream: Used only by the timing-driven router. Stores the upstream * * resistance to ground from this node, including the * - * resistance of the node itself (device_ctx.rr_nodes[index].R).*/ + * resistance of the node itself (device_ctx.rr_nodes[index].R).* + * T_upstream: Used by timing_driven_evaluate_cost. It stores the upstream * + * delay from this node to the source. It is necessary because * + * the router needs this value to calculate the total delay of * + * the path, which is used to evaluate cost of expanding a * + * particular node during router exploration */ struct t_heap { t_heap *next = nullptr;
diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index a0f9e6a..a49df5b 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp
@@ -163,7 +163,10 @@ const int from_node, const int to_node, const int iconn, const int target_node, const float T_upstream); static float get_timing_driven_expected_cost(int inode, int target_node, - float criticality_fac, float R_upstream, int get_delay); + float criticality_fac, float R_upstream); + +static float get_timing_driven_expected_delay(int inode, int target_node, + float R_upstream); static int get_expected_segs_to_target(int inode, int target_node, int *num_segs_ortho_dir_ptr); @@ -1119,7 +1122,7 @@ tot_cost = backward_path_cost + astar_fac * get_timing_driven_expected_cost(inode, target_node, - target_criticality, R_upstream, 0); + target_criticality, R_upstream); float zero = 0.0; //after budgets are loaded, calculate delay cost as described by RCV paper @@ -1381,7 +1384,7 @@ new_costs.T_incremental = Tdel; total_delay_on_connection += T_upstream; total_delay_on_connection += Tdel; - total_delay_on_connection += get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream, 1); + total_delay_on_connection += get_timing_driven_expected_delay(to_node, target_node, new_costs.R_upstream); //Update the backward cost new_costs.backward_cost = old_costs.backward_cost; //Back cost to 'from_node' new_costs.backward_cost += (1. - criticality_fac) * get_rr_cong_cost(to_node); //Congestion cost @@ -1393,21 +1396,19 @@ new_costs.backward_cost += bend_cost; //Bend cost } } - float cost_from_budgets = 0; if (budgeting_inf.if_set()) { //If budgets specified calculate cost as described by RCV paper: // R. Fung, V. Betz and W. Chow, "Slack Allocation and Routing to Improve FPGA Timing While // Repairing Short-Path Violations," in IEEE Transactions on Computer-Aided Design of // Integrated Circuits and Systems, vol. 27, no. 4, pp. 686-697, April 2008. - //TODO: Since these targets are delays, shouldn't we be using Tdel instead of new_costs.total_cost on RHS? new_costs.total_cost += (short_path_crit + criticality_fac) * max(0.f, target_delay - total_delay_on_connection); new_costs.total_cost += pow(max(0.f, total_delay_on_connection - max_delay), 2) / 100e-12; new_costs.total_cost += pow(max(0.f, min_delay - total_delay_on_connection), 2) / 100e-12; } //Update total cost - float expected_cost = get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream, 0); + float expected_cost = get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream); new_costs.total_cost += new_costs.backward_cost + astar_fac * expected_cost; @@ -1415,7 +1416,7 @@ return new_costs; } -static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream, int get_delay) { +static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream) { /* Determines the expected cost (due to both delay and resource cost) to reach * * the target node from inode. It doesn't include the cost of inode -- * @@ -1446,18 +1447,15 @@ + ipin_data.base_cost + sink_data.base_cost; - float Tdel = num_segs_same_dir * same_data.T_linear + float Tdel = get_timing_driven_expected_delay(inode, target_node, R_upstream); + /* float Tdel = num_segs_same_dir * same_data.T_linear + num_segs_ortho_dir * ortho_data.T_linear + num_segs_same_dir * num_segs_same_dir * same_data.T_quadratic + num_segs_ortho_dir * num_segs_ortho_dir * ortho_data.T_quadratic + R_upstream * ( num_segs_same_dir * same_data.C_load + num_segs_ortho_dir * ortho_data.C_load) - + ipin_data.T_linear; + + ipin_data.T_linear;*/ - if (get_delay){ - return Tdel; - } - float expected_cost = criticality_fac * Tdel + (1. - criticality_fac) * cong_cost; return (expected_cost); #endif @@ -1468,6 +1466,34 @@ } } +static float get_timing_driven_expected_delay(int inode, int target_node, float R_upstream){ + + /* Determines the expected delay (due to both delay and resource cost) to reach * + * the target node from inode.*/ + + auto& device_ctx = g_vpr_ctx.device(); + + int num_segs_ortho_dir = 0; + int num_segs_same_dir = get_expected_segs_to_target(inode, target_node, &num_segs_ortho_dir); + + int cost_index = device_ctx.rr_nodes[inode].cost_index(); + int ortho_cost_index = device_ctx.rr_indexed_data[cost_index].ortho_cost_index; + + const auto& same_data = device_ctx.rr_indexed_data[cost_index]; + const auto& ortho_data = device_ctx.rr_indexed_data[ortho_cost_index]; + const auto& ipin_data = device_ctx.rr_indexed_data[IPIN_COST_INDEX]; + + float Tdel = num_segs_same_dir * same_data.T_linear + + num_segs_ortho_dir * ortho_data.T_linear + + num_segs_same_dir * num_segs_same_dir * same_data.T_quadratic + + num_segs_ortho_dir * num_segs_ortho_dir * ortho_data.T_quadratic + + R_upstream * ( num_segs_same_dir * same_data.C_load + + num_segs_ortho_dir * ortho_data.C_load) + + ipin_data.T_linear; + return Tdel; +} + + /* Used below to ensure that fractions are rounded up, but floating * * point values very close to an integer are rounded to that integer. */ static int round_up(float x) {