Fine tuned last commit. One additional change we're now calculating orig_timing_info with net_delay instead of max_delay_budget
diff --git a/vpr/src/route/route_budgets.cpp b/vpr/src/route/route_budgets.cpp
index dafda27..3ab6224 100644
--- a/vpr/src/route/route_budgets.cpp
+++ b/vpr/src/route/route_budgets.cpp
@@ -152,11 +152,8 @@
auto& cluster_ctx = g_vpr_ctx.clustering();
int ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id);
- /*Target delay is calculated using equation in the RCV algorithm*/
- //delay_target[net_id][ipin] = min(0.5 * (delay_min_budget[net_id][ipin] + delay_max_budget[net_id][ipin]), delay_min_budget[net_id][ipin] + 0.1e-9);
-
//set min budget delay to be delay_target for now. Guardband can be added later
- delay_target[net_id][ipin] = delay_min_budget[net_id][ipin];
+ delay_target[net_id][ipin] = delay_min_budget[net_id][ipin];
}
void route_budgets::allocate_slack_using_weights(vtr::vector_map<ClusterNetId, float *> &net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) {
@@ -180,15 +177,16 @@
max_budget_change = 900e-12;
//pass in original timing_info to use for total path delay calculations. This ensures the same total path delay is used throughout the RCV iterations.
- orig_timing_info = perform_sta (delay_max_budget);
+ orig_timing_info = perform_sta (net_delay);
/*This allocates long path slack and increases the budgets. Stopping conditions are set differently from RCV paper because they showed better results*/
- while ((iteration > 3 && max_budget_change > 1e-12) || iteration <= 3) {
+ while (max_budget_change > 1e-12) {
+ //use initial routing delay to perform_sta for the first iteration, since delay_max_budget is initialized to 0
timing_info = perform_sta(delay_max_budget);
max_budget_change = minimax_PERT(orig_timing_info, timing_info, delay_max_budget, net_delay, netlist_pin_lookup, SETUP, true);
iteration++;
- if (iteration > 10)
+ if (iteration > 20)
break;
}
@@ -199,12 +197,12 @@
max_budget_change = 900e-12;
/*Allocate the short path slack to decrease the budgets accordingly*/
- while ((iteration > 3 && max_budget_change > 1e-12) || iteration <= 3) {
+ while (max_budget_change > 1e-12) {
timing_info = perform_sta(delay_min_budget);
max_budget_change = minimax_PERT(orig_timing_info, timing_info, delay_min_budget, net_delay, netlist_pin_lookup, HOLD, true);
iteration++;
- if (iteration > 10)
+ if (iteration > 20)
break;
}
@@ -341,17 +339,26 @@
continue;
}
+ /*Current:
+ Slack is allocated to a connection (i) as slack(i) * [numerator(i) / denominator(i)].
+ Denominator: largest total_path_delay through a connection (including both routing
+ delays we can change and cell/intra-block delays we can't.
+ Numerator: is the original net_delay (routing path found for that connection on the first routing iteration, which is generally the lowest-delay one.)
+ TODO:Future approach that would likely result in faster slack allocation convergence:
+ Change the denominator to be total_mutable_path_delay(i) -- that is, the largest total routing delay path through this connection.*/
+
+ float budget_increment = net_delay[net_id][ipin] * path_slack / total_path_delay;
/*During hold analysis, increase the budgets when there is negative slack.
During setup analysis, decrease the budgets when there is negative slack*/
if ((slack_type == NEGATIVE && path_slack < 0) ||
(slack_type == POSITIVE && path_slack > 0) ||
slack_type == BOTH) {
if (analysis_type == HOLD) {
- temp_budgets[net_id][ipin] += -1 * net_delay[net_id][ipin] * path_slack / total_path_delay;
+ temp_budgets[net_id][ipin] += -1 * budget_increment;
} else {
- temp_budgets[net_id][ipin] += net_delay[net_id][ipin] * path_slack / total_path_delay;
+ temp_budgets[net_id][ipin] += budget_increment;
}
- max_budget_change = max(max_budget_change, abs(net_delay[net_id][ipin] * path_slack / total_path_delay));
+ max_budget_change = max(max_budget_change, abs(budget_increment));
}
/*Budgets need to be between maximum and minimum budgets*/
diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h
index fed7ebb..3f1bef9 100755
--- a/vpr/src/route/route_common.h
+++ b/vpr/src/route/route_common.h
@@ -30,7 +30,12 @@
* to the target. *
* R_upstream: Used only by the timing-driven router. Stores the upstream *
* resistance to ground from this node, including the *
- * resistance of the node itself (device_ctx.rr_nodes[index].R).*/
+ * resistance of the node itself (device_ctx.rr_nodes[index].R).*
+ * T_upstream: Used by timing_driven_evaluate_cost. It stores the upstream *
+ * delay from this node to the source. It is necessary because *
+ * the router needs this value to calculate the total delay of *
+ * the path, which is used to evaluate cost of expanding a *
+ * particular node during router exploration */
struct t_heap {
t_heap *next = nullptr;
diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp
index a0f9e6a..a49df5b 100644
--- a/vpr/src/route/route_timing.cpp
+++ b/vpr/src/route/route_timing.cpp
@@ -163,7 +163,10 @@
const int from_node, const int to_node, const int iconn, const int target_node, const float T_upstream);
static float get_timing_driven_expected_cost(int inode, int target_node,
- float criticality_fac, float R_upstream, int get_delay);
+ float criticality_fac, float R_upstream);
+
+static float get_timing_driven_expected_delay(int inode, int target_node,
+ float R_upstream);
static int get_expected_segs_to_target(int inode, int target_node,
int *num_segs_ortho_dir_ptr);
@@ -1119,7 +1122,7 @@
tot_cost = backward_path_cost
+ astar_fac
* get_timing_driven_expected_cost(inode, target_node,
- target_criticality, R_upstream, 0);
+ target_criticality, R_upstream);
float zero = 0.0;
//after budgets are loaded, calculate delay cost as described by RCV paper
@@ -1381,7 +1384,7 @@
new_costs.T_incremental = Tdel;
total_delay_on_connection += T_upstream;
total_delay_on_connection += Tdel;
- total_delay_on_connection += get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream, 1);
+ total_delay_on_connection += get_timing_driven_expected_delay(to_node, target_node, new_costs.R_upstream);
//Update the backward cost
new_costs.backward_cost = old_costs.backward_cost; //Back cost to 'from_node'
new_costs.backward_cost += (1. - criticality_fac) * get_rr_cong_cost(to_node); //Congestion cost
@@ -1393,21 +1396,19 @@
new_costs.backward_cost += bend_cost; //Bend cost
}
}
- float cost_from_budgets = 0;
if (budgeting_inf.if_set()) {
//If budgets specified calculate cost as described by RCV paper:
// R. Fung, V. Betz and W. Chow, "Slack Allocation and Routing to Improve FPGA Timing While
// Repairing Short-Path Violations," in IEEE Transactions on Computer-Aided Design of
// Integrated Circuits and Systems, vol. 27, no. 4, pp. 686-697, April 2008.
- //TODO: Since these targets are delays, shouldn't we be using Tdel instead of new_costs.total_cost on RHS?
new_costs.total_cost += (short_path_crit + criticality_fac) * max(0.f, target_delay - total_delay_on_connection);
new_costs.total_cost += pow(max(0.f, total_delay_on_connection - max_delay), 2) / 100e-12;
new_costs.total_cost += pow(max(0.f, min_delay - total_delay_on_connection), 2) / 100e-12;
}
//Update total cost
- float expected_cost = get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream, 0);
+ float expected_cost = get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream);
new_costs.total_cost += new_costs.backward_cost + astar_fac * expected_cost;
@@ -1415,7 +1416,7 @@
return new_costs;
}
-static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream, int get_delay) {
+static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream) {
/* Determines the expected cost (due to both delay and resource cost) to reach *
* the target node from inode. It doesn't include the cost of inode -- *
@@ -1446,18 +1447,15 @@
+ ipin_data.base_cost
+ sink_data.base_cost;
- float Tdel = num_segs_same_dir * same_data.T_linear
+ float Tdel = get_timing_driven_expected_delay(inode, target_node, R_upstream);
+ /* float Tdel = num_segs_same_dir * same_data.T_linear
+ num_segs_ortho_dir * ortho_data.T_linear
+ num_segs_same_dir * num_segs_same_dir * same_data.T_quadratic
+ num_segs_ortho_dir * num_segs_ortho_dir * ortho_data.T_quadratic
+ R_upstream * ( num_segs_same_dir * same_data.C_load
+ num_segs_ortho_dir * ortho_data.C_load)
- + ipin_data.T_linear;
+ + ipin_data.T_linear;*/
- if (get_delay){
- return Tdel;
- }
-
float expected_cost = criticality_fac * Tdel + (1. - criticality_fac) * cong_cost;
return (expected_cost);
#endif
@@ -1468,6 +1466,34 @@
}
}
+static float get_timing_driven_expected_delay(int inode, int target_node, float R_upstream){
+
+ /* Determines the expected delay (due to both delay and resource cost) to reach *
+ * the target node from inode.*/
+
+ auto& device_ctx = g_vpr_ctx.device();
+
+ int num_segs_ortho_dir = 0;
+ int num_segs_same_dir = get_expected_segs_to_target(inode, target_node, &num_segs_ortho_dir);
+
+ int cost_index = device_ctx.rr_nodes[inode].cost_index();
+ int ortho_cost_index = device_ctx.rr_indexed_data[cost_index].ortho_cost_index;
+
+ const auto& same_data = device_ctx.rr_indexed_data[cost_index];
+ const auto& ortho_data = device_ctx.rr_indexed_data[ortho_cost_index];
+ const auto& ipin_data = device_ctx.rr_indexed_data[IPIN_COST_INDEX];
+
+ float Tdel = num_segs_same_dir * same_data.T_linear
+ + num_segs_ortho_dir * ortho_data.T_linear
+ + num_segs_same_dir * num_segs_same_dir * same_data.T_quadratic
+ + num_segs_ortho_dir * num_segs_ortho_dir * ortho_data.T_quadratic
+ + R_upstream * ( num_segs_same_dir * same_data.C_load
+ + num_segs_ortho_dir * ortho_data.C_load)
+ + ipin_data.T_linear;
+ return Tdel;
+}
+
+
/* Used below to ensure that fractions are rounded up, but floating *
* point values very close to an integer are rounded to that integer. */
static int round_up(float x) {