Fine tuned last commit. One additional change we're now calculating orig_timing_info with net_delay instead of max_delay_budget

commit: 61079b91522d06c9fa9e00e8ce6d01bdeba029ec [log] [tgz]
author: Martin <martin5696@gmail.com> Fri Aug 17 17:45:33 2018 -0400
committer: Martin <martin5696@gmail.com> Fri Aug 17 17:45:33 2018 -0400
tree: 55fac8af93bb479260a563be1ca498783eb4ca7b
parent: edc933fab4069823e51fe1ec1fdb5dd050decae4 [diff]
diff --git a/vpr/src/route/route_budgets.cpp b/vpr/src/route/route_budgets.cpp
index dafda27..3ab6224 100644
--- a/vpr/src/route/route_budgets.cpp
+++ b/vpr/src/route/route_budgets.cpp

@@ -152,11 +152,8 @@
     auto& cluster_ctx = g_vpr_ctx.clustering();
     int ipin = cluster_ctx.clb_nlist.pin_net_index(pin_id);
 
-    /*Target delay is calculated using equation in the RCV algorithm*/
-    //delay_target[net_id][ipin] = min(0.5 * (delay_min_budget[net_id][ipin] + delay_max_budget[net_id][ipin]), delay_min_budget[net_id][ipin] + 0.1e-9);
-
     //set min budget delay to be delay_target for now. Guardband can be added later
-    delay_target[net_id][ipin] = delay_min_budget[net_id][ipin];
+     delay_target[net_id][ipin] = delay_min_budget[net_id][ipin];
 }
 
 void route_budgets::allocate_slack_using_weights(vtr::vector_map<ClusterNetId, float *> &net_delay, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) {
@@ -180,15 +177,16 @@
     max_budget_change = 900e-12;
 
     //pass in original timing_info to use for total path delay calculations. This ensures the same total path delay is used throughout the RCV iterations.
-    orig_timing_info = perform_sta (delay_max_budget);
+    orig_timing_info = perform_sta (net_delay);
 
     /*This allocates long path slack and increases the budgets. Stopping conditions are set differently from RCV paper because they showed better results*/
-    while ((iteration > 3 && max_budget_change > 1e-12) || iteration <= 3) {
+    while (max_budget_change > 1e-12) {
+	//use initial routing delay to perform_sta for the first iteration, since delay_max_budget is initialized to 0
         timing_info = perform_sta(delay_max_budget);
         max_budget_change = minimax_PERT(orig_timing_info, timing_info, delay_max_budget, net_delay, netlist_pin_lookup, SETUP, true);
 
         iteration++;
-        if (iteration > 10)
+        if (iteration > 20)
             break;
     }
 
@@ -199,12 +197,12 @@
     max_budget_change = 900e-12;
 
     /*Allocate the short path slack to decrease the budgets accordingly*/
-    while ((iteration > 3 && max_budget_change > 1e-12) || iteration <= 3) {
+    while (max_budget_change > 1e-12) {
         timing_info = perform_sta(delay_min_budget);
         max_budget_change = minimax_PERT(orig_timing_info, timing_info, delay_min_budget, net_delay, netlist_pin_lookup, HOLD, true);
         iteration++;
 
-        if (iteration > 10)
+        if (iteration > 20)
             break;
     }
 
@@ -341,17 +339,26 @@
                 continue;
             }
 
+	   /*Current:
+	     Slack is allocated to a connection (i) as slack(i) * [numerator(i) / denominator(i)].
+	     Denominator: largest total_path_delay through a connection (including both routing 
+	     delays we can change and cell/intra-block delays we can't. 
+	     Numerator: is the original net_delay (routing path found for that connection on the first routing iteration, which is generally the lowest-delay one.)
+	     TODO:Future approach that would likely result in faster slack allocation convergence:
+             Change the denominator to be total_mutable_path_delay(i) -- that is, the largest total routing delay path through this connection.*/
+
+	    float budget_increment = net_delay[net_id][ipin] * path_slack / total_path_delay;
             /*During hold analysis, increase the budgets when there is negative slack.
              During setup analysis, decrease the budgets when there is negative slack*/
             if ((slack_type == NEGATIVE && path_slack < 0) ||
                     (slack_type == POSITIVE && path_slack > 0) ||
                     slack_type == BOTH) {
                 if (analysis_type == HOLD) {
-                    temp_budgets[net_id][ipin] += -1 * net_delay[net_id][ipin] * path_slack / total_path_delay;
+                    temp_budgets[net_id][ipin] += -1 * budget_increment;
                 } else {
-                    temp_budgets[net_id][ipin] += net_delay[net_id][ipin] * path_slack / total_path_delay;
+                    temp_budgets[net_id][ipin] += budget_increment;
                 }
-                max_budget_change = max(max_budget_change, abs(net_delay[net_id][ipin] * path_slack / total_path_delay));
+                max_budget_change = max(max_budget_change, abs(budget_increment));
             }
 
             /*Budgets need to be between maximum and minimum budgets*/

diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h
index fed7ebb..3f1bef9 100755
--- a/vpr/src/route/route_common.h
+++ b/vpr/src/route/route_common.h

@@ -30,7 +30,12 @@
  *                      to the target.                                      *
  * R_upstream: Used only by the timing-driven router.  Stores the upstream  *
  *             resistance to ground from this node, including the           *
- *             resistance of the node itself (device_ctx.rr_nodes[index].R).*/
+ *             resistance of the node itself (device_ctx.rr_nodes[index].R).*
+ * T_upstream: Used by timing_driven_evaluate_cost. It stores the upstream  *
+ *             delay from this node to the source. It is necessary because  *
+ *             the router needs this value to calculate the total delay of  *
+ *             the path, which is used to evaluate cost of expanding a      *
+ * 	       particular node during router exploration                    */
 struct t_heap {
     t_heap *next = nullptr;
 

diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp
index a0f9e6a..a49df5b 100644
--- a/vpr/src/route/route_timing.cpp
+++ b/vpr/src/route/route_timing.cpp

@@ -163,7 +163,10 @@
     const int from_node, const int to_node, const int iconn, const int target_node, const float T_upstream);
 
 static float get_timing_driven_expected_cost(int inode, int target_node,
-        float criticality_fac, float R_upstream, int get_delay);
+        float criticality_fac, float R_upstream);
+
+static float get_timing_driven_expected_delay(int inode, int target_node, 
+        float R_upstream);
 
 static int get_expected_segs_to_target(int inode, int target_node,
         int *num_segs_ortho_dir_ptr);
@@ -1119,7 +1122,7 @@
         tot_cost = backward_path_cost
                 + astar_fac
                 * get_timing_driven_expected_cost(inode, target_node,
-                target_criticality, R_upstream, 0);
+                target_criticality, R_upstream);
 
         float zero = 0.0;
         //after budgets are loaded, calculate delay cost as described by RCV paper
@@ -1381,7 +1384,7 @@
     new_costs.T_incremental = Tdel;
     total_delay_on_connection += T_upstream;
     total_delay_on_connection += Tdel;
-    total_delay_on_connection += get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream, 1);
+    total_delay_on_connection += get_timing_driven_expected_delay(to_node, target_node, new_costs.R_upstream);
     //Update the backward cost
     new_costs.backward_cost = old_costs.backward_cost; //Back cost to 'from_node'
     new_costs.backward_cost += (1. - criticality_fac) * get_rr_cong_cost(to_node); //Congestion cost
@@ -1393,21 +1396,19 @@
             new_costs.backward_cost += bend_cost; //Bend cost
         }
     }
-    float cost_from_budgets = 0;
     if (budgeting_inf.if_set()) {
         //If budgets specified calculate cost as described by RCV paper:
         //    R. Fung, V. Betz and W. Chow, "Slack Allocation and Routing to Improve FPGA Timing While
         //     Repairing Short-Path Violations," in IEEE Transactions on Computer-Aided Design of
         //     Integrated Circuits and Systems, vol. 27, no. 4, pp. 686-697, April 2008.
 
-        //TODO: Since these targets are delays, shouldn't we be using Tdel instead of new_costs.total_cost on RHS?
         new_costs.total_cost += (short_path_crit + criticality_fac) * max(0.f, target_delay - total_delay_on_connection);
         new_costs.total_cost += pow(max(0.f, total_delay_on_connection - max_delay), 2) / 100e-12;
         new_costs.total_cost += pow(max(0.f, min_delay - total_delay_on_connection), 2) / 100e-12;
         
     }
     //Update total cost
-    float expected_cost = get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream, 0);
+    float expected_cost = get_timing_driven_expected_cost(to_node, target_node, criticality_fac, new_costs.R_upstream);
     new_costs.total_cost += new_costs.backward_cost + astar_fac * expected_cost;
 
     
@@ -1415,7 +1416,7 @@
     return new_costs;
 }
 
-static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream, int get_delay) {
+static float get_timing_driven_expected_cost(int inode, int target_node, float criticality_fac, float R_upstream) {
 
     /* Determines the expected cost (due to both delay and resource cost) to reach *
      * the target node from inode.  It doesn't include the cost of inode --       *
@@ -1446,18 +1447,15 @@
                           + ipin_data.base_cost
                           + sink_data.base_cost;
 
-        float Tdel =   num_segs_same_dir * same_data.T_linear
+        float Tdel = get_timing_driven_expected_delay(inode, target_node, R_upstream);
+       /* float Tdel =   num_segs_same_dir * same_data.T_linear
                      + num_segs_ortho_dir * ortho_data.T_linear
                      + num_segs_same_dir * num_segs_same_dir * same_data.T_quadratic
                      + num_segs_ortho_dir * num_segs_ortho_dir * ortho_data.T_quadratic
                      + R_upstream * (  num_segs_same_dir * same_data.C_load
                                      + num_segs_ortho_dir * ortho_data.C_load)
-                     + ipin_data.T_linear;
+                     + ipin_data.T_linear;*/
 
-        if (get_delay){
-		return Tdel;
-	}
-        
         float expected_cost = criticality_fac * Tdel + (1. - criticality_fac) * cong_cost;
         return (expected_cost);
 #endif
@@ -1468,6 +1466,34 @@
     }
 }
 
+static float get_timing_driven_expected_delay(int inode, int target_node, float R_upstream){
+
+    /* Determines the expected delay (due to both delay and resource cost) to reach *
+     * the target node from inode.*/
+
+    auto& device_ctx = g_vpr_ctx.device();
+
+    int num_segs_ortho_dir = 0;
+    int num_segs_same_dir = get_expected_segs_to_target(inode, target_node, &num_segs_ortho_dir);
+
+    int cost_index = device_ctx.rr_nodes[inode].cost_index();
+    int ortho_cost_index = device_ctx.rr_indexed_data[cost_index].ortho_cost_index;
+
+    const auto& same_data = device_ctx.rr_indexed_data[cost_index];
+    const auto& ortho_data = device_ctx.rr_indexed_data[ortho_cost_index];
+    const auto& ipin_data = device_ctx.rr_indexed_data[IPIN_COST_INDEX];
+
+        float Tdel =   num_segs_same_dir * same_data.T_linear
+                     + num_segs_ortho_dir * ortho_data.T_linear
+                     + num_segs_same_dir * num_segs_same_dir * same_data.T_quadratic
+                     + num_segs_ortho_dir * num_segs_ortho_dir * ortho_data.T_quadratic
+                     + R_upstream * (  num_segs_same_dir * same_data.C_load
+                                     + num_segs_ortho_dir * ortho_data.C_load)
+                     + ipin_data.T_linear;
+     return Tdel;
+} 
+
+
 /* Used below to ensure that fractions are rounded up, but floating   *
  * point values very close to an integer are rounded to that integer.       */
 static int round_up(float x) {
commit	61079b91522d06c9fa9e00e8ce6d01bdeba029ec	[log] [tgz]
author	Martin <martin5696@gmail.com>	Fri Aug 17 17:45:33 2018 -0400
committer	Martin <martin5696@gmail.com>	Fri Aug 17 17:45:33 2018 -0400
tree	55fac8af93bb479260a563be1ca498783eb4ca7b
parent	edc933fab4069823e51fe1ec1fdb5dd050decae4 [diff]