Add tile filtering option for placer delay matrix sampling.

This is useful if the placer delay sampling method tries to sample clock
network tiles (e.g. PLLs, global/local clock buffers) which will result
in a poor delay matrix which is unsuitable for LUTs/FFs/RAMs.

Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com>
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 5c74449..2bd4384 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -486,6 +486,8 @@
 
     PlacerOpts->write_placement_delay_lookup = Options.write_placement_delay_lookup;
     PlacerOpts->read_placement_delay_lookup = Options.read_placement_delay_lookup;
+
+    PlacerOpts->allowed_tiles_for_delay_model = Options.allowed_tiles_for_delay_model;
 }
 
 static void SetupAnalysisOpts(const t_options& Options, t_analysis_opts& analysis_opts) {
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 576eb62..6166ab1 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1384,6 +1384,14 @@
         .default_value("")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    place_timing_grp.add_argument(args.allowed_tiles_for_delay_model, "--allowed_tiles_for_delay_model")
+        .help(
+            "Names of allowed tile types that can be sampled during delay "
+            "modelling.  Default is to allow all tiles. Can be used to "
+            "exclude specialized tiles from placer delay sampling.")
+        .default_value("")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     auto& route_grp = parser.add_argument_group("routing options");
 
     route_grp.add_argument(args.max_router_iterations, "--max_router_iterations")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 55142a5..d1f45e5 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -115,6 +115,7 @@
     argparse::ArgValue<std::string> post_place_timing_report_file;
     argparse::ArgValue<PlaceDelayModelType> place_delay_model;
     argparse::ArgValue<e_reducer> place_delay_model_reducer;
+    argparse::ArgValue<std::string> allowed_tiles_for_delay_model;
 
     /* Router Options */
     argparse::ArgValue<int> max_router_iterations;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 01dca22..1eb4ffb 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -808,6 +808,12 @@
 
     std::string write_placement_delay_lookup;
     std::string read_placement_delay_lookup;
+
+    // Tile types that should be used during delay sampling.
+    //
+    // Useful for excluding tiles that have abnormal delay behavior, e.g.
+    // clock tree elements like PLL's, global/local clock buffers, etc.
+    std::string allowed_tiles_for_delay_model;
 };
 
 /* All the parameters controlling the router's operation are in this        *
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index 0254225..c3c4fac 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -87,7 +87,8 @@
     int end_x,
     int end_y,
     const t_router_opts& router_opts,
-    bool measure_directconnect);
+    bool measure_directconnect,
+    const std::set<std::string>& allowed_types);
 
 static vtr::Matrix<float> compute_delta_delays(
     const RouterDelayProfiler& route_profiler,
@@ -351,7 +352,8 @@
     int end_x,
     int end_y,
     const t_router_opts& router_opts,
-    bool measure_directconnect) {
+    bool measure_directconnect,
+    const std::set<std::string>& allowed_types) {
     int delta_x, delta_y;
     int sink_x, sink_y;
 
@@ -368,7 +370,9 @@
             bool src_or_target_empty = (src_type == device_ctx.EMPTY_TYPE
                                         || sink_type == device_ctx.EMPTY_TYPE);
 
-            if (src_or_target_empty) {
+            bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end();
+
+            if (src_or_target_empty || !is_allowed_type) {
                 if (matrix[delta_x][delta_y].empty()) {
                     //Only set empty target if we don't already have a valid delta delay
                     matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
@@ -427,6 +431,14 @@
     size_t high_x = std::max(grid.width() - longest_length, mid_x);
     size_t high_y = std::max(grid.height() - longest_length, mid_y);
 
+    std::set<std::string> allowed_types;
+    if (!placer_opts.allowed_tiles_for_delay_model.empty()) {
+        auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ",");
+        for (const auto& type : allowed_types_vector) {
+            allowed_types.insert(type);
+        }
+    }
+
     //   +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
     //   +                 |                       |               +
     //   +        A        |           B           |       C       +
@@ -462,6 +474,9 @@
             auto type = grid[x][y].type;
 
             if (type != device_ctx.EMPTY_TYPE) {
+                if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) {
+                    continue;
+                }
                 src_type = type;
                 break;
             }
@@ -480,7 +495,7 @@
                            x, y,
                            grid.width() - 1, grid.height() - 1,
                            router_opts,
-                           measure_directconnect);
+                           measure_directconnect, allowed_types);
 
     //Find the lowest x location on the bottom edge with a non-empty block
     src_type = nullptr;
@@ -489,6 +504,9 @@
             auto type = grid[x][y].type;
 
             if (type != device_ctx.EMPTY_TYPE) {
+                if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) {
+                    continue;
+                }
                 src_type = type;
                 break;
             }
@@ -506,7 +524,7 @@
                            x, y,
                            grid.width() - 1, grid.height() - 1,
                            router_opts,
-                           measure_directconnect);
+                           measure_directconnect, allowed_types);
 
     //Since the other delta delay values may have suffered from edge effects,
     //we recalculate deltas within regions B, C, E, F
@@ -518,7 +536,7 @@
                            low_x, low_y,
                            grid.width() - 1, grid.height() - 1,
                            router_opts,
-                           measure_directconnect);
+                           measure_directconnect, allowed_types);
 
     //Since the other delta delay values may have suffered from edge effects,
     //we recalculate deltas within regions D, E, G, H
@@ -530,7 +548,7 @@
                            0, 0,
                            high_x, high_y,
                            router_opts,
-                           measure_directconnect);
+                           measure_directconnect, allowed_types);
 
     //Since the other delta delay values may have suffered from edge effects,
     //we recalculate deltas within regions A, B, D, E
@@ -542,7 +560,7 @@
                            0, low_y,
                            high_x, grid.height() - 1,
                            router_opts,
-                           measure_directconnect);
+                           measure_directconnect, allowed_types);
 
     //Since the other delta delay values may have suffered from edge effects,
     //we recalculate deltas within regions E, F, H, I
@@ -554,7 +572,7 @@
                            low_x, 0,
                            grid.width() - 1, high_y,
                            router_opts,
-                           measure_directconnect);
+                           measure_directconnect, allowed_types);
 
     vtr::Matrix<float> delta_delays({grid.width(), grid.height()});
     for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) {