blob: 02542251cdc4b3b0895eacb661d97eb31f3d951d [file] [log] [blame]
#include <cstdio>
#include <cstring>
#include <cmath>
#include <time.h>
#include <limits>
#include "vtr_assert.h"
#include "vtr_ndmatrix.h"
#include "vtr_log.h"
#include "vtr_util.h"
#include "vtr_math.h"
#include "vtr_memory.h"
#include "vtr_time.h"
#include "vtr_geometry.h"
#include "arch_util.h"
#include "vpr_types.h"
#include "globals.h"
#include "place_and_route.h"
#include "route_common.h"
#include "route_tree_timing.h"
#include "route_timing.h"
#include "route_export.h"
#include "rr_graph.h"
#include "timing_place_lookup.h"
#include "read_xml_arch_file.h"
#include "echo_files.h"
#include "atom_netlist.h"
#include "rr_graph2.h"
#include "place_util.h"
// all functions in profiling:: namespace, which are only activated if PROFILE is defined
#include "route_profiling.h"
#include "router_delay_profiling.h"
#include "place_delay_model.h"
/*To compute delay between blocks we calculate the delay between */
/*different nodes in the FPGA. From this procedure we generate
* a lookup table which tells us the delay between different locations in*/
/*the FPGA */
/*the delta arrays are used to contain the best case routing delay */
/*between different locations on the FPGA. */
//#define VERBOSE
constexpr float UNINITIALIZED_DELTA = -1; //Indicates the delta delay value has not been calculated
constexpr float EMPTY_DELTA = -2; //Indicates delta delay from/to an EMPTY block
constexpr float IMPOSSIBLE_DELTA = std::numeric_limits<float>::infinity(); //Indicates there is no valid delta delay
struct t_profile_loc {
t_profile_loc(int x, int y, std::vector<vtr::Point<int>> delta_values)
: root(x, y)
, deltas(delta_values) {}
vtr::Point<int> root;
std::vector<vtr::Point<int>> deltas;
};
struct t_profile_info {
std::vector<t_profile_loc> locations;
int max_delta_x;
int max_delta_y;
};
/*** Function Prototypes *****/
static t_chan_width setup_chan_width(const t_router_opts& router_opts,
t_chan_width_dist chan_width_dist);
static float route_connection_delay(
const RouterDelayProfiler& route_profiler,
int source_x_loc,
int source_y_loc,
int sink_x_loc,
int sink_y_loc,
const t_router_opts& router_opts,
bool measure_directconnect);
static void generic_compute_matrix(
const RouterDelayProfiler& route_profiler,
vtr::Matrix<std::vector<float>>& matrix,
int source_x,
int source_y,
int start_x,
int start_y,
int end_x,
int end_y,
const t_router_opts& router_opts,
bool measure_directconnect);
static vtr::Matrix<float> compute_delta_delays(
const RouterDelayProfiler& route_profiler,
const t_placer_opts& palcer_opts,
const t_router_opts& router_opts,
bool measure_directconnect,
size_t longest_length);
float delay_reduce(std::vector<float>& delays, e_reducer reducer);
static vtr::Matrix<float> compute_delta_delay_model(
const RouterDelayProfiler& route_profiler,
const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
bool measure_directconnect,
int longest_length);
static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
t_physical_tile_type_ptr from_type,
int from_pin,
int from_pin_class,
t_physical_tile_type_ptr to_type,
int to_pin,
int to_pin_class,
int* src_rr,
int* sink_rr);
static bool verify_delta_delays(const vtr::Matrix<float>& delta_delays);
static int get_longest_segment_length(std::vector<t_segment_inf>& segment_inf);
static void fix_empty_coordinates(vtr::Matrix<float>& delta_delays);
static void fix_uninitialized_coordinates(vtr::Matrix<float>& delta_delays);
static float find_neightboring_average(vtr::Matrix<float>& matrix, int x, int y, int max_distance);
/******* Globally Accessible Functions **********/
std::unique_ptr<PlaceDelayModel> compute_place_delay_model(const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
t_det_routing_arch* det_routing_arch,
std::vector<t_segment_inf>& segment_inf,
t_chan_width_dist chan_width_dist,
const t_direct_inf* directs,
const int num_directs) {
vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up");
init_placement_context();
t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist);
alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf,
directs, num_directs);
const RouterLookahead* router_lookahead = get_cached_router_lookahead(
router_opts.lookahead_type,
router_opts.write_router_lookahead,
router_opts.read_router_lookahead,
segment_inf);
RouterDelayProfiler route_profiler(router_lookahead);
int longest_length = get_longest_segment_length(segment_inf);
/*now setup and compute the actual arrays */
std::unique_ptr<PlaceDelayModel> place_delay_model;
if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) {
place_delay_model = std::make_unique<DeltaDelayModel>();
} else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) {
place_delay_model = std::make_unique<OverrideDelayModel>();
} else {
VTR_ASSERT_MSG(false, "Invalid placer delay model");
}
if (placer_opts.read_placement_delay_lookup.empty()) {
place_delay_model->compute(route_profiler, placer_opts, router_opts, longest_length);
} else {
place_delay_model->read(placer_opts.read_placement_delay_lookup);
}
if (!placer_opts.write_placement_delay_lookup.empty()) {
place_delay_model->write(placer_opts.write_placement_delay_lookup);
}
/*free all data structures that are no longer needed */
free_routing_structs();
return place_delay_model;
}
void DeltaDelayModel::compute(
const RouterDelayProfiler& route_profiler,
const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
int longest_length) {
router_opts_ = router_opts;
delays_ = compute_delta_delay_model(
route_profiler,
placer_opts, router_opts, /*measure_directconnect=*/true,
longest_length);
}
void OverrideDelayModel::compute(
const RouterDelayProfiler& route_profiler,
const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
int longest_length) {
router_opts_ = router_opts;
auto delays = compute_delta_delay_model(
route_profiler,
placer_opts, router_opts, /*measure_directconnect=*/false,
longest_length);
base_delay_model_ = std::make_unique<DeltaDelayModel>(delays, router_opts);
compute_override_delay_model(route_profiler);
}
/******* File Accessible Functions **********/
std::vector<int> get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) {
/*
* This function tries to identify the best pin classes to hook up
* for delay calculation. The assumption is that we should pick
* the pin class with the largest number of pins. This makes
* sense, since it ensures we pick commonly used pins, and
* removes order dependence on how the pins are specified
* in the architecture (except in the case were the two largest pin classes
* of a particular pintype have the same number of pins, in which case the
* first pin class is used).
*/
std::vector<int> best_classes;
//Collect all classes of matching type which do not have all their pins ignored
for (int i = 0; i < type->num_class; i++) {
if (type->class_inf[i].type == pintype) {
bool all_ignored = true;
for (int ipin = 0; ipin < type->class_inf[i].num_pins; ++ipin) {
int pin = type->class_inf[i].pinlist[ipin];
if (!type->is_ignored_pin[pin]) {
all_ignored = false;
break;
}
}
if (!all_ignored) {
best_classes.push_back(i);
}
}
}
//Sort classe so largest pin class is first
auto cmp_class = [&](int lhs, int rhs) {
return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins;
};
std::stable_sort(best_classes.begin(), best_classes.end(), cmp_class);
return best_classes;
}
static int get_longest_segment_length(std::vector<t_segment_inf>& segment_inf) {
int length;
length = 0;
for (size_t i = 0; i < segment_inf.size(); i++) {
if (segment_inf[i].length > length)
length = segment_inf[i].length;
}
return (length);
}
static t_chan_width setup_chan_width(const t_router_opts& router_opts,
t_chan_width_dist chan_width_dist) {
/*we give plenty of tracks, this increases routability for the */
/*lookup table generation */
int width_fac;
if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) {
auto& device_ctx = g_vpr_ctx.device();
auto type = physical_tile_type(find_most_common_block_type(device_ctx.grid));
width_fac = 4 * type->num_pins;
/*this is 2x the value that binary search starts */
/*this should be enough to allow most pins to */
/*connect to tracks in the architecture */
} else {
width_fac = router_opts.fixed_channel_width;
}
return init_chan(width_fac, chan_width_dist);
}
static float route_connection_delay(
const RouterDelayProfiler& route_profiler,
int source_x,
int source_y,
int sink_x,
int sink_y,
const t_router_opts& router_opts,
bool measure_directconnect) {
//Routes between the source and sink locations and calculates the delay
float net_delay_value = IMPOSSIBLE_DELTA; /*set to known value for debug purposes */
auto& device_ctx = g_vpr_ctx.device();
bool successfully_routed = false;
//Get the rr nodes to route between
auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid[source_x][source_y].type);
auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid[sink_x][sink_y].type);
for (int driver_ptc : best_driver_ptcs) {
VTR_ASSERT(driver_ptc != OPEN);
int source_rr_node = get_rr_node_index(device_ctx.rr_node_indices, source_x, source_y, SOURCE, driver_ptc);
VTR_ASSERT(source_rr_node != OPEN);
for (int sink_ptc : best_sink_ptcs) {
VTR_ASSERT(sink_ptc != OPEN);
int sink_rr_node = get_rr_node_index(device_ctx.rr_node_indices, sink_x, sink_y, SINK, sink_ptc);
VTR_ASSERT(sink_rr_node != OPEN);
if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) {
//Skip if we shouldn't measure direct connects and a direct connect exists
continue;
}
{
successfully_routed = route_profiler.calculate_delay(
source_rr_node, sink_rr_node,
router_opts,
&net_delay_value);
}
if (successfully_routed) break;
}
if (successfully_routed) break;
}
if (!successfully_routed) {
VTR_LOG_WARN("Unable to route between blocks at (%d,%d) and (%d,%d) to characterize delay (setting to %g)\n",
source_x, source_y, sink_x, sink_y, net_delay_value);
}
return (net_delay_value);
}
static void generic_compute_matrix(
const RouterDelayProfiler& route_profiler,
vtr::Matrix<std::vector<float>>& matrix,
int source_x,
int source_y,
int start_x,
int start_y,
int end_x,
int end_y,
const t_router_opts& router_opts,
bool measure_directconnect) {
int delta_x, delta_y;
int sink_x, sink_y;
auto& device_ctx = g_vpr_ctx.device();
for (sink_x = start_x; sink_x <= end_x; sink_x++) {
for (sink_y = start_y; sink_y <= end_y; sink_y++) {
delta_x = abs(sink_x - source_x);
delta_y = abs(sink_y - source_y);
t_physical_tile_type_ptr src_type = device_ctx.grid[source_x][source_y].type;
t_physical_tile_type_ptr sink_type = device_ctx.grid[sink_x][sink_y].type;
bool src_or_target_empty = (src_type == device_ctx.EMPTY_TYPE
|| sink_type == device_ctx.EMPTY_TYPE);
if (src_or_target_empty) {
if (matrix[delta_x][delta_y].empty()) {
//Only set empty target if we don't already have a valid delta delay
matrix[delta_x][delta_y].push_back(EMPTY_DELTA);
#ifdef VERBOSE
VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n",
"EMPTY",
delta_x, delta_y,
source_x, source_y,
sink_x, sink_y);
#endif
}
} else {
//Valid start/end
float delay = route_connection_delay(route_profiler, source_x, source_y, sink_x, sink_y, router_opts, measure_directconnect);
#ifdef VERBOSE
VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n",
delay,
delta_x, delta_y,
source_x, source_y,
sink_x, sink_y);
#endif
if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) {
//Overwrite empty delta
matrix[delta_x][delta_y][0] = delay;
} else {
//Collect delta
matrix[delta_x][delta_y].push_back(delay);
}
}
}
}
}
static vtr::Matrix<float> compute_delta_delays(
const RouterDelayProfiler& route_profiler,
const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
bool measure_directconnect,
size_t longest_length) {
//To avoid edge effects we place the source at least 'longest_length' away
//from the device edge
//and route from there for all possible delta values < dimension
auto& device_ctx = g_vpr_ctx.device();
auto& grid = device_ctx.grid;
vtr::Matrix<std::vector<float>> sampled_delta_delays({grid.width(), grid.height()});
size_t mid_x = vtr::nint(grid.width() / 2);
size_t mid_y = vtr::nint(grid.height() / 2);
size_t low_x = std::min(longest_length, mid_x);
size_t low_y = std::min(longest_length, mid_y);
size_t high_x = std::max(grid.width() - longest_length, mid_x);
size_t high_y = std::max(grid.height() - longest_length, mid_y);
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// + | | +
// + A | B | C +
// + | | +
// +-----------------\-----------------------.---------------+
// + | | +
// + | | +
// + | | +
// + | | +
// + D | E | F +
// + | | +
// + | | +
// + | | +
// + | | +
// +-----------------*-----------------------/---------------+
// + | | +
// + G | H | I +
// + | | +
// +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
//
// * = (low_x, low_y)
// . = (high_x, high_y)
// / = (high_x, low_y)
// \ = (low_x, high_y)
// + = device edge
//Find the lowest y location on the left edge with a non-empty block
size_t y = 0;
size_t x = 0;
t_physical_tile_type_ptr src_type = nullptr;
for (x = 0; x < grid.width(); ++x) {
for (y = 0; y < grid.height(); ++y) {
auto type = grid[x][y].type;
if (type != device_ctx.EMPTY_TYPE) {
src_type = type;
break;
}
}
if (src_type) {
break;
}
}
VTR_ASSERT(src_type != nullptr);
#ifdef VERBOSE
VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y);
#endif
generic_compute_matrix(route_profiler, sampled_delta_delays,
x, y,
x, y,
grid.width() - 1, grid.height() - 1,
router_opts,
measure_directconnect);
//Find the lowest x location on the bottom edge with a non-empty block
src_type = nullptr;
for (y = 0; y < grid.height(); ++y) {
for (x = 0; x < grid.width(); ++x) {
auto type = grid[x][y].type;
if (type != device_ctx.EMPTY_TYPE) {
src_type = type;
break;
}
}
if (src_type) {
break;
}
}
VTR_ASSERT(src_type != nullptr);
#ifdef VERBOSE
VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y);
#endif
generic_compute_matrix(route_profiler, sampled_delta_delays,
x, y,
x, y,
grid.width() - 1, grid.height() - 1,
router_opts,
measure_directconnect);
//Since the other delta delay values may have suffered from edge effects,
//we recalculate deltas within regions B, C, E, F
#ifdef VERBOSE
VTR_LOG("Computing from low/low:\n");
#endif
generic_compute_matrix(route_profiler, sampled_delta_delays,
low_x, low_y,
low_x, low_y,
grid.width() - 1, grid.height() - 1,
router_opts,
measure_directconnect);
//Since the other delta delay values may have suffered from edge effects,
//we recalculate deltas within regions D, E, G, H
#ifdef VERBOSE
VTR_LOG("Computing from high/high:\n");
#endif
generic_compute_matrix(route_profiler, sampled_delta_delays,
high_x, high_y,
0, 0,
high_x, high_y,
router_opts,
measure_directconnect);
//Since the other delta delay values may have suffered from edge effects,
//we recalculate deltas within regions A, B, D, E
#ifdef VERBOSE
VTR_LOG("Computing from high/low:\n");
#endif
generic_compute_matrix(route_profiler, sampled_delta_delays,
high_x, low_y,
0, low_y,
high_x, grid.height() - 1,
router_opts,
measure_directconnect);
//Since the other delta delay values may have suffered from edge effects,
//we recalculate deltas within regions E, F, H, I
#ifdef VERBOSE
VTR_LOG("Computing from low/high:\n");
#endif
generic_compute_matrix(route_profiler, sampled_delta_delays,
low_x, high_y,
low_x, 0,
grid.width() - 1, high_y,
router_opts,
measure_directconnect);
vtr::Matrix<float> delta_delays({grid.width(), grid.height()});
for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) {
for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) {
delta_delays[dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer);
}
}
return delta_delays;
}
float delay_reduce(std::vector<float>& delays, e_reducer reducer) {
if (delays.size() == 0) {
return IMPOSSIBLE_DELTA;
} else if (delays.size() == 1) {
return delays[0];
}
VTR_ASSERT(delays.size() > 1);
float delay;
if (reducer == e_reducer::MIN) {
auto itr = std::min_element(delays.begin(), delays.end());
delay = *itr;
} else if (reducer == e_reducer::MAX) {
auto itr = std::max_element(delays.begin(), delays.end());
delay = *itr;
} else if (reducer == e_reducer::MEDIAN) {
std::sort(delays.begin(), delays.end());
delay = vtr::median(delays.begin(), delays.end());
} else if (reducer == e_reducer::ARITHMEAN) {
delay = vtr::arithmean(delays.begin(), delays.end());
} else if (reducer == e_reducer::GEOMEAN) {
delay = vtr::geomean(delays.begin(), delays.end());
} else {
VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unrecognized delta delay reducer");
}
return delay;
}
/* We return the average placement estimated delay for a routing spanning (x,y).
* We start with an averaging distance of 1 (i.e. from (x-1,y-1) to (x+1,y+1))
* and look for legal delay values to average; if some are found we return the
* average and if none are found we increase the distance to average over.
*
* If no legal values are found to average over with a range of max_distance,
* we return IMPOSSIBLE_DELTA.
*/
static float find_neightboring_average(
vtr::Matrix<float>& matrix,
int x,
int y,
int max_distance) {
float sum = 0;
int counter = 0;
int endx = matrix.end_index(0);
int endy = matrix.end_index(1);
int delx, dely;
for (int distance = 1; distance <= max_distance; ++distance) {
for (delx = x - distance; delx <= x + distance; delx++) {
for (dely = y - distance; dely <= y + distance; dely++) {
// Check distance constraint
if (abs(delx - x) + abs(dely - y) > distance) {
continue;
}
//check out of bounds
if (delx < 0 || dely < 0 || delx >= endx || dely >= endy || (delx == x && dely == y)) {
continue;
}
if (matrix[delx][dely] == EMPTY_DELTA || matrix[delx][dely] == IMPOSSIBLE_DELTA) {
continue;
}
counter++;
sum += matrix[delx][dely];
}
}
if (counter != 0) {
return sum / (float)counter;
}
}
return IMPOSSIBLE_DELTA;
}
static void fix_empty_coordinates(vtr::Matrix<float>& delta_delays) {
// Set any empty delta's to the average of it's neighbours
//
// Empty coordinates may occur if the sampling location happens to not have
// a connection at that location. However a more through sampling likely
// would return a result, so we fill in the empty holes with a small
// neighbour average.
constexpr int kMaxAverageDistance = 2;
for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) {
for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) {
if (delta_delays[delta_x][delta_y] == EMPTY_DELTA) {
delta_delays[delta_x][delta_y] = find_neightboring_average(delta_delays, delta_x, delta_y, kMaxAverageDistance);
}
}
}
}
static void fix_uninitialized_coordinates(vtr::Matrix<float>& delta_delays) {
// Set any empty delta's to the average of it's neighbours
for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) {
for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) {
if (delta_delays[delta_x][delta_y] == UNINITIALIZED_DELTA) {
delta_delays[delta_x][delta_y] = IMPOSSIBLE_DELTA;
}
}
}
}
static void fill_impossible_coordinates(vtr::Matrix<float>& delta_delays) {
// Set any impossible delta's to the average of it's neighbours
//
// Impossible coordinates may occur if an IPIN cannot be reached from the
// sampling OPIN. This might occur if the IPIN or OPIN used for sampling
// is specialized, and therefore cannot be reached via the by the pins
// sampled. Leaving this value in the delay matrix will result in invalid
// slacks if the delay matrix uses this value.
//
// A max average distance of 5 is used to provide increased effort in
// filling these gaps. It is more important to have a poor predication,
// than a invalid value and causing a slack assertion.
constexpr int kMaxAverageDistance = 5;
for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) {
for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) {
if (delta_delays[delta_x][delta_y] == IMPOSSIBLE_DELTA) {
delta_delays[delta_x][delta_y] = find_neightboring_average(
delta_delays, delta_x, delta_y, kMaxAverageDistance);
}
}
}
}
static vtr::Matrix<float> compute_delta_delay_model(
const RouterDelayProfiler& route_profiler,
const t_placer_opts& placer_opts,
const t_router_opts& router_opts,
bool measure_directconnect,
int longest_length) {
vtr::ScopedStartFinishTimer timer("Computing delta delays");
vtr::Matrix<float> delta_delays = compute_delta_delays(route_profiler,
placer_opts, router_opts, measure_directconnect, longest_length);
fix_uninitialized_coordinates(delta_delays);
fix_empty_coordinates(delta_delays);
fill_impossible_coordinates(delta_delays);
verify_delta_delays(delta_delays);
return delta_delays;
}
//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification
static bool find_direct_connect_sample_locations(const t_direct_inf* direct,
t_physical_tile_type_ptr from_type,
int from_pin,
int from_pin_class,
t_physical_tile_type_ptr to_type,
int to_pin,
int to_pin_class,
int* src_rr,
int* sink_rr) {
VTR_ASSERT(from_type != nullptr);
VTR_ASSERT(to_type != nullptr);
auto& device_ctx = g_vpr_ctx.device();
auto& grid = device_ctx.grid;
//Search the grid for an instance of from/to blocks which satisfy this direct connect offsets,
//and which has the appropriate pins
int from_x = 0, from_y = 0, from_z = 0;
int to_x = 0, to_y = 0, to_z = 0;
bool found = false;
for (from_x = 0; from_x < (int)grid.width(); ++from_x) {
to_x = from_x + direct->x_offset;
if (to_x < 0 || to_x >= (int)grid.width()) continue;
for (from_y = 0; from_y < (int)grid.height(); ++from_y) {
if (grid[from_x][from_y].type != from_type) continue;
//Check that the from pin exists at this from location
//(with multi-width/height blocks pins may not exist at all locations)
bool from_pin_found = false;
if (direct->from_side != NUM_SIDES) {
int from_pin_rr = get_rr_node_index(device_ctx.rr_node_indices, from_x, from_y, OPIN, from_pin, direct->from_side);
from_pin_found = (from_pin_rr != OPEN);
} else {
std::vector<int> from_pin_rrs = get_rr_node_indices(device_ctx.rr_node_indices, from_x, from_y, OPIN, from_pin);
from_pin_found = !from_pin_rrs.empty();
}
if (!from_pin_found) continue;
to_y = from_y + direct->y_offset;
if (to_y < 0 || to_y >= (int)grid.height()) continue;
if (grid[to_x][to_y].type != to_type) continue;
//Check that the from pin exists at this from location
//(with multi-width/height blocks pins may not exist at all locations)
bool to_pin_found = false;
if (direct->to_side != NUM_SIDES) {
int to_pin_rr = get_rr_node_index(device_ctx.rr_node_indices, to_x, to_y, IPIN, to_pin, direct->to_side);
to_pin_found = (to_pin_rr != OPEN);
} else {
std::vector<int> to_pin_rrs = get_rr_node_indices(device_ctx.rr_node_indices, to_x, to_y, IPIN, to_pin);
to_pin_found = !to_pin_rrs.empty();
}
if (!to_pin_found) continue;
for (from_z = 0; from_z < from_type->capacity; ++from_z) {
to_z = from_z + direct->z_offset;
if (to_z < 0 || to_z >= to_type->capacity) continue;
found = true;
break;
}
if (found) break;
}
if (found) break;
}
if (!found) {
return false;
}
//Now have a legal instance of this direct connect
VTR_ASSERT(grid[from_x][from_y].type == from_type);
VTR_ASSERT(from_z < from_type->capacity);
VTR_ASSERT(grid[to_x][to_y].type == to_type);
VTR_ASSERT(to_z < to_type->capacity);
VTR_ASSERT(from_x + direct->x_offset == to_x);
VTR_ASSERT(from_y + direct->y_offset == to_y);
VTR_ASSERT(from_z + direct->z_offset == to_z);
//
//Find a source/sink RR node associated with the pins of the direct
//
auto source_rr_nodes = get_rr_node_indices(device_ctx.rr_node_indices, from_x, from_y, SOURCE, from_pin_class);
VTR_ASSERT(source_rr_nodes.size() > 0);
auto sink_rr_nodes = get_rr_node_indices(device_ctx.rr_node_indices, to_x, to_y, SINK, to_pin_class);
VTR_ASSERT(sink_rr_nodes.size() > 0);
*src_rr = source_rr_nodes[0];
*sink_rr = sink_rr_nodes[0];
return true;
}
static bool verify_delta_delays(const vtr::Matrix<float>& delta_delays) {
auto& device_ctx = g_vpr_ctx.device();
auto& grid = device_ctx.grid;
for (size_t x = 0; x < grid.width(); ++x) {
for (size_t y = 0; y < grid.height(); ++y) {
float delta_delay = delta_delays[x][y];
if (delta_delay < 0.) {
VPR_ERROR(VPR_ERROR_PLACE,
"Found invaild negative delay %g for delta (%d,%d)",
delta_delay, x, y);
}
}
}
return true;
}
void OverrideDelayModel::compute_override_delay_model(const RouterDelayProfiler& route_profiler) {
t_router_opts router_opts2 = router_opts_;
router_opts2.astar_fac = 0.;
//Look at all the direct connections that exist, and add overrides to delay model
auto& device_ctx = g_vpr_ctx.device();
for (int idirect = 0; idirect < device_ctx.arch->num_directs; ++idirect) {
const t_direct_inf* direct = &device_ctx.arch->Directs[idirect];
InstPort from_port = parse_inst_port(direct->from_pin);
InstPort to_port = parse_inst_port(direct->to_pin);
t_physical_tile_type_ptr from_type = find_block_type_by_name(from_port.instance_name(), device_ctx.physical_tile_types);
t_physical_tile_type_ptr to_type = find_block_type_by_name(to_port.instance_name(), device_ctx.physical_tile_types);
int num_conns = from_port.port_high_index() - from_port.port_low_index() + 1;
VTR_ASSERT_MSG(num_conns == to_port.port_high_index() - to_port.port_low_index() + 1, "Directs must have the same size to/from");
//We now walk through all the connections associated with the current direct specification, measure
//their delay and specify that value as an override in the delay model.
//
//Note that we need to check every connection in the direct to cover the case where the pins are not
//equivalent.
//
//However, if the from/to ports are equivalent we could end up sampling the same RR SOURCE/SINK
//paths multiple times (wasting CPU time) -- we avoid this by recording the sampled paths in
//sampled_rr_pairs and skipping them if they occur multiple times.
int missing_instances = 0;
int missing_paths = 0;
std::set<std::pair<int, int>> sampled_rr_pairs;
for (int iconn = 0; iconn < num_conns; ++iconn) {
//Find the associated pins
int from_pin = find_pin(logical_block_type(from_type), from_port.port_name(), from_port.port_low_index() + iconn);
int to_pin = find_pin(logical_block_type(to_type), to_port.port_name(), to_port.port_low_index() + iconn);
VTR_ASSERT(from_pin != OPEN);
VTR_ASSERT(to_pin != OPEN);
int from_pin_class = find_pin_class(logical_block_type(from_type), from_port.port_name(), from_port.port_low_index() + iconn, DRIVER);
VTR_ASSERT(from_pin_class != OPEN);
int to_pin_class = find_pin_class(logical_block_type(to_type), to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER);
VTR_ASSERT(to_pin_class != OPEN);
int src_rr = OPEN;
int sink_rr = OPEN;
bool found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, &src_rr, &sink_rr);
if (!found_sample_points) {
++missing_instances;
continue;
}
//If some of the source/sink ports are logically equivalent we may have already
//sampled the associated source/sink pair and don't need to do so again
if (sampled_rr_pairs.count({src_rr, sink_rr})) continue;
VTR_ASSERT(src_rr != OPEN);
VTR_ASSERT(sink_rr != OPEN);
float direct_connect_delay = std::numeric_limits<float>::quiet_NaN();
bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay);
if (found_routing_path) {
set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay);
} else {
++missing_paths;
}
//Record that we've sampled this pair of source and sink nodes
sampled_rr_pairs.insert({src_rr, sink_rr});
}
VTR_LOGV_WARN(missing_instances > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no instances of this direct found)\n", missing_instances, direct->name);
VTR_LOGV_WARN(missing_paths > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no routing path found)\n", missing_paths, direct->name);
}
}
bool directconnect_exists(int src_rr_node, int sink_rr_node) {
//Returns true if there is a directconnect between the two RR nodes
//
//This is checked by looking for a SOURCE -> OPIN -> IPIN -> SINK path
//which starts at src_rr_node and ends at sink_rr_node
auto& device_ctx = g_vpr_ctx.device();
auto& rr_nodes = device_ctx.rr_nodes;
VTR_ASSERT(rr_nodes[src_rr_node].type() == SOURCE && rr_nodes[sink_rr_node].type() == SINK);
//TODO: This is a constant depth search, but still may be too slow
for (t_edge_size i_src_edge = 0; i_src_edge < rr_nodes[src_rr_node].num_edges(); ++i_src_edge) {
int opin_rr_node = rr_nodes[src_rr_node].edge_sink_node(i_src_edge);
if (rr_nodes[opin_rr_node].type() != OPIN) continue;
for (t_edge_size i_opin_edge = 0; i_opin_edge < rr_nodes[opin_rr_node].num_edges(); ++i_opin_edge) {
int ipin_rr_node = rr_nodes[opin_rr_node].edge_sink_node(i_opin_edge);
if (rr_nodes[ipin_rr_node].type() != IPIN) continue;
for (t_edge_size i_ipin_edge = 0; i_ipin_edge < rr_nodes[ipin_rr_node].num_edges(); ++i_ipin_edge) {
if (sink_rr_node == rr_nodes[ipin_rr_node].edge_sink_node(i_ipin_edge)) {
return true;
}
}
}
}
return false;
}