blob: a99e6daa4b2de71fd1230172b2cff00d615ee982 [file] [log] [blame]
#include <sys/types.h>
#include <cstdio>
#include <ctime>
#include <climits>
#include <cstdlib>
#include <cmath>
#include "vtr_util.h"
#include "vtr_memory.h"
#include "vtr_assert.h"
#include "vtr_log.h"
#include "vpr_types.h"
#include "vpr_utils.h"
#include "vpr_error.h"
#include "globals.h"
#include "atom_netlist.h"
#include "place_and_route.h"
#include "place.h"
#include "read_place.h"
#include "read_route.h"
#include "route_export.h"
#include "draw.h"
#include "stats.h"
#include "check_route.h"
#include "rr_graph.h"
#include "net_delay.h"
#include "timing_place.h"
#include "read_xml_arch_file.h"
#include "echo_files.h"
#include "route_common.h"
#include "place_macro.h"
#include "power.h"
#include "RoutingDelayCalculator.h"
#include "timing_info.h"
#include "tatum/echo_writer.hpp"
/******************* Subroutines local to this module ************************/
static float comp_width(t_chan* chan, float x, float separation);
/************************* Subroutine Definitions ****************************/
int binary_search_place_and_route(t_placer_opts placer_opts,
const t_annealing_sched& annealing_sched,
const t_router_opts& router_opts,
const t_analysis_opts& analysis_opts,
const t_file_name_opts& filename_opts,
const t_arch* arch,
bool verify_binary_search,
int min_chan_width_hint,
t_det_routing_arch* det_routing_arch,
std::vector<t_segment_inf>& segment_inf,
vtr::vector<ClusterNetId, float*>& net_delay,
std::shared_ptr<SetupHoldTimingInfo> timing_info,
std::shared_ptr<RoutingDelayCalculator> delay_calc) {
/* This routine performs a binary search to find the minimum number of *
* tracks per channel required to successfully route a circuit, and returns *
* that minimum width_fac. */
vtr::vector<ClusterNetId, t_trace*> best_routing; /* Saves the best routing found so far. */
int current, low, high, final;
bool success, prev_success, prev2_success, Fc_clipped = false;
bool using_minw_hint = false;
auto& device_ctx = g_vpr_ctx.mutable_device();
auto& route_ctx = g_vpr_ctx.mutable_routing();
t_clb_opins_used saved_clb_opins_used_locally;
int attempt_count;
int udsd_multiplier;
int warnings;
t_graph_type graph_type;
/* Allocate the major routing structures. */
if (router_opts.route_type == GLOBAL) {
graph_type = GRAPH_GLOBAL;
} else {
graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
}
best_routing = alloc_saved_routing();
VTR_ASSERT(net_delay.size());
if (det_routing_arch->directionality == BI_DIRECTIONAL)
udsd_multiplier = 1;
else
udsd_multiplier = 2;
if (router_opts.fixed_channel_width != NO_FIXED_CHANNEL_WIDTH) {
current = router_opts.fixed_channel_width + 5 * udsd_multiplier;
low = router_opts.fixed_channel_width - 1 * udsd_multiplier;
} else {
/* Initialize binary serach guess */
if (min_chan_width_hint > 0) {
//If the user provided a hint use it as the initial guess
VTR_LOG("Initializing minimum channel width search using specified hint\n");
current = min_chan_width_hint;
using_minw_hint = true;
} else {
//Otherwise base it off the architecture
VTR_LOG("Initializing minimum channel width search based on maximum CLB pins\n");
int max_pins = max_pins_per_grid_tile();
current = max_pins + max_pins % 2;
}
low = -1;
}
/* Constraints must be checked to not break rr_graph generator */
if (det_routing_arch->directionality == UNI_DIRECTIONAL) {
if (current % 2 != 0) {
VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
"Tried odd chan width (%d) in uni-directional routing architecture (chan width must be even).\n",
current);
}
} else {
if (det_routing_arch->Fs % 3) {
VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
"Fs must be three in bidirectional mode.\n");
}
}
VTR_ASSERT(current > 0);
high = -1;
final = -1;
attempt_count = 0;
while (final == -1) {
VTR_LOG("\n");
VTR_LOG("Attempting to route at %d channels (binary search bounds: [%d, %d])\n", current, low, high);
fflush(stdout);
/* Check if the channel width is huge to avoid overflow. Assume the *
* circuit is unroutable with the current router options if we're *
* going to overflow. */
if (router_opts.fixed_channel_width != NO_FIXED_CHANNEL_WIDTH) {
if (current > router_opts.fixed_channel_width * 4) {
VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
"This circuit appears to be unroutable with the current router options. Last failed at %d.\n"
"Aborting routing procedure.\n",
low);
}
} else {
if (current > 1000) {
VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
"This circuit requires a channel width above 1000, probably is not going to route.\n"
"Aborting routing procedure.\n");
}
}
if ((current * 3) < det_routing_arch->Fs) {
VTR_LOG("Width factor is now below specified Fs. Stop search.\n");
final = high;
break;
}
if (placer_opts.place_freq == PLACE_ALWAYS) {
placer_opts.place_chan_width = current;
try_place(placer_opts, annealing_sched, router_opts, analysis_opts,
arch->Chans, det_routing_arch, segment_inf,
arch->Directs, arch->num_directs);
}
success = try_route(current,
router_opts,
analysis_opts,
det_routing_arch, segment_inf,
net_delay,
timing_info,
delay_calc,
arch->Chans,
arch->Directs, arch->num_directs,
(attempt_count == 0) ? ScreenUpdatePriority::MAJOR : ScreenUpdatePriority::MINOR);
attempt_count++;
fflush(stdout);
float scale_factor = 2;
if (success && !Fc_clipped) {
if (current == high) {
/* Can't go any lower */
final = current;
}
high = current;
/* If Fc_output is too high, set to full connectivity but warn the user */
if (Fc_clipped) {
VTR_LOG_WARN("Fc_output was too high and was clipped to full (maximum) connectivity.\n");
}
/* Save routing in case it is best. */
save_routing(best_routing, route_ctx.clb_opins_used_locally, saved_clb_opins_used_locally);
//If the user gave us a minW hint (and we routed successfully at that width)
//make the initial guess closer to the current value instead of the standard guess.
//
//To avoid wasting time at unroutable channel widths we want to determine an un-routable (but close
//to the hint channel width). Picking a value too far below the hint may cause us to waste time
//at an un-routable channel width. Picking a value too close to the hint may cause a spurious
//failure (c.f. verify_binary_search). The scale_factor below seems a reasonable compromise.
//
//Note this is only active for only the first re-routing after the initial guess,
//and we use the default scale_factor otherwise
if (using_minw_hint && attempt_count == 1) {
scale_factor = 1.1;
}
if ((high - std::max(low, 0)) <= 1 * udsd_multiplier) { //No more steps
final = high;
}
if (low != -1) { //Have lower-bound, step to midpoint
current = (high + low) / scale_factor;
} else { //Haven't found lower bound yet
current = high / scale_factor;
if (std::abs(current - high) < udsd_multiplier) {
//If high and scale_factor are both small, we might have ended
//up with no change in current.
//In that case, ensure we reduce current by at least the track multiplier
current = high - udsd_multiplier;
}
VTR_ASSERT(current < high);
}
} else { /* last route not successful */
if (success && Fc_clipped) {
VTR_LOG("Routing rejected, Fc_output was too high.\n");
success = false;
}
low = current;
if (high != -1) {
if ((high - low) <= 1 * udsd_multiplier) { //No more steps
final = high;
}
current = (high + low) / scale_factor; //Step to midpoint
} else {
if (router_opts.fixed_channel_width != NO_FIXED_CHANNEL_WIDTH) {
/* FOR Wneed = f(Fs) search */
if (low < router_opts.fixed_channel_width + 30) {
current = low + 5 * udsd_multiplier;
} else {
VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
"Aborting: Wneed = f(Fs) search found exceedingly large Wneed (at least %d).\n", low);
}
} else {
current = low * scale_factor; /* Haven't found upper bound yet */
if (std::abs(current - low) < udsd_multiplier) {
//If high and scale_factor are both small, we might have ended
//up with no change in current.
//In that case, ensure we increase current by at least the track multiplier
current = high + udsd_multiplier;
}
VTR_ASSERT(current > low);
}
}
}
current = current + current % udsd_multiplier;
}
/* The binary search above occassionally does not find the minimum *
* routeable channel width. Sometimes a circuit that will not route *
* in 19 channels will route in 18, due to router flukiness. If *
* verify_binary_search is set, the code below will ensure that FPGAs *
* with channel widths of final-2 and final-3 wil not route *
* successfully. If one does route successfully, the router keeps *
* trying smaller channel widths until two in a row (e.g. 8 and 9) *
* fail. */
if (verify_binary_search) {
VTR_LOG("\n");
VTR_LOG("Verifying that binary search found min channel width...\n");
prev_success = true; /* Actually final - 1 failed, but this makes router */
/* try final-2 and final-3 even if both fail: safer */
prev2_success = true;
current = final - 2;
while (prev2_success || prev_success) {
if ((router_opts.fixed_channel_width != NO_FIXED_CHANNEL_WIDTH)
&& (current < router_opts.fixed_channel_width)) {
break;
}
fflush(stdout);
if (current < 1)
break;
if (placer_opts.place_freq == PLACE_ALWAYS) {
placer_opts.place_chan_width = current;
try_place(placer_opts, annealing_sched, router_opts, analysis_opts,
arch->Chans, det_routing_arch, segment_inf,
arch->Directs, arch->num_directs);
}
success = try_route(current,
router_opts,
analysis_opts,
det_routing_arch,
segment_inf, net_delay,
timing_info,
delay_calc,
arch->Chans, arch->Directs, arch->num_directs,
ScreenUpdatePriority::MINOR);
if (success && Fc_clipped == false) {
final = current;
save_routing(best_routing, route_ctx.clb_opins_used_locally,
saved_clb_opins_used_locally);
if (placer_opts.place_freq == PLACE_ALWAYS) {
auto& cluster_ctx = g_vpr_ctx.clustering();
print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(),
filename_opts.PlaceFile.c_str());
}
}
prev2_success = prev_success;
prev_success = success;
current--;
if (det_routing_arch->directionality == UNI_DIRECTIONAL) {
current--; /* width must be even */
}
}
}
/* End binary search verification. */
/* Restore the best placement (if necessary), the best routing, and *
* * the best channel widths for final drawing and statistics output. */
t_chan_width chan_width = init_chan(final, arch->Chans);
free_rr_graph();
create_rr_graph(graph_type,
device_ctx.physical_tile_types,
device_ctx.grid,
chan_width,
device_ctx.num_arch_switches,
det_routing_arch,
segment_inf,
router_opts.base_cost_type,
router_opts.trim_empty_channels,
router_opts.trim_obs_channels,
router_opts.clock_modeling,
router_opts.lookahead_type,
arch->Directs, arch->num_directs,
&warnings);
init_draw_coords(final);
restore_routing(best_routing, route_ctx.clb_opins_used_locally, saved_clb_opins_used_locally);
if (Fc_clipped) {
VTR_LOG_WARN("Best routing Fc_output too high, clipped to full (maximum) connectivity.\n");
}
VTR_LOG("Best routing used a channel width factor of %d.\n", final);
print_route(filename_opts.PlaceFile.c_str(), filename_opts.RouteFile.c_str());
free_saved_routing(best_routing);
fflush(stdout);
return (final);
}
t_chan_width init_chan(int cfactor, t_chan_width_dist chan_width_dist) {
/* Assigns widths to channels (in tracks). Minimum one track *
* per channel. The channel distributions read from the architecture *
* file are scaled by cfactor. */
auto& device_ctx = g_vpr_ctx.mutable_device();
auto& grid = device_ctx.grid;
t_chan chan_x_dist = chan_width_dist.chan_x_dist;
t_chan chan_y_dist = chan_width_dist.chan_y_dist;
t_chan_width chan_width;
chan_width.x_list.resize(grid.height());
chan_width.y_list.resize(grid.width());
if (grid.height() > 1) {
int num_channels = grid.height() - 1;
VTR_ASSERT(num_channels > 0);
float separation = 1.0 / num_channels; /* Norm. distance between two channels. */
for (size_t i = 0; i < grid.height(); ++i) {
float y = float(i) / num_channels;
chan_width.x_list[i] = (int)floor(cfactor * comp_width(&chan_x_dist, y, separation) + 0.5);
chan_width.x_list[i] = std::max(chan_width.x_list[i], 1); //Minimum channel width 1
}
}
if (grid.width() > 1) {
int num_channels = grid.width() - 1;
VTR_ASSERT(num_channels > 0);
float separation = 1.0 / num_channels; /* Norm. distance between two channels. */
for (size_t i = 0; i < grid.width(); ++i) { //-2 for no perim channels
float x = float(i) / num_channels;
chan_width.y_list[i] = (int)floor(cfactor * comp_width(&chan_y_dist, x, separation) + 0.5);
chan_width.y_list[i] = std::max(chan_width.y_list[i], 1); //Minimum channel width 1
}
}
chan_width.max = 0;
chan_width.x_max = chan_width.y_max = INT_MIN;
chan_width.x_min = chan_width.y_min = INT_MAX;
for (size_t i = 0; i < grid.height(); ++i) {
chan_width.max = std::max(chan_width.max, chan_width.x_list[i]);
chan_width.x_max = std::max(chan_width.x_max, chan_width.x_list[i]);
chan_width.x_min = std::min(chan_width.x_min, chan_width.x_list[i]);
}
for (size_t i = 0; i < grid.width(); ++i) {
chan_width.max = std::max(chan_width.max, chan_width.y_list[i]);
chan_width.y_max = std::max(chan_width.y_max, chan_width.y_list[i]);
chan_width.y_min = std::min(chan_width.y_min, chan_width.y_list[i]);
}
#ifdef VERBOSE
VTR_LOG("\n");
VTR_LOG("device_ctx.chan_width.x_list:\n");
for (size_t i = 0; i < grid.height(); ++i) {
VTR_LOG("%d ", chan_width.x_list[i]);
}
VTR_LOG("\n");
VTR_LOG("device_ctx.chan_width.y_list:\n");
for (size_t i = 0; i < grid.width(); ++i) {
VTR_LOG("%d ", chan_width.y_list[i]);
}
VTR_LOG("\n");
#endif
return chan_width;
}
static float comp_width(t_chan* chan, float x, float separation) {
/* Return the relative channel density. *chan points to a channel *
* functional description data structure, and x is the distance *
* (between 0 and 1) we are across the chip. separation is the *
* distance between two channels, in the 0 to 1 coordinate system. */
float val;
switch (chan->type) {
case UNIFORM:
val = chan->peak;
break;
case GAUSSIAN:
val = (x - chan->xpeak) * (x - chan->xpeak)
/ (2 * chan->width * chan->width);
val = chan->peak * exp(-val);
val += chan->dc;
break;
case PULSE:
val = (float)fabs((double)(x - chan->xpeak));
if (val > chan->width / 2.) {
val = 0;
} else {
val = chan->peak;
}
val += chan->dc;
break;
case DELTA:
val = x - chan->xpeak;
if (val > -separation / 2. && val <= separation / 2.)
val = chan->peak;
else
val = 0.;
val += chan->dc;
break;
default:
VPR_FATAL_ERROR(VPR_ERROR_ROUTE,
"in comp_width: Unknown channel type %d.\n", chan->type);
val = OPEN;
break;
}
return (val);
}
/*
* After placement, logical pins for blocks, and nets must be updated to correspond with physical pins of type.
* This is required by blocks with capacity > 1 (e.g. typically IOs with multiple instaces in each placement
* gride location). Since they may be swapped around during placement, we need to update which pins the various
* nets use.
*
* This updates both the external inter-block net connecitivity (i.e. the clustered netlist), and the intra-block
* connectivity (since the internal pins used also change).
*
* This function should only be called once
*/
void post_place_sync() {
/* Go through each block */
auto& cluster_ctx = g_vpr_ctx.clustering();
for (auto block_id : cluster_ctx.clb_nlist.blocks()) {
place_sync_external_block_connections(block_id);
}
}