blob: b45ab50500774962569d4adf4c892f7a668598c7 [file] [log] [blame]
#include <cstdio>
#include <cstring>
#include <cmath>
#include <ctime>
#include <algorithm>
#include <vector>
#include "vtr_assert.h"
#include "vtr_util.h"
#include "vtr_memory.h"
#include "vtr_math.h"
#include "vtr_log.h"
#include "vtr_time.h"
#include "vpr_types.h"
#include "vpr_utils.h"
#include "vpr_error.h"
#include "globals.h"
#include "rr_graph_util.h"
#include "rr_graph.h"
#include "rr_graph_area.h"
#include "rr_graph2.h"
#include "rr_graph_sbox.h"
#include "rr_graph_timing_params.h"
#include "rr_graph_indexed_data.h"
#include "check_rr_graph.h"
#include "read_xml_arch_file.h"
#include "echo_files.h"
#include "cb_metrics.h"
#include "build_switchblocks.h"
#include "rr_graph_writer.h"
#include "rr_graph_reader.h"
#include "router_lookahead_map.h"
#include "rr_graph_clock.h"
#include "rr_types.h"
//#define VERBOSE
struct t_mux {
int size;
t_mux* next;
};
struct t_mux_size_distribution {
int mux_count;
int max_index;
int* distr;
t_mux_size_distribution* next;
};
struct t_clb_to_clb_directs {
t_logical_block_type_ptr from_clb_type;
int from_clb_pin_start_index;
int from_clb_pin_end_index;
t_logical_block_type_ptr to_clb_type;
int to_clb_pin_start_index;
int to_clb_pin_end_index;
int switch_index; //The switch type used by this direct connection
};
struct t_pin_loc {
int pin_index;
int width_offset;
int height_offset;
e_side side;
};
typedef std::vector<std::map<int, int>> t_arch_switch_fanin;
/******************* Variables local to this module. ***********************/
/********************* Subroutines local to this module. *******************/
void print_rr_graph_stats();
bool channel_widths_unchanged(const t_chan_width& current, const t_chan_width& proposed);
static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_pin_to_track_map(const e_pin_type pin_type,
const vtr::Matrix<int>& Fc,
const t_physical_tile_type_ptr Type,
const std::vector<bool>& perturb_switch_pattern,
const e_directionality directionality,
const int num_seg_types,
const int* sets_per_seg_type);
static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_seg_type(const e_pin_type pin_type,
const int seg_type_tracks,
const int Fc,
const t_physical_tile_type_ptr Type,
const bool perturb_switch_pattern,
const e_directionality directionality);
static void advance_to_next_block_side(t_physical_tile_type_ptr Type, int& width_offset, int& height_offset, e_side& side);
static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup(vtr::NdMatrix<std::vector<int>, 4> pin_to_track_map,
const vtr::Matrix<int>& Fc,
const int width,
const int height,
const int num_pins,
const int max_chan_width,
const int num_seg_types);
static void build_bidir_rr_opins(const int i,
const int j,
const e_side side,
const t_rr_node_indices& L_rr_node_indices,
const std::vector<t_rr_node>& rr_nodes,
const t_pin_to_track_lookup& opin_to_track_map,
const std::vector<vtr::Matrix<int>>& Fc_out,
t_rr_edge_info_set& created_rr_edges,
const t_chan_details& chan_details_x,
const t_chan_details& chan_details_y,
const DeviceGrid& grid,
const t_direct_inf* directs,
const int num_directs,
const t_clb_to_clb_directs* clb_to_clb_directs,
const int num_seg_types);
static void build_unidir_rr_opins(const int i,
const int j,
const e_side side,
const DeviceGrid& grid,
const std::vector<vtr::Matrix<int>>& Fc_out,
const int max_chan_width,
const t_chan_details& chan_details_x,
const t_chan_details& chan_details_y,
vtr::NdMatrix<int, 3>& Fc_xofs,
vtr::NdMatrix<int, 3>& Fc_yofs,
t_rr_edge_info_set& created_rr_edges,
bool* Fc_clipped,
const t_rr_node_indices& L_rr_node_indices,
const std::vector<t_rr_node>& rr_nodes,
const t_direct_inf* directs,
const int num_directs,
const t_clb_to_clb_directs* clb_to_clb_directs,
const int num_seg_types);
static int get_opin_direct_connecions(int x,
int y,
e_side side,
int opin,
int from_rr_node,
t_rr_edge_info_set& rr_edges_to_create,
const t_rr_node_indices& L_rr_node_indices,
const std::vector<t_rr_node>& rr_nodes,
const t_direct_inf* directs,
const int num_directs,
const t_clb_to_clb_directs* clb_to_clb_directs);
static void alloc_and_load_rr_graph(const int num_nodes,
std::vector<t_rr_node>& L_rr_node,
const int num_seg_types,
const t_chan_details& chan_details_x,
const t_chan_details& chan_details_y,
const t_track_to_pin_lookup& track_to_pin_lookup,
const t_pin_to_track_lookup& opin_to_track_map,
const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
t_sb_connection_map* sb_conn_map,
const DeviceGrid& grid,
const int Fs,
t_sblock_pattern& sblock_pattern,
const std::vector<vtr::Matrix<int>>& Fc_out,
vtr::NdMatrix<int, 3>& Fc_xofs,
vtr::NdMatrix<int, 3>& Fc_yofs,
const t_rr_node_indices& L_rr_node_indices,
const int max_chan_width,
const t_chan_width& chan_width,
const int wire_to_ipin_switch,
const int delayless_switch,
const enum e_directionality directionality,
bool* Fc_clipped,
const t_direct_inf* directs,
const int num_directs,
const t_clb_to_clb_directs* clb_to_clb_directs,
bool is_global_graph);
static float pattern_fmod(float a, float b);
static void load_uniform_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
const std::vector<t_pin_loc>& pin_locations,
const int x_chan_width,
const int y_chan_width,
const int Fc,
const enum e_directionality directionality);
static void load_perturbed_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
const std::vector<t_pin_loc>& pin_locations,
const int x_chan_width,
const int y_chan_width,
const int Fc,
const enum e_directionality directionality);
static std::vector<bool> alloc_and_load_perturb_opins(const t_physical_tile_type_ptr type, const vtr::Matrix<int>& Fc_out, const int max_chan_width, const std::vector<t_segment_inf>& segment_inf);
#ifdef ENABLE_CHECK_ALL_TRACKS
static void check_all_tracks_reach_pins(t_logical_block_type_ptr type,
int***** tracks_connected_to_pin,
int max_chan_width,
int Fc,
enum e_pin_type ipin_or_opin);
#endif
static std::vector<std::vector<bool>> alloc_and_load_perturb_ipins(const int L_num_types,
const int num_seg_types,
const int* sets_per_seg_type,
const std::vector<vtr::Matrix<int>>& Fc_in,
const std::vector<vtr::Matrix<int>>& Fc_out,
const enum e_directionality directionality);
static void build_rr_sinks_sources(const int i,
const int j,
std::vector<t_rr_node>& L_rr_node,
t_rr_edge_info_set& rr_edges_to_create,
const t_rr_node_indices& L_rr_node_indices,
const int delayless_switch,
const DeviceGrid& grid);
static void build_rr_chan(const int i,
const int j,
const t_rr_type chan_type,
const t_track_to_pin_lookup& track_to_pin_lookup,
t_sb_connection_map* sb_conn_map,
const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
const int cost_index_offset,
const int max_chan_width,
const DeviceGrid& grid,
const int tracks_per_chan,
t_sblock_pattern& sblock_pattern,
const int Fs_per_side,
const t_chan_details& chan_details_x,
const t_chan_details& chan_details_y,
const t_rr_node_indices& L_rr_node_indices,
t_rr_edge_info_set& created_rr_edges,
std::vector<t_rr_node>& L_rr_node,
const int wire_to_ipin_switch,
const enum e_directionality directionality);
void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create);
void alloc_and_load_edges(std::vector<t_rr_node>& L_rr_node,
const t_rr_edge_info_set& rr_edges_to_create);
static void alloc_and_load_rr_switch_inf(const int num_arch_switches,
const float R_minW_nmos,
const float R_minW_pmos,
const int wire_to_arch_ipin_switch,
int* wire_to_rr_ipin_switch);
static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin);
static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const t_arch_switch_fanin& switch_fanin);
static void alloc_rr_switch_inf(t_arch_switch_fanin& switch_fanin);
static void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
int max_chan_width,
int wire_to_rr_ipin_switch,
enum e_base_cost_type base_cost_type);
static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, const int delayless_switch);
static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map,
const std::vector<t_physical_tile_type>& types,
int max_chan_width);
static t_seg_details* alloc_and_load_global_route_seg_details(const int global_route_switch,
int* num_seg_details = nullptr);
static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
const int max_pins,
const std::vector<t_segment_inf>& segment_inf,
const int* sets_per_seg_type,
const int max_chan_width,
const e_fc_type fc_type,
const enum e_directionality directionality,
bool* Fc_clipped);
static int pick_best_direct_connect_target_rr_node(const std::vector<t_rr_node>& rr_nodes,
int from_rr,
const std::vector<int>& candidate_rr_nodes);
static void expand_non_configurable(int inode, std::set<t_node_edge>& edge_set);
static void process_non_config_sets(const t_non_configurable_rr_sets& non_config_rr_sets);
static void build_rr_graph(const t_graph_type graph_type,
const std::vector<t_physical_tile_type>& types,
const DeviceGrid& grid,
t_chan_width nodes_per_chan,
const enum e_switch_block_type sb_type,
const int Fs,
const std::vector<t_switchblock_inf> switchblocks,
const int num_arch_switches,
const std::vector<t_segment_inf>& segment_inf,
const int global_route_switch,
const int wire_to_arch_ipin_switch,
const int delayless_switch,
const float R_minW_nmos,
const float R_minW_pmos,
const enum e_base_cost_type base_cost_type,
const bool trim_empty_channels,
const bool trim_obs_channels,
const t_direct_inf* directs,
const int num_directs,
int* wire_to_rr_ipin_switch,
int* Warnings);
/******************* Subroutine definitions *******************************/
void create_rr_graph(const t_graph_type graph_type,
const std::vector<t_physical_tile_type>& block_types,
const DeviceGrid& grid,
const t_chan_width nodes_per_chan,
const int num_arch_switches,
t_det_routing_arch* det_routing_arch,
std::vector<t_segment_inf>& segment_inf,
const enum e_base_cost_type base_cost_type,
const bool trim_empty_channels,
const bool trim_obs_channels,
const enum e_clock_modeling clock_modeling,
const e_router_lookahead router_lookahead_type,
const t_direct_inf* directs,
const int num_directs,
int* Warnings) {
const auto& device_ctx = g_vpr_ctx.device();
if (!det_routing_arch->read_rr_graph_filename.empty()) {
if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) {
free_rr_graph();
load_rr_file(graph_type,
grid,
segment_inf,
base_cost_type,
&det_routing_arch->wire_to_rr_ipin_switch,
det_routing_arch->read_rr_graph_filename.c_str());
}
} else {
if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_nodes.empty()) {
//No change in channel width, so skip re-building RR graph
VTR_LOG("RR graph channel widths unchanged, skipping RR graph rebuild\n");
return;
}
free_rr_graph();
build_rr_graph(graph_type,
block_types,
grid,
nodes_per_chan,
det_routing_arch->switch_block_type,
det_routing_arch->Fs,
det_routing_arch->switchblocks,
num_arch_switches,
segment_inf,
det_routing_arch->global_route_switch,
det_routing_arch->wire_to_arch_ipin_switch,
det_routing_arch->delayless_switch,
det_routing_arch->R_minW_nmos,
det_routing_arch->R_minW_pmos,
base_cost_type,
trim_empty_channels,
trim_obs_channels,
directs, num_directs,
&det_routing_arch->wire_to_rr_ipin_switch,
Warnings);
if (clock_modeling == DEDICATED_NETWORK) {
ClockRRGraphBuilder::create_and_append_clock_rr_graph(segment_inf,
det_routing_arch->R_minW_nmos,
det_routing_arch->R_minW_pmos,
det_routing_arch->wire_to_rr_ipin_switch,
base_cost_type);
}
}
auto non_config_rr_sets = identify_non_configurable_rr_sets();
process_non_config_sets(non_config_rr_sets);
print_rr_graph_stats();
//Write out rr graph file if needed
if (!det_routing_arch->write_rr_graph_filename.empty()) {
write_rr_graph(det_routing_arch->write_rr_graph_filename.c_str(), segment_inf);
}
}
void print_rr_graph_stats() {
auto& device_ctx = g_vpr_ctx.device();
size_t num_rr_edges = 0;
for (auto& rr_node : device_ctx.rr_nodes) {
num_rr_edges += rr_node.edges().size();
}
VTR_LOG(" RR Graph Nodes: %zu\n", device_ctx.rr_nodes.size());
VTR_LOG(" RR Graph Edges: %zu\n", num_rr_edges);
}
bool channel_widths_unchanged(const t_chan_width& current, const t_chan_width& proposed) {
if (current.max != proposed.max
|| current.x_max != proposed.x_max
|| current.y_max != proposed.y_max
|| current.x_min != proposed.x_min
|| current.y_min != proposed.y_min
|| current.x_list != proposed.x_list
|| current.y_list != proposed.y_list) {
return false; //Different max/min or channel widths
}
return true; //Identical
}
static void build_rr_graph(const t_graph_type graph_type,
const std::vector<t_physical_tile_type>& types,
const DeviceGrid& grid,
t_chan_width nodes_per_chan,
const enum e_switch_block_type sb_type,
const int Fs,
const std::vector<t_switchblock_inf> switchblocks,
const int num_arch_switches,
const std::vector<t_segment_inf>& segment_inf,
const int global_route_switch,
const int wire_to_arch_ipin_switch,
const int delayless_switch,
const float R_minW_nmos,
const float R_minW_pmos,
const enum e_base_cost_type base_cost_type,
const bool trim_empty_channels,
const bool trim_obs_channels,
const t_direct_inf* directs,
const int num_directs,
int* wire_to_rr_ipin_switch,
int* Warnings) {
vtr::ScopedStartFinishTimer timer("Build routing resource graph");
/* Reset warning flag */
*Warnings = RR_GRAPH_NO_WARN;
/* Decode the graph_type */
bool is_global_graph = ((GRAPH_GLOBAL == graph_type) ? true : false);
bool use_full_seg_groups = ((GRAPH_UNIDIR_TILEABLE == graph_type) ? true : false);
enum e_directionality directionality = ((GRAPH_BIDIR == graph_type) ? BI_DIRECTIONAL : UNI_DIRECTIONAL);
if (is_global_graph) {
directionality = BI_DIRECTIONAL;
}
/* Global routing uses a single longwire track */
int max_chan_width = (is_global_graph ? 1 : nodes_per_chan.max);
VTR_ASSERT(max_chan_width > 0);
auto& device_ctx = g_vpr_ctx.mutable_device();
t_clb_to_clb_directs* clb_to_clb_directs = nullptr;
if (num_directs > 0) {
clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs, delayless_switch);
}
/* START SEG_DETAILS */
int num_seg_details = 0;
t_seg_details* seg_details = nullptr;
if (is_global_graph) {
/* Sets up a single unit length segment type for global routing. */
seg_details = alloc_and_load_global_route_seg_details(global_route_switch, &num_seg_details);
} else {
/* Setup segments including distrubuting tracks and staggering.
* If use_full_seg_groups is specified, max_chan_width may be
* changed. Warning should be singled to caller if this happens. */
size_t max_dim = std::max(grid.width(), grid.height()) - 2; //-2 for no perim channels
seg_details = alloc_and_load_seg_details(&max_chan_width,
max_dim, segment_inf,
use_full_seg_groups, is_global_graph, directionality,
&num_seg_details);
if (nodes_per_chan.max != max_chan_width) {
nodes_per_chan.max = max_chan_width;
*Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
}
//TODO: Fix
//if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) {
//dump_seg_details(seg_details, max_chan_width,
//getEchoFileName(E_ECHO_SEG_DETAILS));
//}
}
/* END SEG_DETAILS */
/* START CHAN_DETAILS */
t_chan_details chan_details_x;
t_chan_details chan_details_y;
alloc_and_load_chan_details(grid, &nodes_per_chan,
trim_empty_channels, trim_obs_channels,
num_seg_details, seg_details,
chan_details_x, chan_details_y);
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CHAN_DETAILS)) {
dump_chan_details(chan_details_x, chan_details_y, max_chan_width, grid,
getEchoFileName(E_ECHO_CHAN_DETAILS));
}
/* END CHAN_DETAILS */
/* START FC */
/* Determine the actual value of Fc */
std::vector<vtr::Matrix<int>> Fc_in; /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..num_segments-1] */
std::vector<vtr::Matrix<int>> Fc_out; /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..num_segments-1] */
/* get maximum number of pins across all blocks */
int max_pins = types[0].num_pins;
for (const auto& type : types) {
if (is_empty_type(&type)) {
continue;
}
if (type.num_pins > max_pins) {
max_pins = type.num_pins;
}
}
/* get the number of 'sets' for each segment type -- unidirectial architectures have two tracks in a set, bidirectional have one */
int total_sets = max_chan_width;
if (directionality == UNI_DIRECTIONAL) {
VTR_ASSERT(max_chan_width % 2 == 0);
total_sets /= 2;
}
int* sets_per_seg_type = get_seg_track_counts(total_sets, segment_inf, use_full_seg_groups);
if (is_global_graph) {
//All pins can connect during global routing
auto ones = vtr::Matrix<int>({size_t(max_pins), segment_inf.size()}, 1);
Fc_in = std::vector<vtr::Matrix<int>>(types.size(), ones);
Fc_out = std::vector<vtr::Matrix<int>>(types.size(), ones);
} else {
bool Fc_clipped = false;
Fc_in = alloc_and_load_actual_fc(types, max_pins, segment_inf, sets_per_seg_type, max_chan_width,
e_fc_type::IN, directionality, &Fc_clipped);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
Fc_clipped = false;
Fc_out = alloc_and_load_actual_fc(types, max_pins, segment_inf, sets_per_seg_type, max_chan_width,
e_fc_type::OUT, directionality, &Fc_clipped);
if (Fc_clipped) {
*Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
}
for (const auto& type : types) {
int i = type.index;
/* Skip "EMPTY" */
if (is_empty_type(&type)) {
continue;
}
for (int j = 0; j < type.num_pins; ++j) {
for (size_t k = 0; k < segment_inf.size(); k++) {
#ifdef VERBOSE
VTR_LOG(
"Fc Actual Values: type = %s, pin = %d (%s), "
"seg = %d (%s), Fc_out = %d, Fc_in = %d.\n",
type.name,
j,
block_type_pin_index_to_name(&type, j).c_str(),
k,
segment_inf[k].name.c_str(),
Fc_out[i][j][k],
Fc_in[i][j][k]);
#endif /* VERBOSE */
VTR_ASSERT_MSG(Fc_out[i][j][k] == 0 || Fc_in[i][j][k] == 0,
"Pins must be inputs or outputs (i.e. can not have both non-zero Fc_out and Fc_in)");
}
}
}
}
auto perturb_ipins = alloc_and_load_perturb_ipins(types.size(), segment_inf.size(),
sets_per_seg_type, Fc_in, Fc_out, directionality);
/* END FC */
/* Alloc node lookups, count nodes, alloc rr nodes */
int num_rr_nodes = 0;
device_ctx.rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, grid,
&num_rr_nodes, chan_details_x, chan_details_y);
device_ctx.rr_nodes.resize(num_rr_nodes);
/* These are data structures used by the the unidir opin mapping. They are used
* to spread connections evenly for each segment type among the available
* wire start points */
vtr::NdMatrix<int, 3> Fc_xofs({grid.height() - 1,
grid.width() - 1,
segment_inf.size()},
0); //[0..grid.height()-2][0..grid.width()-2][0..num_seg_types-1]
vtr::NdMatrix<int, 3> Fc_yofs({grid.width() - 1,
grid.height() - 1,
segment_inf.size()},
0); //[0..grid.width()-2][0..grid.height()-2][0..num_seg_types-1]
/* START SB LOOKUP */
/* Alloc and load the switch block lookup */
vtr::NdMatrix<std::vector<int>, 3> switch_block_conn;
t_sblock_pattern unidir_sb_pattern;
t_sb_connection_map* sb_conn_map = nullptr; //for custom switch blocks
//We are careful to use a single seed each time build_rr_graph is called
//to initialize the random number generator (RNG) which is (potentially)
//used when creating custom switchblocks. This ensures that build_rr_graph()
//is deterministic -- always producing the same RR graph.
constexpr unsigned SWITCHPOINT_RNG_SEED = 1;
vtr::RandState switchpoint_rand_state = SWITCHPOINT_RNG_SEED;
if (is_global_graph) {
switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3);
} else if (BI_DIRECTIONAL == directionality) {
if (sb_type == CUSTOM) {
sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y,
grid,
switchblocks, &nodes_per_chan, directionality,
switchpoint_rand_state);
} else {
switch_block_conn = alloc_and_load_switch_block_conn(max_chan_width, sb_type, Fs);
}
} else {
VTR_ASSERT(UNI_DIRECTIONAL == directionality);
if (sb_type == CUSTOM) {
sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y,
grid,
switchblocks, &nodes_per_chan, directionality,
switchpoint_rand_state);
} else {
/* it looks like we get unbalanced muxing from this switch block code with Fs > 3 */
VTR_ASSERT(Fs == 3);
unidir_sb_pattern = alloc_sblock_pattern_lookup(grid, max_chan_width);
for (size_t i = 0; i < grid.width() - 1; i++) {
for (size_t j = 0; j < grid.height() - 1; j++) {
load_sblock_pattern_lookup(i, j, grid, &nodes_per_chan,
chan_details_x, chan_details_y,
Fs, sb_type, unidir_sb_pattern);
}
}
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SBLOCK_PATTERN)) {
dump_sblock_pattern(unidir_sb_pattern, max_chan_width, grid,
getEchoFileName(E_ECHO_SBLOCK_PATTERN));
}
}
}
/* END SB LOOKUP */
/* START IPIN MAP */
/* Create ipin map lookups */
t_pin_to_track_lookup ipin_to_track_map(types.size()); /* [0..device_ctx.physical_tile_types.size()-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */
t_track_to_pin_lookup track_to_pin_lookup(types.size()); /* [0..device_ctx.physical_tile_types.size()-1][0..max_chan_width-1][0..width][0..height][0..3] */
for (unsigned int itype = 0; itype < types.size(); ++itype) {
ipin_to_track_map[itype] = alloc_and_load_pin_to_track_map(RECEIVER,
Fc_in[itype], &types[itype], perturb_ipins[itype], directionality,
segment_inf.size(), sets_per_seg_type);
track_to_pin_lookup[itype] = alloc_and_load_track_to_pin_lookup(ipin_to_track_map[itype], Fc_in[itype], types[itype].width, types[itype].height,
types[itype].num_pins, max_chan_width, segment_inf.size());
}
/* END IPIN MAP */
/* START OPIN MAP */
/* Create opin map lookups */
t_pin_to_track_lookup opin_to_track_map(types.size()); /* [0..device_ctx.physical_tile_types.size()-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */
if (BI_DIRECTIONAL == directionality) {
for (unsigned int itype = 0; itype < types.size(); ++itype) {
auto perturb_opins = alloc_and_load_perturb_opins(&types[itype], Fc_out[itype],
max_chan_width, segment_inf);
opin_to_track_map[itype] = alloc_and_load_pin_to_track_map(DRIVER,
Fc_out[itype], &types[itype], perturb_opins, directionality,
segment_inf.size(), sets_per_seg_type);
}
}
/* END OPIN MAP */
bool Fc_clipped = false;
alloc_and_load_rr_graph(device_ctx.rr_nodes.size(), device_ctx.rr_nodes, segment_inf.size(),
chan_details_x, chan_details_y,
track_to_pin_lookup, opin_to_track_map,
switch_block_conn, sb_conn_map, grid, Fs, unidir_sb_pattern,
Fc_out, Fc_xofs, Fc_yofs, device_ctx.rr_node_indices,
max_chan_width,
nodes_per_chan,
wire_to_arch_ipin_switch,
delayless_switch,
directionality,
&Fc_clipped,
directs, num_directs, clb_to_clb_directs,
is_global_graph);
/* Update rr_nodes capacities if global routing */
if (graph_type == GRAPH_GLOBAL) {
for (size_t i = 0; i < device_ctx.rr_nodes.size(); i++) {
if (device_ctx.rr_nodes[i].type() == CHANX) {
int ylow = device_ctx.rr_nodes[i].ylow();
device_ctx.rr_nodes[i].set_capacity(nodes_per_chan.x_list[ylow]);
}
if (device_ctx.rr_nodes[i].type() == CHANY) {
int xlow = device_ctx.rr_nodes[i].xlow();
device_ctx.rr_nodes[i].set_capacity(nodes_per_chan.y_list[xlow]);
}
}
}
/* Allocate and load routing resource switches, which are derived from the switches from the architecture file,
* based on their fanin in the rr graph. This routine also adjusts the rr nodes to point to these new rr switches */
alloc_and_load_rr_switch_inf(num_arch_switches, R_minW_nmos, R_minW_pmos, wire_to_arch_ipin_switch, wire_to_rr_ipin_switch);
//Partition the rr graph edges for efficient access to configurable/non-configurable
//edge subsets. Must be done after RR switches have been allocated
partition_rr_graph_edges(device_ctx);
//Save the channel widths for the newly constructed graph
device_ctx.chan_width = nodes_per_chan;
rr_graph_externals(segment_inf, max_chan_width,
*wire_to_rr_ipin_switch, base_cost_type);
check_rr_graph(graph_type, grid, types);
/* Free all temp structs */
if (seg_details) {
delete[] seg_details;
seg_details = nullptr;
}
if (!chan_details_x.empty() || !chan_details_y.empty()) {
free_chan_details(chan_details_x, chan_details_y);
}
if (sb_conn_map) {
free_switchblock_permutations(sb_conn_map);
sb_conn_map = nullptr;
}
if (sets_per_seg_type) {
free(sets_per_seg_type);
sets_per_seg_type = nullptr;
}
free_type_track_to_pin_map(track_to_pin_lookup, types, max_chan_width);
if (clb_to_clb_directs != nullptr) {
free(clb_to_clb_directs);
}
}
/* Allocates and loads the global rr_switch_inf array based on the global
* arch_switch_inf array and the fan-ins used by the rr nodes.
* Also changes switch indices of rr_nodes to index into rr_switch_inf
* instead of arch_switch_inf.
*
* Returns the number of rr switches created.
* Also returns, through a pointer, the index of a representative ipin cblock switch.
* - Currently we're not allowing a designer to specify an ipin cblock switch with
* multiple fan-ins, so there's just one of these switches in the device_ctx.rr_switch_inf array.
* But in the future if we allow this, we can return an index to a representative switch
*
* The rr_switch_inf switches are derived from the arch_switch_inf switches
* (which were read-in from the architecture file) based on fan-in. The delays of
* the rr switches depend on their fan-in, so we first go through the rr_nodes
* and count how many different fan-ins exist for each arch switch.
* Then we create these rr switches and update the switch indices
* of rr_nodes to index into the rr_switch_inf array. */
static void alloc_and_load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const int wire_to_arch_ipin_switch, int* wire_to_rr_ipin_switch) {
/* we will potentially be creating a couple of versions of each arch switch where
* each version corresponds to a different fan-in. We will need to fill device_ctx.rr_switch_inf
* with this expanded list of switches.
*
* To do this we will use arch_switch_fanins, which is indexed as:
* arch_switch_fanins[i_arch_switch][fanin] -> new_switch_id
*/
t_arch_switch_fanin arch_switch_fanins(num_arch_switches);
/* Determine what the different fan-ins are for each arch switch, and also
* how many entries the rr_switch_inf array should have */
alloc_rr_switch_inf(arch_switch_fanins);
/* create the rr switches. also keep track of, for each arch switch, what index of the rr_switch_inf
* array each version of its fanin has been mapped to */
load_rr_switch_inf(num_arch_switches, R_minW_nmos, R_minW_pmos, arch_switch_fanins);
/* next, walk through rr nodes again and remap their switch indices to rr_switch_inf */
remap_rr_node_switch_indices(arch_switch_fanins);
/* now we need to set the wire_to_rr_ipin_switch variable which points the detailed routing architecture
* to the representative ipin cblock switch. currently we're not allowing the specification of an ipin cblock switch
* with multiple fan-ins, so right now there's just one. May change in the future, in which case we'd need to
* return a representative switch */
if (arch_switch_fanins[wire_to_arch_ipin_switch].count(UNDEFINED)) {
/* only have one ipin cblock switch. OK. */
(*wire_to_rr_ipin_switch) = arch_switch_fanins[wire_to_arch_ipin_switch][UNDEFINED];
} else if (arch_switch_fanins[wire_to_arch_ipin_switch].size() != 0) {
VPR_FATAL_ERROR(VPR_ERROR_ARCH,
"Not currently allowing an ipin cblock switch to have multiple fan-ins");
} else {
//This likely indicates that no connection block has been constructed, indicating significant issues with
//the generated RR graph.
//
//Instead of throwing an error we issue a warning. This means that check_rr_graph() etc. will run to give more information
//and allow graphics to be brought up for users to debug their architectures.
(*wire_to_rr_ipin_switch) = OPEN;
VTR_LOG_WARN("No switch found for the ipin cblock in RR graph. Check if there is an error in arch file, or if no connection blocks are being built in RR graph\n");
}
}
/* Allocates space for the global device_ctx.rr_switch_inf variable and returns the
* number of rr switches that were allocated */
static void alloc_rr_switch_inf(t_arch_switch_fanin& arch_switch_fanins) {
auto& device_ctx = g_vpr_ctx.mutable_device();
int num_rr_switches = 0;
{
//Collect the fan-in per switch type for each node in the graph
//
//Note that since we don't store backward edge info in the RR graph we need to walk
//the whole graph to get the per-switch-type fanin info
std::vector<vtr::flat_map<int, int>> inward_switch_inf(device_ctx.rr_nodes.size()); //[to_node][arch_switch] -> fanin
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
for (auto iedge : device_ctx.rr_nodes[inode].edges()) {
int iswitch = device_ctx.rr_nodes[inode].edge_switch(iedge);
int to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
if (inward_switch_inf[to_node].count(iswitch) == 0) {
inward_switch_inf[to_node][iswitch] = 0;
}
inward_switch_inf[to_node][iswitch]++;
}
}
//Record the unique switch type/fanin combinations
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
for (auto& switch_fanin : inward_switch_inf[inode]) {
int iswitch, fanin;
std::tie(iswitch, fanin) = switch_fanin;
if (device_ctx.arch_switch_inf[iswitch].fixed_Tdel()) {
//If delay is independent of fanin drop the unique fanin info
fanin = UNDEFINED;
}
if (arch_switch_fanins[iswitch].count(fanin) == 0) { //New fanin for this switch
arch_switch_fanins[iswitch][fanin] = num_rr_switches++; //Assign it a unique index
}
}
}
}
/* allocate space for the rr_switch_inf array */
device_ctx.rr_switch_inf.resize(num_rr_switches);
}
/* load the global device_ctx.rr_switch_inf variable. also keep track of, for each arch switch, what
* index of the rr_switch_inf array each version of its fanin has been mapped to (through switch_fanin map) */
static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const t_arch_switch_fanin& arch_switch_fanins) {
auto& device_ctx = g_vpr_ctx.mutable_device();
if (!device_ctx.switch_fanin_remap.empty()) {
// at this stage, we rebuild the rr_graph (probably in binary search)
// so old device_ctx.switch_fanin_remap is obsolete
device_ctx.switch_fanin_remap.clear();
}
device_ctx.switch_fanin_remap.resize(num_arch_switches);
for (int i_arch_switch = 0; i_arch_switch < num_arch_switches; i_arch_switch++) {
std::map<int, int>::iterator it;
for (auto fanin_rrswitch : arch_switch_fanins[i_arch_switch]) {
/* the fanin value is in it->first, and we'll need to set what index this i_arch_switch/fanin
* combination maps to (within rr_switch_inf) in it->second) */
int fanin;
int i_rr_switch;
std::tie(fanin, i_rr_switch) = fanin_rrswitch;
// setup device_ctx.switch_fanin_remap, for future swich usage analysis
device_ctx.switch_fanin_remap[i_arch_switch][fanin] = i_rr_switch;
load_rr_switch_from_arch_switch(i_arch_switch, i_rr_switch, fanin, R_minW_nmos, R_minW_pmos);
}
}
}
void load_rr_switch_from_arch_switch(int arch_switch_idx,
int rr_switch_idx,
int fanin,
const float R_minW_nmos,
const float R_minW_pmos) {
auto& device_ctx = g_vpr_ctx.mutable_device();
/* figure out, by looking at the arch switch's Tdel map, what the delay of the new
* rr switch should be */
double rr_switch_Tdel = device_ctx.arch_switch_inf[arch_switch_idx].Tdel(fanin);
/* copy over the arch switch to rr_switch_inf[rr_switch_idx], but with the changed Tdel value */
device_ctx.rr_switch_inf[rr_switch_idx].set_type(device_ctx.arch_switch_inf[arch_switch_idx].type());
device_ctx.rr_switch_inf[rr_switch_idx].R = device_ctx.arch_switch_inf[arch_switch_idx].R;
device_ctx.rr_switch_inf[rr_switch_idx].Cin = device_ctx.arch_switch_inf[arch_switch_idx].Cin;
device_ctx.rr_switch_inf[rr_switch_idx].Cinternal = device_ctx.arch_switch_inf[arch_switch_idx].Cinternal;
device_ctx.rr_switch_inf[rr_switch_idx].Cout = device_ctx.arch_switch_inf[arch_switch_idx].Cout;
device_ctx.rr_switch_inf[rr_switch_idx].Tdel = rr_switch_Tdel;
device_ctx.rr_switch_inf[rr_switch_idx].mux_trans_size = device_ctx.arch_switch_inf[arch_switch_idx].mux_trans_size;
if (device_ctx.arch_switch_inf[arch_switch_idx].buf_size_type == BufferSize::AUTO) {
//Size based on resistance
device_ctx.rr_switch_inf[rr_switch_idx].buf_size = trans_per_buf(device_ctx.arch_switch_inf[arch_switch_idx].R, R_minW_nmos, R_minW_pmos);
} else {
VTR_ASSERT(device_ctx.arch_switch_inf[arch_switch_idx].buf_size_type == BufferSize::ABSOLUTE);
//Use the specified size
device_ctx.rr_switch_inf[rr_switch_idx].buf_size = device_ctx.arch_switch_inf[arch_switch_idx].buf_size;
}
device_ctx.rr_switch_inf[rr_switch_idx].name = device_ctx.arch_switch_inf[arch_switch_idx].name;
device_ctx.rr_switch_inf[rr_switch_idx].power_buffer_type = device_ctx.arch_switch_inf[arch_switch_idx].power_buffer_type;
device_ctx.rr_switch_inf[rr_switch_idx].power_buffer_size = device_ctx.arch_switch_inf[arch_switch_idx].power_buffer_size;
}
/* switch indices of each rr_node original point into the global device_ctx.arch_switch_inf array.
* now we want to remap these indices to point into the global device_ctx.rr_switch_inf array
* which contains switch info at different fan-in values */
static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin) {
auto& device_ctx = g_vpr_ctx.mutable_device();
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
auto& from_node = device_ctx.rr_nodes[inode];
int num_edges = from_node.num_edges();
for (int iedge = 0; iedge < num_edges; iedge++) {
const t_rr_node& to_node = device_ctx.rr_nodes[from_node.edge_sink_node(iedge)];
/* get the switch which this edge uses and its fanin */
int switch_index = from_node.edge_switch(iedge);
int fanin = to_node.fan_in();
if (switch_fanin[switch_index].count(UNDEFINED) == 1) {
fanin = UNDEFINED;
}
auto itr = switch_fanin[switch_index].find(fanin);
VTR_ASSERT(itr != switch_fanin[switch_index].end());
int rr_switch_index = itr->second;
from_node.set_edge_switch(iedge, rr_switch_index);
}
}
}
static void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
int max_chan_width,
int wire_to_rr_ipin_switch,
enum e_base_cost_type base_cost_type) {
auto& device_ctx = g_vpr_ctx.device();
add_rr_graph_C_from_switches(device_ctx.rr_switch_inf[wire_to_rr_ipin_switch].Cin);
alloc_and_load_rr_indexed_data(segment_inf, device_ctx.rr_node_indices,
max_chan_width, wire_to_rr_ipin_switch, base_cost_type);
load_rr_index_segments(segment_inf.size());
}
static std::vector<std::vector<bool>> alloc_and_load_perturb_ipins(const int L_num_types,
const int num_seg_types,
const int* sets_per_seg_type,
const std::vector<vtr::Matrix<int>>& Fc_in,
const std::vector<vtr::Matrix<int>>& Fc_out,
const enum e_directionality directionality) {
std::vector<std::vector<bool>> result(L_num_types);
for (auto& seg_type_bools : result) {
seg_type_bools.resize(num_seg_types, false);
}
/* factor to account for unidir vs bidir */
int fac = 1;
if (directionality == UNI_DIRECTIONAL) {
fac = 2;
}
if (BI_DIRECTIONAL == directionality) {
for (int iseg = 0; iseg < num_seg_types; ++iseg) {
result[0][iseg] = false;
int tracks_in_seg_type = sets_per_seg_type[iseg] * fac;
for (int itype = 1; itype < L_num_types; ++itype) {
result[itype][iseg] = false;
float Fc_ratio;
if (Fc_in[itype][0][iseg] > Fc_out[itype][0][iseg]) {
Fc_ratio = (float)Fc_in[itype][0][iseg] / (float)Fc_out[itype][0][iseg];
} else {
Fc_ratio = (float)Fc_out[itype][0][iseg] / (float)Fc_in[itype][0][iseg];
}
if ((Fc_in[itype][0][iseg] <= tracks_in_seg_type - 2)
&& (fabs(Fc_ratio - vtr::nint(Fc_ratio))
< (0.5 / (float)tracks_in_seg_type))) {
result[itype][iseg] = true;
}
}
}
} else {
/* Unidirectional routing uses mux balancing patterns and
* thus shouldn't need perturbation. */
VTR_ASSERT(UNI_DIRECTIONAL == directionality);
for (int itype = 0; itype < L_num_types; ++itype) {
for (int iseg = 0; iseg < num_seg_types; ++iseg) {
result[itype][iseg] = false;
}
}
}
return result;
}
static t_seg_details* alloc_and_load_global_route_seg_details(const int global_route_switch,
int* num_seg_details) {
t_seg_details* seg_details = new t_seg_details[1];
seg_details->index = 0;
seg_details->length = 1;
seg_details->arch_wire_switch = global_route_switch;
seg_details->arch_opin_switch = global_route_switch;
seg_details->longline = false;
seg_details->direction = BI_DIRECTION;
seg_details->Cmetal = 0.0;
seg_details->Rmetal = 0.0;
seg_details->start = 1;
seg_details->cb = std::make_unique<bool[]>(1);
seg_details->cb[0] = true;
seg_details->sb = std::make_unique<bool[]>(2);
seg_details->sb[0] = true;
seg_details->sb[1] = true;
seg_details->group_size = 1;
seg_details->group_start = 0;
seg_details->seg_start = -1;
seg_details->seg_end = -1;
if (num_seg_details) {
*num_seg_details = 1;
}
return seg_details;
}
/* Calculates the number of track connections from each block pin to each segment type */
static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
const int max_pins,
const std::vector<t_segment_inf>& segment_inf,
const int* sets_per_seg_type,
const int max_chan_width,
const e_fc_type fc_type,
const enum e_directionality directionality,
bool* Fc_clipped) {
//Initialize Fc of all blocks to zero
auto zeros = vtr::Matrix<int>({size_t(max_pins), segment_inf.size()}, 0);
std::vector<vtr::Matrix<int>> Fc(types.size(), zeros);
*Fc_clipped = false;
/* Unidir tracks formed in pairs, otherwise no effect. */
int fac = 1;
if (UNI_DIRECTIONAL == directionality) {
fac = 2;
}
VTR_ASSERT((max_chan_width % fac) == 0);
for (const auto& type : types) { //Skip EMPTY
int itype = type.index;
for (const t_fc_specification& fc_spec : type.fc_specs) {
if (fc_type != fc_spec.fc_type) continue;
VTR_ASSERT(fc_spec.pins.size() > 0);
int iseg = fc_spec.seg_index;
if (fc_spec.fc_value == 0) {
/* Special case indicating that this pin does not connect to general-purpose routing */
for (int ipin : fc_spec.pins) {
Fc[itype][ipin][iseg] = 0;
}
} else {
/* General case indicating that this pin connects to general-purpose routing */
//Calculate how many connections there should be accross all the pins in this fc_spec
int total_connections = 0;
if (fc_spec.fc_value_type == e_fc_value_type::FRACTIONAL) {
float conns_per_pin = fac * sets_per_seg_type[iseg] * fc_spec.fc_value;
float flt_total_connections = conns_per_pin * fc_spec.pins.size();
total_connections = vtr::nint(flt_total_connections); //Round to integer
} else {
VTR_ASSERT(fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE);
if (std::fmod(fc_spec.fc_value, fac) != 0.) {
VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Absolute Fc value must be a multiple of %d (was %f) between block pin '%s' and wire segment '%s'",
fac, fc_spec.fc_value,
block_type_pin_index_to_name(&type, fc_spec.pins[0]).c_str(),
segment_inf[iseg].name.c_str());
}
if (fc_spec.fc_value < fac) {
VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Absolute Fc value must be at least %d (was %f) between block pin '%s' to wire segment %s",
fac, fc_spec.fc_value,
block_type_pin_index_to_name(&type, fc_spec.pins[0]).c_str(),
segment_inf[iseg].name.c_str());
}
total_connections = vtr::nint(fc_spec.fc_value) * fc_spec.pins.size();
}
//Ensure that there are at least fac connections, this ensures that low Fc ports
//targeting small sets of segs get connection(s), even if flt_total_connections < fac.
total_connections = std::max(total_connections, fac);
//Ensure total evenly divides fac by adding the remainder
total_connections += (total_connections % fac);
VTR_ASSERT(total_connections > 0);
VTR_ASSERT(total_connections % fac == 0);
//We walk through all the pins this fc_spec applies to, adding fac connections
//to each pin, until we run out of connections. This should distribute the connections
//as evenly as possible (if total_connections % pins.size() != 0, there will be
//some inevitable imbalance).
int connections_remaining = total_connections;
while (connections_remaining != 0) {
//Add one set of connections to each pin
for (int ipin : fc_spec.pins) {
if (connections_remaining >= fac) {
Fc[itype][ipin][iseg] += fac;
connections_remaining -= fac;
} else {
VTR_ASSERT(connections_remaining == 0);
break;
}
}
}
for (int ipin : fc_spec.pins) {
//It is possible that we may want more connections that wires of this type exist;
//clip to the maximum number of wires
if (Fc[itype][ipin][iseg] > sets_per_seg_type[iseg] * fac) {
*Fc_clipped = true;
Fc[itype][ipin][iseg] = sets_per_seg_type[iseg] * fac;
}
VTR_ASSERT_MSG(Fc[itype][ipin][iseg] >= 0, "Calculated absolute Fc must be positive");
VTR_ASSERT_MSG(Fc[itype][ipin][iseg] % fac == 0, "Calculated absolute Fc must be divisible by 1 (bidir architecture) or 2 (unidir architecture)"); //Required by connection block construction code
}
}
}
}
return Fc;
}
/* frees the track to ipin mapping for each physical grid type */
static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map,
const std::vector<t_physical_tile_type>& types,
int max_chan_width) {
auto& device_ctx = g_vpr_ctx.device();
for (unsigned int i = 0; i < device_ctx.physical_tile_types.size(); i++) {
if (!track_to_pin_map[i].empty()) {
for (int track = 0; track < max_chan_width; ++track) {
for (int width = 0; width < types[i].width; ++width) {
for (int height = 0; height < types[i].height; ++height) {
for (int side = 0; side < 4; ++side) {
if (!track_to_pin_map[i][track][width][height][side].empty()) {
track_to_pin_map[i][track][width][height][side].clear();
}
}
}
}
}
}
}
}
/* Does the actual work of allocating the rr_graph and filling all the *
* appropriate values. Everything up to this was just a prelude! */
static void alloc_and_load_rr_graph(const int num_nodes,
std::vector<t_rr_node>& L_rr_node,
const int num_seg_types,
const t_chan_details& chan_details_x,
const t_chan_details& chan_details_y,
const t_track_to_pin_lookup& track_to_pin_lookup,
const t_pin_to_track_lookup& opin_to_track_map,
const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
t_sb_connection_map* sb_conn_map,
const DeviceGrid& grid,
const int Fs,
t_sblock_pattern& sblock_pattern,
const std::vector<vtr::Matrix<int>>& Fc_out,
vtr::NdMatrix<int, 3>& Fc_xofs,
vtr::NdMatrix<int, 3>& Fc_yofs,
const t_rr_node_indices& L_rr_node_indices,
const int max_chan_width,
const t_chan_width& chan_width,
const int wire_to_ipin_switch,
const int delayless_switch,
const enum e_directionality directionality,
bool* Fc_clipped,
const t_direct_inf* directs,
const int num_directs,
const t_clb_to_clb_directs* clb_to_clb_directs,
bool is_global_graph) {
//We take special care when creating RR graph edges (there are typically many more
//edges than nodes in an RR graph).
//
//In particular, all the following build_*() functions do not create the edges, but
//instead record the edges they wish to create in rr_edges_to_create.
//
//We uniquify the edges to be created (avoiding any duplicates), and create
//the edges in alloc_and_load_edges().
//
//By doing things in this manner we ensure we know exactly how many edges leave each RR
//node, which avoids resizing the RR edge arrays (which can cause significant memory
//fragmentation, and significantly increasing peak memory usage). This is important since
//RR graph creation is the high-watermark of VPR's memory use.
t_rr_edge_info_set rr_edges_to_create;
/* If Fc gets clipped, this will be flagged to true */
*Fc_clipped = false;
/* Connection SINKS and SOURCES to their pins. */
for (size_t i = 0; i < grid.width(); ++i) {
for (size_t j = 0; j < grid.height(); ++j) {
build_rr_sinks_sources(i, j, L_rr_node, rr_edges_to_create, L_rr_node_indices,
delayless_switch, grid);
//Create the actual SOURCE->OPIN, IPIN->SINK edges
uniquify_edges(rr_edges_to_create);
alloc_and_load_edges(L_rr_node, rr_edges_to_create);
rr_edges_to_create.clear();
}
}
/* Build opins */
for (size_t i = 0; i < grid.width(); ++i) {
for (size_t j = 0; j < grid.height(); ++j) {
for (e_side side : SIDES) {
if (BI_DIRECTIONAL == directionality) {
build_bidir_rr_opins(i, j, side, L_rr_node_indices, L_rr_node,
opin_to_track_map, Fc_out, rr_edges_to_create, chan_details_x, chan_details_y,
grid,
directs, num_directs, clb_to_clb_directs, num_seg_types);
} else {
VTR_ASSERT(UNI_DIRECTIONAL == directionality);
bool clipped;
build_unidir_rr_opins(i, j, side, grid, Fc_out, max_chan_width,
chan_details_x, chan_details_y, Fc_xofs, Fc_yofs,
rr_edges_to_create, &clipped, L_rr_node_indices, L_rr_node,
directs, num_directs, clb_to_clb_directs, num_seg_types);
if (clipped) {
*Fc_clipped = true;
}
}
//Create the actual OPIN->CHANX/CHANY edges
uniquify_edges(rr_edges_to_create);
alloc_and_load_edges(L_rr_node, rr_edges_to_create);
rr_edges_to_create.clear();
}
}
}
/* Build channels */
VTR_ASSERT(Fs % 3 == 0);
for (size_t i = 0; i < grid.width() - 1; ++i) {
for (size_t j = 0; j < grid.height() - 1; ++j) {
if (i > 0) {
int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.x_list[j]);
build_rr_chan(i, j, CHANX, track_to_pin_lookup, sb_conn_map, switch_block_conn,
CHANX_COST_INDEX_START,
max_chan_width, grid, tracks_per_chan,
sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
L_rr_node_indices, rr_edges_to_create, L_rr_node,
wire_to_ipin_switch,
directionality);
//Create the actual CHAN->CHAN edges
uniquify_edges(rr_edges_to_create);
alloc_and_load_edges(L_rr_node, rr_edges_to_create);
rr_edges_to_create.clear();
}
if (j > 0) {
int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.y_list[i]);
build_rr_chan(i, j, CHANY, track_to_pin_lookup, sb_conn_map, switch_block_conn,
CHANX_COST_INDEX_START + num_seg_types,
max_chan_width, grid, tracks_per_chan,
sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
L_rr_node_indices, rr_edges_to_create, L_rr_node,
wire_to_ipin_switch,
directionality);
//Create the actual CHAN->CHAN edges
uniquify_edges(rr_edges_to_create);
alloc_and_load_edges(L_rr_node, rr_edges_to_create);
rr_edges_to_create.clear();
}
}
}
init_fan_in(L_rr_node, num_nodes);
}
static void build_bidir_rr_opins(const int i,
const int j,
const e_side side,
const t_rr_node_indices& L_rr_node_indices,
const std::vector<t_rr_node>& rr_nodes,
const t_pin_to_track_lookup& opin_to_track_map,
const std::vector<vtr::Matrix<int>>& Fc_out,
t_rr_edge_info_set& rr_edges_to_create,
const t_chan_details& chan_details_x,
const t_chan_details& chan_details_y,
const DeviceGrid& grid,
const t_direct_inf* directs,
const int num_directs,
const t_clb_to_clb_directs* clb_to_clb_directs,
const int num_seg_types) {
//Don't connect pins which are not adjacent to channels around the perimeter
if ((i == 0 && side != RIGHT)
|| (i == int(grid.width() - 1) && side != LEFT)
|| (j == 0 && side != TOP)
|| (j == int(grid.width() - 1) && side != BOTTOM)) {
return;
}
auto type = grid[i][j].type;
int width_offset = grid[i][j].width_offset;
int height_offset = grid[i][j].height_offset;
const vtr::Matrix<int>& Fc = Fc_out[type->index];
for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) {
/* We only are working with opins so skip non-drivers */
if (type->class_inf[type->pin_class[pin_index]].type != DRIVER) {
continue;
}
/* Can't do anything if pin isn't at this location */
if (0 == type->pinloc[width_offset][height_offset][side][pin_index]) {
continue;
}
/* get number of tracks that this pin connects to */
int total_pin_Fc = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++) {
total_pin_Fc += Fc[pin_index][iseg];
}
int node_index = get_rr_node_index(L_rr_node_indices, i, j, OPIN, pin_index, side);
VTR_ASSERT(node_index >= 0);
if (total_pin_Fc > 0) {
get_bidir_opin_connections(i, j, pin_index,
node_index, rr_edges_to_create, opin_to_track_map, L_rr_node_indices,
chan_details_x,
chan_details_y);
}
/* Add in direct connections */
get_opin_direct_connecions(i, j, side, pin_index,
node_index, rr_edges_to_create, L_rr_node_indices, rr_nodes,
directs, num_directs, clb_to_clb_directs);
}
}
void free_rr_graph() {
/* Frees all the routing graph data structures, if they have been *
* allocated. I use rr_mem_chunk_list_head as a flag to indicate *
* whether or not the graph has been allocated -- if it is not NULL, *
* a routing graph exists and can be freed. Hence, you can call this *
* routine even if you're not sure of whether a rr_graph exists or not. */
/* Before adding any more free calls here, be sure the data is NOT chunk *
* allocated, as ALL the chunk allocated data is already free! */
auto& device_ctx = g_vpr_ctx.mutable_device();
device_ctx.read_rr_graph_filename.clear();
device_ctx.rr_node_indices.clear();
device_ctx.rr_nodes.clear();
device_ctx.rr_node_indices.clear();
device_ctx.rr_indexed_data.clear();
device_ctx.rr_switch_inf.clear();
device_ctx.switch_fanin_remap.clear();
device_ctx.rr_node_metadata.clear();
device_ctx.rr_edge_metadata.clear();
invalidate_router_lookahead_cache();
}
static void build_rr_sinks_sources(const int i,
const int j,
std::vector<t_rr_node>& L_rr_node,
t_rr_edge_info_set& rr_edges_to_create,
const t_rr_node_indices& L_rr_node_indices,
const int delayless_switch,
const DeviceGrid& grid) {
/* Loads IPIN, SINK, SOURCE, and OPIN.
* Loads IPIN to SINK edges, and SOURCE to OPIN edges */
/* Since we share nodes within a large block, only
* start tile can initialize sinks, sources, and pins */
if (grid[i][j].width_offset > 0 || grid[i][j].height_offset > 0)
return;
auto type = grid[i][j].type;
int num_class = type->num_class;
t_class* class_inf = type->class_inf;
int num_pins = type->num_pins;
int* pin_class = type->pin_class;
/* SINK and SOURCE-to-OPIN edges */
for (int iclass = 0; iclass < num_class; ++iclass) {
int inode = 0;
if (class_inf[iclass].type == DRIVER) { /* SOURCE */
inode = get_rr_node_index(L_rr_node_indices, i, j, SOURCE, iclass);
//Retrieve all the physical OPINs associated with this source, this includes
//those at different grid tiles of this block
std::vector<int> opin_nodes;
for (int width_offset = 0; width_offset < type->width; ++width_offset) {
for (int height_offset = 0; height_offset < type->height; ++height_offset) {
for (int ipin = 0; ipin < class_inf[iclass].num_pins; ++ipin) {
int pin_num = class_inf[iclass].pinlist[ipin];
auto physical_pins = get_rr_node_indices(L_rr_node_indices, i + width_offset, j + height_offset, OPIN, pin_num);
opin_nodes.insert(opin_nodes.end(), physical_pins.begin(), physical_pins.end());
}
}
}
//Connect the SOURCE to each OPIN
for (size_t iedge = 0; iedge < opin_nodes.size(); ++iedge) {
rr_edges_to_create.emplace_back(inode, opin_nodes[iedge], delayless_switch);
}
L_rr_node[inode].set_cost_index(SOURCE_COST_INDEX);
L_rr_node[inode].set_type(SOURCE);
} else { /* SINK */
VTR_ASSERT(class_inf[iclass].type == RECEIVER);
inode = get_rr_node_index(L_rr_node_indices, i, j, SINK, iclass);
/* NOTE: To allow route throughs through clbs, change the lines below to *
* make an edge from the input SINK to the output SOURCE. Do for just the *
* special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it *
* leads to. If route throughs are allowed, you may want to increase the *
* base cost of OPINs and/or SOURCES so they aren't used excessively. */
/* Initialize to unconnected */
L_rr_node[inode].set_num_edges(0);
L_rr_node[inode].set_cost_index(SINK_COST_INDEX);
L_rr_node[inode].set_type(SINK);
}
/* Things common to both SOURCEs and SINKs. */
L_rr_node[inode].set_capacity(class_inf[iclass].num_pins);
L_rr_node[inode].set_coordinates(i, j, i + type->width - 1, j + type->height - 1);
float R = 0.;
float C = 0.;
L_rr_node[inode].set_rc_index(find_create_rr_rc_data(R, C));
L_rr_node[inode].set_ptc_num(iclass);
}
/* Connect IPINS to SINKS and initialize OPINS */
//We loop through all the pin locations on the block to initialize the IPINs/OPINs,
//and hook-up the IPINs to sinks.
for (int width_offset = 0; width_offset < type->width; ++width_offset) {
for (int height_offset = 0; height_offset < type->height; ++height_offset) {
for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) {
for (int ipin = 0; ipin < num_pins; ++ipin) {
if (type->pinloc[width_offset][height_offset][side][ipin]) {
int inode;
int iclass = pin_class[ipin];
if (class_inf[iclass].type == RECEIVER) {
//Connect the input pin to the sink
inode = get_rr_node_index(L_rr_node_indices, i + width_offset, j + height_offset, IPIN, ipin, side);
int to_node = get_rr_node_index(L_rr_node_indices, i, j, SINK, iclass);
//Add info about the edge to be created
rr_edges_to_create.emplace_back(inode, to_node, delayless_switch);
VTR_ASSERT(inode >= 0);
L_rr_node[inode].set_cost_index(IPIN_COST_INDEX);
L_rr_node[inode].set_type(IPIN);
} else {
VTR_ASSERT(class_inf[iclass].type == DRIVER);
//Initialize the output pin
// Note that we leave it's out-going edges unconnected (they will be hooked up to global routing later)
inode = get_rr_node_index(L_rr_node_indices, i + width_offset, j + height_offset, OPIN, ipin, side);
//Initially left unconnected
L_rr_node[inode].set_cost_index(OPIN_COST_INDEX);
L_rr_node[inode].set_type(OPIN);
}
/* Common to both DRIVERs and RECEIVERs */
L_rr_node[inode].set_capacity(1);
float R = 0.;
float C = 0.;
L_rr_node[inode].set_rc_index(find_create_rr_rc_data(R, C));
L_rr_node[inode].set_ptc_num(ipin);
//Note that we store the grid tile location and side where the pin is located,
//which greatly simplifies the drawing code
L_rr_node[inode].set_coordinates(i + width_offset, j + height_offset, i + width_offset, j + height_offset);
L_rr_node[inode].set_side(side);
VTR_ASSERT(type->pinloc[width_offset][height_offset][L_rr_node[inode].side()][L_rr_node[inode].pin_num()]);
}
}
}
}
}
//Create the actual edges
}
void init_fan_in(std::vector<t_rr_node>& L_rr_node, const int num_rr_nodes) {
//Loads fan-ins for all nodes
//Reset all fan-ins to zero
for (int i = 0; i < num_rr_nodes; i++) {
L_rr_node[i].set_fan_in(0);
}
//Walk the graph and increment fanin on all downstream nodes
for (int i = 0; i < num_rr_nodes; i++) {
for (t_edge_size iedge = 0; iedge < L_rr_node[i].num_edges(); iedge++) {
int to_node = L_rr_node[i].edge_sink_node(iedge);
L_rr_node[to_node].set_fan_in(L_rr_node[to_node].fan_in() + 1);
}
}
}
/* Allocates/loads edges for nodes belonging to specified channel segment and initializes
* node properties such as cost, occupancy and capacity */
static void build_rr_chan(const int x_coord,
const int y_coord,
const t_rr_type chan_type,
const t_track_to_pin_lookup& track_to_pin_lookup,
t_sb_connection_map* sb_conn_map,
const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
const int cost_index_offset,
const int max_chan_width,
const DeviceGrid& grid,
const int tracks_per_chan,
t_sblock_pattern& sblock_pattern,
const int Fs_per_side,
const t_chan_details& chan_details_x,
const t_chan_details& chan_details_y,
const t_rr_node_indices& L_rr_node_indices,
t_rr_edge_info_set& rr_edges_to_create,
std::vector<t_rr_node>& L_rr_node,
const int wire_to_ipin_switch,
const enum e_directionality directionality) {
/* this function builds both x and y-directed channel segments, so set up our
* coordinates based on channel type */
auto& device_ctx = g_vpr_ctx.device();
//Initally a assumes CHANX
int seg_coord = x_coord; //The absolute coordinate of this segment within the channel
int chan_coord = y_coord; //The absolute coordinate of this channel within the device
int seg_dimension = device_ctx.grid.width() - 2; //-2 for no perim channels
int chan_dimension = device_ctx.grid.height() - 2; //-2 for no perim channels
const t_chan_details& from_chan_details = (chan_type == CHANX) ? chan_details_x : chan_details_y;
const t_chan_details& opposite_chan_details = (chan_type == CHANX) ? chan_details_y : chan_details_x;
t_rr_type opposite_chan_type = CHANY;
if (chan_type == CHANY) {
//Swap values since CHANX was assumed above
std::swap(seg_coord, chan_coord);
std::swap(seg_dimension, chan_dimension);
opposite_chan_type = CHANX;
}
const t_chan_seg_details* seg_details = from_chan_details[x_coord][y_coord].data();
/* figure out if we're generating switch block edges based on a custom switch block
* description */
bool custom_switch_block = false;
if (sb_conn_map != nullptr) {
VTR_ASSERT(sblock_pattern.empty() && switch_block_conn.empty());
custom_switch_block = true;
}
/* Loads up all the routing resource nodes in the current channel segment */
for (int track = 0; track < tracks_per_chan; ++track) {
if (seg_details[track].length() == 0)
continue;
//Start and end coordinates of this segment along the length of the channel
//Note that these values are in the VPR coordinate system (and do not consider
//wire directionality), so start correspond to left/bottom and end corresponds to right/top
int start = get_seg_start(seg_details, track, chan_coord, seg_coord);
int end = get_seg_end(seg_details, track, start, chan_coord, seg_dimension);
if (seg_coord > start)
continue; /* Only process segments which start at this location */
VTR_ASSERT(seg_coord == start);
const t_chan_seg_details* from_seg_details = nullptr;
if (chan_type == CHANY) {
from_seg_details = chan_details_y[x_coord][start].data();
} else {
from_seg_details = chan_details_x[start][y_coord].data();
}
int node = get_rr_node_index(L_rr_node_indices, x_coord, y_coord, chan_type, track);
if (node == OPEN) {
continue;
}
/* Add the edges from this track to all it's connected pins into the list */
int num_edges = 0;
num_edges += get_track_to_pins(start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create,
L_rr_node_indices, track_to_pin_lookup, seg_details, chan_type, seg_dimension,
wire_to_ipin_switch, directionality);
/* get edges going from the current track into channel segments which are perpendicular to it */
if (chan_coord > 0) {
const t_chan_seg_details* to_seg_details;
if (chan_type == CHANX) {
to_seg_details = chan_details_y[start][y_coord].data();
} else {
VTR_ASSERT(chan_type == CHANY);
to_seg_details = chan_details_x[x_coord][start].data();
}
if (to_seg_details->length() > 0) {
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord,
opposite_chan_type, seg_dimension, max_chan_width, grid,
Fs_per_side, sblock_pattern, node, rr_edges_to_create,
from_seg_details, to_seg_details, opposite_chan_details,
directionality,
L_rr_node_indices,
switch_block_conn, sb_conn_map);
}
}
if (chan_coord < chan_dimension) {
const t_chan_seg_details* to_seg_details;
if (chan_type == CHANX) {
to_seg_details = chan_details_y[start][y_coord + 1].data();
} else {
VTR_ASSERT(chan_type == CHANY);
to_seg_details = chan_details_x[x_coord + 1][start].data();
}
if (to_seg_details->length() > 0) {
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord + 1,
opposite_chan_type, seg_dimension, max_chan_width, grid,
Fs_per_side, sblock_pattern, node, rr_edges_to_create,
from_seg_details, to_seg_details, opposite_chan_details,
directionality,
L_rr_node_indices,
switch_block_conn, sb_conn_map);
}
}
/* walk over the switch blocks along the source track and implement edges from this track to other tracks
* in the same channel (i.e. straight-through connections) */
for (int target_seg = start - 1; target_seg <= end + 1; target_seg++) {
if (target_seg != start - 1 && target_seg != end + 1) {
/* skip straight-through connections from midpoint if non-custom switch block.
* currently non-custom switch blocks don't properly describe connections from the mid-point of a wire segment
* to other segments in the same channel (i.e. straight-through connections) */
if (!custom_switch_block) {
continue;
}
}
if (target_seg > 0 && target_seg < seg_dimension + 1) {
const t_chan_seg_details* to_seg_details;
if (chan_type == CHANX) {
to_seg_details = chan_details_x[target_seg][y_coord].data();
} else {
VTR_ASSERT(chan_type == CHANY);
to_seg_details = chan_details_y[x_coord][target_seg].data();
}
if (to_seg_details->length() > 0) {
num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, target_seg,
chan_type, seg_dimension, max_chan_width, grid,
Fs_per_side, sblock_pattern, node, rr_edges_to_create,
from_seg_details, to_seg_details, from_chan_details,
directionality,
L_rr_node_indices,
switch_block_conn, sb_conn_map);
}
}
}
/* Edge arrays have now been built up. Do everything else. */
L_rr_node[node].set_cost_index(cost_index_offset + seg_details[track].index());
L_rr_node[node].set_capacity(1); /* GLOBAL routing handled elsewhere */
if (chan_type == CHANX) {
L_rr_node[node].set_coordinates(start, y_coord, end, y_coord);
} else {
VTR_ASSERT(chan_type == CHANY);
L_rr_node[node].set_coordinates(x_coord, start, x_coord, end);
}
int length = end - start + 1;
float R = length * seg_details[track].Rmetal();
float C = length * seg_details[track].Cmetal();
L_rr_node[node].set_rc_index(find_create_rr_rc_data(R, C));
L_rr_node[node].set_ptc_num(track);
L_rr_node[node].set_type(chan_type);
L_rr_node[node].set_direction(seg_details[track].direction());
}
}
void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) {
std::sort(rr_edges_to_create.begin(), rr_edges_to_create.end());
rr_edges_to_create.erase(std::unique(rr_edges_to_create.begin(), rr_edges_to_create.end()), rr_edges_to_create.end());
}
void alloc_and_load_edges(std::vector<t_rr_node>& L_rr_node,
const t_rr_edge_info_set& rr_edges_to_create) {
/* Sets up all the edge related information for rr_node */
struct compare_from_node {
auto operator()(const t_rr_edge_info& lhs, const int from_node) {
return lhs.from_node < from_node;
}
auto operator()(const int from_node, const t_rr_edge_info& rhs) {
return from_node < rhs.from_node;
}
};
std::set<int> from_nodes;
for (auto& edge : rr_edges_to_create) {
from_nodes.insert(edge.from_node);
}
VTR_ASSERT_SAFE(std::is_sorted(rr_edges_to_create.begin(), rr_edges_to_create.end()));
for (int inode : from_nodes) {
auto edge_range = std::equal_range(rr_edges_to_create.begin(), rr_edges_to_create.end(), inode, compare_from_node());
size_t edge_count = std::distance(edge_range.first, edge_range.second);
if (L_rr_node[inode].num_edges() == 0) {
//Create initial edges
//
//Note that we do this in bulk instead of via add_edge() to reduce
//memory fragmentation
L_rr_node[inode].set_num_edges(edge_count);
int iedge = 0;
for (auto itr = edge_range.first; itr != edge_range.second; ++itr) {
VTR_ASSERT(itr->from_node == inode);
L_rr_node[inode].set_edge_sink_node(iedge, itr->to_node);
L_rr_node[inode].set_edge_switch(iedge, itr->switch_type);
++iedge;
}
} else {
//Add new edge incrementally
//
//This should occur relatively rarely (e.g. a backward bidir edge) so memory fragmentation shouldn't be a big problem
for (auto itr = edge_range.first; itr != edge_range.second; ++itr) {
L_rr_node[inode].add_edge(itr->to_node, itr->switch_type);
}
}
}
}
/* allocate pin to track map for each segment type individually and then combine into a single
* vector */
static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_pin_to_track_map(const e_pin_type pin_type,
const vtr::Matrix<int>& Fc,
const t_physical_tile_type_ptr Type,
const std::vector<bool>& perturb_switch_pattern,
const e_directionality directionality,
const int num_seg_types,
const int* sets_per_seg_type) {
/* get the maximum number of tracks that any pin can connect to */
size_t max_pin_tracks = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++) {
/* determine the maximum Fc to this segment type across all pins */
int max_Fc = 0;
for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) {
int pin_class = Type->pin_class[pin_index];
if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) {
max_Fc = Fc[pin_index][iseg];
}
}
max_pin_tracks += max_Fc;
}
/* allocate 'result' matrix and initialize entries to OPEN. also allocate and intialize matrix which will be
* used to index into the correct entries when loading up 'result' */
auto result = vtr::NdMatrix<std::vector<int>, 4>({
size_t(Type->num_pins), //[0..num_pins-1]
size_t(Type->width), //[0..width-1]
size_t(Type->height), //[0..height-1]
4, //[0..sides-1]
});
/* multiplier for unidirectional vs bidirectional architectures */
int fac = 1;
if (directionality == UNI_DIRECTIONAL) {
fac = 2;
}
/* load the pin to track matrix by looking at each segment type in turn */
int seg_type_start_track = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++) {
int num_seg_type_tracks = fac * sets_per_seg_type[iseg];
/* determine the maximum Fc to this segment type across all pins */
int max_Fc = 0;
for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) {
int pin_class = Type->pin_class[pin_index];
if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) {
max_Fc = Fc[pin_index][iseg];
}
}
/* get pin connections to tracks of the current segment type */
auto pin_to_seg_type_map = alloc_and_load_pin_to_seg_type(pin_type,
num_seg_type_tracks, max_Fc, Type, perturb_switch_pattern[iseg], directionality);
/* connections in pin_to_seg_type_map are within that seg type -- i.e. in the [0,num_seg_type_tracks-1] range.
* now load up 'result' array with these connections, but offset them so they are relative to the channel
* as a whole */
for (int ipin = 0; ipin < Type->num_pins; ipin++) {
for (int iwidth = 0; iwidth < Type->width; iwidth++) {
for (int iheight = 0; iheight < Type->height; iheight++) {
for (int iside = 0; iside < 4; iside++) {
for (int iconn = 0; iconn < max_Fc; iconn++) {
int relative_track_ind = pin_to_seg_type_map[ipin][iwidth][iheight][iside][iconn];
if (relative_track_ind == OPEN) continue;
int absolute_track_ind = relative_track_ind + seg_type_start_track;
VTR_ASSERT(absolute_track_ind >= 0);
result[ipin][iwidth][iheight][iside].push_back(absolute_track_ind);
}
}
}
}
}
/* next seg type will start at this track index */
seg_type_start_track += num_seg_type_tracks;
}
return result;
}
static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_seg_type(const e_pin_type pin_type,
const int num_seg_type_tracks,
const int Fc,
const t_physical_tile_type_ptr Type,
const bool perturb_switch_pattern,
const e_directionality directionality) {
/* Note: currently a single value of Fc is used across each pin. In the future
* the looping below will have to be modified if we want to account for pin-based
* Fc values */
/* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] =
* NULL if there is no pin on that side, or that pin is of the wrong type.
* Probably not enough memory to worry about, esp. as it's temporary.
* If pin ipin on side iside does not exist or is of the wrong type,
* tracks_connected_to_pin[ipin][iside][0] = OPEN. */
if (Type->num_pins < 1) {
return vtr::NdMatrix<int, 5>();
}
auto tracks_connected_to_pin = vtr::NdMatrix<int, 5>({
size_t(Type->num_pins), //[0..num_pins-1]
size_t(Type->width), //[0..width-1]
size_t(Type->height), //[0..height-1]
NUM_SIDES, //[0..NUM_SIDES-1]
size_t(Fc) //[0..Fc-1]
},
OPEN); //Unconnected
//Number of *physical* pins on each side.
//Note that his may be more than the logical number of pins (i.e.
//Type->num_pins) if a logical pin has multiple specified physical
//pinlocations (i.e. appears on multiple sides of the block)
auto num_dir = vtr::NdMatrix<int, 3>({
size_t(Type->width), //[0..width-1]
size_t(Type->height), //[0..height-1]
NUM_SIDES //[0..NUM_SIDES-1]
},
0);
//List of *physical* pins of the correct type on each side of the current
//block type. For a specific width/height/side the valid enteries in the
//last dimension are [0 .. num_dir[width][height][side]-1]
//
//Max possible space alloced for simplicity
auto dir_list = vtr::NdMatrix<int, 4>({
size_t(Type->width), //[0..width-1]
size_t(Type->height), //[0..height-1]
NUM_SIDES, //[0..NUM_SIDES-1]
size_t(Type->num_pins) //[0..num_pins-1]
},
-1); //Defensive coding: Initialize to invalid
//Number of currently assigned physical pins
auto num_done_per_dir = vtr::NdMatrix<int, 3>({
size_t(Type->width), //[0..width-1]
size_t(Type->height), //[0..height-1]
NUM_SIDES //[0..NUM_SIDES-1]
},
0);
//Record the physical pin locations and counts per side/offsets combination
for (int pin = 0; pin < Type->num_pins; ++pin) {
int pin_class = Type->pin_class[pin];
if (Type->class_inf[pin_class].type != pin_type) /* Doing either ipins OR opins */
continue;
/* Pins connecting only to global resources get no switches -> keeps area model accurate. */
if (Type->is_ignored_pin[pin])
continue;
for (int width = 0; width < Type->width; ++width) {
for (int height = 0; height < Type->height; ++height) {
for (e_side side : SIDES) {
if (Type->pinloc[width][height][side][pin] == 1) {
dir_list[width][height][side][num_dir[width][height][side]] = pin;
num_dir[width][height][side]++;
}
}
}
}
}
//Total the number of physical pins
int num_phys_pins = 0;
for (int width = 0; width < Type->width; ++width) {
for (int height = 0; height < Type->height; ++height) {
for (e_side side : SIDES) {
num_phys_pins += num_dir[width][height][side]; /* Num. physical pins per type */
}
}
}
std::vector<t_pin_loc> pin_ordering;
/* Connection block I use distributes pins evenly across the tracks *
* of ALL sides of the clb at once. Ensures that each pin connects *
* to spaced out tracks in its connection block, and that the other *
* pins (potentially in other C blocks) connect to the remaining tracks *
* first. Doesn't matter for large Fc, but should make a fairly *
* good low Fc block that leverages the fact that usually lots of pins *
* are logically equivalent. */
const e_side init_side = LEFT;
const int init_width = 0;
const int init_height = 0;
e_side side = init_side;
int width = init_width;
int height = init_height;
int pin = 0;
int pin_index = -1;
//Determine the order in which physical pins will be considered while building
//the connection block. This generally tries to order the pins so they are 'spread'
//out (in hopes of yielding good connection diversity)
while (pin < num_phys_pins) {
if (height == init_height && width == init_width && side == init_side) {
//Completed one loop through all the possible offsets/side combinations
pin_index++;
}
advance_to_next_block_side(Type, width, height, side);
VTR_ASSERT_MSG(pin_index < num_phys_pins, "Physical block pins bound number of logical block pins");
if (num_done_per_dir[width][height][side] >= num_dir[width][height][side]) {
continue;
}
int pin_num = dir_list[width][height][side][pin_index];
VTR_ASSERT(pin_num >= 0);
VTR_ASSERT(Type->pinloc[width][height][side][pin_num]);
t_pin_loc pin_loc;
pin_loc.pin_index = pin_num;
pin_loc.width_offset = width;
;
pin_loc.height_offset = height;
pin_loc.side = side;
pin_ordering.push_back(pin_loc);
num_done_per_dir[width][height][side]++;
pin++;
}
VTR_ASSERT(pin == num_phys_pins);
VTR_ASSERT(pin_ordering.size() == size_t(num_phys_pins));
if (perturb_switch_pattern) {
load_perturbed_connection_block_pattern(tracks_connected_to_pin,
pin_ordering,
num_seg_type_tracks, num_seg_type_tracks, Fc, directionality);
} else {
load_uniform_connection_block_pattern(tracks_connected_to_pin,
pin_ordering,
num_seg_type_tracks, num_seg_type_tracks, Fc, directionality);
}
#ifdef ENABLE_CHECK_ALL_TRACKS
check_all_tracks_reach_pins(Type, tracks_connected_to_pin, num_seg_type_tracks,
Fc, pin_type);
#endif
return tracks_connected_to_pin;
}
static void advance_to_next_block_side(t_physical_tile_type_ptr Type, int& width_offset, int& height_offset, e_side& side) {
//State-machine transitions for advancing around all sides of a block
//This state-machine transitions in the following order:
//
// TOP
// --->
//
// **********************************
// * 10 | 11 | 12 *
// * 3 25 | 6 22 | 9 19 *
// * 36 | 35 | 34 *
// *----------|----------|----------*
// ^ * 13 | 14 | 15 * |
// LEFT | * 2 26 | 5 23 | 8 20 * | RIGHT
// | * 33 | 32 | 31 * v
// *----------|----------|----------*
// * 16 | 17 | 18 *
// * 1 27 | 4 24 | 7 21 *
// * 30 | 29 | 28 *
// **********************************
//
// <---
// BOTTOM
//
// where each 'square' in the above diagram corresponds to a grid tile of
// the block of width=3 and height=3. The numbers correspond to the visiting order starting
// from '1' (width_offset=0, height_offset=0, side=LEFT).
//
// Note that for blocks of width == 1 and height == 1 this iterates through the sides
// in clock-wise order:
//
// ***********
// * 2 *
// * 1 3 *
// * 4 *
// ***********
//
//Validate current state
VTR_ASSERT(width_offset >= 0 && width_offset < Type->width);
VTR_ASSERT(height_offset >= 0 && height_offset < Type->height);
VTR_ASSERT(side == LEFT || side == RIGHT || side == BOTTOM || side == TOP);
if (side == LEFT) {
if (height_offset == Type->height - 1 && width_offset == Type->width - 1) {
//Finished the last left edge column
side = TOP; //Turn clockwise
width_offset = 0;
height_offset = Type->height - 1;
} else if (height_offset == Type->height - 1) {
//Finished the current left edge column
VTR_ASSERT(width_offset != Type->width - 1);
//Move right to the next left edge column
width_offset++;
height_offset = 0;
} else {
height_offset++; //Advance up current left edge column
}
} else if (side == TOP) {
if (height_offset == 0 && width_offset == Type->width - 1) {
//Finished the last top edge row
side = RIGHT; //Turn clockwise
width_offset = Type->width - 1;
height_offset = Type->height - 1;
} else if (width_offset == Type->width - 1) {
//Finished the current top edge row
VTR_ASSERT(height_offset != 0);
//Move down to the next top edge row
height_offset--;
width_offset = 0;
} else {
width_offset++; //Advance right along current top edge row
}
} else if (side == RIGHT) {
if (height_offset == 0 && width_offset == 0) {
//Finished the last right edge column
side = BOTTOM; //Turn clockwise
width_offset = Type->width - 1;
height_offset = 0;
} else if (height_offset == 0) {
//Finished the current right edge column
VTR_ASSERT(width_offset != 0);
//Move left to the next right edge column
width_offset--;
height_offset = Type->height - 1;
} else {
height_offset--; //Advance down current right edge column
}
} else {
VTR_ASSERT(side == BOTTOM);
if (height_offset == Type->height - 1 && width_offset == 0) {
//Finished the last bottom edge row
side = LEFT; //Turn clockwise
width_offset = 0;
height_offset = 0;
} else if (width_offset == 0) {
//Finished the current bottom edge row
VTR_ASSERT(height_offset != Type->height - 1);
//Move up to the next bottom edge row
height_offset++;
width_offset = Type->width - 1;
} else {
width_offset--; //Advance left along current bottom edge row
}
}
//Validate next state
VTR_ASSERT(width_offset >= 0 && width_offset < Type->width);
VTR_ASSERT(height_offset >= 0 && height_offset < Type->height);
VTR_ASSERT(side == LEFT || side == RIGHT || side == BOTTOM || side == TOP);
}
static float pattern_fmod(float a, float b) {
/* Compute a modulo b. */
float raw_result = a - (int)(a / b) * b;
if (raw_result < 0.0f) {
return 0.0f;
}
if (raw_result >= b) {
return 0.0f;
}
return raw_result;
}
static void load_uniform_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
const std::vector<t_pin_loc>& pin_locations,
const int x_chan_width,
const int y_chan_width,
const int Fc,
enum e_directionality directionality) {
/* Loads the tracks_connected_to_pin array with an even distribution of *
* switches across the tracks for each pin. For example, each pin connects *
* to every 4.3rd track in a channel, with exactly which tracks a pin *
* connects to staggered from pin to pin. */
/* Uni-directional drive is implemented to ensure no directional bias and this means
* two important comments noted below */
/* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
* and lay down 2 switches on an adjacent pair of tracks at a time to ensure
* no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
* assign switches to a pair at a time. Can do this because W is guaranteed to
* be even-numbered; however same approach cannot be applied to Fc_out pattern
* when L > 1 and W <> 2L multiple.
*
* 2. This generic pattern should be considered the tileable physical layout,
* meaning all track # here are physical #'s,
* so later must use vpr_to_phy conversion to find actual logical #'s to connect.
* This also means I will not use get_output_block_companion_track to ensure
* no bias, since that describes a logical # -> that would confuse people. */
int max_width = 0;
int max_height = 0;
int num_phys_pins = pin_locations.size();
/* Keep a record of how many times each track is selected at each
* (side, dx, dy) of the block. This is used to ensure a diversity of tracks are
* assigned to pins that might be related. For a given (side, dx, dy), the counts will be
* decremented after all the tracks have been selected at least once, so the
* counts will not get too big.
*/
std::vector<std::vector<std::vector<std::vector<char>>>> excess_tracks_selected;
excess_tracks_selected.resize(NUM_SIDES);
for (int i = 0; i < num_phys_pins; ++i) {
int width = pin_locations[i].width_offset;
int height = pin_locations[i].height_offset;
max_width = std::max(max_width, width);
max_height = std::max(max_height, height);
}
for (int iside = 0; iside < NUM_SIDES; iside++) {
excess_tracks_selected[iside].resize(max_width + 1);
for (int dx = 0; dx <= max_width; dx++) {
excess_tracks_selected[iside][dx].resize(max_height + 1);
for (int dy = 0; dy <= max_height; dy++) {
int max_chan_width = (((iside == TOP) || (iside == BOTTOM)) ? x_chan_width : y_chan_width);
excess_tracks_selected[iside][dx][dy].resize(max_chan_width);
}
}
}
for (int i = 0; i < num_phys_pins; ++i) {
e_side side = pin_locations[i].side;
int width = pin_locations[i].width_offset;
int height = pin_locations[i].height_offset;
int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width);
for (int j = 0; j < max_chan_width; j++) {
excess_tracks_selected[side][width][height][j] = 0;
}
}
int group_size;
if (directionality == BI_DIRECTIONAL) {
group_size = 1;
} else {
VTR_ASSERT(directionality == UNI_DIRECTIONAL);
group_size = 2;
}
VTR_ASSERT((x_chan_width % group_size == 0) && (y_chan_width % group_size == 0) && (Fc % group_size == 0));
/* offset is used to move to a different point in the track space if it is detected that
* the tracks being assigned overlap recently assigned tracks, with the goal of increasing
* track diversity.
*/
int offset = 0;
for (int i = 0; i < num_phys_pins; ++i) {
int pin = pin_locations[i].pin_index;
e_side side = pin_locations[i].side;
int width = pin_locations[i].width_offset;
int height = pin_locations[i].height_offset;
/* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
for (int j = 0; j < (Fc / group_size); ++j) {
int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width);
float step_size = (float)max_chan_width / (float)(Fc * num_phys_pins);
VTR_ASSERT(Fc > 0);
float fc_step = (float)max_chan_width / (float)Fc;
/* We may go outside the track ID space, because of offset, so use modulo arithmetic below. */
float ftrack = pattern_fmod((i + offset) * step_size, fc_step) + (j * fc_step);
int itrack = ((int)ftrack) * group_size;
if (j == 0) {
/* Check if tracks to be assigned by the algorithm were recently assigned to pins at this
* (side, dx, dy). If so, loop through all possible alternative track
* assignments to find ones that include the most tracks that haven't been selected recently.
*/
for (;;) {
int saved_offset_increment = 0;
int max_num_unassigned_tracks = 0;
/* Across all potential track assignments, determine the maximum number of recently
* unassigned tracks that can be assigned this iteration. offset_increment is used to
* increment through the potential track assignments. The nested loops inside the
* offset_increment loop, iterate through all the tracks associated with a particular
* track assignment.
*/
for (int offset_increment = 0; offset_increment < num_phys_pins; offset_increment++) {
int num_unassigned_tracks = 0;
int num_total_tracks = 0;
for (int j2 = 0; j2 < (Fc / group_size); ++j2) {
ftrack = pattern_fmod((i + offset + offset_increment) * step_size, fc_step) + (j2 * fc_step);
itrack = ((int)ftrack) * group_size;
for (int k = 0; k < group_size; ++k) {
if (excess_tracks_selected[side][width][height][(itrack + k) % max_chan_width] == 0) {
num_unassigned_tracks++;
}
num_total_tracks++;
}
}
if (num_unassigned_tracks > max_num_unassigned_tracks) {
max_num_unassigned_tracks = num_unassigned_tracks;
saved_offset_increment = offset_increment;
}
if (num_unassigned_tracks == num_total_tracks) {
/* We can't do better than this, so end search. */
break;
}
}
if (max_num_unassigned_tracks > 0) {
/* Use the minimum offset increment that achieves the desired goal of track diversity,
* so the patterns produced are similar to the old algorithm (which doesn't explicitly
* monitor track diversity).
*/
offset += saved_offset_increment;
ftrack = pattern_fmod((i + offset) * step_size, fc_step);
itrack = ((int)ftrack) * group_size;
break;
} else {
/* All tracks have been assigned recently. Decrement the excess_tracks_selected counts for
* this location (side, dx, dy), modifying the memory of recently assigned
* tracks. A decrement is done rather than a reset, so if there was some unevenness of track
* assignment, that will be factored into the next round of track assignment.
*/
for (int k = 0; k < max_chan_width; k++) {
VTR_ASSERT(excess_tracks_selected[side][width][height][k] > 0);
excess_tracks_selected[side][width][height][k]--;
}
}
}
}
/* Assign the group of tracks for the Fc pattern */
for (int k = 0; k < group_size; ++k) {
tracks_connected_to_pin[pin][width][height][side][group_size * j + k] = (itrack + k) % max_chan_width;
excess_tracks_selected[side][width][height][(itrack + k) % max_chan_width]++;
}
}
}
}
static void load_perturbed_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
const std::vector<t_pin_loc>& pin_locations,
const int x_chan_width,
const int y_chan_width,
const int Fc,
enum e_directionality directionality) {
/* Loads the tracks_connected_to_pin array with an unevenly distributed *
* set of switches across the channel. This is done for inputs when *
* Fc_input = Fc_output to avoid creating "pin domains" -- certain output *
* pins being able to talk only to certain input pins because their switch *
* patterns exactly line up. Distribute Fc/2 + 1 switches over half the *
* channel and Fc/2 - 1 switches over the other half to make the switch *
* pattern different from the uniform one of the outputs. Also, have half *
* the pins put the "dense" part of their connections in the first half of *
* the channel and the other half put the "dense" part in the second half, *
* to make sure each track can connect to about the same number of ipins. */
VTR_ASSERT(directionality == BI_DIRECTIONAL);
int Fc_dense = (Fc / 2) + 1;
int Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */
int Fc_half[2];
int num_phys_pins = pin_locations.size();
for (int i = 0; i < num_phys_pins; ++i) {
int pin = pin_locations[i].pin_index;
e_side side = pin_locations[i].side;
int width = pin_locations[i].width_offset;
int height = pin_locations[i].height_offset;
int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width);
float step_size = (float)max_chan_width / (float)(Fc * num_phys_pins);
float spacing_dense = (float)max_chan_width / (float)(2 * Fc_dense);
float spacing_sparse = (float)max_chan_width / (float)(2 * Fc_sparse);
float spacing[2];
/* Flip every pin to balance switch density */
spacing[i % 2] = spacing_dense;
Fc_half[i % 2] = Fc_dense;
spacing[(i + 1) % 2] = spacing_sparse;
Fc_half[(i + 1) % 2] = Fc_sparse;
float ftrack = i * step_size; /* Start point. Staggered from pin to pin */
int iconn = 0;
for (int ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */
for (int j = 0; j < Fc_half[ihalf]; ++j) {
/* Can occasionally get wraparound due to floating point rounding.
* This is okay because the starting position > 0 when this occurs
* so connection is valid and fine */
int itrack = (int)ftrack;
itrack = itrack % max_chan_width;
tracks_connected_to_pin[pin][width][height][side][iconn] = itrack;
ftrack += spacing[ihalf];
iconn++;
}
}
} /* End for all physical pins. */
}
#ifdef ENABLE_CHECK_ALL_TRACKS
static void check_all_tracks_reach_pins(t_logical_block_type_ptr type,
int***** tracks_connected_to_pin,
int max_chan_width,
int Fc,
enum e_pin_type ipin_or_opin) {
/* Checks that all tracks can be reached by some pin. */
VTR_ASSERT(max_chan_width > 0);
int* num_conns_to_track; /* [0..max_chan_width-1] */
num_conns_to_track = (int*)vtr::calloc(max_chan_width, sizeof(int));
for (int pin = 0; pin < type->num_pins; ++pin) {
for (int width = 0; width < type->width; ++width) {
for (int height = 0; height < type->height; ++height) {
for (int side = 0; side < 4; ++side) {
if (tracks_connected_to_pin[pin][width][height][side][0] != OPEN) { /* Pin exists */
for (int conn = 0; conn < Fc; ++conn) {
int track = tracks_connected_to_pin[pin][width][height][side][conn];
num_conns_to_track[track]++;
}
}
}
}
}
}
for (int track = 0; track < max_chan_width; ++track) {
if (num_conns_to_track[track] <= 0) {
VTR_LOG_ERROR("check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n",
track, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN"));
}
}
free(num_conns_to_track);
}
#endif
/* Allocates and loads the track to ipin lookup for each physical grid type. This
* is the same information as the ipin_to_track map but accessed in a different way. */
static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup(vtr::NdMatrix<std::vector<int>, 4> pin_to_track_map,
const vtr::Matrix<int>& Fc,
const int type_width,
const int type_height,
const int num_pins,
const int max_chan_width,
const int num_seg_types) {
/* [0..max_chan_width-1][0..width][0..height][0..3]. For each track number
* it stores a vector for each of the four sides. x-directed channels will
* use the TOP and BOTTOM vectors to figure out what clb input pins they
* connect to above and below them, respectively, while y-directed channels
* use the LEFT and RIGHT vectors. Each vector contains an nelem field
* saying how many ipins it connects to. The list[0..nelem-1] array then
* gives the pin numbers. */
/* Note that a clb pin that connects to a channel on its RIGHT means that *
* that channel connects to a clb pin on its LEFT. The convention used *
* here is always in the perspective of the CLB */
if (num_pins < 1) {
return vtr::NdMatrix<std::vector<int>, 4>();
}
/* Alloc and zero the the lookup table */
auto track_to_pin_lookup = vtr::NdMatrix<std::vector<int>, 4>({size_t(max_chan_width), size_t(type_width), size_t(type_height), 4});
/* Count number of pins to which each track connects */
for (int pin = 0; pin < num_pins; ++pin) {
for (int width = 0; width < type_width; ++width) {
for (int height = 0; height < type_height; ++height) {
for (int side = 0; side < 4; ++side) {
if (pin_to_track_map[pin][width][height][side].empty())
continue;
/* get number of tracks to which this pin connects */
int num_tracks = 0;
for (int iseg = 0; iseg < num_seg_types; iseg++) {
num_tracks += Fc[pin][iseg];
}
for (int conn = 0; conn < num_tracks; ++conn) {
int track = pin_to_track_map[pin][width][height][side][conn];
VTR_ASSERT(track < max_chan_width);
VTR_ASSERT(track >= 0);
track_to_pin_lookup[track][width][height][side].push_back(pin);
}
}
}
}
}
return track_to_pin_lookup;
}
std::string describe_rr_node(int inode) {
auto& device_ctx = g_vpr_ctx.device();
std::string msg = vtr::string_fmt("RR node: %d", inode);
const auto& rr_node = device_ctx.rr_nodes[inode];
msg += vtr::string_fmt(" type: %s", rr_node.type_string());
msg += vtr::string_fmt(" location: (%d,%d)", rr_node.xlow(), rr_node.ylow());
if (rr_node.xlow() != rr_node.xhigh() || rr_node.ylow() != rr_node.yhigh()) {
msg += vtr::string_fmt(" <-> (%d,%d)", rr_node.xhigh(), rr_node.yhigh());
}
if (rr_node.type() == CHANX || rr_node.type() == CHANY) {
int cost_index = rr_node.cost_index();
int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index;
if (seg_index < (int)device_ctx.arch->Segments.size()) {
msg += vtr::string_fmt(" track: %d len: %d longline: %d seg_type: %s dir: %s",
rr_node.track_num(),
rr_node.length(),
device_ctx.arch->Segments[seg_index].longline,
device_ctx.arch->Segments[seg_index].name.c_str(),
rr_node.direction_string());
} else {
msg += vtr::string_fmt(" track: %d len: %d seg_type: ILLEGAL_SEG_INDEX dir: %s",
rr_node.track_num(),
rr_node.length(),
rr_node.direction_string());
}
} else if (rr_node.type() == IPIN || rr_node.type() == OPIN) {
auto type = device_ctx.grid[rr_node.xlow()][rr_node.ylow()].type;
std::string pin_name = block_type_pin_index_to_name(type, rr_node.pin_num());
msg += vtr::string_fmt(" pin: %d pin_name: %s",
rr_node.pin_num(),
pin_name.c_str());
} else {
VTR_ASSERT(rr_node.type() == SOURCE || rr_node.type() == SINK);
msg += vtr::string_fmt(" class: %d", rr_node.class_num());
}
return msg;
}
static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector<vtr::Matrix<int>>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix<int, 3>& Fc_xofs, vtr::NdMatrix<int, 3>& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const std::vector<t_rr_node>& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) {
/*
* This routine adds the edges from opins to channels at the specified
* grid location (i,j) and grid tile side
*/
*Fc_clipped = false;
auto type = grid[i][j].type;
int width_offset = grid[i][j].width_offset;
int height_offset = grid[i][j].height_offset;
/* Go through each pin and find its fanout. */
for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) {
/* Skip global pins and pins that are not of DRIVER type */
int class_index = type->pin_class[pin_index];
if (type->class_inf[class_index].type != DRIVER) {
continue;
}
if (type->is_ignored_pin[pin_index]) {
continue;
}
int opin_node_index = get_rr_node_index(L_rr_node_indices, i, j, OPIN, pin_index, side);
if (opin_node_index < 0) continue; //No valid from node
for (int iseg = 0; iseg < num_seg_types; iseg++) {
/* get Fc for this segment type */
int seg_type_Fc = Fc_out[type->index][pin_index][iseg];
VTR_ASSERT(seg_type_Fc >= 0);
if (seg_type_Fc == 0) {
continue;
}
/* Can't do anything if pin isn't at this location */
if (0 == type->pinloc[width_offset][height_offset][side][pin_index]) {
continue;
}
/* Figure out the chan seg at that side.
* side is the side of the logic or io block. */
bool vert = ((side == TOP) || (side == BOTTOM));
bool pos_dir = ((side == TOP) || (side == RIGHT));
t_rr_type chan_type = (vert ? CHANX : CHANY);
int chan = (vert ? (j) : (i));
int seg = (vert ? (i) : (j));
int max_len = (vert ? grid.width() : grid.height());
vtr::NdMatrix<int, 3>& Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
if (false == pos_dir) {
--chan;
}
/* Skip the location if there is no channel. */
if (chan < 0) {
continue;
}
if (seg < 1) {
continue;
}
if (seg > int(vert ? grid.width() : grid.height()) - 2) { //-2 since no channels around perim
continue;
}
if (chan > int(vert ? grid.height() : grid.width()) - 2) { //-2 since no channels around perim
continue;
}
const t_chan_seg_details* seg_details = (chan_type == CHANX ? chan_details_x[seg][chan] : chan_details_y[chan][seg]).data();
if (seg_details[0].length() == 0)
continue;
/* Get the list of opin to mux connections for that chan seg. */
bool clipped;
get_unidir_opin_connections(chan, seg,
seg_type_Fc, iseg, chan_type, seg_details,
opin_node_index,
rr_edges_to_create,
Fc_ofs, max_len, max_chan_width,
L_rr_node_indices, &clipped);
if (clipped) {
*Fc_clipped = true;
}
}
/* Add in direct connections */
get_opin_direct_connecions(i, j, side, pin_index, opin_node_index, rr_edges_to_create, L_rr_node_indices, rr_nodes,
directs, num_directs, clb_to_clb_directs);
}
}
/**
* Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure
* This data structure supplements the the info in the "directs" data structure
* TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one
*/
static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, int delayless_switch) {
int i, j;
unsigned int itype;
t_clb_to_clb_directs* clb_to_clb_directs;
char *pb_type_name, *port_name;
int start_pin_index, end_pin_index;
t_pb_type* pb_type;
auto& device_ctx = g_vpr_ctx.device();
clb_to_clb_directs = (t_clb_to_clb_directs*)vtr::calloc(num_directs, sizeof(t_clb_to_clb_directs));
pb_type_name = nullptr;
port_name = nullptr;
for (i = 0; i < num_directs; i++) {
pb_type_name = (char*)vtr::malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
port_name = (char*)vtr::malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
// Load from pins
// Parse out the pb_type name, port name, and pin range
parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
// Figure out which type, port, and pin is used
for (itype = 0; itype < device_ctx.logical_block_types.size(); ++itype) {
if (strcmp(device_ctx.logical_block_types[itype].name, pb_type_name) == 0) {
break;
}
}
if (itype >= device_ctx.logical_block_types.size()) {
vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find block %s.\n", pb_type_name);
}
clb_to_clb_directs[i].from_clb_type = &device_ctx.logical_block_types[itype];
pb_type = clb_to_clb_directs[i].from_clb_type->pb_type;
for (j = 0; j < pb_type->num_ports; j++) {
if (strcmp(pb_type->ports[j].name, port_name) == 0) {
break;
}
}
if (j >= pb_type->num_ports) {
vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find port %s (on block %s).\n", port_name, pb_type_name);
}
if (start_pin_index == OPEN) {
VTR_ASSERT(start_pin_index == end_pin_index);
start_pin_index = 0;
end_pin_index = pb_type->ports[j].num_pins - 1;
}
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index);
get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index);
// Load to pins
// Parse out the pb_type name, port name, and pin range
parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);
// Figure out which type, port, and pin is used
for (itype = 0; itype < device_ctx.logical_block_types.size(); ++itype) {
if (strcmp(device_ctx.logical_block_types[itype].name, pb_type_name) == 0) {
break;
}
}
if (itype >= device_ctx.logical_block_types.size()) {
vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find block %s.\n", pb_type_name);
}
clb_to_clb_directs[i].to_clb_type = &device_ctx.logical_block_types[itype];
pb_type = clb_to_clb_directs[i].to_clb_type->pb_type;
for (j = 0; j < pb_type->num_ports; j++) {
if (strcmp(pb_type->ports[j].name, port_name) == 0) {
break;
}
}
if (j >= pb_type->num_ports) {
vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find port %s (on block %s).\n", port_name, pb_type_name);
}
if (start_pin_index == OPEN) {
VTR_ASSERT(start_pin_index == end_pin_index);
start_pin_index = 0;
end_pin_index = pb_type->ports[j].num_pins - 1;
}
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index);
get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index);
if (abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) {
vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line,
"Range mismatch from %s to %s.\n", directs[i].from_pin, directs[i].to_pin);
}
//Set the switch index
if (directs[i].switch_type > 0) {
//Use the specified switch
clb_to_clb_directs[i].switch_index = directs[i].switch_type;
} else {
//Use the delayless switch by default
clb_to_clb_directs[i].switch_index = delayless_switch;
}
free(pb_type_name);
free(port_name);
//We must be careful to clean-up anything that we may have incidentally allocated.
//Specifically, we can be called while generating the dummy architecture
//for placer delay estimation. Since the delay estimation occurs on a
//'different' architecture it is almost certain that the f_blk_pin_from_port_pin allocated
//by calling get_blk_pin_from_port_pin() will later be invalid.
//We therefore must free it now.
free_blk_pin_from_port_pin();
}
return clb_to_clb_directs;
}
/* Add all direct clb-pin-to-clb-pin edges to given opin
*
* The current opin is located at (x,y) along the specified side
*/
static int get_opin_direct_connecions(int x,
int y,
e_side side,
int opin,
int from_rr_node,
t_rr_edge_info_set& rr_edges_to_create,
const t_rr_node_indices& L_rr_node_indices,
const std::vector<t_rr_node>& rr_nodes,
const t_direct_inf* directs,
const int num_directs,
const t_clb_to_clb_directs* clb_to_clb_directs) {
auto& device_ctx = g_vpr_ctx.device();
t_physical_tile_type_ptr curr_type = device_ctx.grid[x][y].type;
int num_pins = 0;
int width_offset = device_ctx.grid[x][y].width_offset;
int height_offset = device_ctx.grid[x][y].height_offset;
if (!curr_type->pinloc[width_offset][height_offset][side][opin]) {
return num_pins; //No source pin on this side
}
//Capacity location determined by pin number relative to pins per capacity instance
int z = opin / (curr_type->num_pins / curr_type->capacity);
VTR_ASSERT(z >= 0 && z < curr_type->capacity);
/* Iterate through all direct connections */
for (int i = 0; i < num_directs; i++) {
/* Find matching direct clb-to-clb connections with the same type as current grid location */
if (clb_to_clb_directs[i].from_clb_type == logical_block_type(curr_type)) { //We are at a valid starting point
if (directs[i].from_side != NUM_SIDES && directs[i].from_side != side) continue;
//Offset must be in range
if (x + directs[i].x_offset < int(device_ctx.grid.width() - 1)
&& x + directs[i].x_offset > 0
&& y + directs[i].y_offset < int(device_ctx.grid.height() - 1)
&& y + directs[i].y_offset > 0) {
//Only add connections if the target clb type matches the type in the direct specification
t_physical_tile_type_ptr target_type = device_ctx.grid[x + directs[i].x_offset][y + directs[i].y_offset].type;
if (clb_to_clb_directs[i].to_clb_type == logical_block_type(target_type)
&& z + directs[i].z_offset < int(target_type->capacity)
&& z + directs[i].z_offset >= 0) {
/* Compute index of opin with regards to given pins */
int max_index = OPEN, min_index = OPEN;
bool swap = false;
if (clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) {
swap = true;
max_index = clb_to_clb_directs[i].from_clb_pin_start_index;
min_index = clb_to_clb_directs[i].from_clb_pin_end_index;
} else {
swap = false;
min_index = clb_to_clb_directs[i].from_clb_pin_start_index;
max_index = clb_to_clb_directs[i].from_clb_pin_end_index;
}
int logical_opin = opin % (curr_type->num_pins / curr_type->capacity);
if (max_index >= logical_opin && min_index <= logical_opin) {
int offset = logical_opin - min_index;
/* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */
int logical_ipin = OPEN;
if (clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) {
if (swap) {
logical_ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset;
} else {
logical_ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset;
}
} else {
if (swap) {
logical_ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset;
} else {
logical_ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset;
}
}
VTR_ASSERT(logical_ipin < (target_type->num_pins / target_type->capacity));
//If this block has capacity > 1 then the pins of z position > 0 are offset
//by the number of pins per capacity instance
int ipin = logical_ipin + (target_type->num_pins / target_type->capacity) * (z + directs[i].z_offset);
//if (ipin > target_type->num_pins) continue; //Invalid z-offset
/* Add new ipin edge to list of edges */
std::vector<int> inodes;
if (directs[i].to_side != NUM_SIDES) {
//Explicit side specified, only create if pin exists on that side
int inode = get_rr_node_index(L_rr_node_indices, x + directs[i].x_offset, y + directs[i].y_offset,
IPIN, ipin, directs[i].to_side);
if (inode != OPEN) {
inodes.push_back(inode);
}
} else {
//No side specified, get all candidates
inodes = get_rr_node_indices(L_rr_node_indices, x + directs[i].x_offset, y + directs[i].y_offset,
IPIN, ipin);
}
if (inodes.size() > 0) {
//There may be multiple physical pins corresponding to the logical
//target ipin. We only need to connect to one of them (since the physical pins
//are logically equivalent). This also ensures the graphics look reasonable and map
//back fairly directly to the architecture file in the case of pin equivalence
int inode = pick_best_direct_connect_target_rr_node(rr_nodes, from_rr_node, inodes);
rr_edges_to_create.emplace_back(from_rr_node, inode, clb_to_clb_directs[i].switch_index);
++num_pins;
}
}
}
}
}
}
return num_pins;
}
/* Determines whether the output pins of the specified block type should be perturbed. *
* This is to prevent pathological cases where the output pin connections are *
* spaced such that the connection pattern always skips some types of wire (w.r.t. *
* starting points) */
static std::vector<bool> alloc_and_load_perturb_opins(const t_physical_tile_type_ptr type,
const vtr::Matrix<int>& Fc_out,
const int max_chan_width,
const std::vector<t_segment_inf>& segment_inf) {
int i, Fc_max, iclass, num_wire_types;
int num, max_primes, factor, num_factors;
int* prime_factors;
float step_size = 0;
float n = 0;
float threshold = 0.07;
std::vector<bool> perturb_opins(segment_inf.size(), false);
i = Fc_max = iclass = 0;
if (segment_inf.size() > 1) {
/* Segments of one length are grouped together in the channel. *
* In the future we can determine if any of these segments will *
* encounter the pathological step size case, and determine if *
* we need to perturb based on the segment's frequency (if *
* frequency is small we should not perturb - testing has found *
* that perturbing a channel when unnecessary increases needed *
* W to achieve the same delay); but for now we just return. */
return perturb_opins;
} else {
/* There are as many wire start points as the value of L */
num_wire_types = segment_inf[0].length;
}
/* get Fc_max */
for (i = 0; i < type->num_pins; ++i) {
iclass = type->pin_class[i];
if (Fc_out[i][0] > Fc_max && type->class_inf[iclass].type == DRIVER) {
Fc_max = Fc_out[i][0];
}
}
/* Nothing to perturb if Fc=0; no need to perturb if Fc = 1 */
if (Fc_max == 0 || Fc_max == max_chan_width) {
return perturb_opins;
}
/* Pathological cases occur when the step size, W/Fc, is a multiple of *
* the number of wire starting points, L. Specifically, when the step *
* size is a multiple of a prime factor of L, the connection pattern *
* will always skip some wires. Thus, we perturb pins if we detect this *
* case. */
/* get an upper bound on the number of prime factors of num_wire_types */
max_primes = (int)floor(log((float)num_wire_types) / log(2.0));
max_primes = std::max(max_primes, 1); //Minimum of 1 to ensure we allocate space for at least one prime_factor
prime_factors = (int*)vtr::malloc(max_primes * sizeof(int));
for (i = 0; i < max_primes; i++) {
prime_factors[i] = 0;
}
/* Find the prime factors of num_wire_types */
num = num_wire_types;
factor = 2;
num_factors = 0;
while (pow((float)factor, 2) <= num) {
if (num % factor == 0) {
num /= factor;
if (factor != prime_factors[num_factors]) {
prime_factors[num_factors] = factor;
num_factors++;
}
} else {
factor++;
}
}
if (num_factors == 0) {
prime_factors[num_factors++] = num_wire_types; /* covers cases when num_wire_types is prime */
}
/* Now see if step size is an approximate multiple of one of the factors. A *
* threshold is used because step size may not be an integer. */
step_size = (float)max_chan_width / Fc_max;
for (i = 0; i < num_factors; i++) {
if (vtr::nint(step_size) < prime_factors[i]) {
perturb_opins[0] = false;
break;
}
n = step_size / prime_factors[i];
n = n - (float)vtr::nint(n); /* fractinal part */
if (fabs(n) < threshold) {
perturb_opins[0] = true;
break;
} else {
perturb_opins[0] = false;
}
}
free(prime_factors);
return perturb_opins;
}
static int pick_best_direct_connect_target_rr_node(const std::vector<t_rr_node>& rr_nodes,
int from_rr,
const std::vector<int>& candidate_rr_nodes) {
//With physically equivalent pins there may be multiple candidate rr nodes (which are equivalent)
//to connect the direct edge to.
//As a result it does not matter (from a correctness standpoint) which is picked.
//
//However intuitively we would expect (e.g. when visualizing the drawn RR graph) that the 'closest'
//candidate would be picked (i.e. to minimize the drawn edge length).
//
//This function attempts to pick the 'best/closest' of the candidates.
VTR_ASSERT(rr_nodes[from_rr].type() == OPIN);
e_side from_side = rr_nodes[from_rr].side();
float best_dist = std::numeric_limits<float>::infinity();
int best_rr = OPEN;
for (int to_rr : candidate_rr_nodes) {
VTR_ASSERT(rr_nodes[to_rr].type() == IPIN);
float to_dist = std::abs(rr_nodes[from_rr].xlow() - rr_nodes[to_rr].xlow())
+ std::abs(rr_nodes[from_rr].ylow() - rr_nodes[to_rr].ylow());
e_side to_side = rr_nodes[to_rr].side();
//Include a partial unit of distance based on side alignment to ensure
//we preferr facing sides
if ((from_side == RIGHT && to_side == LEFT)
|| (from_side == LEFT && to_side == RIGHT)
|| (from_side == TOP && to_side == BOTTOM)
|| (from_side == BOTTOM && to_side == TOP)) {
//Facing sides
to_dist += 0.25;
} else if (((from_side == RIGHT || from_side == LEFT) && (to_side == TOP || to_side == BOTTOM))
|| ((from_side == TOP || from_side == BOTTOM) && (to_side == RIGHT || to_side == LEFT))) {
//Perpendicular sides
to_dist += 0.5;
} else {
//Opposite sides
to_dist += 0.75;
}
if (to_dist < best_dist) {
best_dist = to_dist;
best_rr = to_rr;
}
}
VTR_ASSERT(best_rr != OPEN);
return best_rr;
}
//Collects the sets of connected non-configurable edges in the RR graph
t_non_configurable_rr_sets identify_non_configurable_rr_sets() {
std::set<std::set<t_node_edge>> edge_sets;
//Walk through the RR graph and recursively expand non-configurable edges
//to collect the sets of non-configurably connected nodes
auto& device_ctx = g_vpr_ctx.device();
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
std::set<t_node_edge> edge_set;
expand_non_configurable(inode, edge_set);
if (!edge_set.empty()) {
edge_sets.insert(edge_set);
}
}
std::set<std::set<int>> node_sets;
for (auto& edge_set : edge_sets) {
std::set<int> node_set;
for (const auto& edge : edge_set) {
node_set.insert(edge.from_node);
node_set.insert(edge.to_node);
}
VTR_ASSERT(!node_set.empty());
node_sets.insert(node_set);
}
t_non_configurable_rr_sets non_configurable_rr_sets;
non_configurable_rr_sets.edge_sets = edge_sets;
non_configurable_rr_sets.node_sets = node_sets;
return non_configurable_rr_sets;
}
//Builds a set of non-configurably connected RR graph edges
static void expand_non_configurable(int inode, std::set<t_node_edge>& edge_set) {
auto& device_ctx = g_vpr_ctx.device();
for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); ++iedge) {
bool edge_non_configurable = !device_ctx.rr_nodes[inode].edge_is_configurable(iedge);
if (edge_non_configurable) {
int to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
t_node_edge edge = {inode, to_node};
if (edge_set.count(edge)) {
continue; //Already seen don't re-expand to avoid loops
}
edge_set.emplace(edge);
expand_non_configurable(to_node, edge_set);
}
}
}
static void process_non_config_sets(const t_non_configurable_rr_sets& non_config_rr_sets) {
std::vector<std::vector<int>> non_config_rr_node_sets;
std::unordered_map<int, int> rr_node_non_config_node_set;
for (const auto& node_set : non_config_rr_sets.node_sets) {
//Convert node sets to vectors
non_config_rr_node_sets.push_back(std::vector<int>(node_set.begin(), node_set.end()));
//Record reverse look-ups
for (int inode : node_set) {
rr_node_non_config_node_set.emplace(inode, non_config_rr_node_sets.size() - 1);
}
}
auto& device_ctx = g_vpr_ctx.mutable_device();
device_ctx.rr_non_config_node_sets = std::move(non_config_rr_node_sets);
device_ctx.rr_node_to_non_config_node_set = std::move(rr_node_non_config_node_set);
}