blob: ef8e298f307dbbb00f9333d75c5a7d2ae4ae2e91 [file] [log] [blame]
#include <cstdio>
#include <ctime>
#include <cmath>
#include <algorithm>
#include <vector>
#include <iostream>
using namespace std;
#include "vtr_assert.h"
#include "vtr_util.h"
#include "vtr_log.h"
#include "vtr_digest.h"
#include "vtr_memory.h"
#include "vpr_types.h"
#include "vpr_error.h"
#include "vpr_utils.h"
#include "stats.h"
#include "globals.h"
#include "route_export.h"
#include "route_common.h"
#include "route_tree_timing.h"
#include "route_timing.h"
#include "route_breadth_first.h"
#include "place_and_route.h"
#include "build_rr_graph.h"
#include "build_rr_graph2.h"
#include "read_xml_arch_file.h"
#include "draw.h"
#include "echo_files.h"
#include "route_profiling.h"
#include "timing_util.h"
#include "RoutingDelayCalculator.h"
#include "timing_info.h"
#include "tatum/echo_writer.hpp"
#include "path_delay.h"
/**************** Static variables local to route_common.c ******************/
static t_heap **heap; /* Indexed from [1..heap_size] */
static int heap_size; /* Number of slots in the heap array */
static int heap_tail; /* Index of first unused slot in the heap array */
/* For managing my own list of currently free heap data structures. */
static t_heap *heap_free_head = NULL;
/* For keeping track of the sudo malloc memory for the heap*/
static vtr::t_chunk heap_ch;
/* For managing my own list of currently free trace data structures. */
static t_trace *trace_free_head = NULL;
/* For keeping track of the sudo malloc memory for the trace*/
static vtr::t_chunk trace_ch;
static int num_trace_allocated = 0; /* To watch for memory leaks. */
static int num_heap_allocated = 0;
static int num_linked_f_pointer_allocated = 0;
static t_linked_f_pointer *rr_modified_head = NULL;
static t_linked_f_pointer *linked_f_pointer_free_head = NULL;
static vtr::t_chunk linked_f_pointer_ch;
/* The numbering relation between the channels and clbs is: *
* *
* | IO | chan_ | CLB | chan_ | CLB | *
* |grid[0][2] | y[0][2] |grid[1][2] | y[1][2] | grid[2][2]| *
* +-----------+ +-----------+ +-----------+ *
* } capacity in *
* No channel chan_x[1][1] chan_x[2][1] } chan_width *
* } _x[1] *
* +-----------+ +-----------+ +-----------+ *
* | | chan_ | | chan_ | | *
* | IO | y[0][1] | CLB | y[1][1] | CLB | *
* |grid[0][1] | |grid[1][1] | |grid[2][1] | *
* | | | | | | *
* +-----------+ +-----------+ +-----------+ *
* } capacity in *
* chan_x[1][0] chan_x[2][0] } chan_width *
* } _x[0] *
* +-----------+ +-----------+ *
* No | | No | | *
* Channel | IO | Channel | IO | *
* |grid[1][0] | |grid[2][0] | *
* | | | | *
* +-----------+ +-----------+ *
* *
* {=======} {=======} *
* Capacity in Capacity in *
* chan_width_y[0] chan_width_y[1] *
* */
/******************** Subroutines local to route_common.c *******************/
static void add_to_heap(t_heap *hptr);
static t_heap *alloc_heap_data(void);
static t_linked_f_pointer *alloc_linked_f_pointer(void);
static vtr::vector_map<ClusterNetId, std::vector<int>> load_net_rr_terminals(const t_rr_node_indices& L_rr_node_indices);
static vtr::vector_map<ClusterNetId, t_bb> load_route_bb(int bb_factor);
static vtr::vector_map<ClusterBlockId, std::vector<int>> load_rr_clb_sources(const t_rr_node_indices& L_rr_node_indices);
static t_clb_opins_used alloc_and_load_clb_opins_used_locally(void);
static void adjust_one_rr_occ_and_apcost(int inode, int add_or_sub,
float pres_fac, float acc_fac);
/************************** Subroutine definitions ***************************/
void save_routing(vtr::vector_map<ClusterNetId, t_trace *> &best_routing,
const t_clb_opins_used& clb_opins_used_locally,
t_clb_opins_used& saved_clb_opins_used_locally) {
/* This routing frees any routing currently held in best routing, *
* then copies over the current routing (held in route_ctx.trace_head), and *
* finally sets route_ctx.trace_head and route_ctx.trace_tail to all NULLs so that the *
* connection to the saved routing is broken. This is necessary so *
* that the next iteration of the router does not free the saved *
* routing elements. Also saves any data about locally used clb_opins, *
* since this is also part of the routing. */
t_trace *tptr, *tempptr;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.mutable_routing();
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
/* Free any previously saved routing. It is no longer best. */
tptr = best_routing[net_id];
while (tptr != NULL) {
tempptr = tptr->next;
free_trace_data(tptr);
tptr = tempptr;
}
/* Save a pointer to the current routing in best_routing. */
best_routing[net_id] = route_ctx.trace_head[net_id];
/* Set the current (working) routing to NULL so the current trace *
* elements won't be reused by the memory allocator. */
route_ctx.trace_head[net_id] = NULL;
route_ctx.trace_tail[net_id] = NULL;
}
/* Save which OPINs are locally used. */
saved_clb_opins_used_locally = clb_opins_used_locally;
}
/* Deallocates any current routing in route_ctx.trace_head, and replaces it with *
* the routing in best_routing. Best_routing is set to NULL to show that *
* it no longer points to a valid routing. NOTE: route_ctx.trace_tail is not *
* restored -- it is set to all NULLs since it is only used in *
* update_traceback. If you need route_ctx.trace_tail restored, modify this *
* routine. Also restores the locally used opin data. */
void restore_routing(vtr::vector_map<ClusterNetId, t_trace *> &best_routing,
t_clb_opins_used& clb_opins_used_locally,
const t_clb_opins_used& saved_clb_opins_used_locally) {
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.mutable_routing();
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
/* Free any current routing. */
free_traceback(net_id);
/* Set the current routing to the saved one. */
route_ctx.trace_head[net_id] = best_routing[net_id];
best_routing[net_id] = NULL; /* No stored routing. */
}
/* Restore which OPINs are locally used. */
clb_opins_used_locally = saved_clb_opins_used_locally;
}
/* This routine finds a "magic cookie" for the routing and prints it. *
* Use this number as a routing serial number to ensure that programming *
* changes do not break the router. */
void get_serial_num(void) {
int serial_num, inode;
t_trace *tptr;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.routing();
auto& device_ctx = g_vpr_ctx.device();
serial_num = 0;
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
/* Global nets will have null trace_heads (never routed) so they *
* are not included in the serial number calculation. */
tptr = route_ctx.trace_head[net_id];
while (tptr != NULL) {
inode = tptr->index;
serial_num += (size_t(net_id) + 1)
* (device_ctx.rr_nodes[inode].xlow() * (device_ctx.grid.width()) - device_ctx.rr_nodes[inode].yhigh());
serial_num -= device_ctx.rr_nodes[inode].ptc_num() * (size_t(net_id) + 1) * 10;
serial_num -= device_ctx.rr_nodes[inode].type() * (size_t(net_id) + 1) * 100;
serial_num %= 2000000000; /* Prevent overflow */
tptr = tptr->next;
}
}
vtr::printf_info("Serial number (magic cookie) for the routing is: %d\n", serial_num);
}
void try_graph(int width_fac, t_router_opts router_opts,
t_det_routing_arch *det_routing_arch, t_segment_inf * segment_inf,
t_chan_width_dist chan_width_dist,
t_direct_inf *directs, int num_directs) {
auto& device_ctx = g_vpr_ctx.mutable_device();
t_graph_type graph_type;
if (router_opts.route_type == GLOBAL) {
graph_type = GRAPH_GLOBAL;
} else {
graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ?
GRAPH_BIDIR : GRAPH_UNIDIR);
}
/* Set the channel widths */
init_chan(width_fac, chan_width_dist);
/* Free any old routing graph, if one exists. */
free_rr_graph();
clock_t begin = clock();
/* Set up the routing resource graph defined by this FPGA architecture. */
int warning_count;
device_ctx.rr_graph = create_rr_graph(graph_type,
device_ctx.num_block_types, device_ctx.block_types,
device_ctx.grid,
&device_ctx.chan_width,
device_ctx.num_arch_switches,
det_routing_arch,
segment_inf,
router_opts.base_cost_type,
router_opts.trim_empty_channels,
router_opts.trim_obs_channels,
directs, num_directs,
&device_ctx.num_rr_switches,
&warning_count);
clock_t end = clock();
vtr::printf_info("Build rr_graph took %g seconds.\n", (float)(end - begin) / CLOCKS_PER_SEC);
}
bool try_route(int width_fac, t_router_opts router_opts,
t_det_routing_arch *det_routing_arch, t_segment_inf * segment_inf,
vtr::vector_map<ClusterNetId, float *> &net_delay,
#ifdef ENABLE_CLASSIC_VPR_STA
t_slack * slacks,
const t_timing_inf& timing_inf,
#endif
std::shared_ptr<SetupHoldTimingInfo> timing_info,
t_chan_width_dist chan_width_dist,
t_direct_inf *directs, int num_directs,
ScreenUpdatePriority first_iteration_priority) {
/* Attempts a routing via an iterated maze router algorithm. Width_fac *
* specifies the relative width of the channels, while the members of *
* router_opts determine the value of the costs assigned to routing *
* resource node, etc. det_routing_arch describes the detailed routing *
* architecture (connection and switch boxes) of the FPGA; it is used *
* only if a DETAILED routing has been selected. */
auto& device_ctx = g_vpr_ctx.mutable_device();
auto& cluster_ctx = g_vpr_ctx.clustering();
t_graph_type graph_type;
if (router_opts.route_type == GLOBAL) {
graph_type = GRAPH_GLOBAL;
} else {
graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ?
GRAPH_BIDIR : GRAPH_UNIDIR);
}
/* Set the channel widths */
init_chan(width_fac, chan_width_dist);
/* Free any old routing graph, if one exists. */
free_rr_graph();
clock_t begin = clock();
/* Set up the routing resource graph defined by this FPGA architecture. */
int warning_count;
device_ctx.rr_graph = create_rr_graph(graph_type,
device_ctx.num_block_types, device_ctx.block_types,
device_ctx.grid,
&device_ctx.chan_width,
device_ctx.num_arch_switches,
det_routing_arch,
segment_inf,
router_opts.base_cost_type,
router_opts.trim_empty_channels,
router_opts.trim_obs_channels,
directs, num_directs,
&device_ctx.num_rr_switches,
&warning_count);
clock_t end = clock();
vtr::printf_info("Build rr_graph took %g seconds.\n", (float)(end - begin) / CLOCKS_PER_SEC);
//Initialize drawing, now that we have an RR graph
init_draw_coords(width_fac);
bool success = true;
/* Allocate and load additional rr_graph information needed only by the router. */
alloc_and_load_rr_node_route_structs();
init_route_structs(router_opts.bb_factor);
if (cluster_ctx.clb_nlist.nets().empty()) {
vtr::printf_warning(__FILE__, __LINE__, "No nets to route\n");
}
if (router_opts.router_algorithm == BREADTH_FIRST) {
vtr::printf_info("Confirming router algorithm: BREADTH_FIRST.\n");
success = try_breadth_first_route(router_opts);
} else { /* TIMING_DRIVEN route */
vtr::printf_info("Confirming router algorithm: TIMING_DRIVEN.\n");
IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.block_types, device_ctx.num_block_types);
success = try_timing_driven_route(
device_ctx.rr_graph,
router_opts,
net_delay,
intra_lb_pb_pin_lookup,
timing_info,
#ifdef ENABLE_CLASSIC_VPR_STA
slacks,
timing_inf,
#endif
first_iteration_priority);
profiling::time_on_fanout_analysis();
}
return (success);
}
bool feasible_routing(void) {
/* This routine checks to see if this is a resource-feasible routing. *
* That is, are all rr_node capacity limitations respected? It assumes *
* that the occupancy arrays are up to date when it is called. */
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.routing();
for (int inode = 0; inode < device_ctx.num_rr_nodes; inode++) {
if (route_ctx.rr_node_route_inf[inode].occ() > device_ctx.rr_nodes[inode].capacity()) {
return (false);
}
}
return (true);
}
void pathfinder_update_path_cost(t_trace *route_segment_start,
int add_or_sub, float pres_fac) {
/* This routine updates the occupancy and pres_cost of the rr_nodes that are *
* affected by the portion of the routing of one net that starts at *
* route_segment_start. If route_segment_start is route_ctx.trace_head[net_id], the *
* cost of all the nodes in the routing of net net_id are updated. If *
* add_or_sub is -1 the net (or net portion) is ripped up, if it is 1 the *
* net is added to the routing. The size of pres_fac determines how severly *
* oversubscribed rr_nodes are penalized. */
t_trace *tptr;
tptr = route_segment_start;
if (tptr == NULL) /* No routing yet. */
return;
auto& device_ctx = g_vpr_ctx.device();
for (;;) {
pathfinder_update_single_node_cost(tptr->index, add_or_sub, pres_fac);
if (device_ctx.rr_nodes[tptr->index].type() == SINK) {
tptr = tptr->next; /* Skip next segment. */
if (tptr == NULL)
break;
}
tptr = tptr->next;
} /* End while loop -- did an entire traceback. */
}
void pathfinder_update_single_node_cost(int inode, int add_or_sub, float pres_fac) {
/* Updates pathfinder's congestion cost by either adding or removing the
* usage of a resource node. pres_cost is Pn in the Pathfinder paper.
* pres_cost is set according to the overuse that would result from having
* ONE MORE net use this routing node. */
auto& route_ctx = g_vpr_ctx.mutable_routing();
auto& device_ctx = g_vpr_ctx.device();
int occ = route_ctx.rr_node_route_inf[inode].occ() + add_or_sub;
route_ctx.rr_node_route_inf[inode].set_occ(occ);
// can't have negative occupancy
VTR_ASSERT(occ >= 0);
int capacity = device_ctx.rr_nodes[inode].capacity();
if (occ < capacity) {
route_ctx.rr_node_route_inf[inode].pres_cost = 1.0;
} else {
route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + (occ + 1 - capacity) * pres_fac;
}
}
void pathfinder_update_cost(float pres_fac, float acc_fac) {
/* This routine recomputes the pres_cost and acc_cost of each routing *
* resource for the pathfinder algorithm after all nets have been routed. *
* It updates the accumulated cost to by adding in the number of extra *
* signals sharing a resource right now (i.e. after each complete iteration) *
* times acc_fac. It also updates pres_cost, since pres_fac may have *
* changed. THIS ROUTINE ASSUMES THE OCCUPANCY VALUES IN RR_NODE ARE UP TO *
* DATE. */
int inode, occ, capacity;
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.mutable_routing();
for (inode = 0; inode < device_ctx.num_rr_nodes; inode++) {
occ = route_ctx.rr_node_route_inf[inode].occ();
capacity = device_ctx.rr_nodes[inode].capacity();
if (occ > capacity) {
route_ctx.rr_node_route_inf[inode].acc_cost += (occ - capacity) * acc_fac;
route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + (occ + 1 - capacity) * pres_fac;
}
/* If occ == capacity, we don't need to increase acc_cost, but a change *
* in pres_fac could have made it necessary to recompute the cost anyway. */
else if (occ == capacity) {
route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + pres_fac;
}
}
}
void init_heap(const DeviceGrid& grid) {
if (heap != nullptr) {
vtr::free(heap + 1);
heap = nullptr;
}
heap_size = (grid.width() -1 ) * (grid.height() - 1);
heap = (t_heap **) vtr::malloc(heap_size * sizeof(t_heap *));
heap--; /* heap stores from [1..heap_size] */
heap_tail = 1;
}
/* Call this before you route any nets. It frees any old traceback and *
* sets the list of rr_nodes touched to empty. */
void init_route_structs(int bb_factor) {
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.mutable_routing();
//Free any old tracebacks
for (auto net_id : cluster_ctx.clb_nlist.nets())
free_traceback(net_id);
//Allocate new tracebacks
route_ctx.trace_head.resize(cluster_ctx.clb_nlist.nets().size());
route_ctx.trace_tail.resize(cluster_ctx.clb_nlist.nets().size());
init_heap(device_ctx.grid);
//Various look-ups
route_ctx.net_rr_terminals = load_net_rr_terminals(device_ctx.rr_node_indices);
route_ctx.route_bb = load_route_bb(bb_factor);
route_ctx.rr_blk_source = load_rr_clb_sources(device_ctx.rr_node_indices);
route_ctx.clb_opins_used_locally = alloc_and_load_clb_opins_used_locally();
route_ctx.net_status.resize(cluster_ctx.clb_nlist.nets().size());
/* Check that things that should have been emptied after the last routing *
* really were. */
if (rr_modified_head != NULL) {
vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
"in init_route_structs. List of modified rr nodes is not empty.\n");
}
if (heap_tail != 1) {
vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
"in init_route_structs. Heap is not empty.\n");
}
}
t_trace *
update_traceback(t_heap *hptr, ClusterNetId net_id) {
/* This routine adds the most recently finished wire segment to the *
* traceback linked list. The first connection starts with the net SOURCE *
* and begins at the structure pointed to by route_ctx.trace_head[net_id]. Each *
* connection ends with a SINK. After each SINK, the next connection *
* begins (if the net has more than 2 pins). The first element after the *
* SINK gives the routing node on a previous piece of the routing, which is *
* the link from the existing net to this new piece of the net. *
* In each traceback I start at the end of a path and trace back through *
* its predecessors to the beginning. I have stored information on the *
* predecesser of each node to make traceback easy -- this sacrificies some *
* memory for easier code maintenance. This routine returns a pointer to *
* the first "new" node in the traceback (node not previously in trace). */
t_trace *tptr, *prevptr, *temptail, *ret_ptr;
int inode;
short iedge;
t_rr_type rr_type;
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.mutable_routing();
// hptr points to the end of a connection
inode = hptr->index;
rr_type = device_ctx.rr_nodes[inode].type();
if (rr_type != SINK) {
vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
"in update_traceback. Expected type = SINK (%d).\n"
"\tGot type = %d while tracing back net %zu.\n", SINK, rr_type, size_t(net_id));
}
tptr = alloc_trace_data(); /* SINK on the end of the connection */
tptr->index = inode;
tptr->iswitch = OPEN;
tptr->next = NULL;
temptail = tptr; /* This will become the new tail at the end */
/* of the routine. */
/* Now do it's predecessor. */
inode = hptr->u.prev_node;
iedge = hptr->prev_edge;
while (inode != NO_PREVIOUS) {
prevptr = alloc_trace_data();
prevptr->index = inode;
prevptr->iswitch = device_ctx.rr_nodes[inode].edge_switch(iedge);
prevptr->next = tptr;
tptr = prevptr;
iedge = route_ctx.rr_node_route_inf[inode].prev_edge;
inode = route_ctx.rr_node_route_inf[inode].prev_node;
}
if (route_ctx.trace_tail[net_id] != NULL) {
route_ctx.trace_tail[net_id]->next = tptr; /* Traceback ends with tptr */
ret_ptr = tptr->next; /* First new segment. */
} else { /* This was the first "chunk" of the net's routing */
route_ctx.trace_head[net_id] = tptr;
ret_ptr = tptr; /* Whole traceback is new. */
}
route_ctx.trace_tail[net_id] = temptail;
return (ret_ptr);
}
/* The routine sets the path_cost to HUGE_POSITIVE_FLOAT for *
* all channel segments touched by previous routing phases. */
void reset_path_costs(void) {
t_linked_f_pointer *mod_ptr;
int num_mod_ptrs;
/* The traversal method below is slightly painful to make it faster. */
if (rr_modified_head != NULL) {
mod_ptr = rr_modified_head;
num_mod_ptrs = 1;
while (mod_ptr->next != NULL) {
*(mod_ptr->fptr) = HUGE_POSITIVE_FLOAT;
mod_ptr = mod_ptr->next;
num_mod_ptrs++;
}
*(mod_ptr->fptr) = HUGE_POSITIVE_FLOAT; /* Do last one. */
/* Reset the modified list and put all the elements back in the free *
* list. */
mod_ptr->next = linked_f_pointer_free_head;
linked_f_pointer_free_head = rr_modified_head;
rr_modified_head = NULL;
num_linked_f_pointer_allocated -= num_mod_ptrs;
}
}
/* Returns the *congestion* cost of using this rr_node. */
float get_rr_cong_cost(int inode) {
short cost_index;
float cost;
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.routing();
cost_index = device_ctx.rr_nodes[inode].cost_index();
cost = device_ctx.rr_indexed_data[cost_index].base_cost
* route_ctx.rr_node_route_inf[inode].acc_cost
* route_ctx.rr_node_route_inf[inode].pres_cost;
return (cost);
}
/* Mark all the SINKs of this net as targets by setting their target flags *
* to the number of times the net must connect to each SINK. Note that *
* this number can occasionally be greater than 1 -- think of connecting *
* the same net to two inputs of an and-gate (and-gate inputs are logically *
* equivalent, so both will connect to the same SINK). */
void mark_ends(ClusterNetId net_id) {
unsigned int ipin;
int inode;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.mutable_routing();
for (ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
inode = route_ctx.net_rr_terminals[net_id][ipin];
route_ctx.rr_node_route_inf[inode].target_flag++;
}
}
void mark_remaining_ends(const vector<int>& remaining_sinks) {
// like mark_ends, but only performs it for the remaining sinks of a net
auto& route_ctx = g_vpr_ctx.mutable_routing();
for (int sink_node : remaining_sinks)
++route_ctx.rr_node_route_inf[sink_node].target_flag;
}
void node_to_heap(int inode, float total_cost, int prev_node, int prev_edge,
float backward_path_cost, float R_upstream) {
/* Puts an rr_node on the heap, if the new cost given is lower than the *
* current path_cost to this channel segment. The index of its predecessor *
* is stored to make traceback easy. The index of the edge used to get *
* from its predecessor to it is also stored to make timing analysis, etc. *
* easy. The backward_path_cost and R_upstream values are used only by the *
* timing-driven router -- the breadth-first router ignores them. */
auto& route_ctx = g_vpr_ctx.routing();
if (total_cost >= route_ctx.rr_node_route_inf[inode].path_cost)
return;
t_heap* hptr = alloc_heap_data();
hptr->index = inode;
hptr->cost = total_cost;
hptr->u.prev_node = prev_node;
hptr->prev_edge = prev_edge;
hptr->backward_path_cost = backward_path_cost;
hptr->R_upstream = R_upstream;
add_to_heap(hptr);
}
void free_traceback(ClusterNetId net_id) {
/* Puts the entire traceback (old routing) for this net on the free list *
* and sets the route_ctx.trace_head pointers etc. for the net to NULL. */
t_trace *tptr, *tempptr;
auto& route_ctx = g_vpr_ctx.mutable_routing();
if (route_ctx.trace_head.empty() && route_ctx.trace_tail.empty()) {
return;
}
if(route_ctx.trace_head[net_id] == NULL) {
return;
}
tptr = route_ctx.trace_head[net_id];
while (tptr != NULL) {
tempptr = tptr->next;
free_trace_data(tptr);
tptr = tempptr;
}
route_ctx.trace_head[net_id] = NULL;
route_ctx.trace_tail[net_id] = NULL;
}
/* Allocates data structures into which the key routing data can be saved, *
* allowing the routing to be recovered later (e.g. after a another routing *
* is attempted). */
vtr::vector_map<ClusterNetId, t_trace *> alloc_saved_routing() {
auto& cluster_ctx = g_vpr_ctx.clustering();
vtr::vector_map<ClusterNetId, t_trace *> best_routing(cluster_ctx.clb_nlist.nets().size());
return (best_routing);
}
/* TODO: super hacky, jluu comment, I need to rethink this whole function, without it, logically equivalent output pins incorrectly use more pins than needed. I force that CLB output pin uses at most one output pin */
static t_clb_opins_used alloc_and_load_clb_opins_used_locally(void) {
/* Allocates and loads the data needed to make the router reserve some CLB *
* output pins for connections made locally within a CLB (if the netlist *
* specifies that this is necessary). */
t_clb_opins_used clb_opins_used_locally;
int clb_pin, iclass, class_low, class_high;
t_type_ptr type;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& device_ctx = g_vpr_ctx.device();
clb_opins_used_locally.resize(cluster_ctx.clb_nlist.blocks().size());
for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
type = cluster_ctx.clb_nlist.block_type(blk_id);
get_class_range_for_block(blk_id, &class_low, &class_high);
clb_opins_used_locally[blk_id].resize(type->num_class);
int pin_low = 0;
int pin_high = 0;
get_pin_range_for_block(blk_id, &pin_low, &pin_high);
for (clb_pin = pin_low; clb_pin <= pin_high; clb_pin++) {
// another hack to avoid I/Os, whole function needs a rethink
if(type == device_ctx.IO_TYPE)
continue;
if ((cluster_ctx.clb_nlist.block_net(blk_id, clb_pin) != ClusterNetId::INVALID()
&& cluster_ctx.clb_nlist.net_sinks(cluster_ctx.clb_nlist.block_net(blk_id, clb_pin)).size() == 0)
|| cluster_ctx.clb_nlist.block_net(blk_id, clb_pin) == ClusterNetId::INVALID()) {
iclass = type->pin_class[clb_pin];
if(type->class_inf[iclass].type == DRIVER) {
/* Check to make sure class is in same range as that assigned to block */
VTR_ASSERT(iclass >= class_low && iclass <= class_high);
clb_opins_used_locally[blk_id][iclass].emplace_back();
}
}
}
}
return (clb_opins_used_locally);
}
/*the trace lists are only freed after use by the timing-driven placer */
/*Do not free them after use by the router, since stats, and draw */
/*routines use the trace values */
void free_trace_structs(void) {
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.mutable_routing();
if (route_ctx.trace_head.empty() && route_ctx.trace_tail.empty()) {
return;
}
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
free_traceback(net_id);
if (route_ctx.trace_head[net_id]) {
free(route_ctx.trace_head[net_id]);
free(route_ctx.trace_tail[net_id]);
}
route_ctx.trace_head[net_id] = NULL;
route_ctx.trace_tail[net_id] = NULL;
}
}
void free_route_structs() {
/* Frees the temporary storage needed only during the routing. The *
* final routing result is not freed. */
auto& route_ctx = g_vpr_ctx.mutable_routing();
if(heap != NULL) {
// coverity[offset_free : Intentional]
free(heap + 1);
}
if(route_ctx.route_bb.size() != 0) {
route_ctx.route_bb.clear();
}
heap = NULL; /* Defensive coding: crash hard if I use these. */
/*free the memory chunks that were used by heap and linked f pointer */
free_chunk_memory(&heap_ch);
free_chunk_memory(&linked_f_pointer_ch);
heap_free_head = NULL;
linked_f_pointer_free_head = NULL;
}
/* Frees the data structures needed to save a routing. */
void free_saved_routing(vtr::vector_map<ClusterNetId, t_trace *> &best_routing) {
auto &cluster_ctx = g_vpr_ctx.clustering();
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
if (best_routing[net_id] != NULL) {
free(best_routing[net_id]);
best_routing[net_id] = NULL;
}
}
}
void alloc_and_load_rr_node_route_structs(void) {
/* Allocates some extra information about each rr_node that is used only *
* during routing. */
auto& route_ctx = g_vpr_ctx.mutable_routing();
auto& device_ctx = g_vpr_ctx.device();
route_ctx.rr_node_route_inf.resize(device_ctx.num_rr_nodes);
reset_rr_node_route_structs();
}
void reset_rr_node_route_structs(void) {
/* Resets some extra information about each rr_node that is used only *
* during routing. */
auto& route_ctx = g_vpr_ctx.mutable_routing();
auto& device_ctx = g_vpr_ctx.device();
VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.num_rr_nodes));
for (int inode = 0; inode < device_ctx.num_rr_nodes; inode++) {
route_ctx.rr_node_route_inf[inode].prev_node = NO_PREVIOUS;
route_ctx.rr_node_route_inf[inode].prev_edge = NO_PREVIOUS;
route_ctx.rr_node_route_inf[inode].pres_cost = 1.0;
route_ctx.rr_node_route_inf[inode].acc_cost = 1.0;
route_ctx.rr_node_route_inf[inode].path_cost = HUGE_POSITIVE_FLOAT;
route_ctx.rr_node_route_inf[inode].target_flag = 0;
route_ctx.rr_node_route_inf[inode].set_occ(0);
}
}
/* Allocates and loads the route_ctx.net_rr_terminals data structure. For each net it stores the rr_node *
* index of the SOURCE of the net and all the SINKs of the net [clb_nlist.nets()][clb_nlist.net_pins()]. *
* Entry [inet][pnum] stores the rr index corresponding to the SOURCE (opin) or SINK (ipin) of the pin. */
static vtr::vector_map<ClusterNetId, std::vector<int>> load_net_rr_terminals(const t_rr_node_indices& L_rr_node_indices) {
vtr::vector_map<ClusterNetId, std::vector<int>> net_rr_terminals;
int inode, i, j, node_block_pin, iclass;
t_type_ptr type;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& place_ctx = g_vpr_ctx.placement();
auto nets = cluster_ctx.clb_nlist.nets();
net_rr_terminals.resize(nets.size());
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
auto net_pins = cluster_ctx.clb_nlist.net_pins(net_id);
net_rr_terminals[net_id].resize(net_pins.size());
int pin_count = 0;
for (auto pin_id : cluster_ctx.clb_nlist.net_pins(net_id)) {
auto block_id = cluster_ctx.clb_nlist.pin_block(pin_id);
i = place_ctx.block_locs[block_id].x;
j = place_ctx.block_locs[block_id].y;
type = cluster_ctx.clb_nlist.block_type(block_id);
/* In the routing graph, each (x, y) location has unique pins on it
* so when there is capacity, blocks are packed and their pin numbers
* are offset to get their actual rr_node */
node_block_pin = cluster_ctx.clb_nlist.pin_physical_index(pin_id);
iclass = type->pin_class[node_block_pin];
inode = get_rr_node_index(L_rr_node_indices, i, j, (pin_count == 0 ? SOURCE : SINK), /* First pin is driver */
iclass);
net_rr_terminals[net_id][pin_count] = inode;
pin_count++;
}
}
return net_rr_terminals;
}
/* Saves the rr_node corresponding to each SOURCE and SINK in each CLB *
* in the FPGA. Currently only the SOURCE rr_node values are used, and *
* they are used only to reserve pins for locally used OPINs in the router. *
* [0..cluster_ctx.clb_nlist.blocks().size()-1][0..num_class-1]. *
* The values for blocks that are padsare NOT valid. */
static vtr::vector_map<ClusterBlockId, std::vector<int>> load_rr_clb_sources(const t_rr_node_indices& L_rr_node_indices) {
vtr::vector_map<ClusterBlockId, std::vector<int>> rr_blk_source;
int i, j, iclass, inode;
int class_low, class_high;
t_rr_type rr_type;
t_type_ptr type;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& place_ctx = g_vpr_ctx.placement();
rr_blk_source.resize(cluster_ctx.clb_nlist.blocks().size());
for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
type = cluster_ctx.clb_nlist.block_type(blk_id);
get_class_range_for_block(blk_id, &class_low, &class_high);
rr_blk_source[blk_id].resize(type->num_class);
for (iclass = 0; iclass < type->num_class; iclass++) {
if (iclass >= class_low && iclass <= class_high) {
i = place_ctx.block_locs[blk_id].x;
j = place_ctx.block_locs[blk_id].y;
if (type->class_inf[iclass].type == DRIVER)
rr_type = SOURCE;
else
rr_type = SINK;
inode = get_rr_node_index(L_rr_node_indices, i, j, rr_type, iclass);
rr_blk_source[blk_id][iclass] = inode;
} else {
rr_blk_source[blk_id][iclass] = OPEN;
}
}
}
return rr_blk_source;
}
static vtr::vector_map<ClusterNetId, t_bb> load_route_bb(int bb_factor) {
/* This routine loads the bounding box arrays used to limit the space *
* searched by the maze router when routing each net. The search is *
* limited to channels contained with the net bounding box expanded *
* by bb_factor channels on each side. For example, if bb_factor is *
* 0, the maze router must route each net within its bounding box. *
* If bb_factor = max(device_ctx.grid.width()-1, device_cts.grid.height() - 1),
* the maze router will search every channel in *
* the FPGA if necessary. The bounding boxes returned by this routine *
* are different from the ones used by the placer in that they are *
* clipped to lie within (0,0) and (device_ctx.grid.width()-1,device_ctx.grid.height()-1) rather than (1,1) and *
* (device_ctx.grid.width()-1,device_ctx.grid.height()-1). */
vtr::vector_map<ClusterNetId, t_bb> route_bb;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.routing();
auto nets = cluster_ctx.clb_nlist.nets();
route_bb.resize(nets.size());
for (auto net_id : nets) {
int driver_rr = route_ctx.net_rr_terminals[net_id][0];
const t_rr_node& source_node = device_ctx.rr_nodes[driver_rr];
VTR_ASSERT(source_node.type() == SOURCE);
VTR_ASSERT(source_node.xlow() <= source_node.xhigh());
VTR_ASSERT(source_node.ylow() <= source_node.yhigh());
int xmin = source_node.xlow();
int ymin = source_node.ylow();
int xmax = source_node.xhigh();
int ymax = source_node.yhigh();
auto net_sinks = cluster_ctx.clb_nlist.net_sinks(net_id);
for (size_t ipin = 1; ipin < net_sinks.size() + 1; ++ipin) { //Start at 1 since looping through sinks
int sink_rr = route_ctx.net_rr_terminals[net_id][ipin];
const t_rr_node& sink_node = device_ctx.rr_nodes[sink_rr];
VTR_ASSERT(sink_node.type() == SINK);
VTR_ASSERT(sink_node.xlow() <= sink_node.xhigh());
VTR_ASSERT(sink_node.ylow() <= sink_node.yhigh());
xmin = std::min<int>(xmin, sink_node.xlow());
xmax = std::max<int>(xmax, sink_node.xhigh());
ymin = std::min<int>(ymin, sink_node.ylow());
ymax = std::max<int>(ymax, sink_node.yhigh());
}
/* Want the channels on all 4 sides to be usuable, even if bb_factor = 0. */
xmin -= 1;
ymin -= 1;
/* Expand the net bounding box by bb_factor, then clip to the physical *
* chip area. */
route_bb[net_id].xmin = max<int>(xmin - bb_factor, 0);
route_bb[net_id].xmax = min<int>(xmax + bb_factor, device_ctx.grid.width() - 1);
route_bb[net_id].ymin = max<int>(ymin - bb_factor, 0);
route_bb[net_id].ymax = min<int>(ymax + bb_factor, device_ctx.grid.height() - 1);
}
return route_bb;
}
void add_to_mod_list(float *fptr) {
/* This routine adds the floating point pointer (fptr) into a *
* linked list that indicates all the pathcosts that have been *
* modified thus far. */
t_linked_f_pointer *mod_ptr;
mod_ptr = alloc_linked_f_pointer();
/* Add this element to the start of the modified list. */
mod_ptr->next = rr_modified_head;
mod_ptr->fptr = fptr;
rr_modified_head = mod_ptr;
}
namespace heap_ {
size_t parent(size_t i);
size_t left(size_t i);
size_t right(size_t i);
size_t size();
void expand_heap_if_full();
size_t parent(size_t i) {return i >> 1;}
// child indices of a heap
size_t left(size_t i) {return i << 1;}
size_t right(size_t i) {return (i << 1) + 1;}
size_t size() {return static_cast<size_t>(heap_tail - 1);} // heap[0] is not valid element
// make a heap rooted at index i by **sifting down** in O(lgn) time
void sift_down(size_t hole) {
t_heap* head {heap[hole]};
size_t child {left(hole)};
while ((int)child < heap_tail) {
if ((int)child + 1 < heap_tail && heap[child + 1]->cost < heap[child]->cost)
++child;
if (heap[child]->cost < head->cost) {
heap[hole] = heap[child];
hole = child;
child = left(child);
}
else break;
}
heap[hole] = head;
}
// runs in O(n) time by sifting down; the least work is done on the most elements: 1 swap for bottom layer, 2 swap for 2nd, ... lgn swap for top
// 1*(n/2) + 2*(n/4) + 3*(n/8) + ... + lgn*1 = 2n (sum of i/2^i)
void build_heap() {
// second half of heap are leaves
for (size_t i = heap_tail >> 1; i != 0; --i)
sift_down(i);
}
// O(lgn) sifting up to maintain heap property after insertion (should sift down when building heap)
void sift_up(size_t leaf, t_heap* const node) {
while ((leaf > 1) && (node->cost < heap[parent(leaf)]->cost)) {
// sift hole up
heap[leaf] = heap[parent(leaf)];
leaf = parent(leaf);
}
heap[leaf] = node;
}
void expand_heap_if_full() {
if (heap_tail > heap_size) { /* Heap is full */
heap_size *= 2;
heap = (t_heap **) vtr::realloc((void *) (heap + 1),
heap_size * sizeof(t_heap *));
heap--; /* heap goes from [1..heap_size] */
}
}
// adds an element to the back of heap and expand if necessary, but does not maintain heap property
void push_back(t_heap* const hptr) {
expand_heap_if_full();
heap[heap_tail] = hptr;
++heap_tail;
}
void push_back_node(int inode, float total_cost, int prev_node, int prev_edge,
float backward_path_cost, float R_upstream) {
/* Puts an rr_node on the heap with the same condition as node_to_heap,
but do not fix heap property yet as that is more efficiently done from
bottom up with build_heap */
auto& route_ctx = g_vpr_ctx.routing();
if (total_cost >= route_ctx.rr_node_route_inf[inode].path_cost)
return;
t_heap* hptr = alloc_heap_data();
hptr->index = inode;
hptr->cost = total_cost;
hptr->u.prev_node = prev_node;
hptr->prev_edge = prev_edge;
hptr->backward_path_cost = backward_path_cost;
hptr->R_upstream = R_upstream;
push_back(hptr);
}
bool is_valid() {
for (size_t i = 1; (int)i <= heap_tail >> 1; ++i) {
if ((int)left(i) < heap_tail && heap[left(i)]->cost < heap[i]->cost) return false;
if ((int)right(i) < heap_tail && heap[right(i)]->cost < heap[i]->cost) return false;
}
return true;
}
// extract every element and print it
void pop_heap() {
while (!is_empty_heap()) vtr::printf_info("%e ", get_heap_head()->cost);
vtr::printf_info("\n");
}
// print every element; not necessarily in order for minheap
void print_heap() {
for (int i = 1; i < heap_tail >> 1; ++i) vtr::printf_info("(%e %e %e) ", heap[i]->cost, heap[left(i)]->cost, heap[right(i)]->cost);
vtr::printf_info("\n");
}
// verify correctness of extract top by making a copy, sorting it, and iterating it at the same time as extraction
void verify_extract_top() {
constexpr float float_epsilon = 1e-20;
std::cout << "copying heap\n";
std::vector<t_heap*> heap_copy {heap + 1, heap + heap_tail};
// sort based on cost with cheapest first
VTR_ASSERT(heap_copy.size() == size());
std::sort(begin(heap_copy), end(heap_copy),
[](const t_heap* a, const t_heap* b){
return a->cost < b->cost;
});
std::cout << "starting to compare top elements\n";
size_t i = 0;
while (!is_empty_heap()) {
while (heap_copy[i]->index == OPEN) ++i; // skip the ones that won't be extracted
auto top = get_heap_head();
if (abs(top->cost - heap_copy[i]->cost) > float_epsilon)
std::cout << "mismatch with sorted " << top << '(' << top->cost << ") " << heap_copy[i] << '(' << heap_copy[i]->cost << ")\n";
++i;
}
if (i != heap_copy.size()) std::cout << "did not finish extracting: " << i << " vs " << heap_copy.size() << std::endl;
else std::cout << "extract top working as intended\n";
}
}
// adds to heap and maintains heap quality
static void add_to_heap(t_heap *hptr) {
heap_::expand_heap_if_full();
// start with undefined hole
++heap_tail;
heap_::sift_up(heap_tail - 1, hptr);
}
/*WMF: peeking accessor :) */
bool is_empty_heap(void) {
return (bool)(heap_tail == 1);
}
t_heap *
get_heap_head(void) {
/* Returns a pointer to the smallest element on the heap, or NULL if the *
* heap is empty. Invalid (index == OPEN) entries on the heap are never *
* returned -- they are just skipped over. */
t_heap *cheapest;
size_t hole, child;
do {
if (heap_tail == 1) { /* Empty heap. */
vtr::printf_warning(__FILE__, __LINE__, "Empty heap occurred in get_heap_head.\n");
vtr::printf_warning(__FILE__, __LINE__, "Some blocks are impossible to connect in this architecture.\n");
return (NULL);
}
cheapest = heap[1];
hole = 1;
child = 2;
--heap_tail;
while ((int)child < heap_tail) {
if (heap[child + 1]->cost < heap[child]->cost)
++child; // become right child
heap[hole] = heap[child];
hole = child;
child = heap_::left(child);
}
heap_::sift_up(hole, heap[heap_tail]);
} while (cheapest->index == OPEN); /* Get another one if invalid entry. */
return (cheapest);
}
void empty_heap(void) {
for (int i = 1; i < heap_tail; i++)
free_heap_data(heap[i]);
heap_tail = 1;
}
static t_heap *
alloc_heap_data(void) {
t_heap *temp_ptr;
if (heap_free_head == NULL) { /* No elements on the free list */
heap_free_head = (t_heap *) vtr::chunk_malloc(sizeof(t_heap),&heap_ch);
heap_free_head->u.next = NULL;
}
temp_ptr = heap_free_head;
heap_free_head = heap_free_head->u.next;
num_heap_allocated++;
return (temp_ptr);
}
void free_heap_data(t_heap *hptr) {
hptr->u.next = heap_free_head;
heap_free_head = hptr;
num_heap_allocated--;
}
void invalidate_heap_entries(int sink_node, int ipin_node) {
/* Marks all the heap entries consisting of sink_node, where it was reached *
* via ipin_node, as invalid (OPEN). Used only by the breadth_first router *
* and even then only in rare circumstances. */
for (int i = 1; i < heap_tail; i++) {
if (heap[i]->index == sink_node && heap[i]->u.prev_node == ipin_node)
heap[i]->index = OPEN; /* Invalid. */
}
}
t_trace *
alloc_trace_data(void) {
t_trace *temp_ptr;
if (trace_free_head == NULL) { /* No elements on the free list */
trace_free_head = (t_trace *) vtr::chunk_malloc(sizeof(t_trace),&trace_ch);
trace_free_head->next = NULL;
}
temp_ptr = trace_free_head;
trace_free_head = trace_free_head->next;
num_trace_allocated++;
return (temp_ptr);
}
void free_trace_data(t_trace *tptr) {
/* Puts the traceback structure pointed to by tptr on the free list. */
tptr->next = trace_free_head;
trace_free_head = tptr;
num_trace_allocated--;
}
static t_linked_f_pointer *
alloc_linked_f_pointer(void) {
/* This routine returns a linked list element with a float pointer as *
* the node data. */
/*int i;*/
t_linked_f_pointer *temp_ptr;
if (linked_f_pointer_free_head == NULL) {
/* No elements on the free list */
linked_f_pointer_free_head = (t_linked_f_pointer *) vtr::chunk_malloc(sizeof(t_linked_f_pointer),&linked_f_pointer_ch);
linked_f_pointer_free_head->next = NULL;
}
temp_ptr = linked_f_pointer_free_head;
linked_f_pointer_free_head = linked_f_pointer_free_head->next;
num_linked_f_pointer_allocated++;
return (temp_ptr);
}
/* Prints out the routing to file route_file. */
void print_route(const char* placement_file, const char* route_file) {
int inode, ilow, jlow, iclass;
t_rr_type rr_type;
t_trace *tptr;
FILE *fp;
fp = fopen(route_file, "w");
auto& place_ctx = g_vpr_ctx.placement();
auto& device_ctx = g_vpr_ctx.device();
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.mutable_routing();
fprintf(fp, "Placement_File: %s Placement_ID: %s\n", placement_file, place_ctx.placement_id.c_str());
fprintf(fp, "Array size: %zu x %zu logic blocks.\n", device_ctx.grid.width(), device_ctx.grid.height());
fprintf(fp, "\nRouting:");
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
if (!cluster_ctx.clb_nlist.net_is_global(net_id)) {
fprintf(fp, "\n\nNet %zu (%s)\n\n", size_t(net_id), cluster_ctx.clb_nlist.net_name(net_id).c_str());
if (cluster_ctx.clb_nlist.net_sinks(net_id).size() == false) {
fprintf(fp, "\n\nUsed in local cluster only, reserved one CLB pin\n\n");
} else {
tptr = route_ctx.trace_head[net_id];
while (tptr != NULL) {
inode = tptr->index;
rr_type = device_ctx.rr_nodes[inode].type();
ilow = device_ctx.rr_nodes[inode].xlow();
jlow = device_ctx.rr_nodes[inode].ylow();
fprintf(fp, "Node:\t%d\t%6s (%d,%d) ", inode,
device_ctx.rr_nodes[inode].type_string(), ilow, jlow);
if ((ilow != device_ctx.rr_nodes[inode].xhigh())
|| (jlow != device_ctx.rr_nodes[inode].yhigh()))
fprintf(fp, "to (%d,%d) ", device_ctx.rr_nodes[inode].xhigh(),
device_ctx.rr_nodes[inode].yhigh());
switch (rr_type) {
case IPIN:
case OPIN:
if (device_ctx.grid[ilow][jlow].type == device_ctx.IO_TYPE) {
fprintf(fp, " Pad: ");
} else { /* IO Pad. */
fprintf(fp, " Pin: ");
}
break;
case CHANX:
case CHANY:
fprintf(fp, " Track: ");
break;
case SOURCE:
case SINK:
if (device_ctx.grid[ilow][jlow].type == device_ctx.IO_TYPE) {
fprintf(fp, " Pad: ");
} else { /* IO Pad. */
fprintf(fp, " Class: ");
}
break;
default:
vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
"in print_route: Unexpected traceback element type: %d (%s).\n",
rr_type, device_ctx.rr_nodes[inode].type_string());
break;
}
fprintf(fp, "%d ", device_ctx.rr_nodes[inode].ptc_num());
if (device_ctx.grid[ilow][jlow].type != device_ctx.IO_TYPE && (rr_type == IPIN || rr_type == OPIN)) {
int pin_num = device_ctx.rr_nodes[inode].ptc_num();
int xoffset = device_ctx.grid[ilow][jlow].width_offset;
int yoffset = device_ctx.grid[ilow][jlow].height_offset;
ClusterBlockId iblock = place_ctx.grid_blocks[ilow - xoffset][jlow - yoffset].blocks[0];
VTR_ASSERT(iblock);
t_pb_graph_pin *pb_pin = get_pb_graph_node_pin_from_block_pin(iblock, pin_num);
t_pb_type *pb_type = pb_pin->parent_node->pb_type;
fprintf(fp, " %s.%s[%d] ", pb_type->name, pb_pin->port->name, pb_pin->pin_number);
}
/* Uncomment line below if you're debugging and want to see the switch types *
* used in the routing. */
fprintf (fp, "Switch: %d", tptr->iswitch);
fprintf(fp, "\n");
tptr = tptr->next;
}
}
}
else { /* Global net. Never routed. */
fprintf(fp, "\n\nNet %zu (%s): global net connecting:\n\n", size_t(net_id),
cluster_ctx.clb_nlist.net_name(net_id).c_str());
for (auto pin_id : cluster_ctx.clb_nlist.net_pins(net_id)) {
ClusterBlockId block_id = cluster_ctx.clb_nlist.pin_block(pin_id);
int pin_index = cluster_ctx.clb_nlist.pin_physical_index(pin_id);
iclass = cluster_ctx.clb_nlist.block_type(block_id)->pin_class[pin_index];
fprintf(fp, "Block %s (#%zu) at (%d,%d), Pin class %d.\n",
cluster_ctx.clb_nlist.block_name(block_id).c_str(), size_t(block_id),
place_ctx.block_locs[block_id].x,
place_ctx.block_locs[block_id].y,
iclass);
}
}
}
fclose(fp);
if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_MEM)) {
fp = vtr::fopen(getEchoFileName(E_ECHO_MEM), "w");
fprintf(fp, "\nNum_heap_allocated: %d Num_trace_allocated: %d\n",
num_heap_allocated, num_trace_allocated);
fprintf(fp, "Num_linked_f_pointer_allocated: %d\n",
num_linked_f_pointer_allocated);
fclose(fp);
}
//Save the digest of the route file
route_ctx.routing_id = vtr::secure_digest_file(route_file);
}
/* TODO: jluu: I now always enforce logically equivalent outputs to use at most one output pin, should rethink how to do this */
void reserve_locally_used_opins(float pres_fac, float acc_fac, bool rip_up_local_opins) {
/* In the past, this function implicitly allowed LUT duplication when there are free LUTs.
This was especially important for logical equivalence; however, now that we have a very general logic cluster,
it does not make sense to allow LUT duplication implicitly. We'll need to look into how we want to handle this case
*/
int num_local_opin, inode, from_node, iconn, num_edges, to_node;
int iclass, ipin;
float cost;
t_heap *heap_head_ptr;
t_type_ptr type;
auto& cluster_ctx = g_vpr_ctx.clustering();
auto& route_ctx = g_vpr_ctx.mutable_routing();
auto& device_ctx = g_vpr_ctx.device();
if (rip_up_local_opins) {
for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
type = cluster_ctx.clb_nlist.block_type(blk_id);
for (iclass = 0; iclass < type->num_class; iclass++) {
num_local_opin = route_ctx.clb_opins_used_locally[blk_id][iclass].size();
/* Always 0 for pads and for RECEIVER (IPIN) classes */
for (ipin = 0; ipin < num_local_opin; ipin++) {
inode = route_ctx.clb_opins_used_locally[blk_id][iclass][ipin];
adjust_one_rr_occ_and_apcost(inode, -1, pres_fac, acc_fac);
}
}
}
}
for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
type = cluster_ctx.clb_nlist.block_type(blk_id);
for (iclass = 0; iclass < type->num_class; iclass++) {
num_local_opin = route_ctx.clb_opins_used_locally[blk_id][iclass].size();
/* Always 0 for pads and for RECEIVER (IPIN) classes */
if (num_local_opin != 0) { /* Have to reserve (use) some OPINs */
from_node = route_ctx.rr_blk_source[blk_id][iclass];
num_edges = device_ctx.rr_nodes[from_node].num_edges();
for (iconn = 0; iconn < num_edges; iconn++) {
to_node = device_ctx.rr_nodes[from_node].edge_sink_node(iconn);
cost = get_rr_cong_cost(to_node);
node_to_heap(to_node, cost, OPEN, OPEN, 0., 0.);
}
for (ipin = 0; ipin < num_local_opin; ipin++) {
heap_head_ptr = get_heap_head();
inode = heap_head_ptr->index;
adjust_one_rr_occ_and_apcost(inode, 1, pres_fac, acc_fac);
route_ctx.clb_opins_used_locally[blk_id][iclass][ipin] = inode;
free_heap_data(heap_head_ptr);
}
empty_heap();
}
}
}
}
static void adjust_one_rr_occ_and_apcost(int inode, int add_or_sub,
float pres_fac, float acc_fac) {
/* Increments or decrements (depending on add_or_sub) the occupancy of *
* one rr_node, and adjusts the present cost of that node appropriately. */
int occ, capacity;
auto& route_ctx = g_vpr_ctx.mutable_routing();
auto& device_ctx = g_vpr_ctx.device();
occ = route_ctx.rr_node_route_inf[inode].occ() + add_or_sub;
capacity = device_ctx.rr_nodes[inode].capacity();
route_ctx.rr_node_route_inf[inode].set_occ(occ);
if (occ < capacity) {
route_ctx.rr_node_route_inf[inode].pres_cost = 1.0;
} else {
route_ctx.rr_node_route_inf[inode].pres_cost = 1.0 + (occ + 1 - capacity) * pres_fac;
if (add_or_sub == 1) {
route_ctx.rr_node_route_inf[inode].acc_cost += (occ - capacity) * acc_fac;
}
}
}
void free_chunk_memory_trace(void) {
if (trace_ch.chunk_ptr_head != NULL) {
free_chunk_memory(&trace_ch);
}
}
// connection based overhaul (more specificity than nets)
// utility and debugging functions -----------------------
void print_traceback(ClusterNetId net_id) {
// linearly print linked list
auto& route_ctx = g_vpr_ctx.routing();
auto& device_ctx = g_vpr_ctx.device();
vtr::printf_info("traceback %zu: ", size_t(net_id));
t_trace* head = route_ctx.trace_head[net_id];
while (head) {
int inode {head->index};
if (device_ctx.rr_nodes[inode].type() == SINK)
vtr::printf_info("%d(sink)(%d)->",inode, route_ctx.rr_node_route_inf[inode].occ());
else
vtr::printf_info("%d(%d)->",inode, route_ctx.rr_node_route_inf[inode].occ());
head = head->next;
}
vtr::printf_info("\n");
}