| #include <cstdio> | |
| #include <cstring> | |
| #include <cmath> | |
| #include <ctime> | |
| #include <algorithm> | |
| #include <vector> | |
| #include "vtr_assert.h" | |
| using namespace std; | |
| #include "vtr_util.h" | |
| #include "vtr_memory.h" | |
| #include "vtr_matrix.h" | |
| #include "vtr_math.h" | |
| #include "vtr_log.h" | |
| #include "vpr_types.h" | |
| #include "vpr_utils.h" | |
| #include "vpr_error.h" | |
| #include "globals.h" | |
| #include "rr_graph_util.h" | |
| #include "build_rr_graph.h" | |
| #include "rr_graph_area.h" | |
| #include "build_rr_graph2.h" | |
| #include "rr_graph_sbox.h" | |
| #include "rr_graph_timing_params.h" | |
| #include "rr_graph_indexed_data.h" | |
| #include "check_rr_graph.h" | |
| #include "read_xml_arch_file.h" | |
| #include "echo_files.h" | |
| #include "dump_rr_structs.h" | |
| #include "cb_metrics.h" | |
| #include "build_switchblocks.h" | |
| #include "rr_graph_writer.h" | |
| #include "rr_graph_reader.h" | |
| #include "router_lookahead_map.h" | |
| #include "rr_types.h" | |
| struct t_mux { | |
| int size; | |
| t_mux *next; | |
| }; | |
| struct t_mux_size_distribution { | |
| int mux_count; | |
| int max_index; | |
| int *distr; | |
| t_mux_size_distribution *next; | |
| }; | |
| struct t_clb_to_clb_directs { | |
| t_type_descriptor *from_clb_type; | |
| int from_clb_pin_start_index; | |
| int from_clb_pin_end_index; | |
| t_type_descriptor *to_clb_type; | |
| int to_clb_pin_start_index; | |
| int to_clb_pin_end_index; | |
| int switch_index; //The switch type used by this direct connection | |
| }; | |
| struct t_pin_loc { | |
| int pin_index; | |
| int width_offset; | |
| int height_offset; | |
| e_side side; | |
| }; | |
| /******************* Variables local to this module. ***********************/ | |
| /********************* Subroutines local to this module. *******************/ | |
| RRGraph convert_rr_graph(); | |
| static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_track_map(const e_pin_type pin_type, | |
| const vtr::Matrix<int>& Fc, const t_type_ptr Type, const std::vector<bool>& perturb_switch_pattern, | |
| const e_directionality directionality, | |
| const int num_seg_types, const int *sets_per_seg_type); | |
| static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_seg_type( | |
| const e_pin_type pin_type, | |
| const int seg_type_tracks, const int Fc, | |
| const t_type_ptr Type, const bool perturb_switch_pattern, | |
| const e_directionality directionality); | |
| static void advance_to_next_block_side(t_type_ptr Type, int& width_offset, int& height_offset, e_side& side); | |
| static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup( | |
| vtr::NdMatrix<int, 5> pin_to_track_map, const vtr::Matrix<int>& Fc, | |
| const int width, const int height, | |
| const int num_pins, const int max_chan_width, | |
| const int num_seg_types); | |
| static void build_bidir_rr_opins( | |
| const int i, const int j, const e_side side, | |
| t_rr_node * L_rr_node, const t_rr_node_indices& L_rr_node_indices, | |
| const t_pin_to_track_lookup& opin_to_track_map, const std::vector<vtr::Matrix<int>>&Fc_out, | |
| bool * L_rr_edge_done, const t_seg_details * seg_details, | |
| const DeviceGrid& grid, | |
| const t_direct_inf *directs, const int num_directs, const t_clb_to_clb_directs *clb_to_clb_directs, | |
| const int num_seg_types); | |
| static void build_unidir_rr_opins( | |
| const int i, const int j, const e_side side, | |
| const DeviceGrid& grid, const std::vector<vtr::Matrix<int>>&Fc_out, | |
| const int max_chan_width, | |
| const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, | |
| vtr::NdMatrix<int, 3>& Fc_xofs, vtr::NdMatrix<int, 3>& Fc_yofs, | |
| bool * L_rr_edge_done, | |
| bool * Fc_clipped, const t_rr_node_indices& L_rr_node_indices, | |
| const t_direct_inf *directs, const int num_directs, const t_clb_to_clb_directs *clb_to_clb_directs, | |
| const int num_seg_types); | |
| static int get_opin_direct_connecions( | |
| int x, int y, e_side side, int opin, | |
| t_linked_edge ** edge_list_ptr, const t_rr_node_indices& L_rr_node_indices, | |
| const t_direct_inf *directs, const int num_directs, | |
| const t_clb_to_clb_directs *clb_to_clb_directs); | |
| static void alloc_and_load_rr_graph( | |
| const int num_nodes, | |
| t_rr_node * L_rr_node, const int num_seg_types, | |
| const t_seg_details * seg_details, | |
| const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, | |
| bool * L_rr_edge_done, | |
| const t_track_to_pin_lookup& track_to_pin_lookup, | |
| const t_pin_to_track_lookup& opin_to_track_map, const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn, | |
| t_sb_connection_map *sb_conn_map, | |
| const DeviceGrid& grid, const int Fs, | |
| short ******sblock_pattern, const std::vector<vtr::Matrix<int>>&Fc_out, | |
| vtr::NdMatrix<int, 3>& Fc_xofs, vtr::NdMatrix<int, 3>& Fc_yofs, | |
| const t_rr_node_indices& L_rr_node_indices, | |
| const int max_chan_width, | |
| const int delayless_switch, const enum e_directionality directionality, | |
| const int wire_to_ipin_switch, bool * Fc_clipped, | |
| const t_direct_inf *directs, const int num_directs, const t_clb_to_clb_directs *clb_to_clb_directs); | |
| static void load_uniform_connection_block_pattern( | |
| vtr::NdMatrix<int, 5>& tracks_connected_to_pin, | |
| const std::vector<t_pin_loc>& pin_locations, | |
| const int x_chan_width, const int y_chan_width, const int Fc, | |
| const enum e_directionality directionality); | |
| static void load_perturbed_connection_block_pattern( | |
| vtr::NdMatrix<int, 5>& tracks_connected_to_pin, | |
| const std::vector<t_pin_loc>& pin_locations, | |
| const int x_chan_width, const int y_chan_width, const int Fc, | |
| const enum e_directionality directionality); | |
| static std::vector<bool> alloc_and_load_perturb_opins(const t_type_ptr type, const vtr::Matrix<int>& Fc_out, const int max_chan_width, | |
| const int num_seg_types, const t_segment_inf *segment_inf); | |
| #ifdef ENABLE_CHECK_ALL_TRACKS | |
| static void check_all_tracks_reach_pins( | |
| t_type_ptr type, | |
| int *****tracks_connected_to_pin, | |
| int max_chan_width, int Fc, | |
| enum e_pin_type ipin_or_opin); | |
| #endif | |
| static std::vector<std::vector<bool>> alloc_and_load_perturb_ipins( | |
| const int L_num_types, | |
| const int num_seg_types, | |
| const int *sets_per_seg_type, | |
| const std::vector<vtr::Matrix<int>>&Fc_in, const std::vector<vtr::Matrix<int>>&Fc_out, | |
| const enum e_directionality directionality); | |
| static void build_rr_sinks_sources( | |
| const int i, const int j, | |
| t_rr_node * L_rr_node, const t_rr_node_indices& L_rr_node_indices, | |
| const int delayless_switch, const DeviceGrid& grid); | |
| static void build_rr_chan( | |
| const int i, const int j, const t_rr_type chan_type, | |
| const t_track_to_pin_lookup& track_to_pin_lookup, | |
| t_sb_connection_map *sb_conn_map, | |
| const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn, const int cost_index_offset, | |
| const int max_chan_width, | |
| const DeviceGrid& grid, | |
| const int tracks_per_chan, | |
| short ******sblock_pattern, const int Fs_per_side, | |
| const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, | |
| const t_rr_node_indices& L_rr_node_indices, | |
| bool * L_rr_edge_done, t_rr_node * L_rr_node, | |
| const int wire_to_ipin_switch, const enum e_directionality directionality); | |
| static int alloc_and_load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, | |
| const int wire_to_arch_ipin_switch, int *wire_to_rr_ipin_switch); | |
| static void remap_rr_node_switch_indices(map<int, int> *switch_fanin); | |
| static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, map<int, int> *switch_fanin); | |
| static int alloc_rr_switch_inf(map<int, int> *switch_fanin); | |
| static void rr_graph_externals( | |
| const t_segment_inf * segment_inf, int num_seg_types, int max_chan_width, | |
| int wire_to_rr_ipin_switch, enum e_base_cost_type base_cost_type); | |
| void alloc_and_load_edges_and_switches( | |
| t_rr_node * L_rr_node, const int inode, | |
| const int num_edges, bool * L_rr_edge_done, | |
| t_linked_edge * edge_list_head); | |
| static t_clb_to_clb_directs *alloc_and_load_clb_to_clb_directs(const t_direct_inf *directs, const int num_directs, | |
| const int delayless_switch); | |
| static void free_type_track_to_pin_map( | |
| t_track_to_pin_lookup& track_to_pin_map, | |
| t_type_ptr types, int max_chan_width); | |
| static t_seg_details *alloc_and_load_global_route_seg_details( | |
| const int global_route_switch, | |
| int * num_seg_details = 0); | |
| static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const int L_num_types, const t_type_ptr types, const int max_pins, | |
| const int num_seg_types, const int *sets_per_seg_type, | |
| const int max_chan_width, const e_fc_type fc_type, | |
| const enum e_directionality directionality, | |
| bool *Fc_clipped); | |
| static void build_rr_graph( | |
| const t_graph_type graph_type, const int L_num_types, | |
| const t_type_ptr types, | |
| const DeviceGrid& grid, | |
| t_chan_width *nodes_per_chan, | |
| const enum e_switch_block_type sb_type, const int Fs, | |
| const vector<t_switchblock_inf> switchblocks, | |
| const int num_seg_types, const int num_arch_switches, | |
| const t_segment_inf * segment_inf, | |
| const int global_route_switch, const int delayless_switch, | |
| const int wire_to_arch_ipin_switch, | |
| const float R_minW_nmos, | |
| const float R_minW_pmos, | |
| const enum e_base_cost_type base_cost_type, | |
| const bool trim_empty_channels, | |
| const bool trim_obs_channels, | |
| const t_direct_inf *directs, const int num_directs, | |
| const char* dump_rr_structs_file, | |
| int *wire_to_rr_ipin_switch, | |
| int *num_rr_switches, | |
| int *Warnings); | |
| /******************* Subroutine definitions *******************************/ | |
| RRGraph create_rr_graph( | |
| const t_graph_type graph_type, | |
| const int num_block_types, const t_type_ptr block_types, | |
| const DeviceGrid& grid, | |
| t_chan_width *nodes_per_chan, | |
| const int num_arch_switches, | |
| t_det_routing_arch* det_routing_arch, | |
| const t_segment_inf * segment_inf, | |
| const enum e_base_cost_type base_cost_type, | |
| const bool trim_empty_channels, | |
| const bool trim_obs_channels, | |
| const t_direct_inf *directs, const int num_directs, | |
| int *num_rr_switches, | |
| int *Warnings) { | |
| if (!det_routing_arch->read_rr_graph_filename.empty()) { | |
| load_rr_file( | |
| graph_type, | |
| grid, | |
| nodes_per_chan, | |
| det_routing_arch->num_segment, | |
| segment_inf, | |
| base_cost_type, | |
| &det_routing_arch->wire_to_rr_ipin_switch, | |
| num_rr_switches, | |
| det_routing_arch->read_rr_graph_filename.c_str()); | |
| } else { | |
| build_rr_graph( | |
| graph_type, | |
| num_block_types, block_types, | |
| grid, | |
| nodes_per_chan, | |
| det_routing_arch->switch_block_type, | |
| det_routing_arch->Fs, | |
| det_routing_arch->switchblocks, | |
| det_routing_arch->num_segment, | |
| num_arch_switches, | |
| segment_inf, | |
| det_routing_arch->global_route_switch, | |
| det_routing_arch->delayless_switch, | |
| det_routing_arch->wire_to_arch_ipin_switch, | |
| det_routing_arch->R_minW_nmos, | |
| det_routing_arch->R_minW_pmos, | |
| base_cost_type, | |
| trim_empty_channels, | |
| trim_obs_channels, | |
| directs, num_directs, | |
| det_routing_arch->dump_rr_structs_file, | |
| &det_routing_arch->wire_to_rr_ipin_switch, | |
| num_rr_switches, | |
| Warnings); | |
| } | |
| //Write out rr graph file if needed | |
| if (!det_routing_arch->write_rr_graph_filename.empty()) { | |
| write_rr_graph(det_routing_arch->write_rr_graph_filename.c_str(), segment_inf, det_routing_arch->num_segment); | |
| } | |
| return convert_rr_graph(); | |
| } | |
| //TODO: remove when this conversion (from traditional to new data structure) is no longer needed | |
| RRGraph convert_rr_graph() { | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| RRGraph rr_graph; | |
| //Create the switches | |
| std::map<int,RRSwitchId> old_to_new_rr_switch; | |
| for (int inode = 0; inode < device_ctx.num_rr_nodes; ++inode) { | |
| const auto& node = device_ctx.rr_nodes[inode]; | |
| for (int iedge = 0; iedge < node.num_edges(); ++iedge) { | |
| short iswitch = node.edge_switch(iedge); | |
| if (!old_to_new_rr_switch.count(iswitch)) { //Only create the switch once | |
| RRSwitchId rr_switch = rr_graph.create_switch(device_ctx.rr_switch_inf[iswitch]); | |
| VTR_ASSERT(!old_to_new_rr_switch.count(iswitch)); | |
| old_to_new_rr_switch[iswitch] = rr_switch; | |
| } | |
| } | |
| } | |
| //Create the nodes | |
| std::map<int,RRNodeId> old_to_new_rr_node; | |
| for (int inode = 0; inode < device_ctx.num_rr_nodes; ++inode) { | |
| auto& node = device_ctx.rr_nodes[inode]; | |
| RRNodeId rr_node = rr_graph.create_node(node.type()); | |
| rr_graph.set_node_xlow(rr_node, node.xlow()); | |
| rr_graph.set_node_ylow(rr_node, node.ylow()); | |
| rr_graph.set_node_xhigh(rr_node, node.xhigh()); | |
| rr_graph.set_node_yhigh(rr_node, node.yhigh()); | |
| rr_graph.set_node_capacity(rr_node, node.capacity()); | |
| rr_graph.set_node_ptc_num(rr_node, node.ptc_num()); | |
| rr_graph.set_node_cost_index(rr_node, node.cost_index()); | |
| if (node.type() == CHANX || node.type() == CHANY) { | |
| rr_graph.set_node_direction(rr_node, node.direction()); | |
| } | |
| if (node.type() == IPIN || node.type() == OPIN) { | |
| rr_graph.set_node_side(rr_node, node.side()); | |
| } | |
| rr_graph.set_node_R(rr_node, node.R()); | |
| rr_graph.set_node_C(rr_node, node.C()); | |
| VTR_ASSERT(!old_to_new_rr_node.count(inode)); | |
| old_to_new_rr_node[inode] = rr_node; | |
| } | |
| //Create the edges | |
| std::map<std::pair<int,int>,RREdgeId> old_to_new_rr_edge; //Key: {inode,iedge} | |
| for (int inode = 0; inode < device_ctx.num_rr_nodes; ++inode) { | |
| const auto& node = device_ctx.rr_nodes[inode]; | |
| for (int iedge = 0; iedge < node.num_edges(); ++iedge) { | |
| int isink_node = node.edge_sink_node(iedge); | |
| int iswitch = node.edge_switch(iedge); | |
| VTR_ASSERT(old_to_new_rr_node.count(inode)); | |
| VTR_ASSERT(old_to_new_rr_node.count(isink_node)); | |
| VTR_ASSERT(old_to_new_rr_switch.count(iswitch)); | |
| RREdgeId rr_edge = rr_graph.create_edge(old_to_new_rr_node[inode], old_to_new_rr_node[isink_node], old_to_new_rr_switch[iswitch]); | |
| auto key = std::make_pair(inode, iedge); | |
| VTR_ASSERT(!old_to_new_rr_edge.count(key)); | |
| old_to_new_rr_edge[key] = rr_edge; | |
| } | |
| } | |
| return rr_graph; | |
| } | |
| static void build_rr_graph( | |
| const t_graph_type graph_type, | |
| const int L_num_types, | |
| const t_type_ptr types, | |
| const DeviceGrid& grid, | |
| t_chan_width *nodes_per_chan, | |
| const enum e_switch_block_type sb_type, | |
| const int Fs, | |
| const vector<t_switchblock_inf> switchblocks, | |
| const int num_seg_types, | |
| const int num_arch_switches, | |
| const t_segment_inf * segment_inf, | |
| const int global_route_switch, | |
| const int delayless_switch, | |
| const int wire_to_arch_ipin_switch, | |
| const float R_minW_nmos, | |
| const float R_minW_pmos, | |
| const enum e_base_cost_type base_cost_type, | |
| const bool trim_empty_channels, | |
| const bool trim_obs_channels, | |
| const t_direct_inf *directs, | |
| const int num_directs, | |
| const char* dump_rr_structs_file, | |
| int *wire_to_rr_ipin_switch, | |
| int *num_rr_switches, | |
| int *Warnings) { | |
| vtr::printf_info("Starting build routing resource graph...\n"); | |
| clock_t begin = clock(); | |
| /* Reset warning flag */ | |
| *Warnings = RR_GRAPH_NO_WARN; | |
| /* Decode the graph_type */ | |
| bool is_global_graph = (GRAPH_GLOBAL == graph_type ? true : false); | |
| bool use_full_seg_groups = (GRAPH_UNIDIR_TILEABLE == graph_type ? true : false); | |
| enum e_directionality directionality = (GRAPH_BIDIR == graph_type ? BI_DIRECTIONAL : UNI_DIRECTIONAL); | |
| if (is_global_graph) { | |
| directionality = BI_DIRECTIONAL; | |
| } | |
| /* Global routing uses a single longwire track */ | |
| int max_chan_width = (is_global_graph ? 1 : nodes_per_chan->max); | |
| VTR_ASSERT(max_chan_width > 0); | |
| auto& device_ctx = g_vpr_ctx.mutable_device(); | |
| t_clb_to_clb_directs *clb_to_clb_directs = NULL; | |
| if (num_directs > 0) { | |
| clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs, delayless_switch); | |
| } | |
| /* START SEG_DETAILS */ | |
| int num_seg_details = 0; | |
| t_seg_details *seg_details = NULL; | |
| if (is_global_graph) { | |
| /* Sets up a single unit length segment type for global routing. */ | |
| seg_details = alloc_and_load_global_route_seg_details( | |
| global_route_switch, &num_seg_details); | |
| } else { | |
| /* Setup segments including distrubuting tracks and staggering. | |
| * If use_full_seg_groups is specified, max_chan_width may be | |
| * changed. Warning should be singled to caller if this happens. */ | |
| size_t max_dim = std::max(grid.width(), grid.height()) - 2; //-2 for no perim channels | |
| seg_details = alloc_and_load_seg_details(&max_chan_width, | |
| max_dim, num_seg_types, segment_inf, | |
| use_full_seg_groups, is_global_graph, directionality, | |
| &num_seg_details); | |
| if ((is_global_graph ? 1 : nodes_per_chan->max) != max_chan_width) { | |
| nodes_per_chan->max = max_chan_width; | |
| *Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED; | |
| } | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) { | |
| dump_seg_details(seg_details, max_chan_width, | |
| getEchoFileName(E_ECHO_SEG_DETAILS)); | |
| } | |
| } | |
| /* END SEG_DETAILS */ | |
| /* START CHAN_DETAILS */ | |
| t_chan_details chan_details_x; | |
| t_chan_details chan_details_y; | |
| alloc_and_load_chan_details(grid, nodes_per_chan, | |
| trim_empty_channels, trim_obs_channels, | |
| num_seg_details, seg_details, | |
| chan_details_x, chan_details_y); | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CHAN_DETAILS)) { | |
| dump_chan_details(chan_details_x, chan_details_y, max_chan_width, grid, | |
| getEchoFileName(E_ECHO_CHAN_DETAILS)); | |
| } | |
| /* END CHAN_DETAILS */ | |
| /* START FC */ | |
| /* Determine the actual value of Fc */ | |
| std::vector<vtr::Matrix<int>> Fc_in; /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..num_segments-1] */ | |
| std::vector<vtr::Matrix<int>> Fc_out; /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..num_segments-1] */ | |
| /* get maximum number of pins across all blocks */ | |
| int max_pins = types[0].num_pins; | |
| for (int i = 1; i < L_num_types; ++i) { | |
| if (types[i].num_pins > max_pins) { | |
| max_pins = types[i].num_pins; | |
| } | |
| } | |
| /* get the number of 'sets' for each segment type -- unidirectial architectures have two tracks in a set, bidirectional have one */ | |
| int total_sets = max_chan_width; | |
| if (directionality == UNI_DIRECTIONAL) { | |
| VTR_ASSERT(max_chan_width % 2 == 0); | |
| total_sets /= 2; | |
| } | |
| int *sets_per_seg_type = get_seg_track_counts(total_sets, num_seg_types, segment_inf, use_full_seg_groups); | |
| if (is_global_graph) { | |
| //All pins can connect during global routing | |
| auto ones = vtr::Matrix<int>({size_t(max_pins), size_t(num_seg_types)}, 1); | |
| Fc_in = std::vector<vtr::Matrix<int>>(L_num_types, ones); | |
| Fc_out = std::vector<vtr::Matrix<int>>(L_num_types, ones); | |
| } else { | |
| bool Fc_clipped = false; | |
| Fc_in = alloc_and_load_actual_fc(L_num_types, types, max_pins, num_seg_types, sets_per_seg_type, max_chan_width, | |
| e_fc_type::IN, directionality, &Fc_clipped); | |
| if (Fc_clipped) { | |
| *Warnings |= RR_GRAPH_WARN_FC_CLIPPED; | |
| } | |
| Fc_clipped = false; | |
| Fc_out = alloc_and_load_actual_fc(L_num_types, types, max_pins, num_seg_types, sets_per_seg_type, max_chan_width, | |
| e_fc_type::OUT, directionality, &Fc_clipped); | |
| if (Fc_clipped) { | |
| *Warnings |= RR_GRAPH_WARN_FC_CLIPPED; | |
| } | |
| for (int i = 1; i < L_num_types; ++i) { /* Skip "EMPTY" */ | |
| for (int j = 0; j < device_ctx.block_types[i].num_pins; ++j) { | |
| for (int k = 0; k < num_seg_types; k++) { | |
| #ifdef VERBOSE | |
| vtr::printf_info("Fc Actual Values: type = %s, pin = %d, seg = %d (%s), Fc_out = %d, Fc_in = %d.\n", | |
| device_ctx.block_types[i].name, j, k, segment_inf[k].name, Fc_out[i][j][k], Fc_in[i][j][k]); | |
| #endif /* VERBOSE */ | |
| VTR_ASSERT_MSG(Fc_out[i][j][k] == 0 || Fc_in[i][j][k] == 0, | |
| "Pins must be inputs or outputs (i.e. can not have both non-zero Fc_out and Fc_in)"); | |
| } | |
| } | |
| } | |
| } | |
| auto perturb_ipins = alloc_and_load_perturb_ipins(L_num_types, num_seg_types, | |
| sets_per_seg_type, Fc_in, Fc_out, directionality); | |
| /* END FC */ | |
| /* Alloc node lookups, count nodes, alloc rr nodes */ | |
| device_ctx.num_rr_nodes = 0; | |
| device_ctx.rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, grid, | |
| &device_ctx.num_rr_nodes, chan_details_x, chan_details_y); | |
| device_ctx.rr_nodes = new t_rr_node[device_ctx.num_rr_nodes]; | |
| bool *L_rr_edge_done = (bool *) vtr::malloc(sizeof (bool) * device_ctx.num_rr_nodes); | |
| memset(L_rr_edge_done, 0, sizeof (bool) * device_ctx.num_rr_nodes); | |
| /* These are data structures used by the the unidir opin mapping. They are used | |
| to spread connections evenly for each segment type among the available | |
| wire start points */ | |
| vtr::NdMatrix<int, 3> Fc_xofs({ | |
| grid.height() - 1, | |
| grid.width() - 1, | |
| size_t(num_seg_types) | |
| }, | |
| 0); //[0..grid.height()-2][0..grid.width()-2][0..num_seg_types-1] | |
| vtr::NdMatrix<int, 3> Fc_yofs({ | |
| grid.width() - 1, | |
| grid.height() - 1, | |
| size_t(num_seg_types) | |
| }, | |
| 0); //[0..grid.width()-2][0..grid.height()-2][0..num_seg_types-1] | |
| /* START SB LOOKUP */ | |
| /* Alloc and load the switch block lookup */ | |
| vtr::NdMatrix<std::vector<int>, 3> switch_block_conn; | |
| short ******unidir_sb_pattern = NULL; | |
| t_sb_connection_map *sb_conn_map = NULL; //for custom switch blocks | |
| if (is_global_graph) { | |
| switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3); | |
| } else if (BI_DIRECTIONAL == directionality) { | |
| if (sb_type == CUSTOM) { | |
| sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y, | |
| grid, | |
| switchblocks, nodes_per_chan, directionality); | |
| } else { | |
| switch_block_conn = alloc_and_load_switch_block_conn(max_chan_width, sb_type, Fs); | |
| } | |
| } else { | |
| VTR_ASSERT(UNI_DIRECTIONAL == directionality); | |
| if (sb_type == CUSTOM) { | |
| sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y, | |
| grid, | |
| switchblocks, nodes_per_chan, directionality); | |
| } else { | |
| /* it looks like we get unbalanced muxing from this switch block code with Fs > 3 */ | |
| VTR_ASSERT(Fs == 3); | |
| unidir_sb_pattern = alloc_sblock_pattern_lookup(grid, max_chan_width); | |
| for (size_t i = 0; i < grid.width() - 1; i++) { | |
| for (size_t j = 0; j < grid.height() - 1; j++) { | |
| load_sblock_pattern_lookup(i, j, grid, nodes_per_chan, | |
| chan_details_x, chan_details_y, | |
| Fs, sb_type, unidir_sb_pattern); | |
| } | |
| } | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SBLOCK_PATTERN)) { | |
| dump_sblock_pattern(unidir_sb_pattern, max_chan_width, grid, | |
| getEchoFileName(E_ECHO_SBLOCK_PATTERN)); | |
| } | |
| } | |
| } | |
| /* END SB LOOKUP */ | |
| /* START IPIN MAP */ | |
| /* Create ipin map lookups */ | |
| t_pin_to_track_lookup ipin_to_track_map(L_num_types); /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */ | |
| t_track_to_pin_lookup track_to_pin_lookup(L_num_types); /* [0..device_ctx.num_block_types-1][0..max_chan_width-1][0..width][0..height][0..3] */ | |
| for (int itype = 0; itype < L_num_types; ++itype) { | |
| ipin_to_track_map[itype] = alloc_and_load_pin_to_track_map(RECEIVER, | |
| Fc_in[itype], &types[itype], perturb_ipins[itype], directionality, | |
| num_seg_types, sets_per_seg_type); | |
| track_to_pin_lookup[itype] = alloc_and_load_track_to_pin_lookup( | |
| ipin_to_track_map[itype], Fc_in[itype], types[itype].width, types[itype].height, | |
| types[itype].num_pins, max_chan_width, num_seg_types); | |
| } | |
| /* END IPIN MAP */ | |
| /* START OPIN MAP */ | |
| /* Create opin map lookups */ | |
| t_pin_to_track_lookup opin_to_track_map(L_num_types); /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */ | |
| if (BI_DIRECTIONAL == directionality) { | |
| for (int i = 0; i < L_num_types; ++i) { | |
| auto perturb_opins = alloc_and_load_perturb_opins(&types[i], Fc_out[i], | |
| max_chan_width, num_seg_types, segment_inf); | |
| opin_to_track_map[i] = alloc_and_load_pin_to_track_map(DRIVER, | |
| Fc_out[i], &types[i], perturb_opins, directionality, | |
| num_seg_types, sets_per_seg_type); | |
| } | |
| } | |
| /* END OPIN MAP */ | |
| bool Fc_clipped = false; | |
| alloc_and_load_rr_graph(device_ctx.num_rr_nodes, device_ctx.rr_nodes, num_seg_types, | |
| seg_details, chan_details_x, chan_details_y, | |
| L_rr_edge_done, track_to_pin_lookup, opin_to_track_map, | |
| switch_block_conn, sb_conn_map, grid, Fs, unidir_sb_pattern, | |
| Fc_out, Fc_xofs, Fc_yofs, device_ctx.rr_node_indices, max_chan_width, | |
| delayless_switch, directionality, wire_to_arch_ipin_switch, &Fc_clipped, | |
| directs, num_directs, clb_to_clb_directs); | |
| /* Update rr_nodes capacities if global routing */ | |
| if (graph_type == GRAPH_GLOBAL) { | |
| for (int i = 0; i < device_ctx.num_rr_nodes; i++) { | |
| if (device_ctx.rr_nodes[i].type() == CHANX) { | |
| int ylow = device_ctx.rr_nodes[i].ylow(); | |
| device_ctx.rr_nodes[i].set_capacity(device_ctx.chan_width.x_list[ylow]); | |
| } | |
| if (device_ctx.rr_nodes[i].type() == CHANY) { | |
| int xlow = device_ctx.rr_nodes[i].xlow(); | |
| device_ctx.rr_nodes[i].set_capacity(device_ctx.chan_width.y_list[xlow]); | |
| } | |
| } | |
| } | |
| /* Allocate and load routing resource switches, which are derived from the switches from the architecture file, | |
| based on their fanin in the rr graph. This routine also adjusts the rr nodes to point to these new rr switches */ | |
| (*num_rr_switches) = alloc_and_load_rr_switch_inf(num_arch_switches, R_minW_nmos, R_minW_pmos, wire_to_arch_ipin_switch, wire_to_rr_ipin_switch); | |
| rr_graph_externals(segment_inf, num_seg_types, max_chan_width, | |
| *wire_to_rr_ipin_switch, base_cost_type); | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH)) { | |
| dump_rr_graph(getEchoFileName(E_ECHO_RR_GRAPH)); | |
| } | |
| check_rr_graph(graph_type, grid, *num_rr_switches, types, segment_inf); | |
| /* dump out rr structs if requested */ | |
| if (dump_rr_structs_file) { | |
| dump_rr_structs(dump_rr_structs_file); | |
| } | |
| #ifdef USE_MAP_LOOKAHEAD | |
| compute_router_lookahead(num_seg_types); | |
| #endif | |
| /* Free all temp structs */ | |
| if (seg_details) { | |
| free_seg_details(seg_details, max_chan_width); | |
| seg_details = NULL; | |
| } | |
| if (!chan_details_x.empty() || !chan_details_y.empty()) { | |
| free_chan_details(chan_details_x, chan_details_y, max_chan_width, grid); | |
| } | |
| if (sb_conn_map) { | |
| free_switchblock_permutations(sb_conn_map); | |
| sb_conn_map = NULL; | |
| } | |
| if (L_rr_edge_done) { | |
| free(L_rr_edge_done); | |
| L_rr_edge_done = NULL; | |
| } | |
| if (unidir_sb_pattern) { | |
| free_sblock_pattern_lookup(unidir_sb_pattern); | |
| unidir_sb_pattern = NULL; | |
| } | |
| if (sets_per_seg_type) { | |
| free(sets_per_seg_type); | |
| sets_per_seg_type = NULL; | |
| } | |
| free_type_track_to_pin_map(track_to_pin_lookup, types, max_chan_width); | |
| if (clb_to_clb_directs != NULL) { | |
| free(clb_to_clb_directs); | |
| } | |
| float elapsed_time = (float) (clock() - begin) / CLOCKS_PER_SEC; | |
| vtr::printf_info("Build routing resource graph took %g seconds\n", elapsed_time); | |
| } | |
| /* Allocates and loads the global rr_switch_inf array based on the global | |
| arch_switch_inf array and the fan-ins used by the rr nodes. | |
| Also changes switch indices of rr_nodes to index into rr_switch_inf | |
| instead of arch_switch_inf. | |
| Returns the number of rr switches created. | |
| Also returns, through a pointer, the index of a representative ipin cblock switch. | |
| - Currently we're not allowing a designer to specify an ipin cblock switch with | |
| multiple fan-ins, so there's just one of these switches in the device_ctx.rr_switch_inf array. | |
| But in the future if we allow this, we can return an index to a representative switch | |
| The rr_switch_inf switches are derived from the arch_switch_inf switches | |
| (which were read-in from the architecture file) based on fan-in. The delays of | |
| the rr switches depend on their fan-in, so we first go through the rr_nodes | |
| and count how many different fan-ins exist for each arch switch. | |
| Then we create these rr switches and update the switch indices | |
| of rr_nodes to index into the rr_switch_inf array. */ | |
| static int alloc_and_load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, | |
| const int wire_to_arch_ipin_switch, int *wire_to_rr_ipin_switch) { | |
| /* we will potentially be creating a couple of versions of each arch switch where | |
| each version corresponds to a different fan-in. We will need to fill device_ctx.rr_switch_inf | |
| with this expanded list of switches. | |
| To do this we will use an array of maps where each map corresponds to a different arch switch. | |
| So for each arch switch we will use this map to keep track of the different fan-ins that it uses (map key) | |
| and which index in the device_ctx.rr_switch_inf array this arch switch / fanin combination will be placed in */ | |
| map<int, int> *switch_fanin = new map<int, int>[num_arch_switches]; | |
| /* Determine what the different fan-ins are for each arch switch, and also | |
| how many entries the rr_switch_inf array should have */ | |
| int num_rr_switches = alloc_rr_switch_inf(switch_fanin); | |
| /* create the rr switches. also keep track of, for each arch switch, what index of the rr_switch_inf | |
| array each version of its fanin has been mapped to */ | |
| load_rr_switch_inf(num_arch_switches, R_minW_nmos, R_minW_pmos, switch_fanin); | |
| /* next, walk through rr nodes again and remap their switch indices to rr_switch_inf */ | |
| remap_rr_node_switch_indices(switch_fanin); | |
| /* now we need to set the wire_to_rr_ipin_switch variable which points the detailed routing architecture | |
| to the representative ipin cblock switch. currently we're not allowing the specification of an ipin cblock switch | |
| with multiple fan-ins, so right now there's just one. May change in the future, in which case we'd need to | |
| return a representative switch */ | |
| if (switch_fanin[wire_to_arch_ipin_switch].count(UNDEFINED)) { | |
| /* only have one ipin cblock switch. OK. */ | |
| (*wire_to_rr_ipin_switch) = switch_fanin[wire_to_arch_ipin_switch][UNDEFINED]; | |
| } else if (switch_fanin[wire_to_arch_ipin_switch].size() != 0) { | |
| vpr_throw(VPR_ERROR_ARCH, __FILE__, __LINE__, | |
| "Not currently allowing an ipin cblock switch to have multiple fan-ins"); | |
| } else { | |
| vpr_throw(VPR_ERROR_ARCH, __FILE__, __LINE__, | |
| "No switch is specified for the ipin cblock, check if there is an error in arch file"); | |
| } | |
| delete[] switch_fanin; | |
| return num_rr_switches; | |
| } | |
| /* Allocates space for the global device_ctx.rr_switch_inf variable and returns the | |
| number of rr switches that were allocated */ | |
| static int alloc_rr_switch_inf(map<int, int> *switch_fanin) { | |
| auto& device_ctx = g_vpr_ctx.mutable_device(); | |
| int num_rr_switches = 0; | |
| // map key: switch index specified in arch; map value: fanin for that index | |
| map<int, int> *inward_switch_inf = new map<int, int>[device_ctx.num_rr_nodes]; | |
| for (int inode = 0; inode < device_ctx.num_rr_nodes; inode++) { | |
| const t_rr_node& from_node = device_ctx.rr_nodes[inode]; | |
| int num_edges = from_node.num_edges(); | |
| for (int iedge = 0; iedge < num_edges; iedge++) { | |
| int switch_index = from_node.edge_switch(iedge); | |
| int to_node_index = from_node.edge_sink_node(iedge); | |
| if (inward_switch_inf[to_node_index].count(switch_index) == 0) | |
| inward_switch_inf[to_node_index][switch_index] = 0; | |
| inward_switch_inf[to_node_index][switch_index]++; | |
| } | |
| } | |
| // get unique index / fanin combination based on inward_switch_inf | |
| for (int inode = 0; inode < device_ctx.num_rr_nodes; inode++) { | |
| map<int, int>::iterator itr; | |
| for (itr = inward_switch_inf[inode].begin(); itr != inward_switch_inf[inode].end(); itr++) { | |
| int switch_index = itr->first; | |
| int fanin = itr->second; | |
| if (device_ctx.arch_switch_inf[switch_index].fixed_Tdel()) { | |
| fanin = UNDEFINED; | |
| } | |
| if (switch_fanin[switch_index].count(fanin) == 0) { | |
| switch_fanin[switch_index][fanin] = 0; | |
| num_rr_switches++; | |
| } | |
| } | |
| } | |
| delete[] inward_switch_inf; | |
| /* allocate space for the rr_switch_inf array (it's freed later in vpr_api.c-->free_arch) */ | |
| device_ctx.rr_switch_inf = new t_rr_switch_inf[num_rr_switches]; | |
| return num_rr_switches; | |
| } | |
| /* load the global device_ctx.rr_switch_inf variable. also keep track of, for each arch switch, what | |
| index of the rr_switch_inf array each version of its fanin has been mapped to (through switch_fanin map) */ | |
| static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, map<int, int> *switch_fanin) { | |
| auto& device_ctx = g_vpr_ctx.mutable_device(); | |
| int i_rr_switch = 0; | |
| if (device_ctx.switch_fanin_remap != NULL) { | |
| // at this stage, we rebuild the rr_graph (probably in binary search) | |
| // so old device_ctx.switch_fanin_remap is obsolete | |
| delete [] device_ctx.switch_fanin_remap; | |
| } | |
| device_ctx.switch_fanin_remap = new map<int, int>[num_arch_switches]; | |
| for (int i_arch_switch = 0; i_arch_switch < num_arch_switches; i_arch_switch++) { | |
| map<int, int>::iterator it; | |
| for (it = switch_fanin[i_arch_switch].begin(); it != switch_fanin[i_arch_switch].end(); it++) { | |
| /* the fanin value is in it->first, and we'll need to set what index this i_arch_switch/fanin | |
| combination maps to (within rr_switch_inf) in it->second) */ | |
| int fanin = it->first; | |
| it->second = i_rr_switch; | |
| // setup device_ctx.switch_fanin_remap, for future swich usage analysis | |
| device_ctx.switch_fanin_remap[i_arch_switch][fanin] = i_rr_switch; | |
| /* figure out, by looking at the arch switch's Tdel map, what the delay of the new | |
| rr switch should be */ | |
| double rr_switch_Tdel = device_ctx.arch_switch_inf[i_arch_switch].Tdel(fanin); | |
| /* copy over the arch switch to rr_switch_inf[i_rr_switch], but with the changed Tdel value */ | |
| device_ctx.rr_switch_inf[i_rr_switch].buffered = device_ctx.arch_switch_inf[i_arch_switch].buffered(); | |
| device_ctx.rr_switch_inf[i_rr_switch].R = device_ctx.arch_switch_inf[i_arch_switch].R; | |
| device_ctx.rr_switch_inf[i_rr_switch].Cin = device_ctx.arch_switch_inf[i_arch_switch].Cin; | |
| device_ctx.rr_switch_inf[i_rr_switch].Cout = device_ctx.arch_switch_inf[i_arch_switch].Cout; | |
| device_ctx.rr_switch_inf[i_rr_switch].Tdel = rr_switch_Tdel; | |
| device_ctx.rr_switch_inf[i_rr_switch].mux_trans_size = device_ctx.arch_switch_inf[i_arch_switch].mux_trans_size; | |
| if (device_ctx.arch_switch_inf[i_arch_switch].buf_size_type == BufferSize::AUTO) { | |
| //Size based on resistance | |
| device_ctx.rr_switch_inf[i_rr_switch].buf_size = trans_per_buf(device_ctx.arch_switch_inf[i_arch_switch].R, R_minW_nmos, R_minW_pmos); | |
| } else { | |
| VTR_ASSERT(device_ctx.arch_switch_inf[i_arch_switch].buf_size_type == BufferSize::ABSOLUTE); | |
| //Use the specified size | |
| device_ctx.rr_switch_inf[i_rr_switch].buf_size = device_ctx.arch_switch_inf[i_arch_switch].buf_size; | |
| } | |
| device_ctx.rr_switch_inf[i_rr_switch].name = device_ctx.arch_switch_inf[i_arch_switch].name; | |
| device_ctx.rr_switch_inf[i_rr_switch].power_buffer_type = device_ctx.arch_switch_inf[i_arch_switch].power_buffer_type; | |
| device_ctx.rr_switch_inf[i_rr_switch].power_buffer_size = device_ctx.arch_switch_inf[i_arch_switch].power_buffer_size; | |
| /* have created a switch in the rr_switch_inf array */ | |
| i_rr_switch++; | |
| } | |
| } | |
| } | |
| /* switch indices of each rr_node original point into the global device_ctx.arch_switch_inf array. | |
| now we want to remap these indices to point into the global device_ctx.rr_switch_inf array | |
| which contains switch info at different fan-in values */ | |
| static void remap_rr_node_switch_indices(map<int, int> *switch_fanin) { | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| for (int inode = 0; inode < device_ctx.num_rr_nodes; inode++) { | |
| t_rr_node& from_node = device_ctx.rr_nodes[inode]; | |
| int num_edges = from_node.num_edges(); | |
| for (int iedge = 0; iedge < num_edges; iedge++) { | |
| const t_rr_node& to_node = device_ctx.rr_nodes[ from_node.edge_sink_node(iedge) ]; | |
| /* get the switch which this edge uses and its fanin */ | |
| int switch_index = from_node.edge_switch(iedge); | |
| int fanin = to_node.fan_in(); | |
| if (switch_fanin[switch_index].count(UNDEFINED) == 1) { | |
| fanin = UNDEFINED; | |
| } | |
| int rr_switch_index = switch_fanin[switch_index][fanin]; | |
| from_node.set_edge_switch(iedge, rr_switch_index); | |
| } | |
| } | |
| } | |
| static void rr_graph_externals( | |
| const t_segment_inf * segment_inf, int num_seg_types, int max_chan_width, | |
| int wire_to_rr_ipin_switch, enum e_base_cost_type base_cost_type) { | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| add_rr_graph_C_from_switches(device_ctx.rr_switch_inf[wire_to_rr_ipin_switch].Cin); | |
| alloc_and_load_rr_indexed_data(segment_inf, num_seg_types, device_ctx.rr_node_indices, | |
| max_chan_width, wire_to_rr_ipin_switch, base_cost_type); | |
| load_rr_index_segments(num_seg_types); | |
| } | |
| static std::vector<std::vector<bool>> alloc_and_load_perturb_ipins(const int L_num_types, | |
| const int num_seg_types, | |
| const int *sets_per_seg_type, | |
| const std::vector<vtr::Matrix<int>>&Fc_in, const std::vector<vtr::Matrix<int>>&Fc_out, | |
| const enum e_directionality directionality) { | |
| std::vector < std::vector<bool>> result(L_num_types); | |
| for (auto& seg_type_bools : result) { | |
| seg_type_bools.resize(num_seg_types, false); | |
| } | |
| /* factor to account for unidir vs bidir */ | |
| int fac = 1; | |
| if (directionality == UNI_DIRECTIONAL) { | |
| fac = 2; | |
| } | |
| if (BI_DIRECTIONAL == directionality) { | |
| for (int iseg = 0; iseg < num_seg_types; ++iseg) { | |
| result[0][iseg] = false; | |
| int tracks_in_seg_type = sets_per_seg_type[iseg] * fac; | |
| for (int itype = 1; itype < L_num_types; ++itype) { | |
| result[itype][iseg] = false; | |
| float Fc_ratio; | |
| if (Fc_in[itype][0][iseg] > Fc_out[itype][0][iseg]) { | |
| Fc_ratio = (float) Fc_in[itype][0][iseg] / (float) Fc_out[itype][0][iseg]; | |
| } else { | |
| Fc_ratio = (float) Fc_out[itype][0][iseg] / (float) Fc_in[itype][0][iseg]; | |
| } | |
| if ((Fc_in[itype][0][iseg] <= tracks_in_seg_type - 2) | |
| && (fabs(Fc_ratio - vtr::nint(Fc_ratio)) | |
| < (0.5 / (float) tracks_in_seg_type))) { | |
| result[itype][iseg] = true; | |
| } | |
| } | |
| } | |
| } else { | |
| /* Unidirectional routing uses mux balancing patterns and | |
| * thus shouldn't need perturbation. */ | |
| VTR_ASSERT(UNI_DIRECTIONAL == directionality); | |
| for (int itype = 0; itype < L_num_types; ++itype) { | |
| for (int iseg = 0; iseg < num_seg_types; ++iseg) { | |
| result[itype][iseg] = false; | |
| } | |
| } | |
| } | |
| return result; | |
| } | |
| static t_seg_details *alloc_and_load_global_route_seg_details( | |
| const int global_route_switch, | |
| int * num_seg_details) { | |
| t_seg_details *seg_details = (t_seg_details *) vtr::malloc(sizeof (t_seg_details)); | |
| seg_details->index = 0; | |
| seg_details->length = 1; | |
| seg_details->arch_wire_switch = global_route_switch; | |
| seg_details->arch_opin_switch = global_route_switch; | |
| seg_details->longline = false; | |
| seg_details->direction = BI_DIRECTION; | |
| seg_details->Cmetal = 0.0; | |
| seg_details->Rmetal = 0.0; | |
| seg_details->start = 1; | |
| seg_details->cb = (bool *) vtr::malloc(sizeof (bool) * 1); | |
| seg_details->cb[0] = true; | |
| seg_details->sb = (bool *) vtr::malloc(sizeof (bool) * 2); | |
| seg_details->sb[0] = true; | |
| seg_details->sb[1] = true; | |
| seg_details->group_size = 1; | |
| seg_details->group_start = 0; | |
| seg_details->seg_start = -1; | |
| seg_details->seg_end = -1; | |
| if (num_seg_details) { | |
| *num_seg_details = 1; | |
| } | |
| return seg_details; | |
| } | |
| /* Calculates the number of track connections from each block pin to each segment type */ | |
| static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const int L_num_types, const t_type_ptr types, const int max_pins, | |
| const int num_seg_types, const int *sets_per_seg_type, | |
| const int max_chan_width, const e_fc_type fc_type, | |
| const enum e_directionality directionality, | |
| bool *Fc_clipped) { | |
| //Initialize Fc of all blocks to zero | |
| auto zeros = vtr::Matrix<int>({size_t(max_pins), size_t(num_seg_types)}, 0); | |
| std::vector<vtr::Matrix<int>> Fc(L_num_types, zeros); | |
| *Fc_clipped = false; | |
| /* Unidir tracks formed in pairs, otherwise no effect. */ | |
| int fac = 1; | |
| if (UNI_DIRECTIONAL == directionality) { | |
| fac = 2; | |
| } | |
| VTR_ASSERT((max_chan_width % fac) == 0); | |
| for (int itype = 1; itype < L_num_types; ++itype) { //Skip EMPTY | |
| for (const t_fc_specification fc_spec : types[itype].fc_specs) { | |
| if (fc_type != fc_spec.fc_type) continue; | |
| int iseg = fc_spec.seg_index; | |
| if (fc_spec.fc_value == 0) { | |
| /* Special case indicating that this pin does not connect to general-purpose routing */ | |
| for (int ipin : fc_spec.pins) { | |
| Fc[itype][ipin][iseg] = 0; | |
| } | |
| } else { | |
| /* General case indicating that this pin connects to general-purpose routing */ | |
| //Calculate how many connections there should be accross all the pins in this fc_spec | |
| float flt_total_connections; | |
| if (fc_spec.fc_value_type == e_fc_value_type::FRACTIONAL) { | |
| float conns_per_pin = fac * sets_per_seg_type[iseg] * fc_spec.fc_value; | |
| flt_total_connections = conns_per_pin * fc_spec.pins.size(); | |
| } else { | |
| VTR_ASSERT(fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE); | |
| flt_total_connections = fc_spec.fc_value * fc_spec.pins.size(); | |
| } | |
| //Round to integer | |
| int total_connections = vtr::nint(flt_total_connections); | |
| //Ensure that there are at least fac connections, this ensures that low Fc ports | |
| //targeting small sets of segs get connection(s), even if flt_total_connections < fac. | |
| total_connections = std::max(total_connections, fac); | |
| //Ensure total evenly divides fac by adding the remainder | |
| total_connections += (total_connections % fac); | |
| VTR_ASSERT(total_connections > 0); | |
| VTR_ASSERT(total_connections % fac == 0); | |
| //We walk through all the pins this fc_spec applies to, adding fac connections | |
| //to each pin, until we run out of connections. This should distribute the connections | |
| //as evenly as possible (if total_connections % pins.size() != 0, there will be | |
| //some inevitable imbalance). | |
| int connections_remaining = total_connections; | |
| while (connections_remaining != 0) { | |
| //Add one set of connections to each pin | |
| for (int ipin : fc_spec.pins) { | |
| if (connections_remaining >= fac) { | |
| Fc[itype][ipin][iseg] += fac; | |
| connections_remaining -= fac; | |
| } else { | |
| VTR_ASSERT(connections_remaining == 0); | |
| break; | |
| } | |
| } | |
| } | |
| for (int ipin : fc_spec.pins) { | |
| //It is possible that we may want more connections that wires of this type exist; | |
| //clip to the maximum number of wires | |
| if (Fc[itype][ipin][iseg] > sets_per_seg_type[iseg] * fac) { | |
| *Fc_clipped = true; | |
| Fc[itype][ipin][iseg] = sets_per_seg_type[iseg] * fac; | |
| } | |
| VTR_ASSERT_MSG(Fc[itype][ipin][iseg] >= 0, "Calculated absolute Fc must be positive"); | |
| VTR_ASSERT_MSG(Fc[itype][ipin][iseg] % fac == 0, "Calculated absolute Fc must be divisible by 1 (bidir architecture) or 2 (unidir architecture)"); | |
| } | |
| } | |
| } | |
| } | |
| return Fc; | |
| } | |
| /* frees the track to ipin mapping for each physical grid type */ | |
| static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map, | |
| t_type_ptr types, int max_chan_width) { | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| for (int i = 0; i < device_ctx.num_block_types; i++) { | |
| if (!track_to_pin_map[i].empty()) { | |
| for (int track = 0; track < max_chan_width; ++track) { | |
| for (int width = 0; width < types[i].width; ++width) { | |
| for (int height = 0; height < types[i].height; ++height) { | |
| for (int side = 0; side < 4; ++side) { | |
| if (!track_to_pin_map[i][track][width][height][side].empty()) { | |
| track_to_pin_map[i][track][width][height][side].clear(); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| /* Does the actual work of allocating the rr_graph and filling all the * | |
| * appropriate values. Everything up to this was just a prelude! */ | |
| static void alloc_and_load_rr_graph(const int num_nodes, | |
| t_rr_node * L_rr_node, const int num_seg_types, | |
| const t_seg_details * seg_details, | |
| const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, | |
| bool * L_rr_edge_done, | |
| const t_track_to_pin_lookup& track_to_pin_lookup, | |
| const t_pin_to_track_lookup& opin_to_track_map, const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn, | |
| t_sb_connection_map *sb_conn_map, | |
| const DeviceGrid& grid, const int Fs, | |
| short ******sblock_pattern, const std::vector<vtr::Matrix<int>>&Fc_out, | |
| vtr::NdMatrix<int, 3>& Fc_xofs, vtr::NdMatrix<int, 3>& Fc_yofs, | |
| const t_rr_node_indices& L_rr_node_indices, | |
| const int max_chan_width, | |
| const int delayless_switch, const enum e_directionality directionality, | |
| const int wire_to_ipin_switch, bool * Fc_clipped, | |
| const t_direct_inf *directs, const int num_directs, | |
| const t_clb_to_clb_directs *clb_to_clb_directs) { | |
| /* If Fc gets clipped, this will be flagged to true */ | |
| *Fc_clipped = false; | |
| /* Connection SINKS and SOURCES to their pins. */ | |
| for (size_t i = 0; i < grid.width(); ++i) { | |
| for (size_t j = 0; j < grid.height(); ++j) { | |
| build_rr_sinks_sources(i, j, L_rr_node, L_rr_node_indices, | |
| delayless_switch, grid); | |
| } | |
| } | |
| /* Build opins */ | |
| for (size_t i = 0; i < grid.width(); ++i) { | |
| for (size_t j = 0; j < grid.height(); ++j) { | |
| for (e_side side : SIDES) { | |
| if (BI_DIRECTIONAL == directionality) { | |
| build_bidir_rr_opins(i, j, side, L_rr_node, L_rr_node_indices, | |
| opin_to_track_map, Fc_out, L_rr_edge_done, seg_details, | |
| grid, | |
| directs, num_directs, clb_to_clb_directs, num_seg_types); | |
| } else { | |
| VTR_ASSERT(UNI_DIRECTIONAL == directionality); | |
| bool clipped; | |
| build_unidir_rr_opins(i, j, side, grid, Fc_out, max_chan_width, | |
| chan_details_x, chan_details_y, Fc_xofs, Fc_yofs, | |
| L_rr_edge_done, &clipped, L_rr_node_indices, | |
| directs, num_directs, clb_to_clb_directs, num_seg_types); | |
| if (clipped) { | |
| *Fc_clipped = true; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| /* We make a copy of the current fanin values for the nodes to | |
| * know the number of OPINs driving each mux presently */ | |
| int *opin_mux_size = (int *) vtr::malloc(sizeof (int) * num_nodes); | |
| for (int i = 0; i < num_nodes; ++i) { | |
| opin_mux_size[i] = L_rr_node[i].fan_in(); | |
| } | |
| /* Build channels */ | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| VTR_ASSERT(Fs % 3 == 0); | |
| for (size_t i = 0; i < grid.width() - 1; ++i) { | |
| for (size_t j = 0; j < grid.height() - 1; ++j) { | |
| if (i > 0) { | |
| build_rr_chan(i, j, CHANX, track_to_pin_lookup, sb_conn_map, switch_block_conn, | |
| CHANX_COST_INDEX_START, | |
| max_chan_width, grid, device_ctx.chan_width.x_list[j], | |
| sblock_pattern, Fs / 3, chan_details_x, chan_details_y, | |
| L_rr_node_indices, L_rr_edge_done, L_rr_node, | |
| wire_to_ipin_switch, directionality); | |
| } | |
| if (j > 0) { | |
| build_rr_chan(i, j, CHANY, track_to_pin_lookup, sb_conn_map, switch_block_conn, | |
| CHANX_COST_INDEX_START + num_seg_types, | |
| max_chan_width, grid, device_ctx.chan_width.y_list[i], | |
| sblock_pattern, Fs / 3, chan_details_x, chan_details_y, | |
| L_rr_node_indices, L_rr_edge_done, L_rr_node, | |
| wire_to_ipin_switch, directionality); | |
| } | |
| } | |
| } | |
| init_fan_in(L_rr_node, num_nodes); | |
| free(opin_mux_size); | |
| } | |
| static void build_bidir_rr_opins(const int i, const int j, const e_side side, | |
| t_rr_node * L_rr_node, const t_rr_node_indices& L_rr_node_indices, | |
| const t_pin_to_track_lookup& opin_to_track_map, const std::vector<vtr::Matrix<int>>&Fc_out, | |
| bool * L_rr_edge_done, const t_seg_details * seg_details, | |
| const DeviceGrid& grid, | |
| const t_direct_inf *directs, const int num_directs, const t_clb_to_clb_directs *clb_to_clb_directs, | |
| const int num_seg_types) { | |
| //Don't connect pins which are not adjacent to channels around the perimeter | |
| if ( (i == 0 && side != RIGHT) | |
| || (i == int(grid.width() - 1) && side != LEFT) | |
| || (j == 0 && side != TOP) | |
| || (j == int(grid.width() - 1) && side != BOTTOM)) { | |
| return; | |
| } | |
| t_type_ptr type = grid[i][j].type; | |
| int width_offset = grid[i][j].width_offset; | |
| int height_offset = grid[i][j].height_offset; | |
| const vtr::Matrix<int>& Fc = Fc_out[type->index]; | |
| for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) { | |
| /* We only are working with opins so skip non-drivers */ | |
| if (type->class_inf[type->pin_class[pin_index]].type != DRIVER) { | |
| continue; | |
| } | |
| /* Can't do anything if pin isn't at this location */ | |
| if (0 == type->pinloc[width_offset][height_offset][side][pin_index]) { | |
| continue; | |
| } | |
| /* get number of tracks that this pin connects to */ | |
| int total_pin_Fc = 0; | |
| for (int iseg = 0; iseg < num_seg_types; iseg++) { | |
| total_pin_Fc += Fc[pin_index][iseg]; | |
| } | |
| int num_edges = 0; | |
| t_linked_edge *edge_list = NULL; | |
| if (total_pin_Fc > 0) { | |
| num_edges += get_bidir_opin_connections(i + width_offset, j + height_offset, pin_index, | |
| &edge_list, opin_to_track_map, total_pin_Fc, L_rr_edge_done, | |
| L_rr_node_indices, seg_details); | |
| } | |
| /* Add in direct connections */ | |
| num_edges += get_opin_direct_connecions(i, j, side, pin_index, &edge_list, L_rr_node_indices, | |
| directs, num_directs, clb_to_clb_directs); | |
| int node_index = get_rr_node_index(L_rr_node_indices, i, j, OPIN, pin_index, side); | |
| VTR_ASSERT(node_index >= 0); | |
| alloc_and_load_edges_and_switches(L_rr_node, node_index, num_edges, | |
| L_rr_edge_done, edge_list); | |
| while (edge_list != NULL) { | |
| t_linked_edge *next_edge = edge_list->next; | |
| free(edge_list); | |
| edge_list = next_edge; | |
| } | |
| } | |
| } | |
| void free_rr_graph(void) { | |
| /* Frees all the routing graph data structures, if they have been * | |
| * allocated. I use rr_mem_chunk_list_head as a flag to indicate * | |
| * whether or not the graph has been allocated -- if it is not NULL, * | |
| * a routing graph exists and can be freed. Hence, you can call this * | |
| * routine even if you're not sure of whether a rr_graph exists or not. */ | |
| /* Before adding any more free calls here, be sure the data is NOT chunk * | |
| * allocated, as ALL the chunk allocated data is already free! */ | |
| auto& device_ctx = g_vpr_ctx.mutable_device(); | |
| device_ctx.rr_node_indices.clear(); | |
| delete[] device_ctx.rr_nodes; | |
| device_ctx.rr_nodes = nullptr; | |
| device_ctx.num_rr_nodes = 0; | |
| device_ctx.rr_node_indices.clear(); | |
| free(device_ctx.rr_indexed_data); | |
| device_ctx.rr_indexed_data = nullptr; | |
| delete[] device_ctx.rr_switch_inf; | |
| device_ctx.rr_switch_inf = nullptr; | |
| device_ctx.num_rr_switches = 0; | |
| delete[] device_ctx.switch_fanin_remap; | |
| device_ctx.switch_fanin_remap = nullptr; | |
| } | |
| static void build_rr_sinks_sources(const int i, const int j, | |
| t_rr_node * L_rr_node, const t_rr_node_indices& L_rr_node_indices, | |
| const int delayless_switch, const DeviceGrid& grid) { | |
| /* Loads IPIN, SINK, SOURCE, and OPIN. | |
| * Loads IPIN to SINK edges, and SOURCE to OPIN edges */ | |
| /* Since we share nodes within a large block, only | |
| * start tile can initialize sinks, sources, and pins */ | |
| if (grid[i][j].width_offset > 0 || grid[i][j].height_offset > 0) | |
| return; | |
| t_type_ptr type = grid[i][j].type; | |
| int num_class = type->num_class; | |
| t_class *class_inf = type->class_inf; | |
| int num_pins = type->num_pins; | |
| int *pin_class = type->pin_class; | |
| /* SINK and SOURCE-to-OPIN edges */ | |
| for (int iclass = 0; iclass < num_class; ++iclass) { | |
| int inode = 0; | |
| if (class_inf[iclass].type == DRIVER) { /* SOURCE */ | |
| inode = get_rr_node_index(L_rr_node_indices, i, j, SOURCE, iclass); | |
| //Retrieve all the physical OPINs associated with this source, this includes | |
| //those at different grid tiles of this block | |
| std::vector<int> opin_nodes; | |
| for (int width_offset = 0; width_offset < type->width; ++width_offset) { | |
| for (int height_offset = 0; height_offset < type->height; ++height_offset) { | |
| for (int ipin = 0; ipin < class_inf[iclass].num_pins; ++ipin) { | |
| int pin_num = class_inf[iclass].pinlist[ipin]; | |
| auto physical_pins = get_rr_node_indices(L_rr_node_indices, i + width_offset, j + height_offset, OPIN, pin_num); | |
| opin_nodes.insert(opin_nodes.end(), physical_pins.begin(), physical_pins.end()); | |
| } | |
| } | |
| } | |
| //Connect the SOURCE to each OPIN | |
| L_rr_node[inode].set_num_edges(opin_nodes.size()); | |
| for (size_t iedge = 0; iedge < opin_nodes.size(); ++iedge) { | |
| L_rr_node[inode].set_edge_sink_node(iedge, opin_nodes[iedge]); | |
| L_rr_node[inode].set_edge_switch(iedge, delayless_switch); | |
| } | |
| L_rr_node[inode].set_cost_index(SOURCE_COST_INDEX); | |
| L_rr_node[inode].set_type(SOURCE); | |
| } else { /* SINK */ | |
| VTR_ASSERT(class_inf[iclass].type == RECEIVER); | |
| inode = get_rr_node_index(L_rr_node_indices, i, j, SINK, iclass); | |
| /* NOTE: To allow route throughs through clbs, change the lines below to * | |
| * make an edge from the input SINK to the output SOURCE. Do for just the * | |
| * special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it * | |
| * leads to. If route throughs are allowed, you may want to increase the * | |
| * base cost of OPINs and/or SOURCES so they aren't used excessively. */ | |
| /* Initialize to unconnected */ | |
| L_rr_node[inode].set_num_edges(0); | |
| L_rr_node[inode].set_cost_index(SINK_COST_INDEX); | |
| L_rr_node[inode].set_type(SINK); | |
| } | |
| /* Things common to both SOURCEs and SINKs. */ | |
| L_rr_node[inode].set_capacity(class_inf[iclass].num_pins); | |
| L_rr_node[inode].set_coordinates(i, j, i + type->width - 1, j + type->height - 1); | |
| L_rr_node[inode].set_R(0); | |
| L_rr_node[inode].set_C(0); | |
| L_rr_node[inode].set_ptc_num(iclass); | |
| } | |
| /* Connect IPINS to SINKS and initialize OPINS */ | |
| //We loop through all the pin locations on the block to initialize the IPINs/OPINs, | |
| //and hook-up the IPINs to sinks. | |
| for (int width_offset = 0; width_offset < type->width; ++width_offset) { | |
| for (int height_offset = 0; height_offset < type->height; ++height_offset) { | |
| for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) { | |
| for (int ipin = 0; ipin < num_pins; ++ipin) { | |
| if (type->pinloc[width_offset][height_offset][side][ipin]) { | |
| int inode; | |
| int iclass = pin_class[ipin]; | |
| if (class_inf[iclass].type == RECEIVER) { | |
| //Connect the input pin to the sink | |
| inode = get_rr_node_index(L_rr_node_indices, i + width_offset, j + height_offset, IPIN, ipin, side); | |
| int to_node = get_rr_node_index(L_rr_node_indices, i, j, SINK, iclass); | |
| L_rr_node[inode].set_num_edges(1); | |
| L_rr_node[inode].set_edge_sink_node(0, to_node); | |
| L_rr_node[inode].set_edge_switch(0, delayless_switch); | |
| L_rr_node[inode].set_cost_index(IPIN_COST_INDEX); | |
| L_rr_node[inode].set_type(IPIN); | |
| } else { | |
| VTR_ASSERT(class_inf[iclass].type == DRIVER); | |
| //Initialize the output pin | |
| // Note that we leave it's out-going edges unconnected (they will be hooked up to global routing later) | |
| inode = get_rr_node_index(L_rr_node_indices, i + width_offset, j + height_offset, OPIN, ipin, side); | |
| L_rr_node[inode].set_num_edges(0); //Initially unconnected | |
| L_rr_node[inode].set_cost_index(OPIN_COST_INDEX); | |
| L_rr_node[inode].set_type(OPIN); | |
| } | |
| /* Common to both DRIVERs and RECEIVERs */ | |
| L_rr_node[inode].set_capacity(1); | |
| L_rr_node[inode].set_C(0); | |
| L_rr_node[inode].set_R(0); | |
| L_rr_node[inode].set_ptc_num(ipin); | |
| //Note that we store the grid tile location and side where the pin is located, | |
| //which greatly simplifies the drawing code | |
| L_rr_node[inode].set_coordinates(i + width_offset, j + height_offset, i + width_offset, j + height_offset); | |
| L_rr_node[inode].set_side(side); | |
| VTR_ASSERT(type->pinloc[width_offset][height_offset][L_rr_node[inode].side()][L_rr_node[inode].pin_num()]); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| void init_fan_in(t_rr_node * L_rr_node, const int num_rr_nodes) { | |
| //Loads fan-ins for all nodes | |
| //Reset all fan-ins to zero | |
| for (int i = 0; i < num_rr_nodes; i++) { | |
| L_rr_node[i].set_fan_in(0); | |
| } | |
| //Walk the graph and increment fanin on all downstream nodes | |
| for (int i = 0; i < num_rr_nodes; i++) { | |
| for (int iedge = 0; iedge < L_rr_node[i].num_edges(); iedge++) { | |
| int to_node = L_rr_node[i].edge_sink_node(iedge); | |
| L_rr_node[to_node].set_fan_in(L_rr_node[to_node].fan_in() + 1); | |
| } | |
| } | |
| } | |
| /* Allocates/loads edges for nodes belonging to specified channel segment and initializes | |
| node properties such as cost, occupancy and capacity */ | |
| static void build_rr_chan(const int x_coord, const int y_coord, const t_rr_type chan_type, | |
| const t_track_to_pin_lookup& track_to_pin_lookup, | |
| t_sb_connection_map *sb_conn_map, | |
| const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn, const int cost_index_offset, | |
| const int max_chan_width, | |
| const DeviceGrid& grid, | |
| const int tracks_per_chan, | |
| short ******sblock_pattern, const int Fs_per_side, | |
| const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, | |
| const t_rr_node_indices& L_rr_node_indices, | |
| bool * L_rr_edge_done, t_rr_node * L_rr_node, | |
| const int wire_to_ipin_switch, const enum e_directionality directionality) { | |
| /* this function builds both x and y-directed channel segments, so set up our | |
| coordinates based on channel type */ | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| //Initally a assumes CHANX | |
| int seg_coord = x_coord; //The absolute coordinate of this segment within the channel | |
| int chan_coord = y_coord; //The absolute coordinate of this channel within the device | |
| int seg_dimension = device_ctx.grid.width() - 2; //-2 for no perim channels | |
| int chan_dimension = device_ctx.grid.height() - 2; //-2 for no perim channels | |
| const t_chan_details& from_chan_details = (chan_type == CHANX) ? chan_details_x : chan_details_y; | |
| const t_chan_details& opposite_chan_details = (chan_type == CHANX) ? chan_details_y : chan_details_x; | |
| t_rr_type opposite_chan_type = CHANY; | |
| if (chan_type == CHANY) { | |
| //Swap values since CHANX was assumed above | |
| std::swap(seg_coord, chan_coord); | |
| std::swap(seg_dimension, chan_dimension); | |
| opposite_chan_type = CHANX; | |
| } | |
| t_seg_details * seg_details = from_chan_details[x_coord][y_coord]; | |
| /* figure out if we're generating switch block edges based on a custom switch block | |
| description */ | |
| bool custom_switch_block = false; | |
| if (sb_conn_map != NULL) { | |
| VTR_ASSERT(sblock_pattern == NULL && switch_block_conn.empty()); | |
| custom_switch_block = true; | |
| } | |
| /* Loads up all the routing resource nodes in the current channel segment */ | |
| for (int track = 0; track < tracks_per_chan; ++track) { | |
| if (seg_details[track].length == 0) | |
| continue; | |
| //Start and end coordinates of this segment along the length of the channel | |
| //Note that these values are in the VPR coordinate system (and do not consider | |
| //wire directionality), so start correspond to left/bottom and end corresponds to right/top | |
| int start = get_seg_start(seg_details, track, chan_coord, seg_coord); | |
| int end = get_seg_end(seg_details, track, start, chan_coord, seg_dimension); | |
| if (seg_coord > start) | |
| continue; /* Only process segments which start at this location */ | |
| VTR_ASSERT(seg_coord == start); | |
| t_linked_edge *edge_list = NULL; | |
| t_seg_details * from_seg_details = chan_details_x[start][y_coord]; | |
| if (chan_type == CHANY) { | |
| from_seg_details = chan_details_y[x_coord][start]; | |
| } | |
| /* Add the edges from this track to all it's connected pins into the list */ | |
| int num_edges = 0; | |
| num_edges += get_track_to_pins(start, chan_coord, track, tracks_per_chan, &edge_list, | |
| L_rr_node_indices, track_to_pin_lookup, seg_details, chan_type, seg_dimension, | |
| wire_to_ipin_switch, directionality); | |
| /* get edges going from the current track into channel segments which are perpendicular to it */ | |
| if (chan_coord > 0) { | |
| t_seg_details *to_seg_details; | |
| if (chan_type == CHANX) { | |
| to_seg_details = chan_details_y[start][y_coord]; | |
| } else { | |
| VTR_ASSERT(chan_type == CHANY); | |
| to_seg_details = chan_details_x[x_coord][start]; | |
| } | |
| if (to_seg_details->length > 0) { | |
| num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord, | |
| opposite_chan_type, seg_dimension, max_chan_width, grid, | |
| Fs_per_side, sblock_pattern, &edge_list, | |
| from_seg_details, to_seg_details, opposite_chan_details, | |
| directionality, L_rr_node_indices, L_rr_edge_done, | |
| switch_block_conn, sb_conn_map); | |
| } | |
| } | |
| if (chan_coord < chan_dimension) { | |
| t_seg_details *to_seg_details; | |
| if (chan_type == CHANX) { | |
| to_seg_details = chan_details_y[start][y_coord + 1]; | |
| } else { | |
| VTR_ASSERT(chan_type == CHANY); | |
| to_seg_details = chan_details_x[x_coord + 1][start]; | |
| } | |
| if (to_seg_details->length > 0) { | |
| num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord + 1, | |
| opposite_chan_type, seg_dimension, max_chan_width, grid, | |
| Fs_per_side, sblock_pattern, &edge_list, | |
| from_seg_details, to_seg_details, opposite_chan_details, | |
| directionality, L_rr_node_indices, L_rr_edge_done, | |
| switch_block_conn, sb_conn_map); | |
| } | |
| } | |
| /* walk over the switch blocks along the source track and implement edges from this track to other tracks | |
| in the same channel (i.e. straight-through connections) */ | |
| for (int target_seg = start - 1; target_seg <= end + 1; target_seg++) { | |
| if (target_seg != start - 1 && target_seg != end + 1) { | |
| /* skip straight-through connections from midpoint if non-custom switch block. | |
| currently non-custom switch blocks don't properly describe connections from the mid-point of a wire segment | |
| to other segments in the same channel (i.e. straight-through connections) */ | |
| if (!custom_switch_block) { | |
| continue; | |
| } | |
| } | |
| if (target_seg > 0 && target_seg < seg_dimension + 1) { | |
| t_seg_details *to_seg_details; | |
| if (chan_type == CHANX) { | |
| to_seg_details = chan_details_x[target_seg][y_coord]; | |
| } else { | |
| VTR_ASSERT(chan_type == CHANY); | |
| to_seg_details = chan_details_y[x_coord][target_seg]; | |
| } | |
| if (to_seg_details->length > 0) { | |
| num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, target_seg, | |
| chan_type, seg_dimension, max_chan_width, grid, | |
| Fs_per_side, sblock_pattern, &edge_list, | |
| from_seg_details, to_seg_details, from_chan_details, | |
| directionality, L_rr_node_indices, L_rr_edge_done, | |
| switch_block_conn, sb_conn_map); | |
| } | |
| } | |
| } | |
| int node = get_rr_node_index(L_rr_node_indices, x_coord, y_coord, chan_type, track); | |
| VTR_ASSERT(node >= 0); | |
| alloc_and_load_edges_and_switches(L_rr_node, node, num_edges, | |
| L_rr_edge_done, edge_list); | |
| while (edge_list != NULL) { | |
| t_linked_edge *next_edge = edge_list->next; | |
| free(edge_list); | |
| edge_list = next_edge; | |
| } | |
| /* Edge arrays have now been built up. Do everything else. */ | |
| L_rr_node[node].set_cost_index(cost_index_offset + seg_details[track].index); | |
| L_rr_node[node].set_capacity(1); /* GLOBAL routing handled elsewhere */ | |
| if (chan_type == CHANX) { | |
| L_rr_node[node].set_coordinates(start, y_coord, end, y_coord); | |
| } else { | |
| VTR_ASSERT(chan_type == CHANY); | |
| L_rr_node[node].set_coordinates(x_coord, start, x_coord, end); | |
| } | |
| int length = end - start + 1; | |
| L_rr_node[node].set_R(length * seg_details[track].Rmetal); | |
| L_rr_node[node].set_C(length * seg_details[track].Cmetal); | |
| L_rr_node[node].set_ptc_num(track); | |
| L_rr_node[node].set_type(chan_type); | |
| L_rr_node[node].set_direction(seg_details[track].direction); | |
| } | |
| } | |
| void alloc_and_load_edges_and_switches(t_rr_node * L_rr_node, const int inode, | |
| const int num_edges, bool * L_rr_edge_done, | |
| t_linked_edge * edge_list_head) { | |
| /* Sets up all the edge related information for rr_node inode (num_edges, * | |
| * the edges array and the switches array). The edge_list_head points to * | |
| * a list of the num_edges edges and switches to put in the arrays. This * | |
| * linked list is freed by this routine. This routine also resets the * | |
| * rr_edge_done array for the next rr_node (i.e. set it so that no edges * | |
| * are marked as having been seen before). */ | |
| t_linked_edge *list_ptr; | |
| int i; | |
| /* Check we aren't overwriting edges */ | |
| VTR_ASSERT(L_rr_node[inode].num_edges() == 0); | |
| L_rr_node[inode].set_num_edges(num_edges); | |
| i = 0; | |
| list_ptr = edge_list_head; | |
| while (list_ptr && (i < num_edges)) { | |
| L_rr_node[inode].set_edge_sink_node(i, list_ptr->edge); | |
| L_rr_node[inode].set_edge_switch(i, list_ptr->iswitch); | |
| /* Unmark the edge since we are done considering fanout from node. */ | |
| L_rr_edge_done[list_ptr->edge] = false; | |
| list_ptr = list_ptr->next; | |
| ++i; | |
| } | |
| VTR_ASSERT(list_ptr == NULL); | |
| VTR_ASSERT(i == num_edges); | |
| } | |
| /* allocate pin to track map for each segment type individually and then combine into a single | |
| vector */ | |
| static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_track_map(const e_pin_type pin_type, | |
| const vtr::Matrix<int>& Fc, const t_type_ptr Type, const std::vector<bool>& perturb_switch_pattern, | |
| const e_directionality directionality, | |
| const int num_seg_types, const int *sets_per_seg_type) { | |
| /* get the maximum number of tracks that any pin can connect to */ | |
| size_t max_pin_tracks = 0; | |
| for (int iseg = 0; iseg < num_seg_types; iseg++) { | |
| /* determine the maximum Fc to this segment type across all pins */ | |
| int max_Fc = 0; | |
| for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) { | |
| int pin_class = Type->pin_class[pin_index]; | |
| if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) { | |
| max_Fc = Fc[pin_index][iseg]; | |
| } | |
| } | |
| max_pin_tracks += max_Fc; | |
| } | |
| /* allocate 'result' matrix and initialize entries to OPEN. also allocate and intialize matrix which will be | |
| used to index into the correct entries when loading up 'result' */ | |
| auto result = vtr::NdMatrix<int, 5>({ | |
| size_t(Type->num_pins), //[0..num_pins-1] | |
| size_t(Type->width), //[0..width-1] | |
| size_t(Type->height), //[0..height-1] | |
| 4, //[0..sides-1] | |
| max_pin_tracks //[0..Fc-1] | |
| }, | |
| OPEN); | |
| auto next_result_index = vtr::NdMatrix<int, 4>({ | |
| size_t(Type->num_pins), //[0..num_pins-1] | |
| size_t(Type->width), //[0..width-1] | |
| size_t(Type->height), //[0..height-1] | |
| 4 //[0..sides-1] | |
| }, | |
| 0); | |
| /* multiplier for unidirectional vs bidirectional architectures */ | |
| int fac = 1; | |
| if (directionality == UNI_DIRECTIONAL) { | |
| fac = 2; | |
| } | |
| /* load the pin to track matrix by looking at each segment type in turn */ | |
| int seg_type_start_track = 0; | |
| for (int iseg = 0; iseg < num_seg_types; iseg++) { | |
| int num_seg_type_tracks = fac * sets_per_seg_type[iseg]; | |
| /* determine the maximum Fc to this segment type across all pins */ | |
| int max_Fc = 0; | |
| for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) { | |
| int pin_class = Type->pin_class[pin_index]; | |
| if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) { | |
| max_Fc = Fc[pin_index][iseg]; | |
| } | |
| } | |
| /* get pin connections to tracks of the current segment type */ | |
| auto pin_to_seg_type_map = alloc_and_load_pin_to_seg_type(pin_type, | |
| num_seg_type_tracks, max_Fc, Type, perturb_switch_pattern[iseg], directionality); | |
| /* connections in pin_to_seg_type_map are within that seg type -- i.e. in the [0,num_seg_type_tracks-1] range. | |
| now load up 'result' array with these connections, but offset them so they are relative to the channel | |
| as a whole */ | |
| for (int ipin = 0; ipin < Type->num_pins; ipin++) { | |
| for (int iwidth = 0; iwidth < Type->width; iwidth++) { | |
| for (int iheight = 0; iheight < Type->height; iheight++) { | |
| for (int iside = 0; iside < 4; iside++) { | |
| for (int iconn = 0; iconn < max_Fc; iconn++) { | |
| int relative_track_ind = pin_to_seg_type_map[ipin][iwidth][iheight][iside][iconn]; | |
| int absolute_track_ind = relative_track_ind + seg_type_start_track; | |
| int result_index = next_result_index[ipin][iwidth][iheight][iside]; | |
| next_result_index[ipin][iwidth][iheight][iside]++; | |
| result[ipin][iwidth][iheight][iside][result_index] = absolute_track_ind; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| /* next seg type will start at this track index */ | |
| seg_type_start_track += num_seg_type_tracks; | |
| } | |
| return result; | |
| } | |
| static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_seg_type(const e_pin_type pin_type, | |
| const int num_seg_type_tracks, const int Fc, | |
| const t_type_ptr Type, const bool perturb_switch_pattern, | |
| const e_directionality directionality) { | |
| /* Note: currently a single value of Fc is used across each pin. In the future | |
| the looping below will have to be modified if we want to account for pin-based | |
| Fc values */ | |
| /* NB: This wastes some space. Could set tracks_..._pin[ipin][ioff][iside] = | |
| * NULL if there is no pin on that side, or that pin is of the wrong type. | |
| * Probably not enough memory to worry about, esp. as it's temporary. | |
| * If pin ipin on side iside does not exist or is of the wrong type, | |
| * tracks_connected_to_pin[ipin][iside][0] = OPEN. */ | |
| if (Type->num_pins < 1) { | |
| return vtr::NdMatrix<int, 5>(); | |
| } | |
| auto tracks_connected_to_pin = vtr::NdMatrix<int, 5>({ | |
| size_t(Type->num_pins), //[0..num_pins-1] | |
| size_t(Type->width), //[0..width-1] | |
| size_t(Type->height), //[0..height-1] | |
| NUM_SIDES, //[0..NUM_SIDES-1] | |
| size_t(Fc) | |
| }, //[0..Fc-1] | |
| OPEN); //Unconnected | |
| //Number of *physical* pins on each side. | |
| //Note that his may be more than the logical number of pins (i.e. | |
| //Type->num_pins) if a logical pin has multiple specified physical | |
| //pinlocations (i.e. appears on multiple sides of the block) | |
| auto num_dir = vtr::NdMatrix<int, 3>({ | |
| size_t(Type->width), //[0..width-1] | |
| size_t(Type->height), //[0..height-1] | |
| NUM_SIDES //[0..NUM_SIDES-1] | |
| }, | |
| 0); | |
| //List of *physical* pins of the correct type on each side of the current | |
| //block type. For a specific width/height/side the valid enteries in the | |
| //last dimension are [0 .. num_dir[width][height][side]-1] | |
| // | |
| //Max possible space alloced for simplicity | |
| auto dir_list = vtr::NdMatrix<int, 4>({ | |
| size_t(Type->width), //[0..width-1] | |
| size_t(Type->height), //[0..height-1] | |
| NUM_SIDES, //[0..NUM_SIDES-1] | |
| size_t(Type->num_pins) //[0..num_pins-1] | |
| }, | |
| -1); //Defensive coding: Initialize to invalid | |
| //Number of currently assigned physical pins | |
| auto num_done_per_dir = vtr::NdMatrix<int, 3>({ | |
| size_t(Type->width), //[0..width-1] | |
| size_t(Type->height), //[0..height-1] | |
| NUM_SIDES //[0..NUM_SIDES-1] | |
| }, | |
| 0); | |
| //Record the physical pin locations and counts per side/offsets combination | |
| for (int pin = 0; pin < Type->num_pins; ++pin) { | |
| int pin_class = Type->pin_class[pin]; | |
| if (Type->class_inf[pin_class].type != pin_type) /* Doing either ipins OR opins */ | |
| continue; | |
| /* Pins connecting only to global resources get no switches -> keeps area model accurate. */ | |
| if (Type->is_global_pin[pin]) | |
| continue; | |
| for (int width = 0; width < Type->width; ++width) { | |
| for (int height = 0; height < Type->height; ++height) { | |
| for (e_side side : SIDES) { | |
| if (Type->pinloc[width][height][side][pin] == 1) { | |
| dir_list[width][height][side][num_dir[width][height][side]] = pin; | |
| num_dir[width][height][side]++; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| //Total the number of physical pins | |
| int num_phys_pins = 0; | |
| for (int width = 0; width < Type->width; ++width) { | |
| for (int height = 0; height < Type->height; ++height) { | |
| for (e_side side : SIDES) { | |
| num_phys_pins += num_dir[width][height][side]; /* Num. physical pins per type */ | |
| } | |
| } | |
| } | |
| std::vector<t_pin_loc> pin_ordering; | |
| /* Connection block I use distributes pins evenly across the tracks * | |
| * of ALL sides of the clb at once. Ensures that each pin connects * | |
| * to spaced out tracks in its connection block, and that the other * | |
| * pins (potentially in other C blocks) connect to the remaining tracks * | |
| * first. Doesn't matter for large Fc, but should make a fairly * | |
| * good low Fc block that leverages the fact that usually lots of pins * | |
| * are logically equivalent. */ | |
| const e_side init_side = LEFT; | |
| const int init_width = 0; | |
| const int init_height = 0; | |
| e_side side = init_side; | |
| int width = init_width; | |
| int height = init_height; | |
| int pin = 0; | |
| int pin_index = -1; | |
| //Determine the order in which physical pins will be considered while building | |
| //the connection block. This generally tries to order the pins so they are 'spread' | |
| //out (in hopes of yielding good connection diversity) | |
| while (pin < num_phys_pins) { | |
| if (height == init_height && width == init_width && side == init_side) { | |
| //Completed one loop through all the possible offsets/side combinations | |
| pin_index++; | |
| } | |
| advance_to_next_block_side(Type, width, height, side); | |
| VTR_ASSERT_MSG(pin_index < num_phys_pins, "Physical block pins bound number of logical block pins"); | |
| if (num_done_per_dir[width][height][side] >= num_dir[width][height][side]) { | |
| continue; | |
| } | |
| int pin_num = dir_list[width][height][side][pin_index]; | |
| VTR_ASSERT(pin_num >= 0); | |
| VTR_ASSERT(Type->pinloc[width][height][side][pin_num]); | |
| t_pin_loc pin_loc; | |
| pin_loc.pin_index = pin_num; | |
| pin_loc.width_offset = width;; | |
| pin_loc.height_offset = height; | |
| pin_loc.side = side; | |
| pin_ordering.push_back(pin_loc); | |
| num_done_per_dir[width][height][side]++; | |
| pin++; | |
| } | |
| VTR_ASSERT(pin == num_phys_pins); | |
| VTR_ASSERT(pin_ordering.size() == size_t(num_phys_pins)); | |
| if (perturb_switch_pattern) { | |
| load_perturbed_connection_block_pattern(tracks_connected_to_pin, | |
| pin_ordering, | |
| num_seg_type_tracks, num_seg_type_tracks, Fc, directionality); | |
| } else { | |
| load_uniform_connection_block_pattern(tracks_connected_to_pin, | |
| pin_ordering, | |
| num_seg_type_tracks, num_seg_type_tracks, Fc, directionality); | |
| } | |
| #ifdef ENABLE_CHECK_ALL_TRACKS | |
| check_all_tracks_reach_pins(Type, tracks_connected_to_pin, num_seg_type_tracks, | |
| Fc, pin_type); | |
| #endif | |
| return tracks_connected_to_pin; | |
| } | |
| static void advance_to_next_block_side(t_type_ptr Type, int& width_offset, int& height_offset, e_side& side) { | |
| //State-machine transitions for advancing around all sides of a block | |
| //This state-machine transitions in the following order: | |
| // | |
| // TOP | |
| // ---> | |
| // | |
| // ********************************** | |
| // * 10 | 11 | 12 * | |
| // * 3 25 | 6 22 | 9 19 * | |
| // * 36 | 35 | 34 * | |
| // *----------|----------|----------* | |
| // ^ * 13 | 14 | 15 * | | |
| // LEFT | * 2 26 | 5 23 | 8 20 * | RIGHT | |
| // | * 33 | 32 | 31 * v | |
| // *----------|----------|----------* | |
| // * 16 | 17 | 18 * | |
| // * 1 27 | 4 24 | 7 21 * | |
| // * 30 | 29 | 28 * | |
| // ********************************** | |
| // | |
| // <--- | |
| // BOTTOM | |
| // | |
| // where each 'square' in the above diagram corresponds to a grid tile of | |
| // the block of width=3 and height=3. The numbers correspond to the visiting order starting | |
| // from '1' (width_offset=0, height_offset=0, side=LEFT). | |
| // | |
| // Note that for blocks of width == 1 and height == 1 this iterates through the sides | |
| // in clock-wise order: | |
| // | |
| // *********** | |
| // * 2 * | |
| // * 1 3 * | |
| // * 4 * | |
| // *********** | |
| // | |
| //Validate current state | |
| VTR_ASSERT(width_offset >= 0 && width_offset < Type->width); | |
| VTR_ASSERT(height_offset >= 0 && height_offset < Type->height); | |
| VTR_ASSERT(side == LEFT || side == RIGHT || side == BOTTOM || side == TOP); | |
| if (side == LEFT) { | |
| if (height_offset == Type->height - 1 && width_offset == Type->width - 1) { | |
| //Finished the last left edge column | |
| side = TOP; //Turn clockwise | |
| width_offset = 0; | |
| height_offset = Type->height - 1; | |
| } else if (height_offset == Type->height - 1) { | |
| //Finished the current left edge column | |
| VTR_ASSERT(width_offset != Type->width - 1); | |
| //Move right to the next left edge column | |
| width_offset++; | |
| height_offset = 0; | |
| } else { | |
| height_offset++; //Advance up current left edge column | |
| } | |
| } else if (side == TOP) { | |
| if (height_offset == 0 && width_offset == Type->width - 1) { | |
| //Finished the last top edge row | |
| side = RIGHT; //Turn clockwise | |
| width_offset = Type->width - 1; | |
| height_offset = Type->height - 1; | |
| } else if (width_offset == Type->width - 1) { | |
| //Finished the current top edge row | |
| VTR_ASSERT(height_offset != 0); | |
| //Move down to the next top edge row | |
| height_offset--; | |
| width_offset = 0; | |
| } else { | |
| width_offset++; //Advance right along current top edge row | |
| } | |
| } else if (side == RIGHT) { | |
| if (height_offset == 0 && width_offset == 0) { | |
| //Finished the last right edge column | |
| side = BOTTOM; //Turn clockwise | |
| width_offset = Type->width - 1; | |
| height_offset = 0; | |
| } else if (height_offset == 0) { | |
| //Finished the current right edge column | |
| VTR_ASSERT(width_offset != 0); | |
| //Move left to the next right edge column | |
| width_offset--; | |
| height_offset = Type->height - 1; | |
| } else { | |
| height_offset--; //Advance down current right edge column | |
| } | |
| } else { | |
| VTR_ASSERT(side == BOTTOM); | |
| if (height_offset == Type->height - 1 && width_offset == 0) { | |
| //Finished the last bottom edge row | |
| side = LEFT; //Turn clockwise | |
| width_offset = 0; | |
| height_offset = 0; | |
| } else if (width_offset == 0) { | |
| //Finished the current bottom edge row | |
| VTR_ASSERT(height_offset != Type->height - 1); | |
| //Move up to the next bottom edge row | |
| height_offset++; | |
| width_offset = Type->width - 1; | |
| } else { | |
| width_offset--; //Advance left along current bottom edge row | |
| } | |
| } | |
| //Validate next state | |
| VTR_ASSERT(width_offset >= 0 && width_offset < Type->width); | |
| VTR_ASSERT(height_offset >= 0 && height_offset < Type->height); | |
| VTR_ASSERT(side == LEFT || side == RIGHT || side == BOTTOM || side == TOP); | |
| } | |
| static void load_uniform_connection_block_pattern( | |
| vtr::NdMatrix<int, 5>& tracks_connected_to_pin, | |
| const std::vector<t_pin_loc>& pin_locations, | |
| const int x_chan_width, const int y_chan_width, const int Fc, | |
| enum e_directionality directionality) { | |
| /* Loads the tracks_connected_to_pin array with an even distribution of * | |
| * switches across the tracks for each pin. For example, each pin connects * | |
| * to every 4.3rd track in a channel, with exactly which tracks a pin * | |
| * connects to staggered from pin to pin. */ | |
| /* Uni-directional drive is implemented to ensure no directional bias and this means | |
| * two important comments noted below */ | |
| /* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins), | |
| * and lay down 2 switches on an adjacent pair of tracks at a time to ensure | |
| * no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and | |
| * assign switches to a pair at a time. Can do this because W is guaranteed to | |
| * be even-numbered; however same approach cannot be applied to Fc_out pattern | |
| * when L > 1 and W <> 2L multiple. | |
| * | |
| * 2. This generic pattern should be considered the tileable physical layout, | |
| * meaning all track # here are physical #'s, | |
| * so later must use vpr_to_phy conversion to find actual logical #'s to connect. | |
| * This also means I will not use get_output_block_companion_track to ensure | |
| * no bias, since that describes a logical # -> that would confuse people. */ | |
| int group_size; | |
| if (directionality == BI_DIRECTIONAL) { | |
| group_size = 1; | |
| } else { | |
| VTR_ASSERT(directionality == UNI_DIRECTIONAL); | |
| group_size = 2; | |
| } | |
| VTR_ASSERT((x_chan_width % group_size == 0) && (y_chan_width % group_size == 0) && (Fc % group_size == 0)); | |
| int num_phys_pins = pin_locations.size(); | |
| for (int i = 0; i < num_phys_pins; ++i) { | |
| int pin = pin_locations[i].pin_index; | |
| e_side side = pin_locations[i].side; | |
| int width = pin_locations[i].width_offset; | |
| int height = pin_locations[i].height_offset; | |
| /* Bi-directional treats each track separately, uni-directional works with pairs of tracks */ | |
| for (int j = 0; j < (Fc / group_size); ++j) { | |
| int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width); | |
| float step_size = (float) max_chan_width / (float) (Fc * num_phys_pins); | |
| VTR_ASSERT(Fc > 0); | |
| float fc_step = (float) max_chan_width / (float) Fc; | |
| float ftrack = (i * step_size) + (j * fc_step); | |
| int itrack = ((int) ftrack) * group_size; | |
| /* Catch possible floating point round error */ | |
| itrack = min(itrack, max_chan_width - group_size); | |
| /* Assign the group of tracks for the Fc pattern */ | |
| for (int k = 0; k < group_size; ++k) { | |
| tracks_connected_to_pin[pin][width][height][side][group_size * j + k] = itrack + k; | |
| } | |
| } | |
| } | |
| } | |
| static void load_perturbed_connection_block_pattern( | |
| vtr::NdMatrix<int, 5>& tracks_connected_to_pin, | |
| const std::vector<t_pin_loc>& pin_locations, | |
| const int x_chan_width, const int y_chan_width, const int Fc, | |
| enum e_directionality directionality) { | |
| /* Loads the tracks_connected_to_pin array with an unevenly distributed * | |
| * set of switches across the channel. This is done for inputs when * | |
| * Fc_input = Fc_output to avoid creating "pin domains" -- certain output * | |
| * pins being able to talk only to certain input pins because their switch * | |
| * patterns exactly line up. Distribute Fc/2 + 1 switches over half the * | |
| * channel and Fc/2 - 1 switches over the other half to make the switch * | |
| * pattern different from the uniform one of the outputs. Also, have half * | |
| * the pins put the "dense" part of their connections in the first half of * | |
| * the channel and the other half put the "dense" part in the second half, * | |
| * to make sure each track can connect to about the same number of ipins. */ | |
| VTR_ASSERT(directionality == BI_DIRECTIONAL); | |
| int Fc_dense = (Fc / 2) + 1; | |
| int Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */ | |
| int Fc_half[2]; | |
| int num_phys_pins = pin_locations.size(); | |
| for (int i = 0; i < num_phys_pins; ++i) { | |
| int pin = pin_locations[i].pin_index; | |
| e_side side = pin_locations[i].side; | |
| int width = pin_locations[i].width_offset; | |
| int height = pin_locations[i].height_offset; | |
| int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width); | |
| float step_size = (float) max_chan_width / (float) (Fc * num_phys_pins); | |
| float spacing_dense = (float) max_chan_width / (float) (2 * Fc_dense); | |
| float spacing_sparse = (float) max_chan_width / (float) (2 * Fc_sparse); | |
| float spacing[2]; | |
| /* Flip every pin to balance switch density */ | |
| spacing[i % 2] = spacing_dense; | |
| Fc_half[i % 2] = Fc_dense; | |
| spacing[(i + 1) % 2] = spacing_sparse; | |
| Fc_half[(i + 1) % 2] = Fc_sparse; | |
| float ftrack = i * step_size; /* Start point. Staggered from pin to pin */ | |
| int iconn = 0; | |
| for (int ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */ | |
| for (int j = 0; j < Fc_half[ihalf]; ++j) { | |
| /* Can occasionally get wraparound due to floating point rounding. | |
| This is okay because the starting position > 0 when this occurs | |
| so connection is valid and fine */ | |
| int itrack = (int) ftrack; | |
| itrack = itrack % max_chan_width; | |
| tracks_connected_to_pin[pin][width][height][side][iconn] = itrack; | |
| ftrack += spacing[ihalf]; | |
| iconn++; | |
| } | |
| } | |
| } /* End for all physical pins. */ | |
| } | |
| #ifdef ENABLE_CHECK_ALL_TRACKS | |
| static void check_all_tracks_reach_pins(t_type_ptr type, | |
| int *****tracks_connected_to_pin, int max_chan_width, int Fc, | |
| enum e_pin_type ipin_or_opin) { | |
| /* Checks that all tracks can be reached by some pin. */ | |
| VTR_ASSERT(max_chan_width > 0); | |
| int *num_conns_to_track; /* [0..max_chan_width-1] */ | |
| num_conns_to_track = (int *) vtr::calloc(max_chan_width, sizeof (int)); | |
| for (int pin = 0; pin < type->num_pins; ++pin) { | |
| for (int width = 0; width < type->width; ++width) { | |
| for (int height = 0; height < type->height; ++height) { | |
| for (int side = 0; side < 4; ++side) { | |
| if (tracks_connected_to_pin[pin][width][height][side][0] != OPEN) { /* Pin exists */ | |
| for (int conn = 0; conn < Fc; ++conn) { | |
| int track = tracks_connected_to_pin[pin][width][height][side][conn]; | |
| num_conns_to_track[track]++; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| for (int track = 0; track < max_chan_width; ++track) { | |
| if (num_conns_to_track[track] <= 0) { | |
| vtr::printf_error(__FILE__, __LINE__, | |
| "check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n", | |
| track, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN")); | |
| } | |
| } | |
| free(num_conns_to_track); | |
| } | |
| #endif | |
| /* Allocates and loads the track to ipin lookup for each physical grid type. This | |
| * is the same information as the ipin_to_track map but accessed in a different way. */ | |
| static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup( | |
| vtr::NdMatrix<int, 5> pin_to_track_map, const vtr::Matrix<int>& Fc, | |
| const int type_width, const int type_height, | |
| const int num_pins, const int max_chan_width, | |
| const int num_seg_types) { | |
| /* [0..max_chan_width-1][0..width][0..height][0..3]. For each track number | |
| * it stores a vector for each of the four sides. x-directed channels will | |
| * use the TOP and BOTTOM vectors to figure out what clb input pins they | |
| * connect to above and below them, respectively, while y-directed channels | |
| * use the LEFT and RIGHT vectors. Each vector contains an nelem field | |
| * saying how many ipins it connects to. The list[0..nelem-1] array then | |
| * gives the pin numbers. */ | |
| /* Note that a clb pin that connects to a channel on its RIGHT means that * | |
| * that channel connects to a clb pin on its LEFT. The convention used * | |
| * here is always in the perspective of the CLB */ | |
| if (num_pins < 1) { | |
| return vtr::NdMatrix<std::vector<int>, 4>(); | |
| } | |
| /* Alloc and zero the the lookup table */ | |
| auto track_to_pin_lookup = vtr::NdMatrix<std::vector<int>, 4>({size_t(max_chan_width), size_t(type_width), size_t(type_height), 4}); | |
| for (int track = 0; track < max_chan_width; ++track) { | |
| for (int width = 0; width < type_width; ++width) { | |
| for (int height = 0; height < type_height; ++height) { | |
| for (int side = 0; side < 4; ++side) { | |
| if (!track_to_pin_lookup[track][width][height][side].empty()) | |
| track_to_pin_lookup[track][width][height][side].clear(); | |
| } | |
| } | |
| } | |
| } | |
| /* Count number of pins to which each track connects */ | |
| for (int pin = 0; pin < num_pins; ++pin) { | |
| for (int width = 0; width < type_width; ++width) { | |
| for (int height = 0; height < type_height; ++height) { | |
| for (int side = 0; side < 4; ++side) { | |
| if (pin_to_track_map[pin][width][height][side][0] == OPEN) | |
| continue; | |
| /* get number of tracks to which this pin connects */ | |
| int num_tracks = 0; | |
| for (int iseg = 0; iseg < num_seg_types; iseg++) { | |
| num_tracks += Fc[pin][iseg]; | |
| } | |
| for (int conn = 0; conn < num_tracks; ++conn) { | |
| int track = pin_to_track_map[pin][width][height][side][conn]; | |
| VTR_ASSERT(track < max_chan_width); | |
| VTR_ASSERT(track >= 0); | |
| track_to_pin_lookup[track][width][height][side].push_back(pin); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| return track_to_pin_lookup; | |
| } | |
| /* A utility routine to dump the contents of the routing resource graph * | |
| * (everything -- connectivity, occupancy, cost, etc.) into a file. Used * | |
| * only for debugging. */ | |
| void dump_rr_graph(const char *file_name) { | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| FILE *fp = vtr::fopen(file_name, "w"); | |
| for (int inode = 0; inode < device_ctx.num_rr_nodes; ++inode) { | |
| print_rr_node(fp, device_ctx.rr_nodes, inode); | |
| fprintf(fp, "\n"); | |
| } | |
| fclose(fp); | |
| } | |
| /* Prints all the data about node inode to file fp. */ | |
| void print_rr_node(FILE * fp, t_rr_node * L_rr_node, int inode) { | |
| t_rr_type rr_type = L_rr_node[inode].type(); | |
| fprintf(fp, "Node: %d %s ", inode, L_rr_node[inode].type_string()); | |
| if ((L_rr_node[inode].xlow() == L_rr_node[inode].xhigh()) | |
| && (L_rr_node[inode].ylow() == L_rr_node[inode].yhigh())) { | |
| fprintf(fp, "(%d, %d) ", | |
| L_rr_node[inode].xlow(), L_rr_node[inode].ylow()); | |
| } else { | |
| fprintf(fp, "(%d, %d) to (%d, %d) ", | |
| L_rr_node[inode].xlow(), L_rr_node[inode].ylow(), | |
| L_rr_node[inode].xhigh(), L_rr_node[inode].yhigh()); | |
| } | |
| fprintf(fp, "Ptc_num: %d ", L_rr_node[inode].ptc_num()); | |
| fprintf(fp, "Length: %d ", L_rr_node[inode].length()); | |
| if (rr_type == CHANX || rr_type == CHANY) { | |
| fprintf(fp, "Direction: %s ", DIRECTIONS_STRING[L_rr_node[inode].direction()]); | |
| } else if (rr_type == IPIN || rr_type == OPIN) { | |
| fprintf(fp, "Side: %s ", SIDE_STRING[L_rr_node[inode].side()]); | |
| } | |
| fprintf(fp, "\n"); | |
| fprintf(fp, "%d edge(s):", L_rr_node[inode].num_edges()); | |
| for (int iconn = 0; iconn < L_rr_node[inode].num_edges(); ++iconn) | |
| fprintf(fp, " %d", L_rr_node[inode].edge_sink_node(iconn)); | |
| fprintf(fp, "\n"); | |
| fprintf(fp, "Switch types:"); | |
| for (int iconn = 0; iconn < L_rr_node[inode].num_edges(); ++iconn) | |
| fprintf(fp, " %d", L_rr_node[inode].edge_switch(iconn)); | |
| fprintf(fp, "\n"); | |
| fprintf(fp, "Capacity: %d\n", L_rr_node[inode].capacity()); | |
| if (rr_type != INTRA_CLUSTER_EDGE) { | |
| fprintf(fp, "R: %g C: %g\n", L_rr_node[inode].R(), L_rr_node[inode].C()); | |
| } | |
| fprintf(fp, "Cost_index: %d\n", L_rr_node[inode].cost_index()); | |
| } | |
| /* Prints all the device_ctx.rr_indexed_data of index to file fp. */ | |
| void print_rr_indexed_data(FILE * fp, int index) { | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| fprintf(fp, "Index: %d\n", index); | |
| fprintf(fp, "ortho_cost_index: %d ", device_ctx.rr_indexed_data[index].ortho_cost_index); | |
| fprintf(fp, "base_cost: %g ", device_ctx.rr_indexed_data[index].saved_base_cost); | |
| fprintf(fp, "saved_base_cost: %g\n", device_ctx.rr_indexed_data[index].saved_base_cost); | |
| fprintf(fp, "Seg_index: %d ", device_ctx.rr_indexed_data[index].seg_index); | |
| fprintf(fp, "inv_length: %g\n", device_ctx.rr_indexed_data[index].inv_length); | |
| fprintf(fp, "T_linear: %g ", device_ctx.rr_indexed_data[index].T_linear); | |
| fprintf(fp, "T_quadratic: %g ", device_ctx.rr_indexed_data[index].T_quadratic); | |
| fprintf(fp, "C_load: %g\n", device_ctx.rr_indexed_data[index].C_load); | |
| } | |
| static void build_unidir_rr_opins(const int i, const int j, const e_side side, | |
| const DeviceGrid& grid, const std::vector<vtr::Matrix<int>>&Fc_out, const int max_chan_width, | |
| const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, | |
| vtr::NdMatrix<int, 3>& Fc_xofs, vtr::NdMatrix<int, 3>& Fc_yofs, | |
| bool * L_rr_edge_done, | |
| bool * Fc_clipped, const t_rr_node_indices& L_rr_node_indices, | |
| const t_direct_inf *directs, const int num_directs, const t_clb_to_clb_directs *clb_to_clb_directs, | |
| const int num_seg_types) { | |
| /* | |
| * This routine adds the edges from opins to channels at the specified | |
| * grid location (i,j) and grid tile side | |
| */ | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| *Fc_clipped = false; | |
| t_type_ptr type = grid[i][j].type; | |
| int width_offset = grid[i][j].width_offset; | |
| int height_offset = grid[i][j].height_offset; | |
| /* Go through each pin and find its fanout. */ | |
| for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) { | |
| /* Skip global pins and pins that are not of DRIVER type */ | |
| int class_index = type->pin_class[pin_index]; | |
| if (type->class_inf[class_index].type != DRIVER) { | |
| continue; | |
| } | |
| if (type->is_global_pin[pin_index]) { | |
| continue; | |
| } | |
| int num_edges = 0; | |
| t_linked_edge *edge_list = NULL; | |
| for (int iseg = 0; iseg < num_seg_types; iseg++) { | |
| /* get Fc for this segment type */ | |
| int seg_type_Fc = Fc_out[type->index][pin_index][iseg]; | |
| VTR_ASSERT(seg_type_Fc >= 0); | |
| if (seg_type_Fc == 0) { | |
| continue; | |
| } | |
| /* Can't do anything if pin isn't at this location */ | |
| if (0 == type->pinloc[width_offset][height_offset][side][pin_index]) { | |
| continue; | |
| } | |
| /* Figure out the chan seg at that side. | |
| * side is the side of the logic or io block. */ | |
| bool vert = ((side == TOP) || (side == BOTTOM)); | |
| bool pos_dir = ((side == TOP) || (side == RIGHT)); | |
| t_rr_type chan_type = (vert ? CHANX : CHANY); | |
| int chan = (vert ? (j) : (i)); | |
| int seg = (vert ? (i) : (j)); | |
| int max_len = (vert ? grid.width() : grid.height()); | |
| vtr::NdMatrix<int, 3>& Fc_ofs = (vert ? Fc_xofs : Fc_yofs); | |
| if (false == pos_dir) { | |
| --chan; | |
| } | |
| /* Skip the location if there is no channel. */ | |
| if (chan < 0) { | |
| continue; | |
| } | |
| if (seg < 1) { | |
| continue; | |
| } | |
| if (seg > int(vert ? grid.width() : grid.height()) - 2) { //-2 since no channels around perim | |
| continue; | |
| } | |
| if (chan > int(vert ? grid.height() : grid.width()) - 2) { //-2 since no channels around perim | |
| continue; | |
| } | |
| t_seg_details * seg_details = (chan_type == CHANX ? | |
| chan_details_x[seg][chan] : chan_details_y[chan][seg]); | |
| if (seg_details[0].length == 0) | |
| continue; | |
| /* Get the list of opin to mux connections for that chan seg. */ | |
| bool clipped; | |
| num_edges += get_unidir_opin_connections(chan, seg, | |
| seg_type_Fc, iseg, chan_type, seg_details, &edge_list, | |
| Fc_ofs, L_rr_edge_done, max_len, max_chan_width, | |
| L_rr_node_indices, &clipped); | |
| if (clipped) { | |
| *Fc_clipped = true; | |
| } | |
| } | |
| /* Add in direct connections */ | |
| num_edges += get_opin_direct_connecions(i, j, side, pin_index, &edge_list, L_rr_node_indices, | |
| directs, num_directs, clb_to_clb_directs); | |
| if (num_edges != 0) { | |
| /* Add the edges */ | |
| int opin_node_index = get_rr_node_index(L_rr_node_indices, i, j, OPIN, pin_index, side); | |
| VTR_ASSERT(opin_node_index >= 0); | |
| alloc_and_load_edges_and_switches(device_ctx.rr_nodes, opin_node_index, num_edges, | |
| L_rr_edge_done, edge_list); | |
| while (edge_list != NULL) { | |
| t_linked_edge *next_edge = edge_list->next; | |
| free(edge_list); | |
| edge_list = next_edge; | |
| } | |
| } | |
| VTR_ASSERT(edge_list == nullptr); | |
| } | |
| } | |
| /** | |
| * Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure | |
| * This data structure supplements the the info in the "directs" data structure | |
| * TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one | |
| */ | |
| static t_clb_to_clb_directs * alloc_and_load_clb_to_clb_directs(const t_direct_inf *directs, const int num_directs, int delayless_switch) { | |
| int i, j; | |
| t_clb_to_clb_directs *clb_to_clb_directs; | |
| char *pb_type_name, *port_name; | |
| int start_pin_index, end_pin_index; | |
| t_pb_type *pb_type; | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| clb_to_clb_directs = (t_clb_to_clb_directs*) vtr::calloc(num_directs, sizeof (t_clb_to_clb_directs)); | |
| pb_type_name = NULL; | |
| port_name = NULL; | |
| for (i = 0; i < num_directs; i++) { | |
| pb_type_name = (char*) vtr::malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof (char)); | |
| port_name = (char*) vtr::malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof (char)); | |
| // Load from pins | |
| // Parse out the pb_type name, port name, and pin range | |
| parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name); | |
| // Figure out which type, port, and pin is used | |
| for (j = 0; j < device_ctx.num_block_types; j++) { | |
| if (strcmp(device_ctx.block_types[j].name, pb_type_name) == 0) { | |
| break; | |
| } | |
| } | |
| VTR_ASSERT(j < device_ctx.num_block_types); | |
| clb_to_clb_directs[i].from_clb_type = &device_ctx.block_types[j]; | |
| pb_type = clb_to_clb_directs[i].from_clb_type->pb_type; | |
| for (j = 0; j < pb_type->num_ports; j++) { | |
| if (strcmp(pb_type->ports[j].name, port_name) == 0) { | |
| break; | |
| } | |
| } | |
| VTR_ASSERT(j < pb_type->num_ports); | |
| if (start_pin_index == OPEN) { | |
| VTR_ASSERT(start_pin_index == end_pin_index); | |
| start_pin_index = 0; | |
| end_pin_index = pb_type->ports[j].num_pins - 1; | |
| } | |
| get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index); | |
| get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index); | |
| // Load to pins | |
| // Parse out the pb_type name, port name, and pin range | |
| parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name); | |
| // Figure out which type, port, and pin is used | |
| for (j = 0; j < device_ctx.num_block_types; j++) { | |
| if (strcmp(device_ctx.block_types[j].name, pb_type_name) == 0) { | |
| break; | |
| } | |
| } | |
| VTR_ASSERT(j < device_ctx.num_block_types); | |
| clb_to_clb_directs[i].to_clb_type = &device_ctx.block_types[j]; | |
| pb_type = clb_to_clb_directs[i].to_clb_type->pb_type; | |
| for (j = 0; j < pb_type->num_ports; j++) { | |
| if (strcmp(pb_type->ports[j].name, port_name) == 0) { | |
| break; | |
| } | |
| } | |
| VTR_ASSERT(j < pb_type->num_ports); | |
| if (start_pin_index == OPEN) { | |
| VTR_ASSERT(start_pin_index == end_pin_index); | |
| start_pin_index = 0; | |
| end_pin_index = pb_type->ports[j].num_pins - 1; | |
| } | |
| get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index); | |
| get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index); | |
| if (abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) { | |
| vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, | |
| "Range mismatch from %s to %s.\n", directs[i].from_pin, directs[i].to_pin); | |
| } | |
| //Set the switch index | |
| if (directs[i].switch_type > 0) { | |
| //Use the specified switch | |
| clb_to_clb_directs[i].switch_index = directs[i].switch_type; | |
| } else { | |
| //Use the delayless switch by default | |
| clb_to_clb_directs[i].switch_index = delayless_switch; | |
| } | |
| free(pb_type_name); | |
| free(port_name); | |
| //We must be careful to clean-up anything that we may have incidentally allocated. | |
| //Specifically, we can be called while generating the dummy architecture | |
| //for placer delay estimation. Since the delay estimation occurs on a | |
| //'different' architecture it is almost certain that the f_blk_pin_from_port_pin allocated | |
| //by calling get_blk_pin_from_port_pin() will later be invalid. | |
| //We therefore must free it now. | |
| free_blk_pin_from_port_pin(); | |
| } | |
| return clb_to_clb_directs; | |
| } | |
| /* Add all direct clb-pin-to-clb-pin edges to given opin | |
| * | |
| * The current opin is located at (x,y) along the specified side | |
| */ | |
| static int get_opin_direct_connecions(int x, int y, e_side side, int opin, | |
| t_linked_edge ** edge_list_ptr, const t_rr_node_indices& L_rr_node_indices, | |
| const t_direct_inf *directs, const int num_directs, | |
| const t_clb_to_clb_directs *clb_to_clb_directs) { | |
| t_type_ptr curr_type, target_type; | |
| int i, ipin; | |
| t_linked_edge *edge_list_head; | |
| int max_index, min_index, offset, swap; | |
| int new_edges; | |
| auto& device_ctx = g_vpr_ctx.device(); | |
| curr_type = device_ctx.grid[x][y].type; | |
| edge_list_head = *edge_list_ptr; | |
| new_edges = 0; | |
| int width_offset = device_ctx.grid[x][y].width_offset; | |
| int height_offset = device_ctx.grid[x][y].height_offset; | |
| if (!curr_type->pinloc[width_offset][height_offset][side][opin]) { | |
| return 0; //No source pin on this side | |
| } | |
| /* Iterate through all direct connections */ | |
| for (i = 0; i < num_directs; i++) { | |
| /* Find matching direct clb-to-clb connections with the same type as current grid location */ | |
| if (clb_to_clb_directs[i].from_clb_type == curr_type) { //We are at a valid starting point | |
| //Offset must be in range | |
| if (x + directs[i].x_offset < int(device_ctx.grid.width() - 1) && | |
| x + directs[i].x_offset > 0 && | |
| y + directs[i].y_offset < int(device_ctx.grid.height() - 1) && | |
| y + directs[i].y_offset > 0) { | |
| //Only add connections if the target clb type matches the type in the direct specification | |
| target_type = device_ctx.grid[x + directs[i].x_offset][y + directs[i].y_offset].type; | |
| if (clb_to_clb_directs[i].to_clb_type == target_type) { | |
| /* Compute index of opin with regards to given pins */ | |
| if (clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) { | |
| swap = true; | |
| max_index = clb_to_clb_directs[i].from_clb_pin_start_index; | |
| min_index = clb_to_clb_directs[i].from_clb_pin_end_index; | |
| } else { | |
| swap = false; | |
| min_index = clb_to_clb_directs[i].from_clb_pin_start_index; | |
| max_index = clb_to_clb_directs[i].from_clb_pin_end_index; | |
| } | |
| if (max_index >= opin && min_index <= opin) { | |
| offset = opin - min_index; | |
| /* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */ | |
| ipin = OPEN; | |
| if (clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) { | |
| if (swap) { | |
| ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset; | |
| } else { | |
| ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset; | |
| } | |
| } else { | |
| if (swap) { | |
| ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset; | |
| } else { | |
| ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset; | |
| } | |
| } | |
| /* Add new ipin edge to list of edges */ | |
| int target_width_offset = device_ctx.grid[x + directs[i].x_offset][y + directs[i].y_offset].width_offset; | |
| int target_height_offset = device_ctx.grid[x + directs[i].x_offset][y + directs[i].y_offset].height_offset; | |
| auto inodes = get_rr_node_indices(L_rr_node_indices, x + directs[i].x_offset - target_width_offset, y + directs[i].y_offset - target_height_offset, | |
| IPIN, ipin); | |
| //There may be multiple physical pins corresponding to the logical | |
| //target ipin. We only need to connect to one of them (since the physical pins | |
| //are logically equivalent). | |
| VTR_ASSERT(inodes.size() > 0); | |
| int inode = inodes[0]; | |
| edge_list_head = insert_in_edge_list(edge_list_head, inode, clb_to_clb_directs[i].switch_index); | |
| new_edges++; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| *edge_list_ptr = edge_list_head; | |
| return new_edges; | |
| } | |
| /* Determines whether the output pins of the specified block type should be perturbed. * | |
| * This is to prevent pathological cases where the output pin connections are * | |
| * spaced such that the connection pattern always skips some types of wire (w.r.t. * | |
| * starting points) */ | |
| static std::vector<bool> alloc_and_load_perturb_opins(const t_type_ptr type, const vtr::Matrix<int>& Fc_out, | |
| const int max_chan_width, const int num_seg_types, const t_segment_inf *segment_inf) { | |
| int i, Fc_max, iclass, num_wire_types; | |
| int num, max_primes, factor, num_factors; | |
| int *prime_factors; | |
| float step_size = 0; | |
| float n = 0; | |
| float threshold = 0.07; | |
| std::vector<bool> perturb_opins(num_seg_types, false); | |
| i = Fc_max = iclass = 0; | |
| if (num_seg_types > 1) { | |
| /* Segments of one length are grouped together in the channel. * | |
| * In the future we can determine if any of these segments will * | |
| * encounter the pathological step size case, and determine if * | |
| * we need to perturb based on the segment's frequency (if * | |
| * frequency is small we should not perturb - testing has found * | |
| * that perturbing a channel when unnecessary increases needed * | |
| * W to achieve the same delay); but for now we just return. */ | |
| return perturb_opins; | |
| } else { | |
| /* There are as many wire start points as the value of L */ | |
| num_wire_types = segment_inf[0].length; | |
| } | |
| /* get Fc_max */ | |
| for (i = 0; i < type->num_pins; ++i) { | |
| iclass = type->pin_class[i]; | |
| if (Fc_out[i][0] > Fc_max && type->class_inf[iclass].type == DRIVER) { | |
| Fc_max = Fc_out[i][0]; | |
| } | |
| } | |
| /* Nothing to perturb if Fc=0; no need to perturb if Fc = 1 */ | |
| if (Fc_max == 0 || Fc_max == max_chan_width) { | |
| return perturb_opins; | |
| } | |
| /* Pathological cases occur when the step size, W/Fc, is a multiple of * | |
| * the number of wire starting points, L. Specifically, when the step * | |
| * size is a multiple of a prime factor of L, the connection pattern * | |
| * will always skip some wires. Thus, we perturb pins if we detect this * | |
| * case. */ | |
| /* get an upper bound on the number of prime factors of num_wire_types */ | |
| max_primes = (int) floor(log((float) num_wire_types) / log(2.0)); | |
| max_primes = std::max(max_primes, 1); //Minimum of 1 to ensure we allocate space for at least one prime_factor | |
| prime_factors = (int *) vtr::malloc(max_primes * sizeof (int)); | |
| for (i = 0; i < max_primes; i++) { | |
| prime_factors[i] = 0; | |
| } | |
| /* Find the prime factors of num_wire_types */ | |
| num = num_wire_types; | |
| factor = 2; | |
| num_factors = 0; | |
| while (pow((float) factor, 2) <= num) { | |
| if (num % factor == 0) { | |
| num /= factor; | |
| if (factor != prime_factors[num_factors]) { | |
| prime_factors[num_factors] = factor; | |
| num_factors++; | |
| } | |
| } else { | |
| factor++; | |
| } | |
| } | |
| if (num_factors == 0) { | |
| prime_factors[num_factors++] = num_wire_types; /* covers cases when num_wire_types is prime */ | |
| } | |
| /* Now see if step size is an approximate multiple of one of the factors. A * | |
| * threshold is used because step size may not be an integer. */ | |
| step_size = (float) max_chan_width / Fc_max; | |
| for (i = 0; i < num_factors; i++) { | |
| if (vtr::nint(step_size) < prime_factors[i]) { | |
| perturb_opins[0] = false; | |
| break; | |
| } | |
| n = step_size / prime_factors[i]; | |
| n = n - (float) vtr::nint(n); /* fractinal part */ | |
| if (fabs(n) < threshold) { | |
| perturb_opins[0] = true; | |
| break; | |
| } else { | |
| perturb_opins[0] = false; | |
| } | |
| } | |
| free(prime_factors); | |
| return perturb_opins; | |
| } | |