vpr/src/route/rr_graph.cpp - third_party/vtr-verilog-to-routing - Git at Google

 #include <cstdio>
 #include <cstring>
 #include <cmath>
 #include <ctime>
 #include <algorithm>
 #include <vector>
 #include "vtr_assert.h"

 #include "vtr_util.h"
 #include "vtr_memory.h"
 #include "vtr_math.h"
 #include "vtr_log.h"
 #include "vtr_time.h"

 #include "vpr_types.h"
 #include "vpr_utils.h"
 #include "vpr_error.h"

 #include "globals.h"
 #include "rr_graph_util.h"
 #include "rr_graph.h"
 #include "rr_graph_area.h"
 #include "rr_graph2.h"
 #include "rr_graph_sbox.h"
 #include "rr_graph_timing_params.h"
 #include "rr_graph_indexed_data.h"
 #include "check_rr_graph.h"
 #include "read_xml_arch_file.h"
 #include "echo_files.h"
 #include "cb_metrics.h"
 #include "build_switchblocks.h"
 #include "rr_graph_writer.h"
 #include "rr_graph_reader.h"
 #include "router_lookahead_map.h"
 #include "rr_graph_clock.h"

 #include "rr_types.h"

 //#define VERBOSE

 struct t_mux {
     int size;
     t_mux* next;
 };

 struct t_mux_size_distribution {
     int mux_count;
     int max_index;
     int* distr;
     t_mux_size_distribution* next;
 };

 struct t_clb_to_clb_directs {
     t_logical_block_type_ptr from_clb_type;
     int from_clb_pin_start_index;
     int from_clb_pin_end_index;
     t_logical_block_type_ptr to_clb_type;
     int to_clb_pin_start_index;
     int to_clb_pin_end_index;
     int switch_index; //The switch type used by this direct connection
 };

 struct t_pin_loc {
     int pin_index;
     int width_offset;
     int height_offset;
     e_side side;
 };

 typedef std::vector<std::map<int, int>> t_arch_switch_fanin;

 /******************* Variables local to this module. ***********************/

 /********************* Subroutines local to this module. *******************/
 void print_rr_graph_stats();

 bool channel_widths_unchanged(const t_chan_width& current, const t_chan_width& proposed);

 static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_pin_to_track_map(const e_pin_type pin_type,
                                                                           const vtr::Matrix<int>& Fc,
                                                                           const t_physical_tile_type_ptr Type,
                                                                           const std::vector<bool>& perturb_switch_pattern,
                                                                           const e_directionality directionality,
                                                                           const int num_seg_types,
                                                                           const int* sets_per_seg_type);

 static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_seg_type(const e_pin_type pin_type,
                                                             const int seg_type_tracks,
                                                             const int Fc,
                                                             const t_physical_tile_type_ptr Type,
                                                             const bool perturb_switch_pattern,
                                                             const e_directionality directionality);

 static void advance_to_next_block_side(t_physical_tile_type_ptr Type, int& width_offset, int& height_offset, e_side& side);

 static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup(vtr::NdMatrix<std::vector<int>, 4> pin_to_track_map,
                                                                              const vtr::Matrix<int>& Fc,
                                                                              const int width,
                                                                              const int height,
                                                                              const int num_pins,
                                                                              const int max_chan_width,
                                                                              const int num_seg_types);

 static void build_bidir_rr_opins(const int i,
                                  const int j,
                                  const e_side side,
                                  const t_rr_node_indices& L_rr_node_indices,
                                  const std::vector<t_rr_node>& rr_nodes,
                                  const t_pin_to_track_lookup& opin_to_track_map,
                                  const std::vector<vtr::Matrix<int>>& Fc_out,
                                  t_rr_edge_info_set& created_rr_edges,
                                  const t_chan_details& chan_details_x,
                                  const t_chan_details& chan_details_y,
                                  const DeviceGrid& grid,
                                  const t_direct_inf* directs,
                                  const int num_directs,
                                  const t_clb_to_clb_directs* clb_to_clb_directs,
                                  const int num_seg_types);

 static void build_unidir_rr_opins(const int i,
                                   const int j,
                                   const e_side side,
                                   const DeviceGrid& grid,
                                   const std::vector<vtr::Matrix<int>>& Fc_out,
                                   const int max_chan_width,
                                   const t_chan_details& chan_details_x,
                                   const t_chan_details& chan_details_y,
                                   vtr::NdMatrix<int, 3>& Fc_xofs,
                                   vtr::NdMatrix<int, 3>& Fc_yofs,
                                   t_rr_edge_info_set& created_rr_edges,
                                   bool* Fc_clipped,
                                   const t_rr_node_indices& L_rr_node_indices,
                                   const std::vector<t_rr_node>& rr_nodes,
                                   const t_direct_inf* directs,
                                   const int num_directs,
                                   const t_clb_to_clb_directs* clb_to_clb_directs,
                                   const int num_seg_types);

 static int get_opin_direct_connecions(int x,
                                       int y,
                                       e_side side,
                                       int opin,
                                       int from_rr_node,
                                       t_rr_edge_info_set& rr_edges_to_create,
                                       const t_rr_node_indices& L_rr_node_indices,
                                       const std::vector<t_rr_node>& rr_nodes,
                                       const t_direct_inf* directs,
                                       const int num_directs,
                                       const t_clb_to_clb_directs* clb_to_clb_directs);

 static void alloc_and_load_rr_graph(const int num_nodes,
                                     std::vector<t_rr_node>& L_rr_node,
                                     const int num_seg_types,
                                     const t_chan_details& chan_details_x,
                                     const t_chan_details& chan_details_y,
                                     const t_track_to_pin_lookup& track_to_pin_lookup,
                                     const t_pin_to_track_lookup& opin_to_track_map,
                                     const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
                                     t_sb_connection_map* sb_conn_map,
                                     const DeviceGrid& grid,
                                     const int Fs,
                                     t_sblock_pattern& sblock_pattern,
                                     const std::vector<vtr::Matrix<int>>& Fc_out,
                                     vtr::NdMatrix<int, 3>& Fc_xofs,
                                     vtr::NdMatrix<int, 3>& Fc_yofs,
                                     const t_rr_node_indices& L_rr_node_indices,
                                     const int max_chan_width,
                                     const t_chan_width& chan_width,
                                     const int wire_to_ipin_switch,
                                     const int delayless_switch,
                                     const enum e_directionality directionality,
                                     bool* Fc_clipped,
                                     const t_direct_inf* directs,
                                     const int num_directs,
                                     const t_clb_to_clb_directs* clb_to_clb_directs,
                                     bool is_global_graph);

 static float pattern_fmod(float a, float b);
 static void load_uniform_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
                                                   const std::vector<t_pin_loc>& pin_locations,
                                                   const int x_chan_width,
                                                   const int y_chan_width,
                                                   const int Fc,
                                                   const enum e_directionality directionality);

 static void load_perturbed_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
                                                     const std::vector<t_pin_loc>& pin_locations,
                                                     const int x_chan_width,
                                                     const int y_chan_width,
                                                     const int Fc,
                                                     const enum e_directionality directionality);

 static std::vector<bool> alloc_and_load_perturb_opins(const t_physical_tile_type_ptr type, const vtr::Matrix<int>& Fc_out, const int max_chan_width, const std::vector<t_segment_inf>& segment_inf);

 #ifdef ENABLE_CHECK_ALL_TRACKS
 static void check_all_tracks_reach_pins(t_logical_block_type_ptr type,
                                         int***** tracks_connected_to_pin,
                                         int max_chan_width,
                                         int Fc,
                                         enum e_pin_type ipin_or_opin);
 #endif

 static std::vector<std::vector<bool>> alloc_and_load_perturb_ipins(const int L_num_types,
                                                                    const int num_seg_types,
                                                                    const int* sets_per_seg_type,
                                                                    const std::vector<vtr::Matrix<int>>& Fc_in,
                                                                    const std::vector<vtr::Matrix<int>>& Fc_out,
                                                                    const enum e_directionality directionality);

 static void build_rr_sinks_sources(const int i,
                                    const int j,
                                    std::vector<t_rr_node>& L_rr_node,
                                    t_rr_edge_info_set& rr_edges_to_create,
                                    const t_rr_node_indices& L_rr_node_indices,
                                    const int delayless_switch,
                                    const DeviceGrid& grid);

 static void build_rr_chan(const int i,
                           const int j,
                           const t_rr_type chan_type,
                           const t_track_to_pin_lookup& track_to_pin_lookup,
                           t_sb_connection_map* sb_conn_map,
                           const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
                           const int cost_index_offset,
                           const int max_chan_width,
                           const DeviceGrid& grid,
                           const int tracks_per_chan,
                           t_sblock_pattern& sblock_pattern,
                           const int Fs_per_side,
                           const t_chan_details& chan_details_x,
                           const t_chan_details& chan_details_y,
                           const t_rr_node_indices& L_rr_node_indices,
                           t_rr_edge_info_set& created_rr_edges,
                           std::vector<t_rr_node>& L_rr_node,
                           const int wire_to_ipin_switch,
                           const enum e_directionality directionality);

 void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create);

 void alloc_and_load_edges(std::vector<t_rr_node>& L_rr_node,
                           const t_rr_edge_info_set& rr_edges_to_create);

 static void alloc_and_load_rr_switch_inf(const int num_arch_switches,
                                          const float R_minW_nmos,
                                          const float R_minW_pmos,
                                          const int wire_to_arch_ipin_switch,
                                          int* wire_to_rr_ipin_switch);

 static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin);

 static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const t_arch_switch_fanin& switch_fanin);

 static void alloc_rr_switch_inf(t_arch_switch_fanin& switch_fanin);

 static void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
                                int max_chan_width,
                                int wire_to_rr_ipin_switch,
                                enum e_base_cost_type base_cost_type);

 static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, const int delayless_switch);

 static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map,
                                        const std::vector<t_physical_tile_type>& types,
                                        int max_chan_width);

 static t_seg_details* alloc_and_load_global_route_seg_details(const int global_route_switch,
                                                               int* num_seg_details = nullptr);

 static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
                                                               const int max_pins,
                                                               const std::vector<t_segment_inf>& segment_inf,
                                                               const int* sets_per_seg_type,
                                                               const int max_chan_width,
                                                               const e_fc_type fc_type,
                                                               const enum e_directionality directionality,
                                                               bool* Fc_clipped);

 static int pick_best_direct_connect_target_rr_node(const std::vector<t_rr_node>& rr_nodes,
                                                    int from_rr,
                                                    const std::vector<int>& candidate_rr_nodes);

 static void expand_non_configurable(int inode, std::set<t_node_edge>& edge_set);
 static void process_non_config_sets(const t_non_configurable_rr_sets& non_config_rr_sets);

 static void build_rr_graph(const t_graph_type graph_type,
                            const std::vector<t_physical_tile_type>& types,
                            const DeviceGrid& grid,
                            t_chan_width nodes_per_chan,
                            const enum e_switch_block_type sb_type,
                            const int Fs,
                            const std::vector<t_switchblock_inf> switchblocks,
                            const int num_arch_switches,
                            const std::vector<t_segment_inf>& segment_inf,
                            const int global_route_switch,
                            const int wire_to_arch_ipin_switch,
                            const int delayless_switch,
                            const float R_minW_nmos,
                            const float R_minW_pmos,
                            const enum e_base_cost_type base_cost_type,
                            const bool trim_empty_channels,
                            const bool trim_obs_channels,
                            const t_direct_inf* directs,
                            const int num_directs,
                            int* wire_to_rr_ipin_switch,
                            int* Warnings);

 /******************* Subroutine definitions *******************************/

 void create_rr_graph(const t_graph_type graph_type,
                      const std::vector<t_physical_tile_type>& block_types,
                      const DeviceGrid& grid,
                      const t_chan_width nodes_per_chan,
                      const int num_arch_switches,
                      t_det_routing_arch* det_routing_arch,
                      std::vector<t_segment_inf>& segment_inf,
                      const enum e_base_cost_type base_cost_type,
                      const bool trim_empty_channels,
                      const bool trim_obs_channels,
                      const enum e_clock_modeling clock_modeling,
                      const e_router_lookahead router_lookahead_type,
                      const t_direct_inf* directs,
                      const int num_directs,
                      int* Warnings) {
     const auto& device_ctx = g_vpr_ctx.device();

     if (!det_routing_arch->read_rr_graph_filename.empty()) {
         if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) {
             free_rr_graph();

             load_rr_file(graph_type,
                          grid,
                          segment_inf,
                          base_cost_type,
                          &det_routing_arch->wire_to_rr_ipin_switch,
                          det_routing_arch->read_rr_graph_filename.c_str());
         }
     } else {
         if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_nodes.empty()) {
             //No change in channel width, so skip re-building RR graph
             VTR_LOG("RR graph channel widths unchanged, skipping RR graph rebuild\n");
             return;
         }

         free_rr_graph();

         build_rr_graph(graph_type,
                        block_types,
                        grid,
                        nodes_per_chan,
                        det_routing_arch->switch_block_type,
                        det_routing_arch->Fs,
                        det_routing_arch->switchblocks,
                        num_arch_switches,
                        segment_inf,
                        det_routing_arch->global_route_switch,
                        det_routing_arch->wire_to_arch_ipin_switch,
                        det_routing_arch->delayless_switch,
                        det_routing_arch->R_minW_nmos,
                        det_routing_arch->R_minW_pmos,
                        base_cost_type,
                        trim_empty_channels,
                        trim_obs_channels,
                        directs, num_directs,
                        &det_routing_arch->wire_to_rr_ipin_switch,
                        Warnings);

         if (clock_modeling == DEDICATED_NETWORK) {
             ClockRRGraphBuilder::create_and_append_clock_rr_graph(segment_inf,
                                                                   det_routing_arch->R_minW_nmos,
                                                                   det_routing_arch->R_minW_pmos,
                                                                   det_routing_arch->wire_to_rr_ipin_switch,
                                                                   base_cost_type);
         }
     }

     auto non_config_rr_sets = identify_non_configurable_rr_sets();
     process_non_config_sets(non_config_rr_sets);

     print_rr_graph_stats();

     //Write out rr graph file if needed
     if (!det_routing_arch->write_rr_graph_filename.empty()) {
         write_rr_graph(det_routing_arch->write_rr_graph_filename.c_str(), segment_inf);
     }
 }

 void print_rr_graph_stats() {
     auto& device_ctx = g_vpr_ctx.device();

     size_t num_rr_edges = 0;
     for (auto& rr_node : device_ctx.rr_nodes) {
         num_rr_edges += rr_node.edges().size();
     }

     VTR_LOG("  RR Graph Nodes: %zu\n", device_ctx.rr_nodes.size());
     VTR_LOG("  RR Graph Edges: %zu\n", num_rr_edges);
 }

 bool channel_widths_unchanged(const t_chan_width& current, const t_chan_width& proposed) {
     if (current.max != proposed.max
         || current.x_max != proposed.x_max
         || current.y_max != proposed.y_max
         || current.x_min != proposed.x_min
         || current.y_min != proposed.y_min
         || current.x_list != proposed.x_list
         || current.y_list != proposed.y_list) {
         return false; //Different max/min or channel widths
     }

     return true; //Identical
 }

 static void build_rr_graph(const t_graph_type graph_type,
                            const std::vector<t_physical_tile_type>& types,
                            const DeviceGrid& grid,
                            t_chan_width nodes_per_chan,
                            const enum e_switch_block_type sb_type,
                            const int Fs,
                            const std::vector<t_switchblock_inf> switchblocks,
                            const int num_arch_switches,
                            const std::vector<t_segment_inf>& segment_inf,
                            const int global_route_switch,
                            const int wire_to_arch_ipin_switch,
                            const int delayless_switch,
                            const float R_minW_nmos,
                            const float R_minW_pmos,
                            const enum e_base_cost_type base_cost_type,
                            const bool trim_empty_channels,
                            const bool trim_obs_channels,
                            const t_direct_inf* directs,
                            const int num_directs,
                            int* wire_to_rr_ipin_switch,
                            int* Warnings) {
     vtr::ScopedStartFinishTimer timer("Build routing resource graph");

     /* Reset warning flag */
     *Warnings = RR_GRAPH_NO_WARN;

     /* Decode the graph_type */
     bool is_global_graph = ((GRAPH_GLOBAL == graph_type) ? true : false);
     bool use_full_seg_groups = ((GRAPH_UNIDIR_TILEABLE == graph_type) ? true : false);
     enum e_directionality directionality = ((GRAPH_BIDIR == graph_type) ? BI_DIRECTIONAL : UNI_DIRECTIONAL);
     if (is_global_graph) {
         directionality = BI_DIRECTIONAL;
     }

     /* Global routing uses a single longwire track */
     int max_chan_width = (is_global_graph ? 1 : nodes_per_chan.max);
     VTR_ASSERT(max_chan_width > 0);

     auto& device_ctx = g_vpr_ctx.mutable_device();

     t_clb_to_clb_directs* clb_to_clb_directs = nullptr;
     if (num_directs > 0) {
         clb_to_clb_directs = alloc_and_load_clb_to_clb_directs(directs, num_directs, delayless_switch);
     }

     /* START SEG_DETAILS */
     int num_seg_details = 0;
     t_seg_details* seg_details = nullptr;

     if (is_global_graph) {
         /* Sets up a single unit length segment type for global routing. */
         seg_details = alloc_and_load_global_route_seg_details(global_route_switch, &num_seg_details);
     } else {
         /* Setup segments including distrubuting tracks and staggering.
          * If use_full_seg_groups is specified, max_chan_width may be
          * changed. Warning should be singled to caller if this happens. */
         size_t max_dim = std::max(grid.width(), grid.height()) - 2; //-2 for no perim channels

         seg_details = alloc_and_load_seg_details(&max_chan_width,
                                                  max_dim, segment_inf,
                                                  use_full_seg_groups, is_global_graph, directionality,
                                                  &num_seg_details);
         if (nodes_per_chan.max != max_chan_width) {
             nodes_per_chan.max = max_chan_width;
             *Warnings |= RR_GRAPH_WARN_CHAN_WIDTH_CHANGED;
         }

         //TODO: Fix
         //if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SEG_DETAILS)) {
         //dump_seg_details(seg_details, max_chan_width,
         //getEchoFileName(E_ECHO_SEG_DETAILS));
         //}
     }
     /* END SEG_DETAILS */

     /* START CHAN_DETAILS */
     t_chan_details chan_details_x;
     t_chan_details chan_details_y;

     alloc_and_load_chan_details(grid, &nodes_per_chan,
                                 trim_empty_channels, trim_obs_channels,
                                 num_seg_details, seg_details,
                                 chan_details_x, chan_details_y);

     if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_CHAN_DETAILS)) {
         dump_chan_details(chan_details_x, chan_details_y, max_chan_width, grid,
                           getEchoFileName(E_ECHO_CHAN_DETAILS));
     }
     /* END CHAN_DETAILS */

     /* START FC */
     /* Determine the actual value of Fc */
     std::vector<vtr::Matrix<int>> Fc_in;  /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..num_segments-1] */
     std::vector<vtr::Matrix<int>> Fc_out; /* [0..device_ctx.num_block_types-1][0..num_pins-1][0..num_segments-1] */

     /* get maximum number of pins across all blocks */
     int max_pins = types[0].num_pins;
     for (const auto& type : types) {
         if (is_empty_type(&type)) {
             continue;
         }

         if (type.num_pins > max_pins) {
             max_pins = type.num_pins;
         }
     }

     /* get the number of 'sets' for each segment type -- unidirectial architectures have two tracks in a set, bidirectional have one */
     int total_sets = max_chan_width;
     if (directionality == UNI_DIRECTIONAL) {
         VTR_ASSERT(max_chan_width % 2 == 0);
         total_sets /= 2;
     }
     int* sets_per_seg_type = get_seg_track_counts(total_sets, segment_inf, use_full_seg_groups);

     if (is_global_graph) {
         //All pins can connect during global routing
         auto ones = vtr::Matrix<int>({size_t(max_pins), segment_inf.size()}, 1);
         Fc_in = std::vector<vtr::Matrix<int>>(types.size(), ones);
         Fc_out = std::vector<vtr::Matrix<int>>(types.size(), ones);
     } else {
         bool Fc_clipped = false;
         Fc_in = alloc_and_load_actual_fc(types, max_pins, segment_inf, sets_per_seg_type, max_chan_width,
                                          e_fc_type::IN, directionality, &Fc_clipped);
         if (Fc_clipped) {
             *Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
         }
         Fc_clipped = false;
         Fc_out = alloc_and_load_actual_fc(types, max_pins, segment_inf, sets_per_seg_type, max_chan_width,
                                           e_fc_type::OUT, directionality, &Fc_clipped);
         if (Fc_clipped) {
             *Warnings |= RR_GRAPH_WARN_FC_CLIPPED;
         }

         for (const auto& type : types) {
             int i = type.index;

             /* Skip "EMPTY" */
             if (is_empty_type(&type)) {
                 continue;
             }

             for (int j = 0; j < type.num_pins; ++j) {
                 for (size_t k = 0; k < segment_inf.size(); k++) {
 #ifdef VERBOSE
                     VTR_LOG(
                         "Fc Actual Values: type = %s, pin = %d (%s), "
                         "seg = %d (%s), Fc_out = %d, Fc_in = %d.\n",
                         type.name,
                         j,
                         block_type_pin_index_to_name(&type, j).c_str(),
                         k,
                         segment_inf[k].name.c_str(),
                         Fc_out[i][j][k],
                         Fc_in[i][j][k]);
 #endif /* VERBOSE */
                     VTR_ASSERT_MSG(Fc_out[i][j][k] == 0 || Fc_in[i][j][k] == 0,
                                    "Pins must be inputs or outputs (i.e. can not have both non-zero Fc_out and Fc_in)");
                 }
             }
         }
     }

     auto perturb_ipins = alloc_and_load_perturb_ipins(types.size(), segment_inf.size(),
                                                       sets_per_seg_type, Fc_in, Fc_out, directionality);
     /* END FC */

     /* Alloc node lookups, count nodes, alloc rr nodes */
     int num_rr_nodes = 0;

     device_ctx.rr_node_indices = alloc_and_load_rr_node_indices(max_chan_width, grid,
                                                                 &num_rr_nodes, chan_details_x, chan_details_y);
     device_ctx.rr_nodes.resize(num_rr_nodes);

     /* These are data structures used by the the unidir opin mapping. They are used
      * to spread connections evenly for each segment type among the available
      * wire start points */
     vtr::NdMatrix<int, 3> Fc_xofs({grid.height() - 1,
                                    grid.width() - 1,
                                    segment_inf.size()},
                                   0); //[0..grid.height()-2][0..grid.width()-2][0..num_seg_types-1]
     vtr::NdMatrix<int, 3> Fc_yofs({grid.width() - 1,
                                    grid.height() - 1,
                                    segment_inf.size()},
                                   0); //[0..grid.width()-2][0..grid.height()-2][0..num_seg_types-1]

     /* START SB LOOKUP */
     /* Alloc and load the switch block lookup */
     vtr::NdMatrix<std::vector<int>, 3> switch_block_conn;
     t_sblock_pattern unidir_sb_pattern;
     t_sb_connection_map* sb_conn_map = nullptr; //for custom switch blocks

     //We are careful to use a single seed each time build_rr_graph is called
     //to initialize the random number generator (RNG) which is (potentially)
     //used when creating custom switchblocks. This ensures that build_rr_graph()
     //is deterministic -- always producing the same RR graph.
     constexpr unsigned SWITCHPOINT_RNG_SEED = 1;
     vtr::RandState switchpoint_rand_state = SWITCHPOINT_RNG_SEED;

     if (is_global_graph) {
         switch_block_conn = alloc_and_load_switch_block_conn(1, SUBSET, 3);
     } else if (BI_DIRECTIONAL == directionality) {
         if (sb_type == CUSTOM) {
             sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y,
                                                                   grid,
                                                                   switchblocks, &nodes_per_chan, directionality,
                                                                   switchpoint_rand_state);
         } else {
             switch_block_conn = alloc_and_load_switch_block_conn(max_chan_width, sb_type, Fs);
         }
     } else {
         VTR_ASSERT(UNI_DIRECTIONAL == directionality);

         if (sb_type == CUSTOM) {
             sb_conn_map = alloc_and_load_switchblock_permutations(chan_details_x, chan_details_y,
                                                                   grid,
                                                                   switchblocks, &nodes_per_chan, directionality,
                                                                   switchpoint_rand_state);
         } else {
             /* it looks like we get unbalanced muxing from this switch block code with Fs > 3 */
             VTR_ASSERT(Fs == 3);

             unidir_sb_pattern = alloc_sblock_pattern_lookup(grid, max_chan_width);
             for (size_t i = 0; i < grid.width() - 1; i++) {
                 for (size_t j = 0; j < grid.height() - 1; j++) {
                     load_sblock_pattern_lookup(i, j, grid, &nodes_per_chan,
                                                chan_details_x, chan_details_y,
                                                Fs, sb_type, unidir_sb_pattern);
                 }
             }

             if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_SBLOCK_PATTERN)) {
                 dump_sblock_pattern(unidir_sb_pattern, max_chan_width, grid,
                                     getEchoFileName(E_ECHO_SBLOCK_PATTERN));
             }
         }
     }
     /* END SB LOOKUP */

     /* START IPIN MAP */
     /* Create ipin map lookups */

     t_pin_to_track_lookup ipin_to_track_map(types.size());   /* [0..device_ctx.physical_tile_types.size()-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */
     t_track_to_pin_lookup track_to_pin_lookup(types.size()); /* [0..device_ctx.physical_tile_types.size()-1][0..max_chan_width-1][0..width][0..height][0..3] */

     for (unsigned int itype = 0; itype < types.size(); ++itype) {
         ipin_to_track_map[itype] = alloc_and_load_pin_to_track_map(RECEIVER,
                                                                    Fc_in[itype], &types[itype], perturb_ipins[itype], directionality,
                                                                    segment_inf.size(), sets_per_seg_type);

         track_to_pin_lookup[itype] = alloc_and_load_track_to_pin_lookup(ipin_to_track_map[itype], Fc_in[itype], types[itype].width, types[itype].height,
                                                                         types[itype].num_pins, max_chan_width, segment_inf.size());
     }
     /* END IPIN MAP */

     /* START OPIN MAP */
     /* Create opin map lookups */
     t_pin_to_track_lookup opin_to_track_map(types.size()); /* [0..device_ctx.physical_tile_types.size()-1][0..num_pins-1][0..width][0..height][0..3][0..Fc-1] */

     if (BI_DIRECTIONAL == directionality) {
         for (unsigned int itype = 0; itype < types.size(); ++itype) {
             auto perturb_opins = alloc_and_load_perturb_opins(&types[itype], Fc_out[itype],
                                                               max_chan_width, segment_inf);
             opin_to_track_map[itype] = alloc_and_load_pin_to_track_map(DRIVER,
                                                                        Fc_out[itype], &types[itype], perturb_opins, directionality,
                                                                        segment_inf.size(), sets_per_seg_type);
         }
     }
     /* END OPIN MAP */

     bool Fc_clipped = false;
     alloc_and_load_rr_graph(device_ctx.rr_nodes.size(), device_ctx.rr_nodes, segment_inf.size(),
                             chan_details_x, chan_details_y,
                             track_to_pin_lookup, opin_to_track_map,
                             switch_block_conn, sb_conn_map, grid, Fs, unidir_sb_pattern,
                             Fc_out, Fc_xofs, Fc_yofs, device_ctx.rr_node_indices,
                             max_chan_width,
                             nodes_per_chan,
                             wire_to_arch_ipin_switch,
                             delayless_switch,
                             directionality,
                             &Fc_clipped,
                             directs, num_directs, clb_to_clb_directs,
                             is_global_graph);

     /* Update rr_nodes capacities if global routing */
     if (graph_type == GRAPH_GLOBAL) {
         for (size_t i = 0; i < device_ctx.rr_nodes.size(); i++) {
             if (device_ctx.rr_nodes[i].type() == CHANX) {
                 int ylow = device_ctx.rr_nodes[i].ylow();
                 device_ctx.rr_nodes[i].set_capacity(nodes_per_chan.x_list[ylow]);
             }
             if (device_ctx.rr_nodes[i].type() == CHANY) {
                 int xlow = device_ctx.rr_nodes[i].xlow();
                 device_ctx.rr_nodes[i].set_capacity(nodes_per_chan.y_list[xlow]);
             }
         }
     }

     /* Allocate and load routing resource switches, which are derived from the switches from the architecture file,
      * based on their fanin in the rr graph. This routine also adjusts the rr nodes to point to these new rr switches */
     alloc_and_load_rr_switch_inf(num_arch_switches, R_minW_nmos, R_minW_pmos, wire_to_arch_ipin_switch, wire_to_rr_ipin_switch);

     //Partition the rr graph edges for efficient access to configurable/non-configurable
     //edge subsets. Must be done after RR switches have been allocated
     partition_rr_graph_edges(device_ctx);

     //Save the channel widths for the newly constructed graph
     device_ctx.chan_width = nodes_per_chan;

     rr_graph_externals(segment_inf, max_chan_width,
                        *wire_to_rr_ipin_switch, base_cost_type);

     check_rr_graph(graph_type, grid, types);

     /* Free all temp structs */
     if (seg_details) {
         delete[] seg_details;
         seg_details = nullptr;
     }
     if (!chan_details_x.empty() || !chan_details_y.empty()) {
         free_chan_details(chan_details_x, chan_details_y);
     }
     if (sb_conn_map) {
         free_switchblock_permutations(sb_conn_map);
         sb_conn_map = nullptr;
     }
     if (sets_per_seg_type) {
         free(sets_per_seg_type);
         sets_per_seg_type = nullptr;
     }

     free_type_track_to_pin_map(track_to_pin_lookup, types, max_chan_width);
     if (clb_to_clb_directs != nullptr) {
         free(clb_to_clb_directs);
     }
 }

 /* Allocates and loads the global rr_switch_inf array based on the global
  * arch_switch_inf array and the fan-ins used by the rr nodes.
  * Also changes switch indices of rr_nodes to index into rr_switch_inf
  * instead of arch_switch_inf.
  *
  * Returns the number of rr switches created.
  * Also returns, through a pointer, the index of a representative ipin cblock switch.
  * - Currently we're not allowing a designer to specify an ipin cblock switch with
  * multiple fan-ins, so there's just one of these switches in the device_ctx.rr_switch_inf array.
  * But in the future if we allow this, we can return an index to a representative switch
  *
  * The rr_switch_inf switches are derived from the arch_switch_inf switches
  * (which were read-in from the architecture file) based on fan-in. The delays of
  * the rr switches depend on their fan-in, so we first go through the rr_nodes
  * and count how many different fan-ins exist for each arch switch.
  * Then we create these rr switches and update the switch indices
  * of rr_nodes to index into the rr_switch_inf array. */
 static void alloc_and_load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const int wire_to_arch_ipin_switch, int* wire_to_rr_ipin_switch) {
     /* we will potentially be creating a couple of versions of each arch switch where
      * each version corresponds to a different fan-in. We will need to fill device_ctx.rr_switch_inf
      * with this expanded list of switches.
      *
      * To do this we will use arch_switch_fanins, which is indexed as:
      *      arch_switch_fanins[i_arch_switch][fanin] -> new_switch_id
      */
     t_arch_switch_fanin arch_switch_fanins(num_arch_switches);

     /* Determine what the different fan-ins are for each arch switch, and also
      * how many entries the rr_switch_inf array should have */
     alloc_rr_switch_inf(arch_switch_fanins);

     /* create the rr switches. also keep track of, for each arch switch, what index of the rr_switch_inf
      * array each version of its fanin has been mapped to */
     load_rr_switch_inf(num_arch_switches, R_minW_nmos, R_minW_pmos, arch_switch_fanins);

     /* next, walk through rr nodes again and remap their switch indices to rr_switch_inf */
     remap_rr_node_switch_indices(arch_switch_fanins);

     /* now we need to set the wire_to_rr_ipin_switch variable which points the detailed routing architecture
      * to the representative ipin cblock switch. currently we're not allowing the specification of an ipin cblock switch
      * with multiple fan-ins, so right now there's just one. May change in the future, in which case we'd need to
      * return a representative switch */
     if (arch_switch_fanins[wire_to_arch_ipin_switch].count(UNDEFINED)) {
         /* only have one ipin cblock switch. OK. */
         (*wire_to_rr_ipin_switch) = arch_switch_fanins[wire_to_arch_ipin_switch][UNDEFINED];
     } else if (arch_switch_fanins[wire_to_arch_ipin_switch].size() != 0) {
         VPR_FATAL_ERROR(VPR_ERROR_ARCH,
                         "Not currently allowing an ipin cblock switch to have multiple fan-ins");
     } else {
         //This likely indicates that no connection block has been constructed, indicating significant issues with
         //the generated RR graph.
         //
         //Instead of throwing an error we issue a warning. This means that check_rr_graph() etc. will run to give more information
         //and allow graphics to be brought up for users to debug their architectures.
         (*wire_to_rr_ipin_switch) = OPEN;
         VTR_LOG_WARN("No switch found for the ipin cblock in RR graph. Check if there is an error in arch file, or if no connection blocks are being built in RR graph\n");
     }
 }

 /* Allocates space for the global device_ctx.rr_switch_inf variable and returns the
  * number of rr switches that were allocated */
 static void alloc_rr_switch_inf(t_arch_switch_fanin& arch_switch_fanins) {
     auto& device_ctx = g_vpr_ctx.mutable_device();

     int num_rr_switches = 0;
     {
         //Collect the fan-in per switch type for each node in the graph
         //
         //Note that since we don't store backward edge info in the RR graph we need to walk
         //the whole graph to get the per-switch-type fanin info
         std::vector<vtr::flat_map<int, int>> inward_switch_inf(device_ctx.rr_nodes.size()); //[to_node][arch_switch] -> fanin
         for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
             for (auto iedge : device_ctx.rr_nodes[inode].edges()) {
                 int iswitch = device_ctx.rr_nodes[inode].edge_switch(iedge);
                 int to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);

                 if (inward_switch_inf[to_node].count(iswitch) == 0) {
                     inward_switch_inf[to_node][iswitch] = 0;
                 }
                 inward_switch_inf[to_node][iswitch]++;
             }
         }

         //Record the unique switch type/fanin combinations
         for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
             for (auto& switch_fanin : inward_switch_inf[inode]) {
                 int iswitch, fanin;
                 std::tie(iswitch, fanin) = switch_fanin;

                 if (device_ctx.arch_switch_inf[iswitch].fixed_Tdel()) {
                     //If delay is independent of fanin drop the unique fanin info
                     fanin = UNDEFINED;
                 }

                 if (arch_switch_fanins[iswitch].count(fanin) == 0) {        //New fanin for this switch
                     arch_switch_fanins[iswitch][fanin] = num_rr_switches++; //Assign it a unique index
                 }
             }
         }
     }

     /* allocate space for the rr_switch_inf array */
     device_ctx.rr_switch_inf.resize(num_rr_switches);
 }

 /* load the global device_ctx.rr_switch_inf variable. also keep track of, for each arch switch, what
  * index of the rr_switch_inf array each version of its fanin has been mapped to (through switch_fanin map) */
 static void load_rr_switch_inf(const int num_arch_switches, const float R_minW_nmos, const float R_minW_pmos, const t_arch_switch_fanin& arch_switch_fanins) {
     auto& device_ctx = g_vpr_ctx.mutable_device();

     if (!device_ctx.switch_fanin_remap.empty()) {
         // at this stage, we rebuild the rr_graph (probably in binary search)
         // so old device_ctx.switch_fanin_remap is obsolete
         device_ctx.switch_fanin_remap.clear();
     }

     device_ctx.switch_fanin_remap.resize(num_arch_switches);
     for (int i_arch_switch = 0; i_arch_switch < num_arch_switches; i_arch_switch++) {
         std::map<int, int>::iterator it;
         for (auto fanin_rrswitch : arch_switch_fanins[i_arch_switch]) {
             /* the fanin value is in it->first, and we'll need to set what index this i_arch_switch/fanin
              * combination maps to (within rr_switch_inf) in it->second) */
             int fanin;
             int i_rr_switch;
             std::tie(fanin, i_rr_switch) = fanin_rrswitch;

             // setup device_ctx.switch_fanin_remap, for future swich usage analysis
             device_ctx.switch_fanin_remap[i_arch_switch][fanin] = i_rr_switch;

             load_rr_switch_from_arch_switch(i_arch_switch, i_rr_switch, fanin, R_minW_nmos, R_minW_pmos);
         }
     }
 }

 void load_rr_switch_from_arch_switch(int arch_switch_idx,
                                      int rr_switch_idx,
                                      int fanin,
                                      const float R_minW_nmos,
                                      const float R_minW_pmos) {
     auto& device_ctx = g_vpr_ctx.mutable_device();

     /* figure out, by looking at the arch switch's Tdel map, what the delay of the new
      * rr switch should be */
     double rr_switch_Tdel = device_ctx.arch_switch_inf[arch_switch_idx].Tdel(fanin);

     /* copy over the arch switch to rr_switch_inf[rr_switch_idx], but with the changed Tdel value */
     device_ctx.rr_switch_inf[rr_switch_idx].set_type(device_ctx.arch_switch_inf[arch_switch_idx].type());
     device_ctx.rr_switch_inf[rr_switch_idx].R = device_ctx.arch_switch_inf[arch_switch_idx].R;
     device_ctx.rr_switch_inf[rr_switch_idx].Cin = device_ctx.arch_switch_inf[arch_switch_idx].Cin;
     device_ctx.rr_switch_inf[rr_switch_idx].Cinternal = device_ctx.arch_switch_inf[arch_switch_idx].Cinternal;
     device_ctx.rr_switch_inf[rr_switch_idx].Cout = device_ctx.arch_switch_inf[arch_switch_idx].Cout;
     device_ctx.rr_switch_inf[rr_switch_idx].Tdel = rr_switch_Tdel;
     device_ctx.rr_switch_inf[rr_switch_idx].mux_trans_size = device_ctx.arch_switch_inf[arch_switch_idx].mux_trans_size;
     if (device_ctx.arch_switch_inf[arch_switch_idx].buf_size_type == BufferSize::AUTO) {
         //Size based on resistance
         device_ctx.rr_switch_inf[rr_switch_idx].buf_size = trans_per_buf(device_ctx.arch_switch_inf[arch_switch_idx].R, R_minW_nmos, R_minW_pmos);
     } else {
         VTR_ASSERT(device_ctx.arch_switch_inf[arch_switch_idx].buf_size_type == BufferSize::ABSOLUTE);
         //Use the specified size
         device_ctx.rr_switch_inf[rr_switch_idx].buf_size = device_ctx.arch_switch_inf[arch_switch_idx].buf_size;
     }
     device_ctx.rr_switch_inf[rr_switch_idx].name = device_ctx.arch_switch_inf[arch_switch_idx].name;
     device_ctx.rr_switch_inf[rr_switch_idx].power_buffer_type = device_ctx.arch_switch_inf[arch_switch_idx].power_buffer_type;
     device_ctx.rr_switch_inf[rr_switch_idx].power_buffer_size = device_ctx.arch_switch_inf[arch_switch_idx].power_buffer_size;
 }

 /* switch indices of each rr_node original point into the global device_ctx.arch_switch_inf array.
  * now we want to remap these indices to point into the global device_ctx.rr_switch_inf array
  * which contains switch info at different fan-in values */
 static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin) {
     auto& device_ctx = g_vpr_ctx.mutable_device();

     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         auto& from_node = device_ctx.rr_nodes[inode];
         int num_edges = from_node.num_edges();
         for (int iedge = 0; iedge < num_edges; iedge++) {
             const t_rr_node& to_node = device_ctx.rr_nodes[from_node.edge_sink_node(iedge)];
             /* get the switch which this edge uses and its fanin */
             int switch_index = from_node.edge_switch(iedge);
             int fanin = to_node.fan_in();

             if (switch_fanin[switch_index].count(UNDEFINED) == 1) {
                 fanin = UNDEFINED;
             }

             auto itr = switch_fanin[switch_index].find(fanin);
             VTR_ASSERT(itr != switch_fanin[switch_index].end());

             int rr_switch_index = itr->second;

             from_node.set_edge_switch(iedge, rr_switch_index);
         }
     }
 }

 static void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
                                int max_chan_width,
                                int wire_to_rr_ipin_switch,
                                enum e_base_cost_type base_cost_type) {
     auto& device_ctx = g_vpr_ctx.device();

     add_rr_graph_C_from_switches(device_ctx.rr_switch_inf[wire_to_rr_ipin_switch].Cin);
     alloc_and_load_rr_indexed_data(segment_inf, device_ctx.rr_node_indices,
                                    max_chan_width, wire_to_rr_ipin_switch, base_cost_type);
     load_rr_index_segments(segment_inf.size());
 }

 static std::vector<std::vector<bool>> alloc_and_load_perturb_ipins(const int L_num_types,
                                                                    const int num_seg_types,
                                                                    const int* sets_per_seg_type,
                                                                    const std::vector<vtr::Matrix<int>>& Fc_in,
                                                                    const std::vector<vtr::Matrix<int>>& Fc_out,
                                                                    const enum e_directionality directionality) {
     std::vector<std::vector<bool>> result(L_num_types);
     for (auto& seg_type_bools : result) {
         seg_type_bools.resize(num_seg_types, false);
     }

     /* factor to account for unidir vs bidir */
     int fac = 1;
     if (directionality == UNI_DIRECTIONAL) {
         fac = 2;
     }

     if (BI_DIRECTIONAL == directionality) {
         for (int iseg = 0; iseg < num_seg_types; ++iseg) {
             result[0][iseg] = false;

             int tracks_in_seg_type = sets_per_seg_type[iseg] * fac;

             for (int itype = 1; itype < L_num_types; ++itype) {
                 result[itype][iseg] = false;

                 float Fc_ratio;
                 if (Fc_in[itype][0][iseg] > Fc_out[itype][0][iseg]) {
                     Fc_ratio = (float)Fc_in[itype][0][iseg] / (float)Fc_out[itype][0][iseg];
                 } else {
                     Fc_ratio = (float)Fc_out[itype][0][iseg] / (float)Fc_in[itype][0][iseg];
                 }

                 if ((Fc_in[itype][0][iseg] <= tracks_in_seg_type - 2)
                     && (fabs(Fc_ratio - vtr::nint(Fc_ratio))
                         < (0.5 / (float)tracks_in_seg_type))) {
                     result[itype][iseg] = true;
                 }
             }
         }
     } else {
         /* Unidirectional routing uses mux balancing patterns and
          * thus shouldn't need perturbation. */
         VTR_ASSERT(UNI_DIRECTIONAL == directionality);
         for (int itype = 0; itype < L_num_types; ++itype) {
             for (int iseg = 0; iseg < num_seg_types; ++iseg) {
                 result[itype][iseg] = false;
             }
         }
     }
     return result;
 }

 static t_seg_details* alloc_and_load_global_route_seg_details(const int global_route_switch,
                                                               int* num_seg_details) {
     t_seg_details* seg_details = new t_seg_details[1];

     seg_details->index = 0;
     seg_details->length = 1;
     seg_details->arch_wire_switch = global_route_switch;
     seg_details->arch_opin_switch = global_route_switch;
     seg_details->longline = false;
     seg_details->direction = BI_DIRECTION;
     seg_details->Cmetal = 0.0;
     seg_details->Rmetal = 0.0;
     seg_details->start = 1;
     seg_details->cb = std::make_unique<bool[]>(1);
     seg_details->cb[0] = true;
     seg_details->sb = std::make_unique<bool[]>(2);
     seg_details->sb[0] = true;
     seg_details->sb[1] = true;
     seg_details->group_size = 1;
     seg_details->group_start = 0;
     seg_details->seg_start = -1;
     seg_details->seg_end = -1;

     if (num_seg_details) {
         *num_seg_details = 1;
     }
     return seg_details;
 }

 /* Calculates the number of track connections from each block pin to each segment type */
 static std::vector<vtr::Matrix<int>> alloc_and_load_actual_fc(const std::vector<t_physical_tile_type>& types,
                                                               const int max_pins,
                                                               const std::vector<t_segment_inf>& segment_inf,
                                                               const int* sets_per_seg_type,
                                                               const int max_chan_width,
                                                               const e_fc_type fc_type,
                                                               const enum e_directionality directionality,
                                                               bool* Fc_clipped) {
     //Initialize Fc of all blocks to zero
     auto zeros = vtr::Matrix<int>({size_t(max_pins), segment_inf.size()}, 0);
     std::vector<vtr::Matrix<int>> Fc(types.size(), zeros);

     *Fc_clipped = false;

     /* Unidir tracks formed in pairs, otherwise no effect. */
     int fac = 1;
     if (UNI_DIRECTIONAL == directionality) {
         fac = 2;
     }

     VTR_ASSERT((max_chan_width % fac) == 0);

     for (const auto& type : types) { //Skip EMPTY
         int itype = type.index;

         for (const t_fc_specification& fc_spec : type.fc_specs) {
             if (fc_type != fc_spec.fc_type) continue;

             VTR_ASSERT(fc_spec.pins.size() > 0);

             int iseg = fc_spec.seg_index;

             if (fc_spec.fc_value == 0) {
                 /* Special case indicating that this pin does not connect to general-purpose routing */
                 for (int ipin : fc_spec.pins) {
                     Fc[itype][ipin][iseg] = 0;
                 }
             } else {
                 /* General case indicating that this pin connects to general-purpose routing */

                 //Calculate how many connections there should be accross all the pins in this fc_spec
                 int total_connections = 0;
                 if (fc_spec.fc_value_type == e_fc_value_type::FRACTIONAL) {
                     float conns_per_pin = fac * sets_per_seg_type[iseg] * fc_spec.fc_value;
                     float flt_total_connections = conns_per_pin * fc_spec.pins.size();
                     total_connections = vtr::nint(flt_total_connections); //Round to integer
                 } else {
                     VTR_ASSERT(fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE);

                     if (std::fmod(fc_spec.fc_value, fac) != 0.) {
                         VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Absolute Fc value must be a multiple of %d (was %f) between block pin '%s' and wire segment '%s'",
                                         fac, fc_spec.fc_value,
                                         block_type_pin_index_to_name(&type, fc_spec.pins[0]).c_str(),
                                         segment_inf[iseg].name.c_str());
                     }

                     if (fc_spec.fc_value < fac) {
                         VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Absolute Fc value must be at least %d (was %f) between block pin '%s' to wire segment %s",
                                         fac, fc_spec.fc_value,
                                         block_type_pin_index_to_name(&type, fc_spec.pins[0]).c_str(),
                                         segment_inf[iseg].name.c_str());
                     }

                     total_connections = vtr::nint(fc_spec.fc_value) * fc_spec.pins.size();
                 }

                 //Ensure that there are at least fac connections, this ensures that low Fc ports
                 //targeting small sets of segs get connection(s), even if flt_total_connections < fac.
                 total_connections = std::max(total_connections, fac);

                 //Ensure total evenly divides fac by adding the remainder
                 total_connections += (total_connections % fac);

                 VTR_ASSERT(total_connections > 0);
                 VTR_ASSERT(total_connections % fac == 0);

                 //We walk through all the pins this fc_spec applies to, adding fac connections
                 //to each pin, until we run out of connections. This should distribute the connections
                 //as evenly as possible (if total_connections % pins.size() != 0, there will be
                 //some inevitable imbalance).
                 int connections_remaining = total_connections;
                 while (connections_remaining != 0) {
                     //Add one set of connections to each pin
                     for (int ipin : fc_spec.pins) {
                         if (connections_remaining >= fac) {
                             Fc[itype][ipin][iseg] += fac;
                             connections_remaining -= fac;
                         } else {
                             VTR_ASSERT(connections_remaining == 0);
                             break;
                         }
                     }
                 }

                 for (int ipin : fc_spec.pins) {
                     //It is possible that we may want more connections that wires of this type exist;
                     //clip to the maximum number of wires
                     if (Fc[itype][ipin][iseg] > sets_per_seg_type[iseg] * fac) {
                         *Fc_clipped = true;
                         Fc[itype][ipin][iseg] = sets_per_seg_type[iseg] * fac;
                     }

                     VTR_ASSERT_MSG(Fc[itype][ipin][iseg] >= 0, "Calculated absolute Fc must be positive");
                     VTR_ASSERT_MSG(Fc[itype][ipin][iseg] % fac == 0, "Calculated absolute Fc must be divisible by 1 (bidir architecture) or 2 (unidir architecture)"); //Required by connection block construction code
                 }
             }
         }
     }

     return Fc;
 }

 /* frees the track to ipin mapping for each physical grid type */
 static void free_type_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map,
                                        const std::vector<t_physical_tile_type>& types,
                                        int max_chan_width) {
     auto& device_ctx = g_vpr_ctx.device();

     for (unsigned int i = 0; i < device_ctx.physical_tile_types.size(); i++) {
         if (!track_to_pin_map[i].empty()) {
             for (int track = 0; track < max_chan_width; ++track) {
                 for (int width = 0; width < types[i].width; ++width) {
                     for (int height = 0; height < types[i].height; ++height) {
                         for (int side = 0; side < 4; ++side) {
                             if (!track_to_pin_map[i][track][width][height][side].empty()) {
                                 track_to_pin_map[i][track][width][height][side].clear();
                             }
                         }
                     }
                 }
             }
         }
     }
 }

 /* Does the actual work of allocating the rr_graph and filling all the *
  * appropriate values.  Everything up to this was just a prelude!      */
 static void alloc_and_load_rr_graph(const int num_nodes,
                                     std::vector<t_rr_node>& L_rr_node,
                                     const int num_seg_types,
                                     const t_chan_details& chan_details_x,
                                     const t_chan_details& chan_details_y,
                                     const t_track_to_pin_lookup& track_to_pin_lookup,
                                     const t_pin_to_track_lookup& opin_to_track_map,
                                     const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
                                     t_sb_connection_map* sb_conn_map,
                                     const DeviceGrid& grid,
                                     const int Fs,
                                     t_sblock_pattern& sblock_pattern,
                                     const std::vector<vtr::Matrix<int>>& Fc_out,
                                     vtr::NdMatrix<int, 3>& Fc_xofs,
                                     vtr::NdMatrix<int, 3>& Fc_yofs,
                                     const t_rr_node_indices& L_rr_node_indices,
                                     const int max_chan_width,
                                     const t_chan_width& chan_width,
                                     const int wire_to_ipin_switch,
                                     const int delayless_switch,
                                     const enum e_directionality directionality,
                                     bool* Fc_clipped,
                                     const t_direct_inf* directs,
                                     const int num_directs,
                                     const t_clb_to_clb_directs* clb_to_clb_directs,
                                     bool is_global_graph) {
     //We take special care when creating RR graph edges (there are typically many more
     //edges than nodes in an RR graph).
     //
     //In particular, all the following build_*() functions do not create the edges, but
     //instead record the edges they wish to create in rr_edges_to_create.
     //
     //We uniquify the edges to be created (avoiding any duplicates), and create
     //the edges in alloc_and_load_edges().
     //
     //By doing things in this manner we ensure we know exactly how many edges leave each RR
     //node, which avoids resizing the RR edge arrays (which can cause significant memory
     //fragmentation, and significantly increasing peak memory usage). This is important since
     //RR graph creation is the high-watermark of VPR's memory use.
     t_rr_edge_info_set rr_edges_to_create;

     /* If Fc gets clipped, this will be flagged to true */
     *Fc_clipped = false;

     /* Connection SINKS and SOURCES to their pins. */
     for (size_t i = 0; i < grid.width(); ++i) {
         for (size_t j = 0; j < grid.height(); ++j) {
             build_rr_sinks_sources(i, j, L_rr_node, rr_edges_to_create, L_rr_node_indices,
                                    delayless_switch, grid);

             //Create the actual SOURCE->OPIN, IPIN->SINK edges
             uniquify_edges(rr_edges_to_create);
             alloc_and_load_edges(L_rr_node, rr_edges_to_create);
             rr_edges_to_create.clear();
         }
     }

     /* Build opins */
     for (size_t i = 0; i < grid.width(); ++i) {
         for (size_t j = 0; j < grid.height(); ++j) {
             for (e_side side : SIDES) {
                 if (BI_DIRECTIONAL == directionality) {
                     build_bidir_rr_opins(i, j, side, L_rr_node_indices, L_rr_node,
                                          opin_to_track_map, Fc_out, rr_edges_to_create, chan_details_x, chan_details_y,
                                          grid,
                                          directs, num_directs, clb_to_clb_directs, num_seg_types);
                 } else {
                     VTR_ASSERT(UNI_DIRECTIONAL == directionality);
                     bool clipped;
                     build_unidir_rr_opins(i, j, side, grid, Fc_out, max_chan_width,
                                           chan_details_x, chan_details_y, Fc_xofs, Fc_yofs,
                                           rr_edges_to_create, &clipped, L_rr_node_indices, L_rr_node,
                                           directs, num_directs, clb_to_clb_directs, num_seg_types);
                     if (clipped) {
                         *Fc_clipped = true;
                     }
                 }

                 //Create the actual OPIN->CHANX/CHANY edges
                 uniquify_edges(rr_edges_to_create);
                 alloc_and_load_edges(L_rr_node, rr_edges_to_create);
                 rr_edges_to_create.clear();
             }
         }
     }

     /* Build channels */
     VTR_ASSERT(Fs % 3 == 0);
     for (size_t i = 0; i < grid.width() - 1; ++i) {
         for (size_t j = 0; j < grid.height() - 1; ++j) {
             if (i > 0) {
                 int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.x_list[j]);
                 build_rr_chan(i, j, CHANX, track_to_pin_lookup, sb_conn_map, switch_block_conn,
                               CHANX_COST_INDEX_START,
                               max_chan_width, grid, tracks_per_chan,
                               sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
                               L_rr_node_indices, rr_edges_to_create, L_rr_node,
                               wire_to_ipin_switch,
                               directionality);

                 //Create the actual CHAN->CHAN edges
                 uniquify_edges(rr_edges_to_create);
                 alloc_and_load_edges(L_rr_node, rr_edges_to_create);
                 rr_edges_to_create.clear();
             }
             if (j > 0) {
                 int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.y_list[i]);
                 build_rr_chan(i, j, CHANY, track_to_pin_lookup, sb_conn_map, switch_block_conn,
                               CHANX_COST_INDEX_START + num_seg_types,
                               max_chan_width, grid, tracks_per_chan,
                               sblock_pattern, Fs / 3, chan_details_x, chan_details_y,
                               L_rr_node_indices, rr_edges_to_create, L_rr_node,
                               wire_to_ipin_switch,
                               directionality);

                 //Create the actual CHAN->CHAN edges
                 uniquify_edges(rr_edges_to_create);
                 alloc_and_load_edges(L_rr_node, rr_edges_to_create);
                 rr_edges_to_create.clear();
             }
         }
     }

     init_fan_in(L_rr_node, num_nodes);
 }

 static void build_bidir_rr_opins(const int i,
                                  const int j,
                                  const e_side side,
                                  const t_rr_node_indices& L_rr_node_indices,
                                  const std::vector<t_rr_node>& rr_nodes,
                                  const t_pin_to_track_lookup& opin_to_track_map,
                                  const std::vector<vtr::Matrix<int>>& Fc_out,
                                  t_rr_edge_info_set& rr_edges_to_create,
                                  const t_chan_details& chan_details_x,
                                  const t_chan_details& chan_details_y,
                                  const DeviceGrid& grid,
                                  const t_direct_inf* directs,
                                  const int num_directs,
                                  const t_clb_to_clb_directs* clb_to_clb_directs,
                                  const int num_seg_types) {
     //Don't connect pins which are not adjacent to channels around the perimeter
     if ((i == 0 && side != RIGHT)
         || (i == int(grid.width() - 1) && side != LEFT)
         || (j == 0 && side != TOP)
         || (j == int(grid.width() - 1) && side != BOTTOM)) {
         return;
     }

     auto type = grid[i][j].type;
     int width_offset = grid[i][j].width_offset;
     int height_offset = grid[i][j].height_offset;

     const vtr::Matrix<int>& Fc = Fc_out[type->index];

     for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) {
         /* We only are working with opins so skip non-drivers */
         if (type->class_inf[type->pin_class[pin_index]].type != DRIVER) {
             continue;
         }

         /* Can't do anything if pin isn't at this location */
         if (0 == type->pinloc[width_offset][height_offset][side][pin_index]) {
             continue;
         }

         /* get number of tracks that this pin connects to */
         int total_pin_Fc = 0;
         for (int iseg = 0; iseg < num_seg_types; iseg++) {
             total_pin_Fc += Fc[pin_index][iseg];
         }

         int node_index = get_rr_node_index(L_rr_node_indices, i, j, OPIN, pin_index, side);
         VTR_ASSERT(node_index >= 0);

         if (total_pin_Fc > 0) {
             get_bidir_opin_connections(i, j, pin_index,
                                        node_index, rr_edges_to_create, opin_to_track_map, L_rr_node_indices,
                                        chan_details_x,
                                        chan_details_y);
         }

         /* Add in direct connections */
         get_opin_direct_connecions(i, j, side, pin_index,
                                    node_index, rr_edges_to_create, L_rr_node_indices, rr_nodes,
                                    directs, num_directs, clb_to_clb_directs);
     }
 }

 void free_rr_graph() {
     /* Frees all the routing graph data structures, if they have been       *
      * allocated.  I use rr_mem_chunk_list_head as a flag to indicate       *
      * whether or not the graph has been allocated -- if it is not NULL,    *
      * a routing graph exists and can be freed.  Hence, you can call this   *
      * routine even if you're not sure of whether a rr_graph exists or not. */

     /* Before adding any more free calls here, be sure the data is NOT chunk *
      * allocated, as ALL the chunk allocated data is already free!           */
     auto& device_ctx = g_vpr_ctx.mutable_device();

     device_ctx.read_rr_graph_filename.clear();

     device_ctx.rr_node_indices.clear();

     device_ctx.rr_nodes.clear();

     device_ctx.rr_node_indices.clear();

     device_ctx.rr_indexed_data.clear();

     device_ctx.rr_switch_inf.clear();

     device_ctx.switch_fanin_remap.clear();

     device_ctx.rr_node_metadata.clear();

     device_ctx.rr_edge_metadata.clear();

     invalidate_router_lookahead_cache();
 }

 static void build_rr_sinks_sources(const int i,
                                    const int j,
                                    std::vector<t_rr_node>& L_rr_node,
                                    t_rr_edge_info_set& rr_edges_to_create,
                                    const t_rr_node_indices& L_rr_node_indices,
                                    const int delayless_switch,
                                    const DeviceGrid& grid) {
     /* Loads IPIN, SINK, SOURCE, and OPIN.
      * Loads IPIN to SINK edges, and SOURCE to OPIN edges */

     /* Since we share nodes within a large block, only
      * start tile can initialize sinks, sources, and pins */
     if (grid[i][j].width_offset > 0 || grid[i][j].height_offset > 0)
         return;

     auto type = grid[i][j].type;
     int num_class = type->num_class;
     t_class* class_inf = type->class_inf;
     int num_pins = type->num_pins;
     int* pin_class = type->pin_class;

     /* SINK and SOURCE-to-OPIN edges */
     for (int iclass = 0; iclass < num_class; ++iclass) {
         int inode = 0;
         if (class_inf[iclass].type == DRIVER) { /* SOURCE */
             inode = get_rr_node_index(L_rr_node_indices, i, j, SOURCE, iclass);

             //Retrieve all the physical OPINs associated with this source, this includes
             //those at different grid tiles of this block
             std::vector<int> opin_nodes;
             for (int width_offset = 0; width_offset < type->width; ++width_offset) {
                 for (int height_offset = 0; height_offset < type->height; ++height_offset) {
                     for (int ipin = 0; ipin < class_inf[iclass].num_pins; ++ipin) {
                         int pin_num = class_inf[iclass].pinlist[ipin];
                         auto physical_pins = get_rr_node_indices(L_rr_node_indices, i + width_offset, j + height_offset, OPIN, pin_num);
                         opin_nodes.insert(opin_nodes.end(), physical_pins.begin(), physical_pins.end());
                     }
                 }
             }

             //Connect the SOURCE to each OPIN
             for (size_t iedge = 0; iedge < opin_nodes.size(); ++iedge) {
                 rr_edges_to_create.emplace_back(inode, opin_nodes[iedge], delayless_switch);
             }

             L_rr_node[inode].set_cost_index(SOURCE_COST_INDEX);
             L_rr_node[inode].set_type(SOURCE);
         } else { /* SINK */
             VTR_ASSERT(class_inf[iclass].type == RECEIVER);
             inode = get_rr_node_index(L_rr_node_indices, i, j, SINK, iclass);

             /* NOTE:  To allow route throughs through clbs, change the lines below to  *
              * make an edge from the input SINK to the output SOURCE.  Do for just the *
              * special case of INPUTS = class 0 and OUTPUTS = class 1 and see what it  *
              * leads to.  If route throughs are allowed, you may want to increase the  *
              * base cost of OPINs and/or SOURCES so they aren't used excessively.      */

             /* Initialize to unconnected */
             L_rr_node[inode].set_num_edges(0);

             L_rr_node[inode].set_cost_index(SINK_COST_INDEX);
             L_rr_node[inode].set_type(SINK);
         }

         /* Things common to both SOURCEs and SINKs.   */
         L_rr_node[inode].set_capacity(class_inf[iclass].num_pins);
         L_rr_node[inode].set_coordinates(i, j, i + type->width - 1, j + type->height - 1);
         float R = 0.;
         float C = 0.;
         L_rr_node[inode].set_rc_index(find_create_rr_rc_data(R, C));
         L_rr_node[inode].set_ptc_num(iclass);
     }

     /* Connect IPINS to SINKS and initialize OPINS */
     //We loop through all the pin locations on the block to initialize the IPINs/OPINs,
     //and hook-up the IPINs to sinks.
     for (int width_offset = 0; width_offset < type->width; ++width_offset) {
         for (int height_offset = 0; height_offset < type->height; ++height_offset) {
             for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) {
                 for (int ipin = 0; ipin < num_pins; ++ipin) {
                     if (type->pinloc[width_offset][height_offset][side][ipin]) {
                         int inode;
                         int iclass = pin_class[ipin];

                         if (class_inf[iclass].type == RECEIVER) {
                             //Connect the input pin to the sink
                             inode = get_rr_node_index(L_rr_node_indices, i + width_offset, j + height_offset, IPIN, ipin, side);

                             int to_node = get_rr_node_index(L_rr_node_indices, i, j, SINK, iclass);

                             //Add info about the edge to be created
                             rr_edges_to_create.emplace_back(inode, to_node, delayless_switch);

                             VTR_ASSERT(inode >= 0);
                             L_rr_node[inode].set_cost_index(IPIN_COST_INDEX);
                             L_rr_node[inode].set_type(IPIN);

                         } else {
                             VTR_ASSERT(class_inf[iclass].type == DRIVER);
                             //Initialize the output pin
                             // Note that we leave it's out-going edges unconnected (they will be hooked up to global routing later)
                             inode = get_rr_node_index(L_rr_node_indices, i + width_offset, j + height_offset, OPIN, ipin, side);

                             //Initially left unconnected

                             L_rr_node[inode].set_cost_index(OPIN_COST_INDEX);
                             L_rr_node[inode].set_type(OPIN);
                         }

                         /* Common to both DRIVERs and RECEIVERs */
                         L_rr_node[inode].set_capacity(1);
                         float R = 0.;
                         float C = 0.;
                         L_rr_node[inode].set_rc_index(find_create_rr_rc_data(R, C));
                         L_rr_node[inode].set_ptc_num(ipin);

                         //Note that we store the grid tile location and side where the pin is located,
                         //which greatly simplifies the drawing code
                         L_rr_node[inode].set_coordinates(i + width_offset, j + height_offset, i + width_offset, j + height_offset);
                         L_rr_node[inode].set_side(side);

                         VTR_ASSERT(type->pinloc[width_offset][height_offset][L_rr_node[inode].side()][L_rr_node[inode].pin_num()]);
                     }
                 }
             }
         }
     }

     //Create the actual edges
 }

 void init_fan_in(std::vector<t_rr_node>& L_rr_node, const int num_rr_nodes) {
     //Loads fan-ins for all nodes

     //Reset all fan-ins to zero
     for (int i = 0; i < num_rr_nodes; i++) {
         L_rr_node[i].set_fan_in(0);
     }

     //Walk the graph and increment fanin on all downstream nodes
     for (int i = 0; i < num_rr_nodes; i++) {
         for (t_edge_size iedge = 0; iedge < L_rr_node[i].num_edges(); iedge++) {
             int to_node = L_rr_node[i].edge_sink_node(iedge);

             L_rr_node[to_node].set_fan_in(L_rr_node[to_node].fan_in() + 1);
         }
     }
 }

 /* Allocates/loads edges for nodes belonging to specified channel segment and initializes
  * node properties such as cost, occupancy and capacity */
 static void build_rr_chan(const int x_coord,
                           const int y_coord,
                           const t_rr_type chan_type,
                           const t_track_to_pin_lookup& track_to_pin_lookup,
                           t_sb_connection_map* sb_conn_map,
                           const vtr::NdMatrix<std::vector<int>, 3>& switch_block_conn,
                           const int cost_index_offset,
                           const int max_chan_width,
                           const DeviceGrid& grid,
                           const int tracks_per_chan,
                           t_sblock_pattern& sblock_pattern,
                           const int Fs_per_side,
                           const t_chan_details& chan_details_x,
                           const t_chan_details& chan_details_y,
                           const t_rr_node_indices& L_rr_node_indices,
                           t_rr_edge_info_set& rr_edges_to_create,
                           std::vector<t_rr_node>& L_rr_node,
                           const int wire_to_ipin_switch,
                           const enum e_directionality directionality) {
     /* this function builds both x and y-directed channel segments, so set up our
      * coordinates based on channel type */

     auto& device_ctx = g_vpr_ctx.device();

     //Initally a assumes CHANX
     int seg_coord = x_coord;                           //The absolute coordinate of this segment within the channel
     int chan_coord = y_coord;                          //The absolute coordinate of this channel within the device
     int seg_dimension = device_ctx.grid.width() - 2;   //-2 for no perim channels
     int chan_dimension = device_ctx.grid.height() - 2; //-2 for no perim channels
     const t_chan_details& from_chan_details = (chan_type == CHANX) ? chan_details_x : chan_details_y;
     const t_chan_details& opposite_chan_details = (chan_type == CHANX) ? chan_details_y : chan_details_x;
     t_rr_type opposite_chan_type = CHANY;
     if (chan_type == CHANY) {
         //Swap values since CHANX was assumed above
         std::swap(seg_coord, chan_coord);
         std::swap(seg_dimension, chan_dimension);
         opposite_chan_type = CHANX;
     }

     const t_chan_seg_details* seg_details = from_chan_details[x_coord][y_coord].data();

     /* figure out if we're generating switch block edges based on a custom switch block
      * description */
     bool custom_switch_block = false;
     if (sb_conn_map != nullptr) {
         VTR_ASSERT(sblock_pattern.empty() && switch_block_conn.empty());
         custom_switch_block = true;
     }

     /* Loads up all the routing resource nodes in the current channel segment */
     for (int track = 0; track < tracks_per_chan; ++track) {
         if (seg_details[track].length() == 0)
             continue;

         //Start and end coordinates of this segment along the length of the channel
         //Note that these values are in the VPR coordinate system (and do not consider
         //wire directionality), so start correspond to left/bottom and end corresponds to right/top
         int start = get_seg_start(seg_details, track, chan_coord, seg_coord);
         int end = get_seg_end(seg_details, track, start, chan_coord, seg_dimension);

         if (seg_coord > start)
             continue; /* Only process segments which start at this location */
         VTR_ASSERT(seg_coord == start);

         const t_chan_seg_details* from_seg_details = nullptr;
         if (chan_type == CHANY) {
             from_seg_details = chan_details_y[x_coord][start].data();
         } else {
             from_seg_details = chan_details_x[start][y_coord].data();
         }

         int node = get_rr_node_index(L_rr_node_indices, x_coord, y_coord, chan_type, track);

         if (node == OPEN) {
             continue;
         }

         /* Add the edges from this track to all it's connected pins into the list */
         int num_edges = 0;
         num_edges += get_track_to_pins(start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create,
                                        L_rr_node_indices, track_to_pin_lookup, seg_details, chan_type, seg_dimension,
                                        wire_to_ipin_switch, directionality);

         /* get edges going from the current track into channel segments which are perpendicular to it */
         if (chan_coord > 0) {
             const t_chan_seg_details* to_seg_details;
             if (chan_type == CHANX) {
                 to_seg_details = chan_details_y[start][y_coord].data();
             } else {
                 VTR_ASSERT(chan_type == CHANY);
                 to_seg_details = chan_details_x[x_coord][start].data();
             }
             if (to_seg_details->length() > 0) {
                 num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord,
                                                  opposite_chan_type, seg_dimension, max_chan_width, grid,
                                                  Fs_per_side, sblock_pattern, node, rr_edges_to_create,
                                                  from_seg_details, to_seg_details, opposite_chan_details,
                                                  directionality,
                                                  L_rr_node_indices,
                                                  switch_block_conn, sb_conn_map);
             }
         }
         if (chan_coord < chan_dimension) {
             const t_chan_seg_details* to_seg_details;
             if (chan_type == CHANX) {
                 to_seg_details = chan_details_y[start][y_coord + 1].data();
             } else {
                 VTR_ASSERT(chan_type == CHANY);
                 to_seg_details = chan_details_x[x_coord + 1][start].data();
             }
             if (to_seg_details->length() > 0) {
                 num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, chan_coord + 1,
                                                  opposite_chan_type, seg_dimension, max_chan_width, grid,
                                                  Fs_per_side, sblock_pattern, node, rr_edges_to_create,
                                                  from_seg_details, to_seg_details, opposite_chan_details,
                                                  directionality,
                                                  L_rr_node_indices,
                                                  switch_block_conn, sb_conn_map);
             }
         }

         /* walk over the switch blocks along the source track and implement edges from this track to other tracks
          * in the same channel (i.e. straight-through connections) */
         for (int target_seg = start - 1; target_seg <= end + 1; target_seg++) {
             if (target_seg != start - 1 && target_seg != end + 1) {
                 /* skip straight-through connections from midpoint if non-custom switch block.
                  * currently non-custom switch blocks don't properly describe connections from the mid-point of a wire segment
                  * to other segments in the same channel (i.e. straight-through connections) */
                 if (!custom_switch_block) {
                     continue;
                 }
             }
             if (target_seg > 0 && target_seg < seg_dimension + 1) {
                 const t_chan_seg_details* to_seg_details;
                 if (chan_type == CHANX) {
                     to_seg_details = chan_details_x[target_seg][y_coord].data();
                 } else {
                     VTR_ASSERT(chan_type == CHANY);
                     to_seg_details = chan_details_y[x_coord][target_seg].data();
                 }
                 if (to_seg_details->length() > 0) {
                     num_edges += get_track_to_tracks(chan_coord, start, track, chan_type, target_seg,
                                                      chan_type, seg_dimension, max_chan_width, grid,
                                                      Fs_per_side, sblock_pattern, node, rr_edges_to_create,
                                                      from_seg_details, to_seg_details, from_chan_details,
                                                      directionality,
                                                      L_rr_node_indices,
                                                      switch_block_conn, sb_conn_map);
                 }
             }
         }

         /* Edge arrays have now been built up.  Do everything else.  */
         L_rr_node[node].set_cost_index(cost_index_offset + seg_details[track].index());
         L_rr_node[node].set_capacity(1); /* GLOBAL routing handled elsewhere */

         if (chan_type == CHANX) {
             L_rr_node[node].set_coordinates(start, y_coord, end, y_coord);
         } else {
             VTR_ASSERT(chan_type == CHANY);
             L_rr_node[node].set_coordinates(x_coord, start, x_coord, end);
         }

         int length = end - start + 1;
         float R = length * seg_details[track].Rmetal();
         float C = length * seg_details[track].Cmetal();
         L_rr_node[node].set_rc_index(find_create_rr_rc_data(R, C));

         L_rr_node[node].set_ptc_num(track);
         L_rr_node[node].set_type(chan_type);
         L_rr_node[node].set_direction(seg_details[track].direction());
     }
 }

 void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) {
     std::sort(rr_edges_to_create.begin(), rr_edges_to_create.end());
     rr_edges_to_create.erase(std::unique(rr_edges_to_create.begin(), rr_edges_to_create.end()), rr_edges_to_create.end());
 }

 void alloc_and_load_edges(std::vector<t_rr_node>& L_rr_node,
                           const t_rr_edge_info_set& rr_edges_to_create) {
     /* Sets up all the edge related information for rr_node */

     struct compare_from_node {
         auto operator()(const t_rr_edge_info& lhs, const int from_node) {
             return lhs.from_node < from_node;
         }
         auto operator()(const int from_node, const t_rr_edge_info& rhs) {
             return from_node < rhs.from_node;
         }
     };

     std::set<int> from_nodes;
     for (auto& edge : rr_edges_to_create) {
         from_nodes.insert(edge.from_node);
     }

     VTR_ASSERT_SAFE(std::is_sorted(rr_edges_to_create.begin(), rr_edges_to_create.end()));

     for (int inode : from_nodes) {
         auto edge_range = std::equal_range(rr_edges_to_create.begin(), rr_edges_to_create.end(), inode, compare_from_node());

         size_t edge_count = std::distance(edge_range.first, edge_range.second);

         if (L_rr_node[inode].num_edges() == 0) {
             //Create initial edges
             //
             //Note that we do this in bulk instead of via add_edge() to reduce
             //memory fragmentation

             L_rr_node[inode].set_num_edges(edge_count);

             int iedge = 0;
             for (auto itr = edge_range.first; itr != edge_range.second; ++itr) {
                 VTR_ASSERT(itr->from_node == inode);

                 L_rr_node[inode].set_edge_sink_node(iedge, itr->to_node);
                 L_rr_node[inode].set_edge_switch(iedge, itr->switch_type);
                 ++iedge;
             }
         } else {
             //Add new edge incrementally
             //
             //This should occur relatively rarely (e.g. a backward bidir edge) so memory fragmentation shouldn't be a big problem
             for (auto itr = edge_range.first; itr != edge_range.second; ++itr) {
                 L_rr_node[inode].add_edge(itr->to_node, itr->switch_type);
             }
         }
     }
 }

 /* allocate pin to track map for each segment type individually and then combine into a single
  * vector */
 static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_pin_to_track_map(const e_pin_type pin_type,
                                                                           const vtr::Matrix<int>& Fc,
                                                                           const t_physical_tile_type_ptr Type,
                                                                           const std::vector<bool>& perturb_switch_pattern,
                                                                           const e_directionality directionality,
                                                                           const int num_seg_types,
                                                                           const int* sets_per_seg_type) {
     /* get the maximum number of tracks that any pin can connect to */
     size_t max_pin_tracks = 0;
     for (int iseg = 0; iseg < num_seg_types; iseg++) {
         /* determine the maximum Fc to this segment type across all pins */
         int max_Fc = 0;
         for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) {
             int pin_class = Type->pin_class[pin_index];
             if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) {
                 max_Fc = Fc[pin_index][iseg];
             }
         }

         max_pin_tracks += max_Fc;
     }

     /* allocate 'result' matrix and initialize entries to OPEN. also allocate and intialize matrix which will be
      * used to index into the correct entries when loading up 'result' */

     auto result = vtr::NdMatrix<std::vector<int>, 4>({
         size_t(Type->num_pins), //[0..num_pins-1]
         size_t(Type->width),    //[0..width-1]
         size_t(Type->height),   //[0..height-1]
         4,                      //[0..sides-1]
     });

     /* multiplier for unidirectional vs bidirectional architectures */
     int fac = 1;
     if (directionality == UNI_DIRECTIONAL) {
         fac = 2;
     }

     /* load the pin to track matrix by looking at each segment type in turn */
     int seg_type_start_track = 0;
     for (int iseg = 0; iseg < num_seg_types; iseg++) {
         int num_seg_type_tracks = fac * sets_per_seg_type[iseg];

         /* determine the maximum Fc to this segment type across all pins */
         int max_Fc = 0;
         for (int pin_index = 0; pin_index < Type->num_pins; ++pin_index) {
             int pin_class = Type->pin_class[pin_index];
             if (Fc[pin_index][iseg] > max_Fc && Type->class_inf[pin_class].type == pin_type) {
                 max_Fc = Fc[pin_index][iseg];
             }
         }

         /* get pin connections to tracks of the current segment type */
         auto pin_to_seg_type_map = alloc_and_load_pin_to_seg_type(pin_type,
                                                                   num_seg_type_tracks, max_Fc, Type, perturb_switch_pattern[iseg], directionality);

         /* connections in pin_to_seg_type_map are within that seg type -- i.e. in the [0,num_seg_type_tracks-1] range.
          * now load up 'result' array with these connections, but offset them so they are relative to the channel
          * as a whole */
         for (int ipin = 0; ipin < Type->num_pins; ipin++) {
             for (int iwidth = 0; iwidth < Type->width; iwidth++) {
                 for (int iheight = 0; iheight < Type->height; iheight++) {
                     for (int iside = 0; iside < 4; iside++) {
                         for (int iconn = 0; iconn < max_Fc; iconn++) {
                             int relative_track_ind = pin_to_seg_type_map[ipin][iwidth][iheight][iside][iconn];

                             if (relative_track_ind == OPEN) continue;

                             int absolute_track_ind = relative_track_ind + seg_type_start_track;

                             VTR_ASSERT(absolute_track_ind >= 0);
                             result[ipin][iwidth][iheight][iside].push_back(absolute_track_ind);
                         }
                     }
                 }
             }
         }

         /* next seg type will start at this track index */
         seg_type_start_track += num_seg_type_tracks;
     }

     return result;
 }

 static vtr::NdMatrix<int, 5> alloc_and_load_pin_to_seg_type(const e_pin_type pin_type,
                                                             const int num_seg_type_tracks,
                                                             const int Fc,
                                                             const t_physical_tile_type_ptr Type,
                                                             const bool perturb_switch_pattern,
                                                             const e_directionality directionality) {
     /* Note: currently a single value of Fc is used across each pin. In the future
      * the looping below will have to be modified if we want to account for pin-based
      * Fc values */

     /* NB:  This wastes some space.  Could set tracks_..._pin[ipin][ioff][iside] =
      * NULL if there is no pin on that side, or that pin is of the wrong type.
      * Probably not enough memory to worry about, esp. as it's temporary.
      * If pin ipin on side iside does not exist or is of the wrong type,
      * tracks_connected_to_pin[ipin][iside][0] = OPEN.                               */

     if (Type->num_pins < 1) {
         return vtr::NdMatrix<int, 5>();
     }

     auto tracks_connected_to_pin = vtr::NdMatrix<int, 5>({
                                                              size_t(Type->num_pins), //[0..num_pins-1]
                                                              size_t(Type->width),    //[0..width-1]
                                                              size_t(Type->height),   //[0..height-1]
                                                              NUM_SIDES,              //[0..NUM_SIDES-1]
                                                              size_t(Fc)              //[0..Fc-1]
                                                          },
                                                          OPEN); //Unconnected

     //Number of *physical* pins on each side.
     //Note that his may be more than the logical number of pins (i.e.
     //Type->num_pins) if a logical pin has multiple specified physical
     //pinlocations (i.e. appears on multiple sides of the block)
     auto num_dir = vtr::NdMatrix<int, 3>({
                                              size_t(Type->width),  //[0..width-1]
                                              size_t(Type->height), //[0..height-1]
                                              NUM_SIDES             //[0..NUM_SIDES-1]
                                          },
                                          0);

     //List of *physical* pins of the correct type on each side of the current
     //block type. For a specific width/height/side the valid enteries in the
     //last dimension are [0 .. num_dir[width][height][side]-1]
     //
     //Max possible space alloced for simplicity
     auto dir_list = vtr::NdMatrix<int, 4>({
                                               size_t(Type->width),   //[0..width-1]
                                               size_t(Type->height),  //[0..height-1]
                                               NUM_SIDES,             //[0..NUM_SIDES-1]
                                               size_t(Type->num_pins) //[0..num_pins-1]
                                           },
                                           -1); //Defensive coding: Initialize to invalid

     //Number of currently assigned physical pins
     auto num_done_per_dir = vtr::NdMatrix<int, 3>({
                                                       size_t(Type->width),  //[0..width-1]
                                                       size_t(Type->height), //[0..height-1]
                                                       NUM_SIDES             //[0..NUM_SIDES-1]
                                                   },
                                                   0);

     //Record the physical pin locations and counts per side/offsets combination
     for (int pin = 0; pin < Type->num_pins; ++pin) {
         int pin_class = Type->pin_class[pin];
         if (Type->class_inf[pin_class].type != pin_type) /* Doing either ipins OR opins */
             continue;

         /* Pins connecting only to global resources get no switches -> keeps area model accurate. */
         if (Type->is_ignored_pin[pin])
             continue;

         for (int width = 0; width < Type->width; ++width) {
             for (int height = 0; height < Type->height; ++height) {
                 for (e_side side : SIDES) {
                     if (Type->pinloc[width][height][side][pin] == 1) {
                         dir_list[width][height][side][num_dir[width][height][side]] = pin;
                         num_dir[width][height][side]++;
                     }
                 }
             }
         }
     }

     //Total the number of physical pins
     int num_phys_pins = 0;
     for (int width = 0; width < Type->width; ++width) {
         for (int height = 0; height < Type->height; ++height) {
             for (e_side side : SIDES) {
                 num_phys_pins += num_dir[width][height][side]; /* Num. physical pins per type */
             }
         }
     }

     std::vector<t_pin_loc> pin_ordering;

     /* Connection block I use distributes pins evenly across the tracks      *
      * of ALL sides of the clb at once.  Ensures that each pin connects      *
      * to spaced out tracks in its connection block, and that the other      *
      * pins (potentially in other C blocks) connect to the remaining tracks  *
      * first.  Doesn't matter for large Fc, but should make a fairly         *
      * good low Fc block that leverages the fact that usually lots of pins   *
      * are logically equivalent.                                             */

     const e_side init_side = LEFT;
     const int init_width = 0;
     const int init_height = 0;

     e_side side = init_side;
     int width = init_width;
     int height = init_height;
     int pin = 0;
     int pin_index = -1;

     //Determine the order in which physical pins will be considered while building
     //the connection block. This generally tries to order the pins so they are 'spread'
     //out (in hopes of yielding good connection diversity)
     while (pin < num_phys_pins) {
         if (height == init_height && width == init_width && side == init_side) {
             //Completed one loop through all the possible offsets/side combinations
             pin_index++;
         }

         advance_to_next_block_side(Type, width, height, side);

         VTR_ASSERT_MSG(pin_index < num_phys_pins, "Physical block pins bound number of logical block pins");

         if (num_done_per_dir[width][height][side] >= num_dir[width][height][side]) {
             continue;
         }

         int pin_num = dir_list[width][height][side][pin_index];
         VTR_ASSERT(pin_num >= 0);
         VTR_ASSERT(Type->pinloc[width][height][side][pin_num]);

         t_pin_loc pin_loc;
         pin_loc.pin_index = pin_num;
         pin_loc.width_offset = width;
         ;
         pin_loc.height_offset = height;
         pin_loc.side = side;

         pin_ordering.push_back(pin_loc);

         num_done_per_dir[width][height][side]++;
         pin++;
     }

     VTR_ASSERT(pin == num_phys_pins);
     VTR_ASSERT(pin_ordering.size() == size_t(num_phys_pins));

     if (perturb_switch_pattern) {
         load_perturbed_connection_block_pattern(tracks_connected_to_pin,
                                                 pin_ordering,
                                                 num_seg_type_tracks, num_seg_type_tracks, Fc, directionality);
     } else {
         load_uniform_connection_block_pattern(tracks_connected_to_pin,
                                               pin_ordering,
                                               num_seg_type_tracks, num_seg_type_tracks, Fc, directionality);
     }

 #ifdef ENABLE_CHECK_ALL_TRACKS
     check_all_tracks_reach_pins(Type, tracks_connected_to_pin, num_seg_type_tracks,
                                 Fc, pin_type);
 #endif

     return tracks_connected_to_pin;
 }

 static void advance_to_next_block_side(t_physical_tile_type_ptr Type, int& width_offset, int& height_offset, e_side& side) {
     //State-machine transitions for advancing around all sides of a block

     //This state-machine transitions in the following order:
     //
     //                           TOP
     //                           --->
     //
     //            **********************************
     //            *    10    |    11    |    12    *
     //            * 3     25 | 6     22 | 9     19 *
     //            *    36    |    35    |    34    *
     //            *----------|----------|----------*
     //       ^    *    13    |    14    |    15    *    |
     //  LEFT |    * 2     26 | 5     23 | 8     20 *    | RIGHT
     //       |    *    33    |    32    |    31    *    v
     //            *----------|----------|----------*
     //            *    16    |    17    |    18    *
     //            * 1     27 | 4     24 | 7     21 *
     //            *    30    |    29    |    28    *
     //            **********************************
     //
     //                           <---
     //                          BOTTOM
     //
     // where each 'square' in the above diagram corresponds to a grid tile of
     // the block of width=3 and height=3. The numbers correspond to the visiting order starting
     // from '1' (width_offset=0, height_offset=0, side=LEFT).
     //
     // Note that for blocks of width == 1 and height == 1 this iterates through the sides
     // in clock-wise order:
     //
     //      ***********
     //      *    2    *
     //      * 1     3 *
     //      *    4    *
     //      ***********
     //

     //Validate current state
     VTR_ASSERT(width_offset >= 0 && width_offset < Type->width);
     VTR_ASSERT(height_offset >= 0 && height_offset < Type->height);
     VTR_ASSERT(side == LEFT || side == RIGHT || side == BOTTOM || side == TOP);

     if (side == LEFT) {
         if (height_offset == Type->height - 1 && width_offset == Type->width - 1) {
             //Finished the last left edge column
             side = TOP; //Turn clockwise
             width_offset = 0;
             height_offset = Type->height - 1;
         } else if (height_offset == Type->height - 1) {
             //Finished the current left edge column
             VTR_ASSERT(width_offset != Type->width - 1);

             //Move right to the next left edge column
             width_offset++;
             height_offset = 0;
         } else {
             height_offset++; //Advance up current left edge column
         }
     } else if (side == TOP) {
         if (height_offset == 0 && width_offset == Type->width - 1) {
             //Finished the last top edge row
             side = RIGHT; //Turn clockwise
             width_offset = Type->width - 1;
             height_offset = Type->height - 1;
         } else if (width_offset == Type->width - 1) {
             //Finished the current top edge row
             VTR_ASSERT(height_offset != 0);

             //Move down to the next top edge row
             height_offset--;
             width_offset = 0;
         } else {
             width_offset++; //Advance right along current top edge row
         }
     } else if (side == RIGHT) {
         if (height_offset == 0 && width_offset == 0) {
             //Finished the last right edge column
             side = BOTTOM; //Turn clockwise
             width_offset = Type->width - 1;
             height_offset = 0;
         } else if (height_offset == 0) {
             //Finished the current right edge column
             VTR_ASSERT(width_offset != 0);

             //Move left to the next right edge column
             width_offset--;
             height_offset = Type->height - 1;
         } else {
             height_offset--; //Advance down current right edge column
         }
     } else {
         VTR_ASSERT(side == BOTTOM);
         if (height_offset == Type->height - 1 && width_offset == 0) {
             //Finished the last bottom edge row
             side = LEFT; //Turn clockwise
             width_offset = 0;
             height_offset = 0;
         } else if (width_offset == 0) {
             //Finished the current bottom edge row
             VTR_ASSERT(height_offset != Type->height - 1);

             //Move up to the next bottom edge row
             height_offset++;
             width_offset = Type->width - 1;
         } else {
             width_offset--; //Advance left along current bottom edge row
         }
     }

     //Validate next state
     VTR_ASSERT(width_offset >= 0 && width_offset < Type->width);
     VTR_ASSERT(height_offset >= 0 && height_offset < Type->height);
     VTR_ASSERT(side == LEFT || side == RIGHT || side == BOTTOM || side == TOP);
 }

 static float pattern_fmod(float a, float b) {
     /* Compute a modulo b. */
     float raw_result = a - (int)(a / b) * b;

     if (raw_result < 0.0f) {
         return 0.0f;
     }

     if (raw_result >= b) {
         return 0.0f;
     }

     return raw_result;
 }

 static void load_uniform_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
                                                   const std::vector<t_pin_loc>& pin_locations,
                                                   const int x_chan_width,
                                                   const int y_chan_width,
                                                   const int Fc,
                                                   enum e_directionality directionality) {
     /* Loads the tracks_connected_to_pin array with an even distribution of     *
      * switches across the tracks for each pin.  For example, each pin connects *
      * to every 4.3rd track in a channel, with exactly which tracks a pin       *
      * connects to staggered from pin to pin.                                   */

     /* Uni-directional drive is implemented to ensure no directional bias and this means
      * two important comments noted below                                                */
     /* 1. Spacing should be (W/2)/(Fc/2), and step_size should be spacing/(num_phys_pins),
      *    and lay down 2 switches on an adjacent pair of tracks at a time to ensure
      *    no directional bias. Basically, treat W (even) as W/2 pairs of tracks, and
      *    assign switches to a pair at a time. Can do this because W is guaranteed to
      *    be even-numbered; however same approach cannot be applied to Fc_out pattern
      *    when L > 1 and W <> 2L multiple.
      *
      * 2. This generic pattern should be considered the tileable physical layout,
      *    meaning all track # here are physical #'s,
      *    so later must use vpr_to_phy conversion to find actual logical #'s to connect.
      *    This also means I will not use get_output_block_companion_track to ensure
      *    no bias, since that describes a logical # -> that would confuse people.  */

     int max_width = 0;
     int max_height = 0;

     int num_phys_pins = pin_locations.size();

     /* Keep a record of how many times each track is selected at each
      * (side, dx, dy) of the block. This is used to ensure a diversity of tracks are
      * assigned to pins that might be related. For a given (side, dx, dy), the counts will be
      * decremented after all the tracks have been selected at least once, so the
      * counts will not get too big.
      */
     std::vector<std::vector<std::vector<std::vector<char>>>> excess_tracks_selected;
     excess_tracks_selected.resize(NUM_SIDES);

     for (int i = 0; i < num_phys_pins; ++i) {
         int width = pin_locations[i].width_offset;
         int height = pin_locations[i].height_offset;

         max_width = std::max(max_width, width);
         max_height = std::max(max_height, height);
     }

     for (int iside = 0; iside < NUM_SIDES; iside++) {
         excess_tracks_selected[iside].resize(max_width + 1);

         for (int dx = 0; dx <= max_width; dx++) {
             excess_tracks_selected[iside][dx].resize(max_height + 1);

             for (int dy = 0; dy <= max_height; dy++) {
                 int max_chan_width = (((iside == TOP) || (iside == BOTTOM)) ? x_chan_width : y_chan_width);
                 excess_tracks_selected[iside][dx][dy].resize(max_chan_width);
             }
         }
     }

     for (int i = 0; i < num_phys_pins; ++i) {
         e_side side = pin_locations[i].side;
         int width = pin_locations[i].width_offset;
         int height = pin_locations[i].height_offset;

         int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width);

         for (int j = 0; j < max_chan_width; j++) {
             excess_tracks_selected[side][width][height][j] = 0;
         }
     }

     int group_size;

     if (directionality == BI_DIRECTIONAL) {
         group_size = 1;
     } else {
         VTR_ASSERT(directionality == UNI_DIRECTIONAL);
         group_size = 2;
     }

     VTR_ASSERT((x_chan_width % group_size == 0) && (y_chan_width % group_size == 0) && (Fc % group_size == 0));

     /* offset is used to move to a different point in the track space if it is detected that
      * the tracks being assigned overlap recently assigned tracks, with the goal of increasing
      * track diversity.
      */
     int offset = 0;

     for (int i = 0; i < num_phys_pins; ++i) {
         int pin = pin_locations[i].pin_index;
         e_side side = pin_locations[i].side;
         int width = pin_locations[i].width_offset;
         int height = pin_locations[i].height_offset;

         /* Bi-directional treats each track separately, uni-directional works with pairs of tracks */
         for (int j = 0; j < (Fc / group_size); ++j) {
             int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width);
             float step_size = (float)max_chan_width / (float)(Fc * num_phys_pins);

             VTR_ASSERT(Fc > 0);
             float fc_step = (float)max_chan_width / (float)Fc;

             /* We may go outside the track ID space, because of offset, so use modulo arithmetic below. */

             float ftrack = pattern_fmod((i + offset) * step_size, fc_step) + (j * fc_step);
             int itrack = ((int)ftrack) * group_size;

             if (j == 0) {
                 /* Check if tracks to be assigned by the algorithm were recently assigned to pins at this
                  * (side, dx, dy). If so, loop through all possible alternative track
                  * assignments to find ones that include the most tracks that haven't been selected recently.
                  */
                 for (;;) {
                     int saved_offset_increment = 0;
                     int max_num_unassigned_tracks = 0;

                     /* Across all potential track assignments, determine the maximum number of recently
                      * unassigned tracks that can be assigned this iteration. offset_increment is used to
                      * increment through the potential track assignments. The nested loops inside the
                      * offset_increment loop, iterate through all the tracks associated with a particular
                      * track assignment.
                      */

                     for (int offset_increment = 0; offset_increment < num_phys_pins; offset_increment++) {
                         int num_unassigned_tracks = 0;
                         int num_total_tracks = 0;

                         for (int j2 = 0; j2 < (Fc / group_size); ++j2) {
                             ftrack = pattern_fmod((i + offset + offset_increment) * step_size, fc_step) + (j2 * fc_step);
                             itrack = ((int)ftrack) * group_size;

                             for (int k = 0; k < group_size; ++k) {
                                 if (excess_tracks_selected[side][width][height][(itrack + k) % max_chan_width] == 0) {
                                     num_unassigned_tracks++;
                                 }

                                 num_total_tracks++;
                             }
                         }

                         if (num_unassigned_tracks > max_num_unassigned_tracks) {
                             max_num_unassigned_tracks = num_unassigned_tracks;
                             saved_offset_increment = offset_increment;
                         }

                         if (num_unassigned_tracks == num_total_tracks) {
                             /* We can't do better than this, so end search. */
                             break;
                         }
                     }

                     if (max_num_unassigned_tracks > 0) {
                         /* Use the minimum offset increment that achieves the desired goal of track diversity,
                          * so the patterns produced are similar to the old algorithm (which doesn't explicitly
                          * monitor track diversity).
                          */

                         offset += saved_offset_increment;

                         ftrack = pattern_fmod((i + offset) * step_size, fc_step);
                         itrack = ((int)ftrack) * group_size;

                         break;
                     } else {
                         /* All tracks have been assigned recently. Decrement the excess_tracks_selected counts for
                          * this location (side, dx, dy), modifying the memory of recently assigned
                          * tracks. A decrement is done rather than a reset, so if there was some unevenness of track
                          * assignment, that will be factored into the next round of track assignment.
                          */
                         for (int k = 0; k < max_chan_width; k++) {
                             VTR_ASSERT(excess_tracks_selected[side][width][height][k] > 0);
                             excess_tracks_selected[side][width][height][k]--;
                         }
                     }
                 }
             }

             /* Assign the group of tracks for the Fc pattern */
             for (int k = 0; k < group_size; ++k) {
                 tracks_connected_to_pin[pin][width][height][side][group_size * j + k] = (itrack + k) % max_chan_width;

                 excess_tracks_selected[side][width][height][(itrack + k) % max_chan_width]++;
             }
         }
     }
 }

 static void load_perturbed_connection_block_pattern(vtr::NdMatrix<int, 5>& tracks_connected_to_pin,
                                                     const std::vector<t_pin_loc>& pin_locations,
                                                     const int x_chan_width,
                                                     const int y_chan_width,
                                                     const int Fc,
                                                     enum e_directionality directionality) {
     /* Loads the tracks_connected_to_pin array with an unevenly distributed     *
      * set of switches across the channel.  This is done for inputs when        *
      * Fc_input = Fc_output to avoid creating "pin domains" -- certain output   *
      * pins being able to talk only to certain input pins because their switch  *
      * patterns exactly line up.  Distribute Fc/2 + 1 switches over half the    *
      * channel and Fc/2 - 1 switches over the other half to make the switch     *
      * pattern different from the uniform one of the outputs.  Also, have half  *
      * the pins put the "dense" part of their connections in the first half of  *
      * the channel and the other half put the "dense" part in the second half,  *
      * to make sure each track can connect to about the same number of ipins.   */

     VTR_ASSERT(directionality == BI_DIRECTIONAL);

     int Fc_dense = (Fc / 2) + 1;
     int Fc_sparse = Fc - Fc_dense; /* Works for even or odd Fc */
     int Fc_half[2];

     int num_phys_pins = pin_locations.size();

     for (int i = 0; i < num_phys_pins; ++i) {
         int pin = pin_locations[i].pin_index;
         e_side side = pin_locations[i].side;
         int width = pin_locations[i].width_offset;
         int height = pin_locations[i].height_offset;

         int max_chan_width = (((side == TOP) || (side == BOTTOM)) ? x_chan_width : y_chan_width);
         float step_size = (float)max_chan_width / (float)(Fc * num_phys_pins);

         float spacing_dense = (float)max_chan_width / (float)(2 * Fc_dense);
         float spacing_sparse = (float)max_chan_width / (float)(2 * Fc_sparse);
         float spacing[2];

         /* Flip every pin to balance switch density */
         spacing[i % 2] = spacing_dense;
         Fc_half[i % 2] = Fc_dense;
         spacing[(i + 1) % 2] = spacing_sparse;
         Fc_half[(i + 1) % 2] = Fc_sparse;

         float ftrack = i * step_size; /* Start point.  Staggered from pin to pin */
         int iconn = 0;

         for (int ihalf = 0; ihalf < 2; ihalf++) { /* For both dense and sparse halves. */
             for (int j = 0; j < Fc_half[ihalf]; ++j) {
                 /* Can occasionally get wraparound due to floating point rounding.
                  * This is okay because the starting position > 0 when this occurs
                  * so connection is valid and fine */
                 int itrack = (int)ftrack;
                 itrack = itrack % max_chan_width;
                 tracks_connected_to_pin[pin][width][height][side][iconn] = itrack;

                 ftrack += spacing[ihalf];
                 iconn++;
             }
         }
     } /* End for all physical pins. */
 }

 #ifdef ENABLE_CHECK_ALL_TRACKS

 static void check_all_tracks_reach_pins(t_logical_block_type_ptr type,
                                         int***** tracks_connected_to_pin,
                                         int max_chan_width,
                                         int Fc,
                                         enum e_pin_type ipin_or_opin) {
     /* Checks that all tracks can be reached by some pin.   */
     VTR_ASSERT(max_chan_width > 0);

     int* num_conns_to_track; /* [0..max_chan_width-1] */
     num_conns_to_track = (int*)vtr::calloc(max_chan_width, sizeof(int));

     for (int pin = 0; pin < type->num_pins; ++pin) {
         for (int width = 0; width < type->width; ++width) {
             for (int height = 0; height < type->height; ++height) {
                 for (int side = 0; side < 4; ++side) {
                     if (tracks_connected_to_pin[pin][width][height][side][0] != OPEN) { /* Pin exists */
                         for (int conn = 0; conn < Fc; ++conn) {
                             int track = tracks_connected_to_pin[pin][width][height][side][conn];
                             num_conns_to_track[track]++;
                         }
                     }
                 }
             }
         }
     }

     for (int track = 0; track < max_chan_width; ++track) {
         if (num_conns_to_track[track] <= 0) {
             VTR_LOG_ERROR("check_all_tracks_reach_pins: Track %d does not connect to any CLB %ss.\n",
                           track, (ipin_or_opin == DRIVER ? "OPIN" : "IPIN"));
         }
     }
     free(num_conns_to_track);
 }
 #endif

 /* Allocates and loads the track to ipin lookup for each physical grid type. This
  * is the same information as the ipin_to_track map but accessed in a different way. */

 static vtr::NdMatrix<std::vector<int>, 4> alloc_and_load_track_to_pin_lookup(vtr::NdMatrix<std::vector<int>, 4> pin_to_track_map,
                                                                              const vtr::Matrix<int>& Fc,
                                                                              const int type_width,
                                                                              const int type_height,
                                                                              const int num_pins,
                                                                              const int max_chan_width,
                                                                              const int num_seg_types) {
     /* [0..max_chan_width-1][0..width][0..height][0..3].  For each track number
      * it stores a vector for each of the four sides.  x-directed channels will
      * use the TOP and   BOTTOM vectors to figure out what clb input pins they
      * connect to above  and below them, respectively, while y-directed channels
      * use the LEFT and RIGHT vectors.  Each vector contains an nelem field
      * saying how many ipins it connects to.  The list[0..nelem-1] array then
      * gives the pin numbers.                                                  */

     /* Note that a clb pin that connects to a channel on its RIGHT means that  *
      * that channel connects to a clb pin on its LEFT.  The convention used    *
      * here is always in the perspective of the CLB                            */

     if (num_pins < 1) {
         return vtr::NdMatrix<std::vector<int>, 4>();
     }

     /* Alloc and zero the the lookup table */
     auto track_to_pin_lookup = vtr::NdMatrix<std::vector<int>, 4>({size_t(max_chan_width), size_t(type_width), size_t(type_height), 4});

     /* Count number of pins to which each track connects  */
     for (int pin = 0; pin < num_pins; ++pin) {
         for (int width = 0; width < type_width; ++width) {
             for (int height = 0; height < type_height; ++height) {
                 for (int side = 0; side < 4; ++side) {
                     if (pin_to_track_map[pin][width][height][side].empty())
                         continue;

                     /* get number of tracks to which this pin connects */
                     int num_tracks = 0;
                     for (int iseg = 0; iseg < num_seg_types; iseg++) {
                         num_tracks += Fc[pin][iseg];
                     }

                     for (int conn = 0; conn < num_tracks; ++conn) {
                         int track = pin_to_track_map[pin][width][height][side][conn];
                         VTR_ASSERT(track < max_chan_width);
                         VTR_ASSERT(track >= 0);
                         track_to_pin_lookup[track][width][height][side].push_back(pin);
                     }
                 }
             }
         }
     }

     return track_to_pin_lookup;
 }

 std::string describe_rr_node(int inode) {
     auto& device_ctx = g_vpr_ctx.device();

     std::string msg = vtr::string_fmt("RR node: %d", inode);

     const auto& rr_node = device_ctx.rr_nodes[inode];

     msg += vtr::string_fmt(" type: %s", rr_node.type_string());

     msg += vtr::string_fmt(" location: (%d,%d)", rr_node.xlow(), rr_node.ylow());
     if (rr_node.xlow() != rr_node.xhigh() || rr_node.ylow() != rr_node.yhigh()) {
         msg += vtr::string_fmt(" <-> (%d,%d)", rr_node.xhigh(), rr_node.yhigh());
     }

     if (rr_node.type() == CHANX || rr_node.type() == CHANY) {
         int cost_index = rr_node.cost_index();

         int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index;

         if (seg_index < (int)device_ctx.arch->Segments.size()) {
             msg += vtr::string_fmt(" track: %d len: %d longline: %d seg_type: %s dir: %s",
                                    rr_node.track_num(),
                                    rr_node.length(),
                                    device_ctx.arch->Segments[seg_index].longline,
                                    device_ctx.arch->Segments[seg_index].name.c_str(),
                                    rr_node.direction_string());
         } else {
             msg += vtr::string_fmt(" track: %d len: %d seg_type: ILLEGAL_SEG_INDEX dir: %s",
                                    rr_node.track_num(),
                                    rr_node.length(),
                                    rr_node.direction_string());
         }
     } else if (rr_node.type() == IPIN || rr_node.type() == OPIN) {
         auto type = device_ctx.grid[rr_node.xlow()][rr_node.ylow()].type;
         std::string pin_name = block_type_pin_index_to_name(type, rr_node.pin_num());

         msg += vtr::string_fmt(" pin: %d pin_name: %s",
                                rr_node.pin_num(),
                                pin_name.c_str());
     } else {
         VTR_ASSERT(rr_node.type() == SOURCE || rr_node.type() == SINK);

         msg += vtr::string_fmt(" class: %d", rr_node.class_num());
     }

     return msg;
 }
 static void build_unidir_rr_opins(const int i, const int j, const e_side side, const DeviceGrid& grid, const std::vector<vtr::Matrix<int>>& Fc_out, const int max_chan_width, const t_chan_details& chan_details_x, const t_chan_details& chan_details_y, vtr::NdMatrix<int, 3>& Fc_xofs, vtr::NdMatrix<int, 3>& Fc_yofs, t_rr_edge_info_set& rr_edges_to_create, bool* Fc_clipped, const t_rr_node_indices& L_rr_node_indices, const std::vector<t_rr_node>& rr_nodes, const t_direct_inf* directs, const int num_directs, const t_clb_to_clb_directs* clb_to_clb_directs, const int num_seg_types) {
     /*
      * This routine adds the edges from opins to channels at the specified
      * grid location (i,j) and grid tile side
      */
     *Fc_clipped = false;

     auto type = grid[i][j].type;

     int width_offset = grid[i][j].width_offset;
     int height_offset = grid[i][j].height_offset;

     /* Go through each pin and find its fanout. */
     for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) {
         /* Skip global pins and pins that are not of DRIVER type */
         int class_index = type->pin_class[pin_index];
         if (type->class_inf[class_index].type != DRIVER) {
             continue;
         }
         if (type->is_ignored_pin[pin_index]) {
             continue;
         }

         int opin_node_index = get_rr_node_index(L_rr_node_indices, i, j, OPIN, pin_index, side);
         if (opin_node_index < 0) continue; //No valid from node

         for (int iseg = 0; iseg < num_seg_types; iseg++) {
             /* get Fc for this segment type */
             int seg_type_Fc = Fc_out[type->index][pin_index][iseg];
             VTR_ASSERT(seg_type_Fc >= 0);
             if (seg_type_Fc == 0) {
                 continue;
             }

             /* Can't do anything if pin isn't at this location */
             if (0 == type->pinloc[width_offset][height_offset][side][pin_index]) {
                 continue;
             }

             /* Figure out the chan seg at that side.
              * side is the side of the logic or io block. */
             bool vert = ((side == TOP) || (side == BOTTOM));
             bool pos_dir = ((side == TOP) || (side == RIGHT));
             t_rr_type chan_type = (vert ? CHANX : CHANY);
             int chan = (vert ? (j) : (i));
             int seg = (vert ? (i) : (j));
             int max_len = (vert ? grid.width() : grid.height());
             vtr::NdMatrix<int, 3>& Fc_ofs = (vert ? Fc_xofs : Fc_yofs);
             if (false == pos_dir) {
                 --chan;
             }

             /* Skip the location if there is no channel. */
             if (chan < 0) {
                 continue;
             }
             if (seg < 1) {
                 continue;
             }
             if (seg > int(vert ? grid.width() : grid.height()) - 2) { //-2 since no channels around perim
                 continue;
             }
             if (chan > int(vert ? grid.height() : grid.width()) - 2) { //-2 since no channels around perim
                 continue;
             }

             const t_chan_seg_details* seg_details = (chan_type == CHANX ? chan_details_x[seg][chan] : chan_details_y[chan][seg]).data();
             if (seg_details[0].length() == 0)
                 continue;

             /* Get the list of opin to mux connections for that chan seg. */
             bool clipped;
             get_unidir_opin_connections(chan, seg,
                                         seg_type_Fc, iseg, chan_type, seg_details,
                                         opin_node_index,
                                         rr_edges_to_create,
                                         Fc_ofs, max_len, max_chan_width,
                                         L_rr_node_indices, &clipped);
             if (clipped) {
                 *Fc_clipped = true;
             }
         }

         /* Add in direct connections */
         get_opin_direct_connecions(i, j, side, pin_index, opin_node_index, rr_edges_to_create, L_rr_node_indices, rr_nodes,
                                    directs, num_directs, clb_to_clb_directs);
     }
 }

 /**
  * Parse out which CLB pins should connect directly to which other CLB pins then store that in a clb_to_clb_directs data structure
  * This data structure supplements the the info in the "directs" data structure
  * TODO: The function that does this parsing in placement is poorly done because it lacks generality on heterogeniety, should replace with this one
  */
 static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_inf* directs, const int num_directs, int delayless_switch) {
     int i, j;
     unsigned int itype;
     t_clb_to_clb_directs* clb_to_clb_directs;
     char *pb_type_name, *port_name;
     int start_pin_index, end_pin_index;
     t_pb_type* pb_type;

     auto& device_ctx = g_vpr_ctx.device();

     clb_to_clb_directs = (t_clb_to_clb_directs*)vtr::calloc(num_directs, sizeof(t_clb_to_clb_directs));

     pb_type_name = nullptr;
     port_name = nullptr;

     for (i = 0; i < num_directs; i++) {
         pb_type_name = (char*)vtr::malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));
         port_name = (char*)vtr::malloc((strlen(directs[i].from_pin) + strlen(directs[i].to_pin)) * sizeof(char));

         // Load from pins
         // Parse out the pb_type name, port name, and pin range
         parse_direct_pin_name(directs[i].from_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);

         // Figure out which type, port, and pin is used
         for (itype = 0; itype < device_ctx.logical_block_types.size(); ++itype) {
             if (strcmp(device_ctx.logical_block_types[itype].name, pb_type_name) == 0) {
                 break;
             }
         }

         if (itype >= device_ctx.logical_block_types.size()) {
             vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find block %s.\n", pb_type_name);
         }

         clb_to_clb_directs[i].from_clb_type = &device_ctx.logical_block_types[itype];
         pb_type = clb_to_clb_directs[i].from_clb_type->pb_type;

         for (j = 0; j < pb_type->num_ports; j++) {
             if (strcmp(pb_type->ports[j].name, port_name) == 0) {
                 break;
             }
         }
         if (j >= pb_type->num_ports) {
             vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find port %s (on block %s).\n", port_name, pb_type_name);
         }

         if (start_pin_index == OPEN) {
             VTR_ASSERT(start_pin_index == end_pin_index);
             start_pin_index = 0;
             end_pin_index = pb_type->ports[j].num_pins - 1;
         }
         get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].from_clb_pin_start_index);
         get_blk_pin_from_port_pin(clb_to_clb_directs[i].from_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].from_clb_pin_end_index);

         // Load to pins
         // Parse out the pb_type name, port name, and pin range
         parse_direct_pin_name(directs[i].to_pin, directs[i].line, &start_pin_index, &end_pin_index, pb_type_name, port_name);

         // Figure out which type, port, and pin is used
         for (itype = 0; itype < device_ctx.logical_block_types.size(); ++itype) {
             if (strcmp(device_ctx.logical_block_types[itype].name, pb_type_name) == 0) {
                 break;
             }
         }

         if (itype >= device_ctx.logical_block_types.size()) {
             vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find block %s.\n", pb_type_name);
         }

         clb_to_clb_directs[i].to_clb_type = &device_ctx.logical_block_types[itype];
         pb_type = clb_to_clb_directs[i].to_clb_type->pb_type;

         for (j = 0; j < pb_type->num_ports; j++) {
             if (strcmp(pb_type->ports[j].name, port_name) == 0) {
                 break;
             }
         }
         if (j >= pb_type->num_ports) {
             vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line, "Unable to find port %s (on block %s).\n", port_name, pb_type_name);
         }

         if (start_pin_index == OPEN) {
             VTR_ASSERT(start_pin_index == end_pin_index);
             start_pin_index = 0;
             end_pin_index = pb_type->ports[j].num_pins - 1;
         }

         get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, start_pin_index, &clb_to_clb_directs[i].to_clb_pin_start_index);
         get_blk_pin_from_port_pin(clb_to_clb_directs[i].to_clb_type->index, j, end_pin_index, &clb_to_clb_directs[i].to_clb_pin_end_index);

         if (abs(clb_to_clb_directs[i].from_clb_pin_start_index - clb_to_clb_directs[i].from_clb_pin_end_index) != abs(clb_to_clb_directs[i].to_clb_pin_start_index - clb_to_clb_directs[i].to_clb_pin_end_index)) {
             vpr_throw(VPR_ERROR_ARCH, get_arch_file_name(), directs[i].line,
                       "Range mismatch from %s to %s.\n", directs[i].from_pin, directs[i].to_pin);
         }

         //Set the switch index
         if (directs[i].switch_type > 0) {
             //Use the specified switch
             clb_to_clb_directs[i].switch_index = directs[i].switch_type;
         } else {
             //Use the delayless switch by default
             clb_to_clb_directs[i].switch_index = delayless_switch;
         }
         free(pb_type_name);
         free(port_name);

         //We must be careful to clean-up anything that we may have incidentally allocated.
         //Specifically, we can be called while generating the dummy architecture
         //for placer delay estimation.  Since the delay estimation occurs on a
         //'different' architecture it is almost certain that the f_blk_pin_from_port_pin allocated
         //by calling get_blk_pin_from_port_pin() will later be invalid.
         //We therefore must free it now.
         free_blk_pin_from_port_pin();
     }
     return clb_to_clb_directs;
 }

 /* Add all direct clb-pin-to-clb-pin edges to given opin
  *
  * The current opin is located at (x,y) along the specified side
  */
 static int get_opin_direct_connecions(int x,
                                       int y,
                                       e_side side,
                                       int opin,
                                       int from_rr_node,
                                       t_rr_edge_info_set& rr_edges_to_create,
                                       const t_rr_node_indices& L_rr_node_indices,
                                       const std::vector<t_rr_node>& rr_nodes,
                                       const t_direct_inf* directs,
                                       const int num_directs,
                                       const t_clb_to_clb_directs* clb_to_clb_directs) {
     auto& device_ctx = g_vpr_ctx.device();

     t_physical_tile_type_ptr curr_type = device_ctx.grid[x][y].type;

     int num_pins = 0;

     int width_offset = device_ctx.grid[x][y].width_offset;
     int height_offset = device_ctx.grid[x][y].height_offset;
     if (!curr_type->pinloc[width_offset][height_offset][side][opin]) {
         return num_pins; //No source pin on this side
     }

     //Capacity location determined by pin number relative to pins per capacity instance
     int z = opin / (curr_type->num_pins / curr_type->capacity);
     VTR_ASSERT(z >= 0 && z < curr_type->capacity);

     /* Iterate through all direct connections */
     for (int i = 0; i < num_directs; i++) {
         /* Find matching direct clb-to-clb connections with the same type as current grid location */
         if (clb_to_clb_directs[i].from_clb_type == logical_block_type(curr_type)) { //We are at a valid starting point

             if (directs[i].from_side != NUM_SIDES && directs[i].from_side != side) continue;

             //Offset must be in range
             if (x + directs[i].x_offset < int(device_ctx.grid.width() - 1)
                 && x + directs[i].x_offset > 0
                 && y + directs[i].y_offset < int(device_ctx.grid.height() - 1)
                 && y + directs[i].y_offset > 0) {
                 //Only add connections if the target clb type matches the type in the direct specification
                 t_physical_tile_type_ptr target_type = device_ctx.grid[x + directs[i].x_offset][y + directs[i].y_offset].type;
                 if (clb_to_clb_directs[i].to_clb_type == logical_block_type(target_type)
                     && z + directs[i].z_offset < int(target_type->capacity)
                     && z + directs[i].z_offset >= 0) {
                     /* Compute index of opin with regards to given pins */
                     int max_index = OPEN, min_index = OPEN;
                     bool swap = false;
                     if (clb_to_clb_directs[i].from_clb_pin_start_index > clb_to_clb_directs[i].from_clb_pin_end_index) {
                         swap = true;
                         max_index = clb_to_clb_directs[i].from_clb_pin_start_index;
                         min_index = clb_to_clb_directs[i].from_clb_pin_end_index;
                     } else {
                         swap = false;
                         min_index = clb_to_clb_directs[i].from_clb_pin_start_index;
                         max_index = clb_to_clb_directs[i].from_clb_pin_end_index;
                     }
                     int logical_opin = opin % (curr_type->num_pins / curr_type->capacity);

                     if (max_index >= logical_opin && min_index <= logical_opin) {
                         int offset = logical_opin - min_index;
                         /* This opin is specified to connect directly to an ipin, now compute which ipin to connect to */
                         int logical_ipin = OPEN;
                         if (clb_to_clb_directs[i].to_clb_pin_start_index > clb_to_clb_directs[i].to_clb_pin_end_index) {
                             if (swap) {
                                 logical_ipin = clb_to_clb_directs[i].to_clb_pin_end_index + offset;
                             } else {
                                 logical_ipin = clb_to_clb_directs[i].to_clb_pin_start_index - offset;
                             }
                         } else {
                             if (swap) {
                                 logical_ipin = clb_to_clb_directs[i].to_clb_pin_end_index - offset;
                             } else {
                                 logical_ipin = clb_to_clb_directs[i].to_clb_pin_start_index + offset;
                             }
                         }

                         VTR_ASSERT(logical_ipin < (target_type->num_pins / target_type->capacity));

                         //If this block has capacity > 1 then the pins of z position > 0 are offset
                         //by the number of pins per capacity instance
                         int ipin = logical_ipin + (target_type->num_pins / target_type->capacity) * (z + directs[i].z_offset);

                         //if (ipin > target_type->num_pins) continue; //Invalid z-offset

                         /* Add new ipin edge to list of edges */
                         std::vector<int> inodes;

                         if (directs[i].to_side != NUM_SIDES) {
                             //Explicit side specified, only create if pin exists on that side
                             int inode = get_rr_node_index(L_rr_node_indices, x + directs[i].x_offset, y + directs[i].y_offset,
                                                           IPIN, ipin, directs[i].to_side);
                             if (inode != OPEN) {
                                 inodes.push_back(inode);
                             }
                         } else {
                             //No side specified, get all candidates
                             inodes = get_rr_node_indices(L_rr_node_indices, x + directs[i].x_offset, y + directs[i].y_offset,
                                                          IPIN, ipin);
                         }

                         if (inodes.size() > 0) {
                             //There may be multiple physical pins corresponding to the logical
                             //target ipin. We only need to connect to one of them (since the physical pins
                             //are logically equivalent). This also ensures the graphics look reasonable and map
                             //back fairly directly to the architecture file in the case of pin equivalence
                             int inode = pick_best_direct_connect_target_rr_node(rr_nodes, from_rr_node, inodes);

                             rr_edges_to_create.emplace_back(from_rr_node, inode, clb_to_clb_directs[i].switch_index);
                             ++num_pins;
                         }
                     }
                 }
             }
         }
     }
     return num_pins;
 }

 /* Determines whether the output pins of the specified block type should be perturbed.	*
  *  This is to prevent pathological cases where the output pin connections are		*
  *  spaced such that the connection pattern always skips some types of wire (w.r.t.	*
  *  starting points)									*/
 static std::vector<bool> alloc_and_load_perturb_opins(const t_physical_tile_type_ptr type,
                                                       const vtr::Matrix<int>& Fc_out,
                                                       const int max_chan_width,
                                                       const std::vector<t_segment_inf>& segment_inf) {
     int i, Fc_max, iclass, num_wire_types;
     int num, max_primes, factor, num_factors;
     int* prime_factors;
     float step_size = 0;
     float n = 0;
     float threshold = 0.07;

     std::vector<bool> perturb_opins(segment_inf.size(), false);

     i = Fc_max = iclass = 0;
     if (segment_inf.size() > 1) {
         /* Segments of one length are grouped together in the channel.	*
          *  In the future we can determine if any of these segments will	*
          *  encounter the pathological step size case, and determine if	*
          *  we need to perturb based on the segment's frequency (if 	*
          *  frequency is small we should not perturb - testing has found	*
          *  that perturbing a channel when unnecessary increases needed	*
          *  W to achieve the same delay); but for now we just return.	*/
         return perturb_opins;
     } else {
         /* There are as many wire start points as the value of L */
         num_wire_types = segment_inf[0].length;
     }

     /* get Fc_max */
     for (i = 0; i < type->num_pins; ++i) {
         iclass = type->pin_class[i];
         if (Fc_out[i][0] > Fc_max && type->class_inf[iclass].type == DRIVER) {
             Fc_max = Fc_out[i][0];
         }
     }
     /* Nothing to perturb if Fc=0; no need to perturb if Fc = 1 */
     if (Fc_max == 0 || Fc_max == max_chan_width) {
         return perturb_opins;
     }

     /* Pathological cases occur when the step size, W/Fc, is a multiple of	*
      *  the number of wire starting points, L. Specifically, when the step 	*
      *  size is a multiple of a prime factor of L, the connection pattern	*
      *  will always skip some wires. Thus, we perturb pins if we detect this	*
      *  case.								*/

     /* get an upper bound on the number of prime factors of num_wire_types	*/
     max_primes = (int)floor(log((float)num_wire_types) / log(2.0));
     max_primes = std::max(max_primes, 1); //Minimum of 1 to ensure we allocate space for at least one prime_factor

     prime_factors = (int*)vtr::malloc(max_primes * sizeof(int));
     for (i = 0; i < max_primes; i++) {
         prime_factors[i] = 0;
     }

     /* Find the prime factors of num_wire_types */
     num = num_wire_types;
     factor = 2;
     num_factors = 0;
     while (pow((float)factor, 2) <= num) {
         if (num % factor == 0) {
             num /= factor;
             if (factor != prime_factors[num_factors]) {
                 prime_factors[num_factors] = factor;
                 num_factors++;
             }
         } else {
             factor++;
         }
     }
     if (num_factors == 0) {
         prime_factors[num_factors++] = num_wire_types; /* covers cases when num_wire_types is prime */
     }

     /* Now see if step size is an approximate multiple of one of the factors. A 	*
      *  threshold is used because step size may not be an integer.			*/
     step_size = (float)max_chan_width / Fc_max;
     for (i = 0; i < num_factors; i++) {
         if (vtr::nint(step_size) < prime_factors[i]) {
             perturb_opins[0] = false;
             break;
         }

         n = step_size / prime_factors[i];
         n = n - (float)vtr::nint(n); /* fractinal part */
         if (fabs(n) < threshold) {
             perturb_opins[0] = true;
             break;
         } else {
             perturb_opins[0] = false;
         }
     }
     free(prime_factors);

     return perturb_opins;
 }

 static int pick_best_direct_connect_target_rr_node(const std::vector<t_rr_node>& rr_nodes,
                                                    int from_rr,
                                                    const std::vector<int>& candidate_rr_nodes) {
     //With physically equivalent pins there may be multiple candidate rr nodes (which are equivalent)
     //to connect the direct edge to.
     //As a result it does not matter (from a correctness standpoint) which is picked.
     //
     //However intuitively we would expect (e.g. when visualizing the drawn RR graph) that the 'closest'
     //candidate would be picked (i.e. to minimize the drawn edge length).
     //
     //This function attempts to pick the 'best/closest' of the candidates.

     VTR_ASSERT(rr_nodes[from_rr].type() == OPIN);
     e_side from_side = rr_nodes[from_rr].side();

     float best_dist = std::numeric_limits<float>::infinity();
     int best_rr = OPEN;

     for (int to_rr : candidate_rr_nodes) {
         VTR_ASSERT(rr_nodes[to_rr].type() == IPIN);
         float to_dist = std::abs(rr_nodes[from_rr].xlow() - rr_nodes[to_rr].xlow())
                         + std::abs(rr_nodes[from_rr].ylow() - rr_nodes[to_rr].ylow());

         e_side to_side = rr_nodes[to_rr].side();

         //Include a partial unit of distance based on side alignment to ensure
         //we preferr facing sides
         if ((from_side == RIGHT && to_side == LEFT)
             || (from_side == LEFT && to_side == RIGHT)
             || (from_side == TOP && to_side == BOTTOM)
             || (from_side == BOTTOM && to_side == TOP)) {
             //Facing sides
             to_dist += 0.25;
         } else if (((from_side == RIGHT || from_side == LEFT) && (to_side == TOP || to_side == BOTTOM))
                    || ((from_side == TOP || from_side == BOTTOM) && (to_side == RIGHT || to_side == LEFT))) {
             //Perpendicular sides
             to_dist += 0.5;

         } else {
             //Opposite sides
             to_dist += 0.75;
         }

         if (to_dist < best_dist) {
             best_dist = to_dist;
             best_rr = to_rr;
         }
     }

     VTR_ASSERT(best_rr != OPEN);

     return best_rr;
 }

 //Collects the sets of connected non-configurable edges in the RR graph
 t_non_configurable_rr_sets identify_non_configurable_rr_sets() {
     std::set<std::set<t_node_edge>> edge_sets;

     //Walk through the RR graph and recursively expand non-configurable edges
     //to collect the sets of non-configurably connected nodes
     auto& device_ctx = g_vpr_ctx.device();
     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
         std::set<t_node_edge> edge_set;

         expand_non_configurable(inode, edge_set);

         if (!edge_set.empty()) {
             edge_sets.insert(edge_set);
         }
     }

     std::set<std::set<int>> node_sets;
     for (auto& edge_set : edge_sets) {
         std::set<int> node_set;

         for (const auto& edge : edge_set) {
             node_set.insert(edge.from_node);
             node_set.insert(edge.to_node);
         }

         VTR_ASSERT(!node_set.empty());

         node_sets.insert(node_set);
     }

     t_non_configurable_rr_sets non_configurable_rr_sets;
     non_configurable_rr_sets.edge_sets = edge_sets;
     non_configurable_rr_sets.node_sets = node_sets;

     return non_configurable_rr_sets;
 }

 //Builds a set of non-configurably connected RR graph edges
 static void expand_non_configurable(int inode, std::set<t_node_edge>& edge_set) {
     auto& device_ctx = g_vpr_ctx.device();

     for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); ++iedge) {
         bool edge_non_configurable = !device_ctx.rr_nodes[inode].edge_is_configurable(iedge);

         if (edge_non_configurable) {
             int to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);

             t_node_edge edge = {inode, to_node};

             if (edge_set.count(edge)) {
                 continue; //Already seen don't re-expand to avoid loops
             }

             edge_set.emplace(edge);

             expand_non_configurable(to_node, edge_set);
         }
     }
 }

 static void process_non_config_sets(const t_non_configurable_rr_sets& non_config_rr_sets) {
     std::vector<std::vector<int>> non_config_rr_node_sets;
     std::unordered_map<int, int> rr_node_non_config_node_set;

     for (const auto& node_set : non_config_rr_sets.node_sets) {
         //Convert node sets to vectors
         non_config_rr_node_sets.push_back(std::vector<int>(node_set.begin(), node_set.end()));

         //Record reverse look-ups
         for (int inode : node_set) {
             rr_node_non_config_node_set.emplace(inode, non_config_rr_node_sets.size() - 1);
         }
     }

     auto& device_ctx = g_vpr_ctx.mutable_device();
     device_ctx.rr_non_config_node_sets = std::move(non_config_rr_node_sets);
     device_ctx.rr_node_to_non_config_node_set = std::move(rr_node_non_config_node_set);
 }