vpr/src/base/vpr_api.cpp - third_party/vtr-verilog-to-routing - Git at Google

 /**
  General API for VPR
  Other software tools should generally call just the functions defined here
  For advanced/power users, you can call functions defined elsewhere in VPR or modify the data structures directly at your discretion but be aware that doing so can break the correctness of VPR

  Author: Jason Luu
  June 21, 2012
  */

 #include <cstdio>
 #include <cstring>
 #include <ctime>
 #include <chrono>
 #include <cmath>
 using namespace std;


 #include "vtr_assert.h"
 #include "vtr_list.h"
 #include "vtr_matrix.h"
 #include "vtr_math.h"
 #include "vtr_log.h"
 #include "vtr_version.h"
 #include "vtr_time.h"
 #include "vtr_cilk.h"

 #include "vpr_types.h"
 #include "vpr_utils.h"
 #include "globals.h"
 #include "atom_netlist.h"
 #include "graphics.h"
 #include "read_netlist.h"
 #include "check_netlist.h"
 #include "read_blif.h"
 #include "draw.h"
 #include "place_and_route.h"
 #include "pack.h"
 #include "place.h"
 #include "SetupGrid.h"
 #include "stats.h"
 #include "path_delay.h"
 #include "read_options.h"
 #include "echo_files.h"
 #include "read_xml_arch_file.h"
 #include "SetupVPR.h"
 #include "ShowSetup.h"
 #include "CheckArch.h"
 #include "CheckSetup.h"
 #include "rr_graph.h"
 #include "pb_type_graph.h"
 #include "route_common.h"
 #include "timing_place_lookup.h"
 #include "route_export.h"
 #include "vpr_api.h"
 #include "read_sdc.h"
 #include "read_sdc2.h"
 #include "power.h"
 #include "pack_types.h"
 #include "lb_type_rr_graph.h"
 #include "output_blif.h"
 #include "read_activity.h"
 #include "net_delay.h"
 #include "AnalysisDelayCalculator.h"
 #include "timing_info.h"
 #include "netlist_writer.h"
 #include "net_delay.h"
 #include "RoutingDelayCalculator.h"
 #include "check_route.h"
 #include "constant_nets.h"

 #include "timing_graph_builder.h"
 #include "timing_reports.h"
 #include "tatum/echo_writer.hpp"

 #include "read_route.h"
 #include "read_blif.h"
 #include "read_place.h"

 #include "arch_util.h"

 #include "log.h"

 #if defined(TBB_INTERFACE_VERSION)
 # include <tbb/task_scheduler_init.h>

 //We need to store the scheduler object so any concurrency
 //setting is persistent
 std::unique_ptr<tbb::task_scheduler_init> tbb_scheduler;
 #endif

 /* Local subroutines */
 static void free_complex_block_types();

 static void free_device(const t_det_routing_arch& routing_arch);
 static void free_circuit();

 static void get_intercluster_switch_fanin_estimates(const t_vpr_setup& vpr_setup, const t_arch& arch, const int wire_segment_length,
         int *opin_switch_fanin, int *wire_switch_fanin, int *ipin_switch_fanin);
 /* Local subroutines end */

 /* Display general VPR information */
 void vpr_print_title() {

     vtr::printf_info("\n");
     vtr::printf_info("VPR FPGA Placement and Routing.\n");
     vtr::printf_info("Version: %s\n", vtr::VERSION);
     vtr::printf_info("Revision: %s\n", vtr::VCS_REVISION);
     vtr::printf_info("Compiled: %s\n", vtr::BUILD_TIMESTAMP);
     vtr::printf_info("Compiler: %s\n", vtr::COMPILER);
     vtr::printf_info("University of Toronto\n");
     vtr::printf_info("vtr-users@googlegroups.com\n");
     vtr::printf_info("This is free open source code under MIT license.\n");
     vtr::printf_info("\n");

 }

 void vpr_print_args(int argc, const char** argv) {
     vtr::printf_info("VPR was run with the following command-line:\n");
     for (int i = 0; i < argc; i++) {
         if (i != 0) {
             vtr::printf_info(" ");
         }
         vtr::printf_info("%s", argv[i]);
     }
     vtr::printf_info("\n\n");
 }

 /* Initialize VPR
  1. Read Options
  2. Read Arch
  3. Read Circuit
  4. Sanity check all three
  */
 void vpr_init(const int argc, const char **argv,
         t_options *options,
         t_vpr_setup *vpr_setup,
         t_arch *arch) {

     vtr::set_log_file("vpr_stdout.log");

     /* Print title message */
     vpr_print_title();

     /* Read in user options */
     *options = read_options(argc, argv);

     //Print out the arguments passed to VPR.
     //This provides a reference in the log file to exactly
     //how VPR was run, aiding in re-producibility
     vpr_print_args(argc, argv);

     //Set the number of parallel workers
     // We determine the number of workers in the following order:
     //  1. An explicitly specified command-line argument
     //  2. An environment variable
     //  3. The default value
     size_t num_workers;
     if (options->num_workers.provenance() == argparse::Provenance::SPECIFIED) {
         //Explicit command-line
         num_workers = options->num_workers.value();
     } else {
         const char* env_value = std::getenv("VPR_NUM_WORKERS");
         if (env_value != nullptr) {
             //VPR specific environment variable
             num_workers = vtr::atou(env_value);
         } else {
             //Command-line default value
             VTR_ASSERT(options->num_workers.provenance() == argparse::Provenance::DEFAULT);
             num_workers = options->num_workers.value();
         }
     }

 #if defined(TBB_INTERFACE_VERSION)
     //Using Thread Building Blocks
     if (num_workers == 0) {
         //Use default concurrency (i.e. maximum conccurency)
         num_workers = tbb::task_scheduler_init::default_num_threads();
     }

     vtr::printf("Using up to %zu parallel worker(s)\n", num_workers);
     tbb_scheduler = std::make_unique<tbb::task_scheduler_init>(num_workers);

 #elif defined(__cilk)
     //Using cilk, set the number of workers for the run-time

     if (num_workers == 0) {
         //Use default concurrency (i.e. maximum conccurency)
         num_workers = __cilkrts_get_nworkers();
     }

     std::string num_workers_str = std::to_string(num_workers);
     vtr::printf("Using up to %zu parallel worker(s)\n", num_workers);
     if (__cilkrts_set_param("nworkers", num_workers_str.c_str()) != 0) {
         VPR_THROW(VPR_ERROR_OTHER, "Failed to set the number of workers for cilkrts");
     }
 #else
     //No parallel execution support
     if (num_workers != 1) {
         vtr::printf_warning(__FILE__, __LINE__,
             "VPR was compiled without parallel execution support, ignoring the specified number of workers (%zu)",
             options->num_workers.value());
     }
 #endif

     vpr_setup->TimingEnabled = options->timing_analysis;
     vpr_setup->device_layout = options->device_layout;
     vpr_setup->constant_net_method = options->constant_net_method;

     vtr::printf_info("\n");
     vtr::printf_info("Architecture file: %s\n", options->ArchFile.value().c_str());
     vtr::printf_info("Circuit name: %s\n", options->CircuitName.value().c_str());
     vtr::printf_info("\n");

     /* Determine whether echo is on or off */
     setEchoEnabled(options->CreateEchoFile);

     /* Read in arch and circuit */
     SetupVPR(options,
             vpr_setup->TimingEnabled,
             true,
             &vpr_setup->FileNameOpts,
             arch,
             &vpr_setup->user_models,
             &vpr_setup->library_models,
             &vpr_setup->NetlistOpts,
             &vpr_setup->PackerOpts,
             &vpr_setup->PlacerOpts,
             &vpr_setup->AnnealSched,
             &vpr_setup->RouterOpts,
             &vpr_setup->AnalysisOpts,
             &vpr_setup->RoutingArch,
             &vpr_setup->PackerRRGraph,
             &vpr_setup->Segments,
             &vpr_setup->Timing,
             &vpr_setup->ShowGraphics,
             &vpr_setup->GraphPause,
             &vpr_setup->PowerOpts);

     /* Check inputs are reasonable */
     CheckArch(*arch);

     /* Verify settings don't conflict or otherwise not make sense */
     CheckSetup(
             vpr_setup->PackerOpts,
             vpr_setup->PlacerOpts,
             vpr_setup->RouterOpts,
             vpr_setup->RoutingArch, vpr_setup->Segments, vpr_setup->Timing,
             arch->Chans);

     /* flush any messages to user still in stdout that hasn't gotten displayed */
     fflush(stdout);

     /* Read blif file and sweep unused components */
     auto& atom_ctx = g_vpr_ctx.mutable_atom();
     atom_ctx.nlist = read_and_process_circuit(
             options->circuit_format,
             vpr_setup->PackerOpts.blif_file_name.c_str(),
             vpr_setup->user_models,
             vpr_setup->library_models,
             vpr_setup->NetlistOpts.absorb_buffer_luts,
             vpr_setup->NetlistOpts.sweep_dangling_primary_ios,
             vpr_setup->NetlistOpts.sweep_dangling_nets,
             vpr_setup->NetlistOpts.sweep_dangling_blocks,
             vpr_setup->NetlistOpts.sweep_constant_primary_outputs);


     if (vpr_setup->PowerOpts.do_power) {
         //Load the net activity file for power estimation
         vtr::ScopedPrintTimer t("Load Activity File");
         auto& power_ctx = g_vpr_ctx.mutable_power();
         power_ctx.atom_net_power = read_activity(atom_ctx.nlist, vpr_setup->FileNameOpts.ActFile.c_str());
     }

     //Initialize timing graph and constraints
     if (vpr_setup->TimingEnabled) {
         auto& timing_ctx = g_vpr_ctx.mutable_timing();
         {
             vtr::ScopedPrintTimer t("Build Timing Graph");
             timing_ctx.graph = TimingGraphBuilder(atom_ctx.nlist, atom_ctx.lookup).timing_graph();
             vtr::printf("  Timing Graph Nodes: %zu\n", timing_ctx.graph->nodes().size());
             vtr::printf("  Timing Graph Edges: %zu\n", timing_ctx.graph->edges().size());
         }
         {
             vtr::ScopedPrintTimer t("Load Timing Constraints");
             timing_ctx.constraints = read_sdc2(vpr_setup->Timing, atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph);
         }
     }

     fflush(stdout);

     ShowSetup(*vpr_setup);
 }


 bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) {
     { //Pack
         bool pack_success = vpr_pack_flow(vpr_setup, arch);

         if (!pack_success) {
             return false; //Unimplementable
         }
     }

     vpr_create_device_grid(vpr_setup, arch);

     vpr_init_graphics(vpr_setup, arch);

     { //Place
         bool place_success = vpr_place_flow(vpr_setup, arch);

         if (!place_success) {
             return false; //Unimplementable
         }
     }

     { //Route
         auto route_status = vpr_route_flow(vpr_setup, arch);

         if (!route_status.success()) {
             return false; //Unimplementable
         }
     }

     {
         //Analysis
         vpr_analysis(vpr_setup, arch);
     }

     vpr_close_graphics(vpr_setup);
     return true;
 }

 /*
  * Allocs globals: chan_width_x, chan_width_y, device_ctx.grid
  * Depends on num_clbs, pins_per_clb */
 void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) {
 	/* Read in netlist file for placement and routing */
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& device_ctx = g_vpr_ctx.mutable_device();

     /*
      * Keep a copy of the architecture
      */
     device_ctx.arch = Arch;

     /*
      *Load the device grid
      */

     //Record the resource requirement
     std::map<t_type_ptr,size_t> num_type_instances;
     for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
         num_type_instances[cluster_ctx.clb_nlist.block_type(blk_id)]++;
     }

     //Build the device
     float target_device_utilization = vpr_setup.PackerOpts.target_device_utilization;
     device_ctx.grid = create_device_grid(vpr_setup.device_layout, Arch.grid_layouts, num_type_instances, target_device_utilization);

     /*
      *Report on the device
      */
     vtr::printf_info("FPGA sized to %zu x %zu (%s)\n", device_ctx.grid.width(), device_ctx.grid.height(), device_ctx.grid.name().c_str());

     vtr::printf_info("\n");
     vtr::printf_info("Resource usage...\n");
     for (int i = 0; i < device_ctx.num_block_types; ++i) {
         auto type = &device_ctx.block_types[i];
         vtr::printf_info("\tNetlist      %d\tblocks of type: %s\n",
                 num_type_instances[type], type->name);
         vtr::printf_info("\tArchitecture %d\tblocks of type: %s\n",
                 device_ctx.grid.num_instances(type), type->name);
     }
     vtr::printf_info("\n");

     float device_utilization = calculate_device_utilization(device_ctx.grid, num_type_instances);
     vtr::printf_info("Device Utilization: %.2f (target %.2f)\n", device_utilization, target_device_utilization);
     for (int i = 0; i < device_ctx.num_block_types; ++i) {
         auto type = &device_ctx.block_types[i];
         float util = 0.;
         if (num_type_instances[type] != 0) {
             util = float(num_type_instances[type]) / device_ctx.grid.num_instances(type);
         }
         vtr::printf("\tBlock Utilization: %.2f Type: %s\n", util, type->name);
     }
     vtr::printf_info("\n");


     /*
      * Channel setup
      */
     device_ctx.chan_width.x_max = device_ctx.chan_width.y_max = 0;
     device_ctx.chan_width.x_min = device_ctx.chan_width.y_min = 0;
     device_ctx.chan_width.x_list = (int *) vtr::malloc(device_ctx.grid.height() * sizeof (int));
     device_ctx.chan_width.y_list = (int *) vtr::malloc(device_ctx.grid.width() * sizeof (int));
 }

 bool vpr_pack_flow(t_vpr_setup& vpr_setup, const t_arch& arch) {
     auto& packer_opts = vpr_setup.PackerOpts;

     if (packer_opts.doPacking == STAGE_SKIP) {
         //pass
     } else {
         if (packer_opts.doPacking == STAGE_DO) {
             //Do the actual packing
             vpr_pack(vpr_setup, arch);

             //TODO: to be consistent with placement/routing vpr_pack should really
             //      load the netlist data structures itself, instead of re-loading
             //      the netlist from the .net file

             //Load the result from the .net file
             vpr_load_packing(vpr_setup, arch);
         } else {
             VTR_ASSERT(packer_opts.doPacking == STAGE_LOAD);
             //Load a previous packing from the .net file
             vpr_load_packing(vpr_setup, arch);
         }

         /* Sanity check the resulting netlist */
         check_netlist();

         /* Output the netlist stats to console. */
         printClusteredNetlistStats();

         if(vpr_setup.gen_netlist_as_blif) {
             char *name = (char*)vtr::malloc((strlen(vpr_setup.FileNameOpts.CircuitName.c_str()) + 16) * sizeof(char));
             sprintf(name, "%s.preplace.blif", vpr_setup.FileNameOpts.CircuitName.c_str());
             output_blif(&arch, name);
             free(name);
         }
     }

     return true;
 }

 void vpr_pack(t_vpr_setup& vpr_setup, const t_arch& arch) {
     vtr::ScopedPrintTimer timer("Packing");

     /* If needed, estimate inter-cluster delay. Assume the average routing hop goes out of
      a block through an opin switch to a length-4 wire, then through a wire switch to another
      length-4 wire, then through a wire-to-ipin-switch into another block. */
     int wire_segment_length = 4;

     float inter_cluster_delay = UNDEFINED;
     if (vpr_setup.PackerOpts.timing_driven
             && vpr_setup.PackerOpts.auto_compute_inter_cluster_net_delay) {

         /* We want to determine a reasonable fan-in to the opin, wire, and ipin switches, based
            on which the intercluster delays can be estimated. The fan-in of a switch influences its
            delay.

            The fan-in of the switch depends on the architecture (unidirectional/bidirectional), as
            well as Fc_in/out and Fs */
         int opin_switch_fanin, wire_switch_fanin, ipin_switch_fanin;
         get_intercluster_switch_fanin_estimates(vpr_setup, arch, wire_segment_length, &opin_switch_fanin,
                 &wire_switch_fanin, &ipin_switch_fanin);


         float Tdel_opin_switch, R_opin_switch, Cout_opin_switch;
         float opin_switch_del = get_arch_switch_info(arch.Segments[0].arch_opin_switch, opin_switch_fanin,
                 Tdel_opin_switch, R_opin_switch, Cout_opin_switch);

         float Tdel_wire_switch, R_wire_switch, Cout_wire_switch;
         float wire_switch_del = get_arch_switch_info(arch.Segments[0].arch_wire_switch, wire_switch_fanin,
                 Tdel_wire_switch, R_wire_switch, Cout_wire_switch);

         float Tdel_wtoi_switch, R_wtoi_switch, Cout_wtoi_switch;
         float wtoi_switch_del = get_arch_switch_info(
                 vpr_setup.RoutingArch.wire_to_arch_ipin_switch, ipin_switch_fanin,
                 Tdel_wtoi_switch, R_wtoi_switch, Cout_wtoi_switch);

         float Rmetal = arch.Segments[0].Rmetal;
         float Cmetal = arch.Segments[0].Cmetal;

         /* The delay of a wire with its driving switch is the switch delay plus the
          product of the equivalent resistance and capacitance experienced by the wire. */

         float first_wire_seg_delay = opin_switch_del
                 + (R_opin_switch + Rmetal * (float) wire_segment_length / 2)
                 * (Cout_opin_switch + Cmetal * (float) wire_segment_length);
         float second_wire_seg_delay = wire_switch_del
                 + (R_wire_switch + Rmetal * (float) wire_segment_length / 2)
                 * (Cout_wire_switch + Cmetal * (float) wire_segment_length);
         inter_cluster_delay = 4
                 * (first_wire_seg_delay + second_wire_seg_delay
                 + wtoi_switch_del); /* multiply by 4 to get a more conservative estimate */
     }

     try_pack(&vpr_setup.PackerOpts, &arch, vpr_setup.user_models,
             vpr_setup.library_models, inter_cluster_delay, vpr_setup.PackerRRGraph
 #ifdef ENABLE_CLASSIC_VPR_STA
             , vpr_setup.Timing
 #endif
             );
 }

 void vpr_load_packing(t_vpr_setup& vpr_setup, const t_arch& arch) {
     vtr::ScopedPrintTimer timer("Load Packing");

     VTR_ASSERT_MSG(!vpr_setup.FileNameOpts.NetFile.empty(),
             "Must have valid .net filename to load packing");

     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();

     cluster_ctx.clb_nlist = read_netlist(vpr_setup.FileNameOpts.NetFile.c_str(),
                                          &arch,
                                          vpr_setup.FileNameOpts.verify_file_digests);

     process_constant_nets(cluster_ctx.clb_nlist, vpr_setup.constant_net_method);
 }

 bool vpr_place_flow(t_vpr_setup& vpr_setup, const t_arch& arch) {
     const auto& placer_opts = vpr_setup.PlacerOpts;
     if (placer_opts.doPlacement == STAGE_SKIP) {
         //pass
     } else {
         if (placer_opts.doPlacement == STAGE_DO) {
             //Do the actual placement
             vpr_place(vpr_setup, arch);

         } else {
             VTR_ASSERT(placer_opts.doPlacement == STAGE_LOAD);

             //Load a previous placement
             vpr_load_placement(vpr_setup, arch);
         }

         sync_grid_to_blocks();
         post_place_sync();
     }

     return true;
 }

 void vpr_place(t_vpr_setup& vpr_setup, const t_arch& arch) {
     vtr::ScopedPrintTimer timer("Placement");

     try_place(vpr_setup.PlacerOpts,
               vpr_setup.AnnealSched,
               arch.Chans,
               vpr_setup.RouterOpts,
               &vpr_setup.RoutingArch,
               vpr_setup.Segments,
 #ifdef ENABLE_CLASSIC_VPR_STA
               vpr_setup.Timing,
 #endif
               arch.Directs,
               arch.num_directs);

     auto& filename_opts = vpr_setup.FileNameOpts;
     auto& cluster_ctx = g_vpr_ctx.clustering();

     print_place(filename_opts.NetFile.c_str(),
                 cluster_ctx.clb_nlist.netlist_id().c_str(),
                 filename_opts.PlaceFile.c_str());
 }

 void vpr_load_placement(t_vpr_setup& vpr_setup, const t_arch& /*arch*/) {
     vtr::ScopedPrintTimer timer("Load Placement");

     const auto& device_ctx = g_vpr_ctx.device();
     const auto& filename_opts = vpr_setup.FileNameOpts;

     read_place(filename_opts.NetFile.c_str(), filename_opts.PlaceFile.c_str(), filename_opts.verify_file_digests, device_ctx.grid);
 }

 RouteStatus vpr_route_flow(t_vpr_setup& vpr_setup, const t_arch& arch) {

     RouteStatus route_status;

     const auto& router_opts = vpr_setup.RouterOpts;
     const auto& filename_opts = vpr_setup.FileNameOpts;

     if (router_opts.doRouting == STAGE_SKIP) {
         //Assume successful
         route_status = RouteStatus(true, -1);
     } else { //Do or load
         int chan_width = router_opts.fixed_channel_width;

         //Initialize the delay calculator
         vtr::t_chunk net_delay_ch;
         vtr::vector_map<ClusterNetId, float *> net_delay = alloc_net_delay(&net_delay_ch);

         std::shared_ptr<SetupHoldTimingInfo> timing_info = nullptr;
         std::shared_ptr<RoutingDelayCalculator> routing_delay_calc = nullptr;
         if (vpr_setup.Timing.timing_analysis_enabled) {
             auto& atom_ctx = g_vpr_ctx.atom();

             routing_delay_calc = std::make_shared<RoutingDelayCalculator>(atom_ctx.nlist, atom_ctx.lookup, net_delay);

             timing_info = make_setup_hold_timing_info(routing_delay_calc);
         }

         if (router_opts.doRouting == STAGE_DO) {
             //Do the actual routing
             if (NO_FIXED_CHANNEL_WIDTH == chan_width) {
                 //Find minimum channel width
                 route_status = vpr_route_min_W(vpr_setup, arch, timing_info, net_delay);
             } else {
                 //Route at specified channel width
                 route_status = vpr_route_fixed_W(vpr_setup, arch, chan_width, timing_info, net_delay);
             }

             //Save the routing in the .route file
             print_route(filename_opts.PlaceFile.c_str(), filename_opts.RouteFile.c_str());
         } else {
             VTR_ASSERT(router_opts.doRouting == STAGE_LOAD);

             //Load a previous routing
             route_status = vpr_load_routing(vpr_setup, arch, chan_width);
         }

         //Post-implementation

         std::string graphics_msg;
         if (route_status.success()) {
             //Sanity check the routing
             auto& device_ctx = g_vpr_ctx.device();
             check_route(router_opts.route_type, device_ctx.num_rr_switches);
             get_serial_num();

             //Update status
             vtr::printf_info("Circuit successfully routed with a channel width factor of %d.\n", route_status.chan_width());
             graphics_msg = vtr::string_fmt("Routing succeeded with a channel width factor of %d.", route_status.chan_width());
         } else {
             //Update status
             vtr::printf_info("Circuit is unroutable with a channel width factor of %d.\n", route_status.chan_width());
             graphics_msg = vtr::string_fmt("Routing failed with a channel width factor of %d. ILLEGAL routing shown.", route_status.chan_width());
         }

         //Echo files
         if (vpr_setup.Timing.timing_analysis_enabled) {
             if (isEchoFileEnabled(E_ECHO_FINAL_ROUTING_TIMING_GRAPH)) {
                 auto& timing_ctx = g_vpr_ctx.timing();
                 tatum::write_echo(getEchoFileName(E_ECHO_FINAL_ROUTING_TIMING_GRAPH),
                         *timing_ctx.graph, *timing_ctx.constraints, *routing_delay_calc, timing_info->analyzer());
             }

             if (isEchoFileEnabled(E_ECHO_ROUTING_SINK_DELAYS)) {
                 //TODO: implement
             }
         }

         if (router_opts.switch_usage_analysis) {
             print_switch_usage();
         }

         //Update interactive graphics
         update_screen(ScreenUpdatePriority::MAJOR, graphics_msg.c_str(), ROUTING, timing_info);

         free_net_delay(net_delay, &net_delay_ch);
     }

     return route_status;
 }

 RouteStatus vpr_route_fixed_W(t_vpr_setup& vpr_setup, const t_arch& arch, int fixed_channel_width, std::shared_ptr<SetupHoldTimingInfo> timing_info, vtr::vector_map<ClusterNetId, float *>& net_delay) {
     vtr::ScopedPrintTimer timer("Routing");

     if (NO_FIXED_CHANNEL_WIDTH == fixed_channel_width || fixed_channel_width <= 0) {
         VPR_THROW(VPR_ERROR_ROUTE, "Fixed channel width must be specified when routing at fixed channel width (was %d)", fixed_channel_width);
     }

 #ifdef ENABLE_CLASSIC_VPR_STA
     t_slack *slacks = alloc_and_load_timing_graph(vpr_setup.Timing);
 #endif

     bool status = try_route(fixed_channel_width,
                             vpr_setup.RouterOpts,
                             &vpr_setup.RoutingArch,
                             vpr_setup.Segments,
                             net_delay,
 #ifdef ENABLE_CLASSIC_VPR_STA
                             slacks,
                             vpr_setup.Timing,
 #endif
                             timing_info,
                             arch.Chans,
                             arch.Directs, arch.num_directs,
                             ScreenUpdatePriority::MAJOR);


     return RouteStatus(status, fixed_channel_width);
 }

 RouteStatus vpr_route_min_W(t_vpr_setup& vpr_setup, const t_arch& arch, std::shared_ptr<SetupHoldTimingInfo> timing_info, vtr::vector_map<ClusterNetId, float *>& net_delay) {
     vtr::ScopedPrintTimer timer("Routing");

     auto& router_opts = vpr_setup.RouterOpts;
     int min_W = binary_search_place_and_route(vpr_setup.PlacerOpts,
                                               vpr_setup.FileNameOpts,
                                               &arch,
                                               router_opts.verify_binary_search,
                                               router_opts.min_channel_width_hint,
                                               vpr_setup.AnnealSched,
                                               router_opts,
                                               &vpr_setup.RoutingArch,
                                               vpr_setup.Segments,
                                               net_delay,
 #ifdef ENABLE_CLASSIC_VPR_STA
                                               vpr_setup.Timing,
 #endif
                                               timing_info);

     bool status = (min_W > 0);
     return RouteStatus(status, min_W);
 }

 RouteStatus vpr_load_routing(t_vpr_setup& vpr_setup, const t_arch& arch, int fixed_channel_width) {
     vtr::ScopedPrintTimer timer("Load Routing");
     if (NO_FIXED_CHANNEL_WIDTH == fixed_channel_width) {
         VPR_THROW(VPR_ERROR_ROUTE, "Fixed channel width must be specified when loading routing (was %d)");
     }

     //Create the routing resource graph
     vpr_create_rr_graph(vpr_setup, arch, fixed_channel_width);

     auto& filename_opts = vpr_setup.FileNameOpts;

     //Load the routing from a file
     read_route(filename_opts.RouteFile.c_str(), vpr_setup.RouterOpts, filename_opts.verify_file_digests);

     return RouteStatus(true, fixed_channel_width);
 }


 void vpr_create_rr_graph(t_vpr_setup& vpr_setup, const t_arch& arch, int chan_width) {
     auto& device_ctx = g_vpr_ctx.mutable_device();
     auto det_routing_arch = &vpr_setup.RoutingArch;
     auto& router_opts = vpr_setup.RouterOpts;

     init_chan(chan_width, arch.Chans);

     t_graph_type graph_type;
     if (router_opts.route_type == GLOBAL) {
         graph_type = GRAPH_GLOBAL;
     } else {
         graph_type = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
     }

     int warnings = 0;

     //Clean-up any previous RR graph
     free_rr_graph();

     //Create the RR graph
 	create_rr_graph(graph_type,
             device_ctx.num_block_types, device_ctx.block_types,
             device_ctx.grid,
 			&device_ctx.chan_width,
 			device_ctx.num_arch_switches,
             det_routing_arch,
             vpr_setup.Segments,
 			router_opts.base_cost_type,
 			router_opts.trim_empty_channels,
 			router_opts.trim_obs_channels,
 			arch.Directs, arch.num_directs,
 			&device_ctx.num_rr_switches,
 			&warnings);

     //Initialize drawing, now that we have an RR graph
     init_draw_coords(chan_width);

 }

 void vpr_init_graphics(const t_vpr_setup& vpr_setup, const t_arch& arch) {
     /* Startup X graphics */
     init_graphics_state(vpr_setup.ShowGraphics, vpr_setup.GraphPause,
             vpr_setup.RouterOpts.route_type);
     if (vpr_setup.ShowGraphics) {
         init_graphics("VPR: Versatile Place and Route for FPGAs", WHITE);
         alloc_draw_structs(&arch);
     }
 }

 void vpr_close_graphics(const t_vpr_setup& vpr_setup) {
     /* Close down X Display */
     if (vpr_setup.ShowGraphics)
         close_graphics();
     free_draw_structs();
 }

 /* Since the parameters of a switch may change as a function of its fanin,
    to get an estimation of inter-cluster delays we need a reasonable estimation
    of the fan-ins of switches that connect clusters together. These switches are
         1) opin to wire switch
         2) wire to wire switch
         3) wire to ipin switch
    We can estimate the fan-in of these switches based on the Fc_in/Fc_out of
    a logic block, and the switch block Fs value */
 static void get_intercluster_switch_fanin_estimates(const t_vpr_setup& vpr_setup, const t_arch& arch, const int wire_segment_length,
         int *opin_switch_fanin, int *wire_switch_fanin, int *ipin_switch_fanin) {
     e_directionality directionality;
     int Fs;
     float Fc_in, Fc_out;
     int W = 100; //W is unknown pre-packing, so *if* we need W here, we will assume a value of 100

     directionality = vpr_setup.RoutingArch.directionality;
     Fs = vpr_setup.RoutingArch.Fs;
     Fc_in = 0, Fc_out = 0;

     //Build a dummy 10x10 device to determine the 'best' block type to use
     auto grid = create_device_grid(vpr_setup.device_layout, arch.grid_layouts, 10, 10);

     auto type = find_most_common_block_type(grid);
     /* get Fc_in/out for most common block (e.g. logic blocks) */
     VTR_ASSERT(type->fc_specs.size() > 0);

     //Estimate the maximum Fc_in/Fc_out

     for (const t_fc_specification& fc_spec : type->fc_specs) {
         float Fc = fc_spec.fc_value;

         if (fc_spec.fc_value_type == e_fc_value_type::ABSOLUTE) {
             //Convert to estimated fractional
             Fc /= W;
         }
         VTR_ASSERT_MSG(Fc >= 0 && Fc <= 1., "Fc should be fractional");

         for (int ipin : fc_spec.pins) {
             int iclass = type->pin_class[ipin];
             e_pin_type pin_type = type->class_inf[iclass].type;

             if (pin_type == DRIVER) {
                 Fc_out = std::max(Fc, Fc_out);
             } else {
                 VTR_ASSERT(pin_type == RECEIVER);
                 Fc_in = std::max(Fc, Fc_in);
             }
         }
     }

     /* Estimates of switch fan-in are done as follows:
        1) opin to wire switch:
             2 CLBs connect to a channel, each with #opins/4 pins. Each pin has Fc_out*W
             switches, and then we assume the switches are distributed evenly over the W wires.
             In the unidirectional case, all these switches are then crammed down to W/wire_segment_length wires.

                     Unidirectional: 2 * #opins_per_side * Fc_out * wire_segment_length
                     Bidirectional:  2 * #opins_per_side * Fc_out

        2) wire to wire switch
             A wire segment in a switchblock connects to Fs other wires. Assuming these connections are evenly
             distributed, each target wire receives Fs connections as well. In the unidirectional case,
             source wires can only connect to W/wire_segment_length wires.

                     Unidirectional: Fs * wire_segment_length
                     Bidirectional:  Fs

        3) wire to ipin switch
             An input pin of a CLB simply receives Fc_in connections.

                     Unidirectional: Fc_in
                     Bidirectional:  Fc_in
      */


     /* Fan-in to opin/ipin/wire switches depends on whether the architecture is unidirectional/bidirectional */
     (*opin_switch_fanin) = 2 * type->num_drivers / 4 * Fc_out;
     (*wire_switch_fanin) = Fs;
     (*ipin_switch_fanin) = Fc_in;
     if (directionality == UNI_DIRECTIONAL) {
         /* adjustments to opin-to-wire and wire-to-wire switch fan-ins */
         (*opin_switch_fanin) *= wire_segment_length;
         (*wire_switch_fanin) *= wire_segment_length;
     } else if (directionality == BI_DIRECTIONAL) {
         /* no adjustments need to be made here */
     } else {
         vpr_throw(VPR_ERROR_PACK, __FILE__, __LINE__, "Unrecognized directionality: %d\n", (int) directionality);
     }
 }

 /* Free architecture data structures */
 void free_device(const t_det_routing_arch& routing_arch) {
     auto& device_ctx = g_vpr_ctx.mutable_device();

     vtr::free(device_ctx.chan_width.x_list);
     vtr::free(device_ctx.chan_width.y_list);

     device_ctx.chan_width.x_list = device_ctx.chan_width.y_list = nullptr;
     device_ctx.chan_width.max = device_ctx.chan_width.x_max = device_ctx.chan_width.y_max = device_ctx.chan_width.x_min = device_ctx.chan_width.y_min = 0;

     for (int iswitch : {routing_arch.delayless_switch, routing_arch.global_route_switch}) {
         if (device_ctx.arch_switch_inf[iswitch].name) {
             vtr::free(device_ctx.arch_switch_inf[iswitch].name);
             device_ctx.arch_switch_inf[iswitch].name = nullptr;
         }
     }
     delete[] device_ctx.arch_switch_inf;
     device_ctx.arch_switch_inf = nullptr;
     free_complex_block_types();
     free_chunk_memory_trace();
 }

 static void free_complex_block_types() {

     auto& device_ctx = g_vpr_ctx.mutable_device();

     free_type_descriptors(device_ctx.block_types, device_ctx.num_block_types);
     free_pb_graph_edges();
 }

 void free_circuit() {
 	//Free new net structures
 	auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
 	for (auto blk_id : cluster_ctx.clb_nlist.blocks())
 		cluster_ctx.clb_nlist.remove_block(blk_id);

 	cluster_ctx.clb_nlist = ClusteredNetlist();
 }

 void vpr_free_vpr_data_structures(t_arch& Arch,
         t_vpr_setup& vpr_setup) {

     free_all_lb_type_rr_graph(vpr_setup.PackerRRGraph);
     free_circuit();
     free_arch(&Arch);
     free_device(vpr_setup.RoutingArch);
     free_echo_file_info();
     free_timing_stats();
     free_sdc_related_structs();
 }

 void vpr_free_all(t_arch& Arch,
         t_vpr_setup& vpr_setup) {

     free_rr_graph();
     if (vpr_setup.RouterOpts.doRouting) {
         free_route_structs();
     }
     free_trace_structs();
     vpr_free_vpr_data_structures(Arch, vpr_setup);
 }


 /****************************************************************************************************
  * Advanced functions
  *  Used when you need fine-grained control over VPR that the main VPR operations do not enable
  ****************************************************************************************************/

 /* Read in user options */
 void vpr_read_options(const int argc, const char **argv, t_options * options) {
     *options = read_options(argc, argv);
 }

 /* Read in arch and circuit */
 void vpr_setup_vpr(t_options *Options, const bool TimingEnabled,
         const bool readArchFile, t_file_name_opts *FileNameOpts,
         t_arch * Arch,
         t_model ** user_models, t_model ** library_models,
         t_netlist_opts* NetlistOpts,
         t_packer_opts *PackerOpts,
         t_placer_opts *PlacerOpts,
         t_annealing_sched *AnnealSched,
         t_router_opts *RouterOpts,
         t_analysis_opts* AnalysisOpts,
         t_det_routing_arch *RoutingArch,
         vector <t_lb_type_rr_node> **PackerRRGraph,
         t_segment_inf ** Segments, t_timing_inf * Timing,
         bool * ShowGraphics, int *GraphPause,
         t_power_opts * PowerOpts) {
     SetupVPR(Options, TimingEnabled, readArchFile, FileNameOpts, Arch,
             user_models, library_models, NetlistOpts, PackerOpts, PlacerOpts,
             AnnealSched, RouterOpts, AnalysisOpts, RoutingArch, PackerRRGraph, Segments, Timing,
             ShowGraphics, GraphPause, PowerOpts);
 }

 void vpr_check_arch(const t_arch& Arch) {
     CheckArch(Arch);
 }

 /* Verify settings don't conflict or otherwise not make sense */
 void vpr_check_setup(
         const t_packer_opts PackerOpts,
         const t_placer_opts PlacerOpts,
         const t_router_opts RouterOpts,
         const t_det_routing_arch RoutingArch, const t_segment_inf * Segments,
         const t_timing_inf Timing, const t_chan_width_dist Chans) {
     CheckSetup(PackerOpts, PlacerOpts, RouterOpts, RoutingArch,
             Segments, Timing, Chans);
 }

 /* Show current setup */
 void vpr_show_setup(const t_vpr_setup& vpr_setup) {
     ShowSetup(vpr_setup);
 }

 void vpr_analysis(t_vpr_setup& vpr_setup, const t_arch& Arch) {
     if (vpr_setup.AnalysisOpts.doAnalysis == STAGE_SKIP) return;
     VTR_ASSERT(vpr_setup.AnalysisOpts.doAnalysis == STAGE_DO);

     auto& route_ctx = g_vpr_ctx.routing();
     auto& device_ctx = g_vpr_ctx.mutable_device();
     auto& atom_ctx = g_vpr_ctx.atom();

 	//Check the first index to see if a pointer exists
 	//TODO: Implement a better error check
     if (route_ctx.trace_head.size() == 0) {
         VPR_THROW(VPR_ERROR_ANALYSIS, "No routing loaded -- can not perform post-routing analysis");
     }


 	vtr::vector_map<ClusterNetId, float *> net_delay;
     vtr::t_chunk net_delay_ch;
 #ifdef ENABLE_CLASSIC_VPR_STA
     t_slack* slacks = nullptr;
 #endif
     if (vpr_setup.TimingEnabled) {
         //Load the net delays
         net_delay = alloc_net_delay(&net_delay_ch);
         load_net_delay_from_routing(net_delay);

 #ifdef ENABLE_CLASSIC_VPR_STA
         slacks = alloc_and_load_timing_graph(vpr_setup.Timing);
 #endif
     }

     routing_stats(vpr_setup.RouterOpts.full_stats, vpr_setup.RouterOpts.route_type,
             device_ctx.num_rr_switches, vpr_setup.Segments,
             vpr_setup.RoutingArch.num_segment,
             vpr_setup.RoutingArch.R_minW_nmos,
             vpr_setup.RoutingArch.R_minW_pmos,
             Arch.grid_logic_tile_area,
             vpr_setup.RoutingArch.directionality,
             vpr_setup.RoutingArch.wire_to_rr_ipin_switch,
             vpr_setup.TimingEnabled, net_delay
 #ifdef ENABLE_CLASSIC_VPR_STA
             , slacks, vpr_setup.Timing
 #endif
             );

     if (vpr_setup.TimingEnabled) {

         //Do final timing analysis
         auto analysis_delay_calc = std::make_shared<AnalysisDelayCalculator>(atom_ctx.nlist, atom_ctx.lookup, net_delay);
         auto timing_info = make_setup_hold_timing_info(analysis_delay_calc);
         timing_info->update();

         if (isEchoFileEnabled(E_ECHO_ANALYSIS_TIMING_GRAPH)) {
             auto& timing_ctx = g_vpr_ctx.timing();
             tatum::write_echo(getEchoFileName(E_ECHO_ANALYSIS_TIMING_GRAPH),
                     *timing_ctx.graph, *timing_ctx.constraints, *analysis_delay_calc, timing_info->analyzer());
         }

 #ifdef ENABLE_CLASSIC_VPR_STA
         do_timing_analysis(slacks, vpr_setup.Timing, false, true);
 #endif

         //Timing stats
         vtr::printf("\n");
         generate_hold_timing_stats(*timing_info);
         generate_setup_timing_stats(*timing_info);

         //Write the post-syntesis netlist
         if (vpr_setup.AnalysisOpts.gen_post_synthesis_netlist) {
             netlist_writer(atom_ctx.nlist.netlist_name().c_str(), analysis_delay_calc);
         }

         //Do power analysis
         if (vpr_setup.PowerOpts.do_power) {

             vpr_power_estimation(vpr_setup, Arch, *timing_info);
         }

         //Clean-up the net delays
         free_net_delay(net_delay, &net_delay_ch);

 #ifdef ENABLE_CLASSIC_VPR_STA
         free_timing_graph(slacks);
 #endif
     }
 }

 /* This function performs power estimation, and must be called
  * after packing, placement AND routing. Currently, this
  * will not work when running a partial flow (ex. only routing). */
 void vpr_power_estimation(const t_vpr_setup& vpr_setup, const t_arch& Arch, const SetupTimingInfo& timing_info) {
 	/* Ensure we are only using 1 clock */
 	if(timing_info.critical_paths().size() != 1) {
         VPR_THROW(VPR_ERROR_POWER, "Power analysis only supported on single-clock circuits");
     }

     auto& power_ctx = g_vpr_ctx.mutable_power();

     /* Get the critical path of this clock */
     power_ctx.solution_inf.T_crit = timing_info.least_slack_critical_path().delay();
     VTR_ASSERT(power_ctx.solution_inf.T_crit > 0.);

     vtr::printf_info("\n\nPower Estimation:\n");
     vtr::printf_info("-----------------\n");

     vtr::printf_info("Initializing power module\n");

     /* Initialize the power module */
     bool power_error = power_init(vpr_setup.FileNameOpts.PowerFile.c_str(),
             vpr_setup.FileNameOpts.CmosTechFile.c_str(), &Arch, &vpr_setup.RoutingArch);
     if (power_error) {
         vtr::printf_error(__FILE__, __LINE__,
                 "Power initialization failed.\n");
     }

     if (!power_error) {
         float power_runtime_s;

         vtr::printf_info("Running power estimation\n");

         /* Run power estimation */
         e_power_ret_code power_ret_code = power_total(&power_runtime_s, vpr_setup,
                 &Arch, &vpr_setup.RoutingArch);

         /* Check for errors/warnings */
         if (power_ret_code == POWER_RET_CODE_ERRORS) {
             vtr::printf_error(__FILE__, __LINE__,
                     "Power estimation failed. See power output for error details.\n");
         } else if (power_ret_code == POWER_RET_CODE_WARNINGS) {
             vtr::printf_warning(__FILE__, __LINE__,
                     "Power estimation completed with warnings. See power output for more details.\n");
         } else if (power_ret_code == POWER_RET_CODE_SUCCESS) {
         }
         vtr::printf_info("Power estimation took %g seconds\n", power_runtime_s);
     }

     /* Uninitialize power module */
     if (!power_error) {
         vtr::printf_info("Uninitializing power module\n");
         power_error = power_uninit();
         if (power_error) {
             vtr::printf_error(__FILE__, __LINE__,
                     "Power uninitialization failed.\n");
         } else {

         }
     }

     vtr::printf_info("\n");
 }

 void vpr_print_error(const VprError& vpr_error){
 	/* Determine the type of VPR error, To-do: can use some enum-to-string mechanism */
     char* error_type = nullptr;
     try {
         switch (vpr_error.type()) {
             case VPR_ERROR_UNKNOWN:
                 error_type = vtr::strdup("Unknown");
                 break;
             case VPR_ERROR_ARCH:
                 error_type = vtr::strdup("Architecture file");
                 break;
             case VPR_ERROR_PACK:
                 error_type = vtr::strdup("Packing");
                 break;
             case VPR_ERROR_PLACE:
                 error_type = vtr::strdup("Placement");
                 break;
             case VPR_ERROR_ROUTE:
                 error_type = vtr::strdup("Routing");
                 break;
             case VPR_ERROR_TIMING:
                 error_type = vtr::strdup("Timing");
                 break;
             case VPR_ERROR_SDC:
                 error_type = vtr::strdup("SDC file");
                 break;
             case VPR_ERROR_NET_F:
                 error_type = vtr::strdup("Netlist file");
                 break;
             case VPR_ERROR_BLIF_F:
                 error_type = vtr::strdup("Blif file");
                 break;
             case VPR_ERROR_PLACE_F:
                 error_type = vtr::strdup("Placement file");
                 break;
             case VPR_ERROR_IMPL_NETLIST_WRITER:
                 error_type = vtr::strdup("Implementation Netlist Writer");
                 break;
             case VPR_ERROR_ATOM_NETLIST:
                 error_type = vtr::strdup("Atom Netlist");
                 break;
             case VPR_ERROR_POWER:
                 error_type = vtr::strdup("Power");
                 break;
             case VPR_ERROR_ANALYSIS:
                 error_type = vtr::strdup("Analysis");
                 break;
             case VPR_ERROR_OTHER:
                 error_type = vtr::strdup("Other");
                 break;
             case VPR_ERROR_INTERRUPTED:
                 error_type = vtr::strdup("Interrupted");
                 break;
             default:
                 error_type = vtr::strdup("Unrecognized Error");
                 break;
         }
     } catch (const vtr::VtrError& e) {
         error_type = nullptr;
     }

     //We can't pass std::string's through va_args functions,
     //so we need to copy them and pass via c_str()
     std::string msg = vpr_error.what();
     std::string filename = vpr_error.filename();

     vtr::printf_error(__FILE__, __LINE__,
             "\nType: %s\nFile: %s\nLine: %d\nMessage: %s\n",
             error_type, filename.c_str(), vpr_error.line(),
             msg.c_str());
 }