vpr/src/route/rr_graph_timing_params.cpp - third_party/vtr-verilog-to-routing - Git at Google

 #include <cstdio>

 #include "vtr_memory.h"

 #include "vpr_types.h"
 #include "vpr_error.h"

 #include "globals.h"
 #include "rr_graph.h"
 #include "rr_graph_util.h"
 #include "rr_graph2.h"
 #include "rr_graph_timing_params.h"

 /****************** Subroutine definitions *********************************/

 void add_rr_graph_C_from_switches(float C_ipin_cblock) {
     /* This routine finishes loading the C elements of the rr_graph. It assumes *
      * that when you call it the CHANX and CHANY nodes have had their C set to  *
      * their metal capacitance, and everything else has C set to 0.  The graph  *
      * connectivity (edges, switch types etc.) must all be loaded too.  This    *
      * routine will add in the capacitance on the CHANX and CHANY nodes due to: *
      *                                                                          *
      * 1) The output capacitance of the switches coming from OPINs;             *
      * 2) The input and output capacitance of the switches between the various  *
      *    wiring (CHANX and CHANY) segments; and                                *
      * 3) The input capacitance of the input connection block (or buffers       *
      *    separating tracks from the input connection block, if enabled by      *
      *    INCLUDE_TRACK_BUFFERS)                                    	    */

     int switch_index, maxlen;
     size_t to_node;
     int icblock, isblock, iseg_low, iseg_high;
     float Cin, Cout;
     t_rr_type from_rr_type, to_rr_type;
     bool* cblock_counted; /* [0..maxlen-1] -- 0th element unused. */
     float* buffer_Cin;    /* [0..maxlen-1] */
     bool buffered;
     float* Couts_to_add; /* UDSD */

     auto& device_ctx = g_vpr_ctx.device();
     auto& mutable_device_ctx = g_vpr_ctx.mutable_device();

     maxlen = std::max(device_ctx.grid.width(), device_ctx.grid.height());
     cblock_counted = (bool*)vtr::calloc(maxlen, sizeof(bool));
     buffer_Cin = (float*)vtr::calloc(maxlen, sizeof(float));

     std::vector<float> rr_node_C(device_ctx.rr_nodes.size(), 0.); //Stores the final C

     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         //The C may have already been partly initialized (e.g. with metal capacitance)
         rr_node_C[inode] += device_ctx.rr_nodes[inode].C();

         from_rr_type = device_ctx.rr_nodes[inode].type();

         if (from_rr_type == CHANX || from_rr_type == CHANY) {
             for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); iedge++) {
                 to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
                 to_rr_type = device_ctx.rr_nodes[to_node].type();

                 if (to_rr_type == CHANX || to_rr_type == CHANY) {
                     switch_index = device_ctx.rr_nodes[inode].edge_switch(iedge);
                     Cin = device_ctx.rr_switch_inf[switch_index].Cin;
                     Cout = device_ctx.rr_switch_inf[switch_index].Cout;
                     buffered = device_ctx.rr_switch_inf[switch_index].buffered();

                     /* If both the switch from inode to to_node and the switch from *
                      * to_node back to inode use bidirectional switches (i.e. pass  *
                      * transistors), there will only be one physical switch for     *
                      * both edges.  Hence, I only want to count the capacitance of  *
                      * that switch for one of the two edges.  (Note:  if there is   *
                      * a pass transistor edge from x to y, I always build the graph *
                      * so that there is a corresponding edge using the same switch  *
                      * type from y to x.) So, I arbitrarily choose to add in the    *
                      * capacitance in that case of a pass transistor only when      *
                      * processing the lower inode number.                           *
                      * If an edge uses a buffer I always have to add in the output  *
                      * capacitance.  I assume that buffers are shared at the same   *
                      * (i,j) location, so only one input capacitance needs to be    *
                      * added for all the buffered switches at that location.  If    *
                      * the buffers at that location have different sizes, I use the *
                      * input capacitance of the largest one.                        */

                     if (!buffered && inode < to_node) { /* Pass transistor. */
                         rr_node_C[inode] += Cin;
                         rr_node_C[to_node] += Cout;
                     }

                     else if (buffered) {
                         /* Prevent double counting of capacitance for UDSD */
                         if (device_ctx.rr_nodes[to_node].direction() == BI_DIRECTION) {
                             /* For multiple-driver architectures the output capacitance can
                              * be added now since each edge is actually a driver */
                             rr_node_C[to_node] += Cout;
                         }
                         isblock = seg_index_of_sblock(inode, to_node);
                         buffer_Cin[isblock] = std::max(buffer_Cin[isblock], Cin);
                     }

                 }
                 /* End edge to CHANX or CHANY node. */
                 else if (to_rr_type == IPIN) {
                     if (INCLUDE_TRACK_BUFFERS) {
                         /* Implements sharing of the track to connection box buffer.
                          * Such a buffer exists at every segment of the wire at which
                          * at least one logic block input connects. */
                         icblock = seg_index_of_cblock(from_rr_type, to_node);
                         if (cblock_counted[icblock] == false) {
                             rr_node_C[inode] += C_ipin_cblock;
                             cblock_counted[icblock] = true;
                         }
                     } else {
                         /* No track buffer. Simply add the capacitance onto the wire */
                         rr_node_C[inode] += C_ipin_cblock;
                     }
                 }
             } /* End loop over all edges of a node. */

             /* Reset the cblock_counted and buffer_Cin arrays, and add buf Cin. */

             /* Method below would be faster for very unpopulated segments, but I  *
              * think it would be slower overall for most FPGAs, so commented out. */

             /*   for (iedge=0;iedge<device_ctx.rr_nodes[inode].num_edges();iedge++) {
              * to_node = device_ctx.rr_nodes[inode].edges[iedge];
              * if (device_ctx.rr_nodes[to_node].type() == IPIN) {
              * icblock = seg_index_of_cblock (from_rr_type, to_node);
              * cblock_counted[icblock] = false;
              * }
              * }     */

             if (from_rr_type == CHANX) {
                 iseg_low = device_ctx.rr_nodes[inode].xlow();
                 iseg_high = device_ctx.rr_nodes[inode].xhigh();
             } else { /* CHANY */
                 iseg_low = device_ctx.rr_nodes[inode].ylow();
                 iseg_high = device_ctx.rr_nodes[inode].yhigh();
             }

             for (icblock = iseg_low; icblock <= iseg_high; icblock++) {
                 cblock_counted[icblock] = false;
             }

             for (isblock = iseg_low - 1; isblock <= iseg_high; isblock++) {
                 rr_node_C[inode] += buffer_Cin[isblock]; /* Biggest buf Cin at loc */
                 buffer_Cin[isblock] = 0.;
             }

         }
         /* End node is CHANX or CHANY */
         else if (from_rr_type == OPIN) {
             for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); iedge++) {
                 switch_index = device_ctx.rr_nodes[inode].edge_switch(iedge);
                 to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
                 to_rr_type = device_ctx.rr_nodes[to_node].type();

                 if (to_rr_type != CHANX && to_rr_type != CHANY)
                     continue;

                 if (device_ctx.rr_nodes[to_node].direction() == BI_DIRECTION) {
                     Cout = device_ctx.rr_switch_inf[switch_index].Cout;
                     to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge); /* Will be CHANX or CHANY */
                     rr_node_C[to_node] += Cout;
                 }
             }
         }
         /* End node is OPIN. */
     } /* End for all nodes. */

     /* Now we need to add any Cout loads for nets that we previously didn't process
      * Current structures only keep switch information from a node to the next node and
      * not the reverse.  Therefore I need to go through all the possible edges to figure
      * out what the Cout's should be */
     Couts_to_add = (float*)vtr::calloc(device_ctx.rr_nodes.size(), sizeof(float));
     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); iedge++) {
             switch_index = device_ctx.rr_nodes[inode].edge_switch(iedge);
             to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
             to_rr_type = device_ctx.rr_nodes[to_node].type();
             if (to_rr_type == CHANX || to_rr_type == CHANY) {
                 if (device_ctx.rr_nodes[to_node].direction() != BI_DIRECTION) {
                     /* Cout was not added in these cases */
                     Couts_to_add[to_node] = std::max(Couts_to_add[to_node], device_ctx.rr_switch_inf[switch_index].Cout);
                 }
             }
         }
     }
     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         rr_node_C[inode] += Couts_to_add[inode];
     }

     //Create the final flywieghted t_rr_rc_data
     for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
         mutable_device_ctx.rr_nodes[inode].set_rc_index(find_create_rr_rc_data(device_ctx.rr_nodes[inode].R(), rr_node_C[inode]));
     }

     free(Couts_to_add);
     free(cblock_counted);
     free(buffer_Cin);
 }
	#include <cstdio>

	#include "vtr_memory.h"

	#include "vpr_types.h"
	#include "vpr_error.h"

	#include "globals.h"
	#include "rr_graph.h"
	#include "rr_graph_util.h"
	#include "rr_graph2.h"
	#include "rr_graph_timing_params.h"

	/**************** Subroutine definitions *******************************/

	void add_rr_graph_C_from_switches(float C_ipin_cblock) {
	/* This routine finishes loading the C elements of the rr_graph. It assumes *
	* that when you call it the CHANX and CHANY nodes have had their C set to *
	* their metal capacitance, and everything else has C set to 0. The graph *
	* connectivity (edges, switch types etc.) must all be loaded too. This *
	* routine will add in the capacitance on the CHANX and CHANY nodes due to: *
	* *
	* 1) The output capacitance of the switches coming from OPINs; *
	* 2) The input and output capacitance of the switches between the various *
	* wiring (CHANX and CHANY) segments; and *
	* 3) The input capacitance of the input connection block (or buffers *
	* separating tracks from the input connection block, if enabled by *
	* INCLUDE_TRACK_BUFFERS) */

	int switch_index, maxlen;
	size_t to_node;
	int icblock, isblock, iseg_low, iseg_high;
	float Cin, Cout;
	t_rr_type from_rr_type, to_rr_type;
	bool* cblock_counted; /* [0..maxlen-1] -- 0th element unused. */
	float* buffer_Cin; /* [0..maxlen-1] */
	bool buffered;
	float* Couts_to_add; /* UDSD */

	auto& device_ctx = g_vpr_ctx.device();
	auto& mutable_device_ctx = g_vpr_ctx.mutable_device();

	maxlen = std::max(device_ctx.grid.width(), device_ctx.grid.height());
	cblock_counted = (bool*)vtr::calloc(maxlen, sizeof(bool));
	buffer_Cin = (float*)vtr::calloc(maxlen, sizeof(float));

	std::vector<float> rr_node_C(device_ctx.rr_nodes.size(), 0.); //Stores the final C

	for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
	//The C may have already been partly initialized (e.g. with metal capacitance)
	rr_node_C[inode] += device_ctx.rr_nodes[inode].C();

	from_rr_type = device_ctx.rr_nodes[inode].type();

	if (from_rr_type == CHANX \|\| from_rr_type == CHANY) {
	for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); iedge++) {
	to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
	to_rr_type = device_ctx.rr_nodes[to_node].type();

	if (to_rr_type == CHANX \|\| to_rr_type == CHANY) {
	switch_index = device_ctx.rr_nodes[inode].edge_switch(iedge);
	Cin = device_ctx.rr_switch_inf[switch_index].Cin;
	Cout = device_ctx.rr_switch_inf[switch_index].Cout;
	buffered = device_ctx.rr_switch_inf[switch_index].buffered();

	/* If both the switch from inode to to_node and the switch from *
	* to_node back to inode use bidirectional switches (i.e. pass *
	* transistors), there will only be one physical switch for *
	* both edges. Hence, I only want to count the capacitance of *
	* that switch for one of the two edges. (Note: if there is *
	* a pass transistor edge from x to y, I always build the graph *
	* so that there is a corresponding edge using the same switch *
	* type from y to x.) So, I arbitrarily choose to add in the *
	* capacitance in that case of a pass transistor only when *
	* processing the lower inode number. *
	* If an edge uses a buffer I always have to add in the output *
	* capacitance. I assume that buffers are shared at the same *
	* (i,j) location, so only one input capacitance needs to be *
	* added for all the buffered switches at that location. If *
	* the buffers at that location have different sizes, I use the *
	* input capacitance of the largest one. */

	if (!buffered && inode < to_node) { /* Pass transistor. */
	rr_node_C[inode] += Cin;
	rr_node_C[to_node] += Cout;
	}

	else if (buffered) {
	/* Prevent double counting of capacitance for UDSD */
	if (device_ctx.rr_nodes[to_node].direction() == BI_DIRECTION) {
	/* For multiple-driver architectures the output capacitance can
	* be added now since each edge is actually a driver */
	rr_node_C[to_node] += Cout;
	}
	isblock = seg_index_of_sblock(inode, to_node);
	buffer_Cin[isblock] = std::max(buffer_Cin[isblock], Cin);
	}

	}
	/* End edge to CHANX or CHANY node. */
	else if (to_rr_type == IPIN) {
	if (INCLUDE_TRACK_BUFFERS) {
	/* Implements sharing of the track to connection box buffer.
	* Such a buffer exists at every segment of the wire at which
	* at least one logic block input connects. */
	icblock = seg_index_of_cblock(from_rr_type, to_node);
	if (cblock_counted[icblock] == false) {
	rr_node_C[inode] += C_ipin_cblock;
	cblock_counted[icblock] = true;
	}
	} else {
	/* No track buffer. Simply add the capacitance onto the wire */
	rr_node_C[inode] += C_ipin_cblock;
	}
	}
	} /* End loop over all edges of a node. */

	/* Reset the cblock_counted and buffer_Cin arrays, and add buf Cin. */

	/* Method below would be faster for very unpopulated segments, but I *
	* think it would be slower overall for most FPGAs, so commented out. */

	/* for (iedge=0;iedge<device_ctx.rr_nodes[inode].num_edges();iedge++) {
	* to_node = device_ctx.rr_nodes[inode].edges[iedge];
	* if (device_ctx.rr_nodes[to_node].type() == IPIN) {
	* icblock = seg_index_of_cblock (from_rr_type, to_node);
	* cblock_counted[icblock] = false;
	* }
	* } */

	if (from_rr_type == CHANX) {
	iseg_low = device_ctx.rr_nodes[inode].xlow();
	iseg_high = device_ctx.rr_nodes[inode].xhigh();
	} else { /* CHANY */
	iseg_low = device_ctx.rr_nodes[inode].ylow();
	iseg_high = device_ctx.rr_nodes[inode].yhigh();
	}

	for (icblock = iseg_low; icblock <= iseg_high; icblock++) {
	cblock_counted[icblock] = false;
	}

	for (isblock = iseg_low - 1; isblock <= iseg_high; isblock++) {
	rr_node_C[inode] += buffer_Cin[isblock]; /* Biggest buf Cin at loc */
	buffer_Cin[isblock] = 0.;
	}

	}
	/* End node is CHANX or CHANY */
	else if (from_rr_type == OPIN) {
	for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); iedge++) {
	switch_index = device_ctx.rr_nodes[inode].edge_switch(iedge);
	to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
	to_rr_type = device_ctx.rr_nodes[to_node].type();

	if (to_rr_type != CHANX && to_rr_type != CHANY)
	continue;

	if (device_ctx.rr_nodes[to_node].direction() == BI_DIRECTION) {
	Cout = device_ctx.rr_switch_inf[switch_index].Cout;
	to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge); /* Will be CHANX or CHANY */
	rr_node_C[to_node] += Cout;
	}
	}
	}
	/* End node is OPIN. */
	} /* End for all nodes. */

	/* Now we need to add any Cout loads for nets that we previously didn't process
	* Current structures only keep switch information from a node to the next node and
	* not the reverse. Therefore I need to go through all the possible edges to figure
	* out what the Cout's should be */
	Couts_to_add = (float*)vtr::calloc(device_ctx.rr_nodes.size(), sizeof(float));
	for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
	for (t_edge_size iedge = 0; iedge < device_ctx.rr_nodes[inode].num_edges(); iedge++) {
	switch_index = device_ctx.rr_nodes[inode].edge_switch(iedge);
	to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
	to_rr_type = device_ctx.rr_nodes[to_node].type();
	if (to_rr_type == CHANX \|\| to_rr_type == CHANY) {
	if (device_ctx.rr_nodes[to_node].direction() != BI_DIRECTION) {
	/* Cout was not added in these cases */
	Couts_to_add[to_node] = std::max(Couts_to_add[to_node], device_ctx.rr_switch_inf[switch_index].Cout);
	}
	}
	}
	}
	for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
	rr_node_C[inode] += Couts_to_add[inode];
	}

	//Create the final flywieghted t_rr_rc_data
	for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
	mutable_device_ctx.rr_nodes[inode].set_rc_index(find_create_rr_rc_data(device_ctx.rr_nodes[inode].R(), rr_node_C[inode]));
	}

	free(Couts_to_add);
	free(cblock_counted);
	free(buffer_Cin);
	}