| #include <stdio.h> | |
| #include <string.h> | |
| #include <limits.h> | |
| #include <math.h> | |
| #include "util.h" | |
| #include "vpr_types.h" | |
| #include "globals.h" | |
| #include "path_delay.h" | |
| #include "path_delay2.h" | |
| #include "net_delay.h" | |
| #include "vpr_utils.h" | |
| #include <assert.h> | |
| #include "read_xml_arch_file.h" | |
| #include "ReadOptions.h" | |
| #include "read_sdc.h" | |
| #include "stats.h" | |
| /**************************** Top-level summary ****************************** | |
| Timing analysis by Vaughn Betz, Jason Luu, and Michael Wainberg. | |
| Timing analysis is a three-step process: | |
| 1. Interpret the constraints specified by the user in an SDC (Synopsys Design | |
| Constraints) file or, if none is specified, use default constraints. | |
| 2. Convert the pre- or post-packed netlist (depending on the stage of the | |
| flow) into a timing graph, where nodes represent pins and edges represent | |
| dependencies and delays between pins (see "Timing graph structure", below). | |
| 3. Traverse the timing graph to obtain information about which connections | |
| to optimize, as well as statistics for the user. | |
| Steps 1 and 2 are performed through one of two helper functions: | |
| alloc_and_load_timing_graph and alloc_and_load_pre_packing_timing_graph. | |
| The first step is to create the timing graph, which is stored in the array | |
| tnode ("timing node"). This is done through alloc_and_load_tnodes (post- | |
| packed) or alloc_and_load_tnodes_from_prepacked_netlist. Then, the timing | |
| graph is topologically sorted ("levelized") in | |
| alloc_and_load_timing_graph_levels, to allow for faster traversals later. | |
| read_sdc reads the SDC file, interprets its contents and stores them in | |
| the data structure g_sdc. (This data structure does not need to remain | |
| global but it is probably easier, since its information is used in both | |
| netlists and only needs to be read in once.) | |
| load_clock_domain_and_clock_and_io_delay then gives each flip-flop and I/O | |
| the index of a constrained clock from the SDC file in g_sdc->constrained_ | |
| clocks, or -1 if an I/O is unconstrained. | |
| process_constraints does a pre-traversal through the timing graph and prunes | |
| all constraints between domains that never intersect so they are not analysed. | |
| Step 3 is performed through do_timing_analysis. For each constraint between | |
| a pair of clock domains we do a forward traversal from the "source" domain | |
| to the "sink" domain to compute each tnode's arrival time, the time when the | |
| latest signal would arrive at the node. We also do a backward traversal to | |
| compute required time, the time when the earliest signal has to leave the | |
| node to meet the constraint. The "slack" of each sink pin on each net is | |
| basically the difference between the two times. | |
| If path counting is on, we calculate a forward and backward path weight in | |
| do_path_counting. These represent the importance of paths fanning, | |
| respectively, into and out of this pin, in such a way that paths with a | |
| higher slack are discounted exponentially in importance. | |
| If path counting is off and we are using the pre-packed netlist, we also | |
| calculate normalized costs for the clusterer (normalized arrival time, slack | |
| and number of critical paths) in normalized_costs. The clusterer uses these | |
| to calculate a criticality for each block. | |
| Finally, in update_slacks, we calculate the slack for each sink pin on each net | |
| for printing, as well as a derived metric, timing criticality, which the | |
| optimizers actually use. If path counting is on, we calculate a path criticality | |
| from the forward and backward weights on each tnode. */ | |
| /************************* Timing graph structure **************************** | |
| Author: V. Betz | |
| We can build timing graphs that match either the primitive (logical_block) | |
| netlist (of basic elements before clustering, like FFs and LUTs) or that | |
| match the clustered netlist (block). You pass in the is_pre_packed flag to | |
| say which kind of netlist and timing graph you are working with. | |
| Every used (not OPEN) block pin becomes a timing node, both on primitive | |
| blocks and (if youre building the timing graph that matches a clustered | |
| netlist) on clustered blocks. For the clustered (not pre_packed) timing | |
| graph, every used pin within a clustered (pb_type) block also becomes a timing | |
| node. So a CLB that contains ALMs that contains LUTs will have nodes created | |
| for the CLB pins, ALM pins and LUT pins, with edges that connect them as | |
| specified in the clustered netlist. Unused (OPEN) pins don't create any | |
| timing nodes. | |
| The primitive blocks have edges from the tnodes that represent input pins and | |
| output pins that represent the timing dependencies within these lowest level | |
| blocks (e.g. LUTs and FFs and IOs). The exact edges created come from reading | |
| the architecture file. A LUT for example will have delays specified from all | |
| its inputs to its output, and hence we will create a tedge from each tnode | |
| corresponding to an input pin of the LUT to the tnode that represents the LUT | |
| output pin. The delay marked on each edge is read from the architecture file. | |
| A FF has nodes representing its pins, but also two extra nodes, representing | |
| the TN_FF_SINK and TN_FF_SOURCE. Those two nodes represent the internal storage | |
| node of the FF. The TN_FF_SINK has edges going to it from the regular FF inputs | |
| (but not the clock input), with appropriate delays. It has no outgoing edges, | |
| since it terminates timing paths. The TN_FF_SOURCE has an edge going from it to | |
| the FF output pin, with the appropriate delay. TN_FF_SOURCES have no edges; they | |
| start timing paths. FF clock pins have no outgoing edges, but the delay to | |
| them can be looked up, and is used in the timing traversals to compute clock | |
| delay for slack computations. | |
| In the timing graph I create, input pads and constant generators have no | |
| inputs (incoming edges), just like TN_FF_SOURCES. Every input pad and output | |
| pad is represented by two tnodes -- an input pin/source and an output pin/ | |
| sink. For an input pad the input source comes from off chip and has no fanin, | |
| while the output pin drives signals within the chip. For output pads, the | |
| input pin is driven by signal (net) within the chip, and the output sink node | |
| goes off chip and has no fanout (out-edges). I need two nodes to respresent | |
| things like pads because I mark all delay on tedges, not on tnodes. | |
| One other subblock that needs special attention is a constant generator. | |
| This has no used inputs, but its output is used. I create an extra tnode, | |
| a dummy input, in addition to the output pin tnode. The dummy tnode has | |
| no fanin. Since constant generators really generate their outputs at T = | |
| -infinity, I set the delay from the input tnode to the output to a large- | |
| magnitude negative number. This guarantees every block that needs the | |
| output of a constant generator sees it available very early. | |
| The main structure of the timing graph is given by the nodes and the edges | |
| that connect them. We also store some extra information on tnodes that | |
| (1) lets us figure out how to map from a tnode back to the netlist pin (or | |
| other item) it represents and (2) figure out what clock domain it is on, if | |
| it is a timing path start point (no incoming edges) or end point (no outgoing | |
| edges) and (3) lets us figure out the delay of the clock to that node, if it | |
| is a timing path start or end point. | |
| To map efficiently from tedges back to the netlist pins, we create the tedge | |
| array driven by a tnode the represents a netlist output pin *in the same order | |
| as the netlist net pins*. That means the edge index for the tedge array from | |
| such a tnode guarantees iedge = net_pin_index 1. The code to map slacks | |
| from the timing graph back to the netlist relies on this. */ | |
| /*************************** Global variables *******************************/ | |
| t_tnode *tnode = NULL; /* [0..num_tnodes - 1] */ | |
| int num_tnodes = 0; /* Number of nodes (pins) in the timing graph */ | |
| /******************** Variables local to this module ************************/ | |
| #define NUM_BUCKETS 5 /* Used when printing slack and criticality. */ | |
| /* Variables for "chunking" the tedge memory. If the head pointer in tedge_ch is NULL, * | |
| * no timing graph exists now. */ | |
| static t_chunk tedge_ch = {NULL, 0, NULL}; | |
| static struct s_net *timing_nets = NULL; | |
| static int num_timing_nets = 0; | |
| static t_timing_stats * f_timing_stats = NULL; /* Critical path delay and worst-case slack per constraint. */ | |
| static int * f_net_to_driver_tnode; | |
| /* [0..num_nets - 1]. Gives the index of the tnode that drives each net. | |
| Used for both pre- and post-packed netlists. If you just want the number | |
| of edges on the driver tnode, use: | |
| num_edges = timing_nets[inet].num_sinks; | |
| instead of the equivalent but more convoluted: | |
| driver_tnode = f_net_to_driver_tnode[inet]; | |
| num_edges = tnode[driver_tnode].num_edges; | |
| Only use this array if you want the actual edges themselves or the index | |
| of the driver tnode. */ | |
| /***************** Subroutines local to this module *************************/ | |
| static t_slack * alloc_slacks(void); | |
| static void update_slacks(t_slack * slacks, int source_clock_domain, int sink_clock_domain, float criticality_denom, | |
| boolean update_slack); | |
| static void alloc_and_load_tnodes(t_timing_inf timing_inf); | |
| static void alloc_and_load_tnodes_from_prepacked_netlist(float block_delay, | |
| float inter_cluster_net_delay); | |
| static void alloc_timing_stats(void); | |
| static float do_timing_analysis_for_constraint(int source_clock_domain, int sink_clock_domain, | |
| boolean is_prepacked, boolean is_final_analysis, long * max_critical_input_paths_ptr, | |
| long * max_critical_output_paths_ptr); | |
| #ifdef PATH_COUNTING | |
| static void do_path_counting(float criticality_denom); | |
| #endif | |
| static void do_lut_rebalancing(); | |
| static void load_tnode(INP t_pb_graph_pin *pb_graph_pin, INP int iblock, | |
| INOUTP int *inode, INP t_timing_inf timing_inf); | |
| #ifndef PATH_COUNTING | |
| static void update_normalized_costs(float T_arr_max_this_domain, long max_critical_input_paths, | |
| long max_critical_output_paths); | |
| #endif | |
| static void print_primitive_as_blif (FILE *fpout, int iblk); | |
| static void set_and_balance_arrival_time(int to_node, int from_node, float Tdel, boolean do_lut_input_balancing); | |
| static void load_clock_domain_and_clock_and_io_delay(boolean is_prepacked); | |
| static char * find_tnode_net_name(int inode, boolean is_prepacked); | |
| static t_tnode * find_ff_clock_tnode(int inode, boolean is_prepacked); | |
| static inline int get_tnode_index(t_tnode * node); | |
| static inline boolean has_valid_T_arr(int inode); | |
| static inline boolean has_valid_T_req(int inode); | |
| static int find_clock(char * net_name); | |
| static int find_input(char * net_name); | |
| static int find_output(char * net_name); | |
| static int find_cf_constraint(char * source_clock_name, char * sink_ff_name); | |
| static void propagate_clock_domain_and_skew(int inode); | |
| static void process_constraints(void); | |
| static void print_global_criticality_stats(FILE * fp, float ** criticality, const char * singular_name, const char * capitalized_plural_name); | |
| static void print_timing_constraint_info(const char *fname); | |
| static void print_spaces(FILE * fp, int num_spaces); | |
| /********************* Subroutine definitions *******************************/ | |
| t_slack * alloc_and_load_timing_graph(t_timing_inf timing_inf) { | |
| /* This routine builds the graph used for timing analysis. Every cb pin is a | |
| * timing node (tnode). The connectivity between pins is * | |
| * represented by timing edges (tedges). All delay is marked on edges, not * | |
| * on nodes. Returns two arrays that will store slack values: * | |
| * slack and criticality ([0..num_nets-1][1..num_pins]). */ | |
| /* For pads, only the first two pin locations are used (input to pad is first, | |
| * output of pad is second). For CLBs, all OPEN pins on the cb have their | |
| * mapping set to OPEN so I won't use it by mistake. */ | |
| int num_sinks; | |
| t_slack * slacks = NULL; | |
| boolean do_process_constraints = FALSE; | |
| if (tedge_ch.chunk_ptr_head != NULL) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "in alloc_and_load_timing_graph: An old timing graph still exists.\n"); | |
| exit(1); | |
| } | |
| num_timing_nets = num_nets; | |
| timing_nets = clb_net; | |
| alloc_and_load_tnodes(timing_inf); | |
| num_sinks = alloc_and_load_timing_graph_levels(); | |
| check_timing_graph(num_sinks); | |
| slacks = alloc_slacks(); | |
| if (g_sdc == NULL) { | |
| /* the SDC timing constraints only need to be read in once; * | |
| * if they haven't been already, do it now */ | |
| read_sdc(timing_inf); | |
| do_process_constraints = TRUE; | |
| } | |
| load_clock_domain_and_clock_and_io_delay(FALSE); | |
| if (do_process_constraints) | |
| process_constraints(); | |
| if (f_timing_stats == NULL) | |
| alloc_timing_stats(); | |
| return slacks; | |
| } | |
| t_slack * alloc_and_load_pre_packing_timing_graph(float block_delay, | |
| float inter_cluster_net_delay, t_model *models, t_timing_inf timing_inf) { | |
| /* This routine builds the graph used for timing analysis. Every technology- | |
| * mapped netlist pin is a timing node (tnode). The connectivity between pins is * | |
| * represented by timing edges (tedges). All delay is marked on edges, not * | |
| * on nodes. Returns two arrays that will store slack values: * | |
| * slack and criticality ([0..num_nets-1][1..num_pins]). */ | |
| /* For pads, only the first two pin locations are used (input to pad is first, | |
| * output of pad is second). For CLBs, all OPEN pins on the cb have their | |
| * mapping set to OPEN so I won't use it by mistake. */ | |
| int num_sinks; | |
| t_slack * slacks = NULL; | |
| boolean do_process_constraints = FALSE; | |
| if (tedge_ch.chunk_ptr_head != NULL) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "in alloc_and_load_timing_graph: An old timing graph still exists.\n"); | |
| exit(1); | |
| } | |
| num_timing_nets = num_logical_nets; | |
| timing_nets = vpack_net; | |
| alloc_and_load_tnodes_from_prepacked_netlist(block_delay, | |
| inter_cluster_net_delay); | |
| num_sinks = alloc_and_load_timing_graph_levels(); | |
| slacks = alloc_slacks(); | |
| check_timing_graph(num_sinks); | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_PRE_PACKING_TIMING_GRAPH_AS_BLIF)) { | |
| print_timing_graph_as_blif(getEchoFileName(E_ECHO_PRE_PACKING_TIMING_GRAPH_AS_BLIF), models); | |
| } | |
| if (g_sdc == NULL) { | |
| /* the SDC timing constraints only need to be read in once; * | |
| * if they haven't been already, do it now */ | |
| read_sdc(timing_inf); | |
| do_process_constraints = TRUE; | |
| } | |
| load_clock_domain_and_clock_and_io_delay(TRUE); | |
| if (do_process_constraints) | |
| process_constraints(); | |
| if (f_timing_stats == NULL) | |
| alloc_timing_stats(); | |
| return slacks; | |
| } | |
| static t_slack * alloc_slacks(void) { | |
| /* Allocates the slack, criticality and path_criticality structures | |
| ([0..num_nets-1][1..num_pins-1]). Chunk allocated to save space. */ | |
| int inet; | |
| t_slack * slacks = (t_slack *) my_malloc(sizeof(t_slack)); | |
| slacks->slack = (float **) my_malloc(num_timing_nets * sizeof(float *)); | |
| slacks->timing_criticality = (float **) my_malloc(num_timing_nets * sizeof(float *)); | |
| #ifdef PATH_COUNTING | |
| slacks->path_criticality = (float **) my_malloc(num_timing_nets * sizeof(float *)); | |
| #endif | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| slacks->slack[inet] = (float *) my_chunk_malloc((timing_nets[inet].num_sinks + 1) * sizeof(float), &tedge_ch); | |
| slacks->timing_criticality[inet] = (float *) my_chunk_malloc((timing_nets[inet].num_sinks + 1) * sizeof(float), &tedge_ch); | |
| #ifdef PATH_COUNTING | |
| slacks->path_criticality[inet] = (float *) my_chunk_malloc((timing_nets[inet].num_sinks + 1) * sizeof(float), &tedge_ch); | |
| #endif | |
| } | |
| return slacks; | |
| } | |
| void load_timing_graph_net_delays(float **net_delay) { | |
| /* Sets the delays of the inter-CLB nets to the values specified by * | |
| * net_delay[0..num_nets-1][1..num_pins-1]. These net delays should have * | |
| * been allocated and loaded with the net_delay routines. This routine * | |
| * marks the corresponding edges in the timing graph with the proper delay. */ | |
| int inet, ipin, inode; | |
| t_tedge *tedge; | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| inode = f_net_to_driver_tnode[inet]; | |
| tedge = tnode[inode].out_edges; | |
| /* Note that the edges of a tnode corresponding to a CLB or INPAD opin must * | |
| * be in the same order as the pins of the net driven by the tnode. */ | |
| for (ipin = 1; ipin < (timing_nets[inet].num_sinks + 1); ipin++) | |
| tedge[ipin - 1].Tdel = net_delay[inet][ipin]; | |
| } | |
| } | |
| void free_timing_graph(t_slack * slacks) { | |
| int inode; | |
| if (tedge_ch.chunk_ptr_head == NULL) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "in free_timing_graph: No timing graph to free.\n"); | |
| exit(1); | |
| } | |
| free_chunk_memory(&tedge_ch); | |
| if (tnode[0].prepacked_data) { | |
| /* If we allocated prepacked_data for the first node, | |
| it must be allocated for all other nodes too. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| free(tnode[inode].prepacked_data); | |
| } | |
| } | |
| free(tnode); | |
| free(f_net_to_driver_tnode); | |
| free_ivec_vector(tnodes_at_level, 0, num_tnode_levels - 1); | |
| free(slacks->slack); | |
| free(slacks->timing_criticality); | |
| #ifdef PATH_COUNTING | |
| free(slacks->path_criticality); | |
| #endif | |
| free(slacks); | |
| tnode = NULL; | |
| num_tnodes = 0; | |
| f_net_to_driver_tnode = NULL; | |
| tnodes_at_level = NULL; | |
| num_tnode_levels = 0; | |
| slacks = NULL; | |
| } | |
| void free_timing_stats(void) { | |
| int i; | |
| if(f_timing_stats != NULL) { | |
| for (i = 0; i < g_sdc->num_constrained_clocks; i++) { | |
| free(f_timing_stats->cpd[i]); | |
| free(f_timing_stats->least_slack[i]); | |
| } | |
| free(f_timing_stats->cpd); | |
| free(f_timing_stats->least_slack); | |
| free(f_timing_stats); | |
| } | |
| f_timing_stats = NULL; | |
| } | |
| void print_slack(float ** slack, boolean slack_is_normalized, const char *fname) { | |
| /* Prints slacks into a file. */ | |
| int inet, iedge, ibucket, driver_tnode, num_edges, num_unused_slacks = 0; | |
| t_tedge * tedge; | |
| FILE *fp; | |
| float max_slack = HUGE_NEGATIVE_FLOAT, min_slack = HUGE_POSITIVE_FLOAT, | |
| total_slack = 0, total_negative_slack = 0, bucket_size, slk; | |
| int slacks_in_bucket[NUM_BUCKETS]; | |
| fp = my_fopen(fname, "w", 0); | |
| if (slack_is_normalized) { | |
| fprintf(fp, "The following slacks have been normalized to be non-negative by " | |
| "relaxing the required times to the maximum arrival time.\n\n"); | |
| } | |
| /* Go through slack once to get the largest and smallest slack, both for reporting and | |
| so that we can delimit the buckets. Also calculate the total negative slack in the design. */ | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| slk = slack[inet][iedge + 1]; | |
| if (slk < HUGE_POSITIVE_FLOAT - 1) { /* if slack was analysed */ | |
| max_slack = std::max(max_slack, slk); | |
| min_slack = std::min(min_slack, slk); | |
| total_slack += slk; | |
| if (slk < NEGATIVE_EPSILON) { | |
| total_negative_slack -= slk; /* By convention, we'll have total_negative_slack be a positive number. */ | |
| } | |
| } else { /* slack was never analysed */ | |
| num_unused_slacks++; | |
| } | |
| } | |
| } | |
| if (max_slack > HUGE_NEGATIVE_FLOAT + 1) { | |
| fprintf(fp, "Largest slack in design: %g\n", max_slack); | |
| } else { | |
| fprintf(fp, "Largest slack in design: --\n"); | |
| } | |
| if (min_slack < HUGE_POSITIVE_FLOAT - 1) { | |
| fprintf(fp, "Smallest slack in design: %g\n", min_slack); | |
| } else { | |
| fprintf(fp, "Smallest slack in design: --\n"); | |
| } | |
| fprintf(fp, "Total slack in design: %g\n", total_slack); | |
| fprintf(fp, "Total negative slack: %g\n", total_negative_slack); | |
| if (max_slack - min_slack > EPSILON) { /* Only sort the slacks into buckets if not all slacks are the same (if they are identical, no need to sort). */ | |
| /* Initialize slacks_in_bucket, an array counting how many slacks are within certain linearly-spaced ranges (buckets). */ | |
| for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) { | |
| slacks_in_bucket[ibucket] = 0; | |
| } | |
| /* The size of each bucket is the range of slacks, divided by the number of buckets. */ | |
| bucket_size = (max_slack - min_slack)/NUM_BUCKETS; | |
| /* Now, pass through again, incrementing the number of slacks in the nth bucket | |
| for each slack between (min_slack + n*bucket_size) and (min_slack + (n+1)*bucket_size). */ | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| slk = slack[inet][iedge + 1]; | |
| if (slk < HUGE_POSITIVE_FLOAT - 1) { | |
| /* We have to watch out for the special case where slack = max_slack, in which case ibucket = NUM_BUCKETS and we go out of bounds of the array. */ | |
| ibucket = std::min(NUM_BUCKETS - 1, (int) ((slk - min_slack)/bucket_size)); | |
| assert(ibucket >= 0 && ibucket < NUM_BUCKETS); | |
| slacks_in_bucket[ibucket]++; | |
| } | |
| } | |
| } | |
| /* Now print how many slacks are in each bucket. */ | |
| fprintf(fp, "\n\nRange\t\t"); | |
| for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) { | |
| fprintf(fp, "%.1e to ", min_slack); | |
| min_slack += bucket_size; | |
| fprintf(fp, "%.1e\t", min_slack); | |
| } | |
| fprintf(fp, "Not analysed"); | |
| fprintf(fp, "\nSlacks in range\t\t"); | |
| for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) { | |
| fprintf(fp, "%d\t\t\t", slacks_in_bucket[ibucket]); | |
| } | |
| fprintf(fp, "%d", num_unused_slacks); | |
| } | |
| /* Finally, print all the slacks, organized by net. */ | |
| fprintf(fp, "\n\nNet #\tDriver_tnode\tto_node\tSlack\n\n"); | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| driver_tnode = f_net_to_driver_tnode[inet]; | |
| num_edges = tnode[driver_tnode].num_edges; | |
| tedge = tnode[driver_tnode].out_edges; | |
| slk = slack[inet][1]; | |
| if (slk < HUGE_POSITIVE_FLOAT - 1) { | |
| fprintf(fp, "%5d\t%5d\t\t%5d\t%g\n", inet, driver_tnode, tedge[0].to_node, slk); | |
| } else { /* Slack is meaningless, so replace with --. */ | |
| fprintf(fp, "%5d\t%5d\t\t%5d\t--\n", inet, driver_tnode, tedge[0].to_node); | |
| } | |
| for (iedge = 1; iedge < num_edges; iedge++) { /* newline and indent subsequent edges after the first */ | |
| slk = slack[inet][iedge+1]; | |
| if (slk < HUGE_POSITIVE_FLOAT - 1) { | |
| fprintf(fp, "\t\t\t%5d\t%g\n", tedge[iedge].to_node, slk); | |
| } else { /* Slack is meaningless, so replace with --. */ | |
| fprintf(fp, "\t\t\t%5d\t--\n", tedge[iedge].to_node); | |
| } | |
| } | |
| } | |
| fclose(fp); | |
| } | |
| void print_criticality(t_slack * slacks, boolean criticality_is_normalized, const char *fname) { | |
| /* Prints timing criticalities (and path criticalities if enabled) into a file. */ | |
| int inet, iedge, driver_tnode, num_edges; | |
| t_tedge * tedge; | |
| FILE *fp; | |
| fp = my_fopen(fname, "w", 0); | |
| if (criticality_is_normalized) { | |
| fprintf(fp, "Timing criticalities have been normalized to be non-negative by " | |
| "relaxing the required times to the maximum arrival time.\n\n"); | |
| } | |
| print_global_criticality_stats(fp, slacks->timing_criticality, "timing criticality", "Timing criticalities"); | |
| #ifdef PATH_COUNTING | |
| print_global_criticality_stats(fp, slacks->path_criticality, "path criticality", "Path criticalities"); | |
| #endif | |
| /* Finally, print all the criticalities, organized by net. */ | |
| fprintf(fp, "\n\nNet #\tDriver_tnode\t to_node\tTiming criticality" | |
| #ifdef PATH_COUNTING | |
| "\tPath criticality" | |
| #endif | |
| "\n"); | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| driver_tnode = f_net_to_driver_tnode[inet]; | |
| num_edges = tnode[driver_tnode].num_edges; | |
| tedge = tnode[driver_tnode].out_edges; | |
| fprintf(fp, "\n%5d\t%5d\t\t%5d\t\t%.6f", inet, driver_tnode, tedge[0].to_node, slacks->timing_criticality[inet][1]); | |
| #ifdef PATH_COUNTING | |
| fprintf(fp, "\t\t%g", slacks->path_criticality[inet][1]); | |
| #endif | |
| for (iedge = 1; iedge < num_edges; iedge++) { /* newline and indent subsequent edges after the first */ | |
| fprintf(fp, "\n\t\t\t%5d\t\t%.6f", tedge[iedge].to_node, slacks->timing_criticality[inet][iedge+1]); | |
| #ifdef PATH_COUNTING | |
| fprintf(fp, "\t\t%g", slacks->path_criticality[inet][iedge+1]); | |
| #endif | |
| } | |
| } | |
| fclose(fp); | |
| } | |
| static void print_global_criticality_stats(FILE * fp, float ** criticality, const char * singular_name, const char * capitalized_plural_name) { | |
| /* Prints global stats for timing or path criticality to the file pointed to by fp, | |
| including maximum criticality, minimum criticality, total criticality in the design, | |
| and the number of criticalities within various ranges, or buckets. */ | |
| int inet, iedge, num_edges, ibucket, criticalities_in_bucket[NUM_BUCKETS]; | |
| float crit, max_criticality = HUGE_NEGATIVE_FLOAT, min_criticality = HUGE_POSITIVE_FLOAT, | |
| total_criticality = 0, bucket_size; | |
| /* Go through criticality once to get the largest and smallest timing criticality, | |
| both for reporting and so that we can delimit the buckets. */ | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| crit = criticality[inet][iedge + 1]; | |
| max_criticality = std::max(max_criticality, crit); | |
| min_criticality = std::min(min_criticality, crit); | |
| total_criticality += crit; | |
| } | |
| } | |
| fprintf(fp, "Largest %s in design: %g\n", singular_name, max_criticality); | |
| fprintf(fp, "Smallest %s in design: %g\n", singular_name, min_criticality); | |
| fprintf(fp, "Total %s in design: %g\n", singular_name, total_criticality); | |
| if (max_criticality - min_criticality > EPSILON) { /* Only sort the criticalities into buckets if not all criticalities are the same (if they are identical, no need to sort). */ | |
| /* Initialize criticalities_in_bucket, an array counting how many criticalities are within certain linearly-spaced ranges (buckets). */ | |
| for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) { | |
| criticalities_in_bucket[ibucket] = 0; | |
| } | |
| /* The size of each bucket is the range of criticalities, divided by the number of buckets. */ | |
| bucket_size = (max_criticality - min_criticality)/NUM_BUCKETS; | |
| /* Now, pass through again, incrementing the number of criticalities in the nth bucket | |
| for each criticality between (min_criticality + n*bucket_size) and (min_criticality + (n+1)*bucket_size). */ | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| crit = criticality[inet][iedge + 1]; | |
| /* We have to watch out for the special case where criticality = max_criticality, in which case ibucket = NUM_BUCKETS and we go out of bounds of the array. */ | |
| ibucket = std::min(NUM_BUCKETS - 1, (int) ((crit - min_criticality)/bucket_size)); | |
| assert(ibucket >= 0 && ibucket < NUM_BUCKETS); | |
| criticalities_in_bucket[ibucket]++; | |
| } | |
| } | |
| /* Now print how many criticalities are in each bucket. */ | |
| fprintf(fp, "\nRange\t\t"); | |
| for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) { | |
| fprintf(fp, "%.1e to ", min_criticality); | |
| min_criticality += bucket_size; | |
| fprintf(fp, "%.1e\t", min_criticality); | |
| } | |
| fprintf(fp, "\n%s in range\t\t", capitalized_plural_name); | |
| for (ibucket = 0; ibucket < NUM_BUCKETS; ibucket++) { | |
| fprintf(fp, "%d\t\t\t", criticalities_in_bucket[ibucket]); | |
| } | |
| } | |
| fprintf(fp, "\n\n"); | |
| } | |
| void print_net_delay(float **net_delay, const char *fname) { | |
| /* Prints the net delays into a file. */ | |
| int inet, iedge, driver_tnode, num_edges; | |
| t_tedge * tedge; | |
| FILE *fp; | |
| fp = my_fopen(fname, "w", 0); | |
| fprintf(fp, "Net #\tDriver_tnode\tto_node\tDelay\n\n"); | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| driver_tnode = f_net_to_driver_tnode[inet]; | |
| num_edges = tnode[driver_tnode].num_edges; | |
| tedge = tnode[driver_tnode].out_edges; | |
| fprintf(fp, "%5d\t%5d\t\t%5d\t%g\n", inet, driver_tnode, tedge[0].to_node, net_delay[inet][1]); | |
| for (iedge = 1; iedge < num_edges; iedge++) { /* newline and indent subsequent edges after the first */ | |
| fprintf(fp, "\t\t\t%5d\t%g\n", tedge[iedge].to_node, net_delay[inet][iedge+1]); | |
| } | |
| } | |
| fclose(fp); | |
| } | |
| #ifndef PATH_COUNTING | |
| void print_clustering_timing_info(const char *fname) { | |
| /* Print information from tnodes which is used by the clusterer. */ | |
| int inode; | |
| FILE *fp; | |
| fp = my_fopen(fname, "w", 0); | |
| fprintf(fp, "inode "); | |
| if (g_sdc->num_constrained_clocks <= 1) { | |
| /* These values are from the last constraint analysed, | |
| so they're not meaningful unless there was only one constraint. */ | |
| fprintf(fp, "Critical input paths Critical output paths "); | |
| } | |
| fprintf(fp, "Normalized slack Normalized Tarr Normalized total crit paths\n"); | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| fprintf(fp, "%d\t", inode); | |
| /* Only print normalized values for tnodes which have valid normalized values. (If normalized_T_arr is valid, the others will be too.) */ | |
| if (has_valid_normalized_T_arr(inode)) { | |
| if (g_sdc->num_constrained_clocks <= 1) { | |
| fprintf(fp, "%ld\t\t\t%ld\t\t\t", tnode[inode].prepacked_data->num_critical_input_paths, tnode[inode].prepacked_data->num_critical_output_paths); | |
| } | |
| fprintf(fp, "%f\t%f\t%f\n", tnode[inode].prepacked_data->normalized_slack, tnode[inode].prepacked_data->normalized_T_arr, tnode[inode].prepacked_data->normalized_total_critical_paths); | |
| } else { | |
| if (g_sdc->num_constrained_clocks <= 1) { | |
| fprintf(fp, "--\t\t\t--\t\t\t"); | |
| } | |
| fprintf(fp, "--\t\t--\t\t--\n"); | |
| } | |
| } | |
| fclose(fp); | |
| } | |
| #endif | |
| /* Count # of tnodes, allocates space, and loads the tnodes and its associated edges */ | |
| static void alloc_and_load_tnodes(t_timing_inf timing_inf) { | |
| int i, j, k; | |
| int inode; | |
| int num_nodes_in_block; | |
| int count; | |
| int iblock, irr_node; | |
| int inet, dport, dpin, dblock, dnode; | |
| int normalized_pin, normalization; | |
| t_pb_graph_pin *ipb_graph_pin; | |
| t_rr_node *local_rr_graph, *d_rr_graph; | |
| int num_dangling_pins; | |
| f_net_to_driver_tnode = (int*)my_malloc(num_timing_nets * sizeof(int)); | |
| for (i = 0; i < num_timing_nets; i++) { | |
| f_net_to_driver_tnode[i] = OPEN; | |
| } | |
| /* allocate space for tnodes */ | |
| num_tnodes = 0; | |
| for (i = 0; i < num_blocks; i++) { | |
| num_nodes_in_block = 0; | |
| for (j = 0; j < block[i].pb->pb_graph_node->total_pb_pins; j++) { | |
| if (block[i].pb->rr_graph[j].net_num != OPEN) { | |
| if (block[i].pb->rr_graph[j].pb_graph_pin->type == PB_PIN_INPAD | |
| || block[i].pb->rr_graph[j].pb_graph_pin->type | |
| == PB_PIN_OUTPAD | |
| || block[i].pb->rr_graph[j].pb_graph_pin->type | |
| == PB_PIN_SEQUENTIAL) { | |
| num_nodes_in_block += 2; | |
| } else { | |
| num_nodes_in_block++; | |
| } | |
| } | |
| } | |
| num_tnodes += num_nodes_in_block; | |
| } | |
| tnode = (t_tnode*)my_calloc(num_tnodes, sizeof(t_tnode)); | |
| /* load tnodes with all info except edge info */ | |
| /* populate tnode lookups for edge info */ | |
| inode = 0; | |
| for (i = 0; i < num_blocks; i++) { | |
| for (j = 0; j < block[i].pb->pb_graph_node->total_pb_pins; j++) { | |
| if (block[i].pb->rr_graph[j].net_num != OPEN) { | |
| assert(tnode[inode].pb_graph_pin == NULL); | |
| load_tnode(block[i].pb->rr_graph[j].pb_graph_pin, i, &inode, | |
| timing_inf); | |
| } | |
| } | |
| } | |
| assert(inode == num_tnodes); | |
| num_dangling_pins = 0; | |
| /* load edge delays and initialize clock domains to OPEN | |
| and prepacked_data (which is not used post-packing) to NULL. */ | |
| for (i = 0; i < num_tnodes; i++) { | |
| tnode[i].clock_domain = OPEN; | |
| tnode[i].prepacked_data = NULL; | |
| /* 3 primary scenarios for edge delays | |
| 1. Point-to-point delays inside block | |
| 2. | |
| */ | |
| count = 0; | |
| iblock = tnode[i].block; | |
| switch (tnode[i].type) { | |
| case TN_INPAD_OPIN: | |
| case TN_INTERMEDIATE_NODE: | |
| case TN_PRIMITIVE_OPIN: | |
| case TN_FF_OPIN: | |
| case TN_CB_IPIN: | |
| /* fanout is determined by intra-cluster connections */ | |
| /* Allocate space for edges */ | |
| irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster; | |
| local_rr_graph = block[iblock].pb->rr_graph; | |
| ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin; | |
| if (ipb_graph_pin->parent_node->pb_type->max_internal_delay | |
| != UNDEFINED) { | |
| if (pb_max_internal_delay == UNDEFINED) { | |
| pb_max_internal_delay = | |
| ipb_graph_pin->parent_node->pb_type->max_internal_delay; | |
| pbtype_max_internal_delay = | |
| ipb_graph_pin->parent_node->pb_type; | |
| } else if (pb_max_internal_delay | |
| < ipb_graph_pin->parent_node->pb_type->max_internal_delay) { | |
| pb_max_internal_delay = | |
| ipb_graph_pin->parent_node->pb_type->max_internal_delay; | |
| pbtype_max_internal_delay = | |
| ipb_graph_pin->parent_node->pb_type; | |
| } | |
| } | |
| for (j = 0; j < block[iblock].pb->rr_graph[irr_node].num_edges; | |
| j++) { | |
| dnode = local_rr_graph[irr_node].edges[j]; | |
| if ((local_rr_graph[dnode].prev_node == irr_node) | |
| && (j == local_rr_graph[dnode].prev_edge)) { | |
| count++; | |
| } | |
| } | |
| assert(count > 0); | |
| tnode[i].num_edges = count; | |
| tnode[i].out_edges = (t_tedge *) my_chunk_malloc( | |
| count * sizeof(t_tedge), &tedge_ch); | |
| /* Load edges */ | |
| count = 0; | |
| for (j = 0; j < local_rr_graph[irr_node].num_edges; j++) { | |
| dnode = local_rr_graph[irr_node].edges[j]; | |
| if ((local_rr_graph[dnode].prev_node == irr_node) | |
| && (j == local_rr_graph[dnode].prev_edge)) { | |
| assert( | |
| (ipb_graph_pin->output_edges[j]->num_output_pins == 1) && (local_rr_graph[ipb_graph_pin->output_edges[j]->output_pins[0]->pin_count_in_cluster].net_num == local_rr_graph[irr_node].net_num)); | |
| tnode[i].out_edges[count].Tdel = | |
| ipb_graph_pin->output_edges[j]->delay_max; | |
| tnode[i].out_edges[count].to_node = | |
| get_tnode_index(local_rr_graph[dnode].tnode); | |
| if (vpack_net[local_rr_graph[irr_node].net_num].is_const_gen | |
| == TRUE && tnode[i].type == TN_PRIMITIVE_OPIN) { | |
| tnode[i].out_edges[count].Tdel = HUGE_NEGATIVE_FLOAT; | |
| tnode[i].type = TN_CONSTANT_GEN_SOURCE; | |
| } | |
| count++; | |
| } | |
| } | |
| assert(count > 0); | |
| break; | |
| case TN_PRIMITIVE_IPIN: | |
| /* Pin info comes from pb_graph block delays | |
| */ | |
| /*there would be no slack information if timing analysis is off*/ | |
| if (timing_inf.timing_analysis_enabled) | |
| { | |
| irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster; | |
| local_rr_graph = block[iblock].pb->rr_graph; | |
| ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin; | |
| tnode[i].num_edges = ipb_graph_pin->num_pin_timing; | |
| tnode[i].out_edges = (t_tedge *) my_chunk_malloc( | |
| ipb_graph_pin->num_pin_timing * sizeof(t_tedge), | |
| &tedge_ch); | |
| k = 0; | |
| for (j = 0; j < tnode[i].num_edges; j++) { | |
| /* Some outpins aren't used, ignore these. Only consider output pins that are used */ | |
| if (local_rr_graph[ipb_graph_pin->pin_timing[j]->pin_count_in_cluster].net_num | |
| != OPEN) { | |
| tnode[i].out_edges[k].Tdel = | |
| ipb_graph_pin->pin_timing_del_max[j]; | |
| tnode[i].out_edges[k].to_node = | |
| get_tnode_index(local_rr_graph[ipb_graph_pin->pin_timing[j]->pin_count_in_cluster].tnode); | |
| assert(tnode[i].out_edges[k].to_node != OPEN); | |
| k++; | |
| } | |
| } | |
| tnode[i].num_edges -= (j - k); /* remove unused edges */ | |
| if (tnode[i].num_edges == 0) { | |
| /* Dangling pin */ | |
| num_dangling_pins++; | |
| } | |
| } | |
| break; | |
| case TN_CB_OPIN: | |
| /* load up net info */ | |
| irr_node = tnode[i].pb_graph_pin->pin_count_in_cluster; | |
| local_rr_graph = block[iblock].pb->rr_graph; | |
| ipb_graph_pin = local_rr_graph[irr_node].pb_graph_pin; | |
| assert(local_rr_graph[irr_node].net_num != OPEN); | |
| inet = vpack_to_clb_net_mapping[local_rr_graph[irr_node].net_num]; | |
| assert(inet != OPEN); | |
| f_net_to_driver_tnode[inet] = i; | |
| tnode[i].num_edges = clb_net[inet].num_sinks; | |
| tnode[i].out_edges = (t_tedge *) my_chunk_malloc( | |
| clb_net[inet].num_sinks * sizeof(t_tedge), | |
| &tedge_ch); | |
| for (j = 1; j <= clb_net[inet].num_sinks; j++) { | |
| dblock = clb_net[inet].node_block[j]; | |
| normalization = block[dblock].type->num_pins | |
| / block[dblock].type->capacity; | |
| normalized_pin = clb_net[inet].node_block_pin[j] | |
| % normalization; | |
| d_rr_graph = block[dblock].pb->rr_graph; | |
| dpin = OPEN; | |
| dport = OPEN; | |
| count = 0; | |
| for (k = 0; | |
| k < block[dblock].pb->pb_graph_node->num_input_ports | |
| && dpin == OPEN; k++) { | |
| if (normalized_pin >= count | |
| && (count | |
| + block[dblock].pb->pb_graph_node->num_input_pins[k] | |
| > normalized_pin)) { | |
| dpin = normalized_pin - count; | |
| dport = k; | |
| break; | |
| } | |
| count += block[dblock].pb->pb_graph_node->num_input_pins[k]; | |
| } | |
| if (dpin == OPEN) { | |
| for (k = 0; | |
| k | |
| < block[dblock].pb->pb_graph_node->num_output_ports | |
| && dpin == OPEN; k++) { | |
| count += | |
| block[dblock].pb->pb_graph_node->num_output_pins[k]; | |
| } | |
| for (k = 0; | |
| k < block[dblock].pb->pb_graph_node->num_clock_ports | |
| && dpin == OPEN; k++) { | |
| if (normalized_pin >= count | |
| && (count | |
| + block[dblock].pb->pb_graph_node->num_clock_pins[k] | |
| > normalized_pin)) { | |
| dpin = normalized_pin - count; | |
| dport = k; | |
| } | |
| count += | |
| block[dblock].pb->pb_graph_node->num_clock_pins[k]; | |
| } | |
| assert(dpin != OPEN); | |
| assert( | |
| inet == vpack_to_clb_net_mapping[d_rr_graph[block[dblock].pb->pb_graph_node->clock_pins[dport][dpin].pin_count_in_cluster].net_num]); | |
| tnode[i].out_edges[j - 1].to_node = | |
| get_tnode_index(d_rr_graph[block[dblock].pb->pb_graph_node->clock_pins[dport][dpin].pin_count_in_cluster].tnode); | |
| } else { | |
| assert(dpin != OPEN); | |
| assert( | |
| inet == vpack_to_clb_net_mapping[d_rr_graph[block[dblock].pb->pb_graph_node->input_pins[dport][dpin].pin_count_in_cluster].net_num]); | |
| /* delays are assigned post routing */ | |
| tnode[i].out_edges[j - 1].to_node = | |
| get_tnode_index(d_rr_graph[block[dblock].pb->pb_graph_node->input_pins[dport][dpin].pin_count_in_cluster].tnode); | |
| } | |
| tnode[i].out_edges[j - 1].Tdel = 0; | |
| assert(inet != OPEN); | |
| } | |
| break; | |
| case TN_OUTPAD_IPIN: | |
| case TN_INPAD_SOURCE: | |
| case TN_OUTPAD_SINK: | |
| case TN_FF_SINK: | |
| case TN_FF_SOURCE: | |
| case TN_FF_IPIN: | |
| case TN_FF_CLOCK: | |
| break; | |
| default: | |
| vpr_printf(TIO_MESSAGE_ERROR, "Consistency check failed: Unknown tnode type %d.\n", tnode[i].type); | |
| assert(0); | |
| break; | |
| } | |
| } | |
| if(num_dangling_pins > 0) { | |
| vpr_printf(TIO_MESSAGE_WARNING, "Unconnected logic in design, number of dangling tnodes = %d\n", num_dangling_pins); | |
| } | |
| } | |
| /* Allocate timing graph for pre packed netlist | |
| Count number of tnodes first | |
| Then connect up tnodes with edges | |
| */ | |
| static void alloc_and_load_tnodes_from_prepacked_netlist(float block_delay, | |
| float inter_cluster_net_delay) { | |
| int i, j, k; | |
| t_model *model; | |
| t_model_ports *model_port; | |
| t_pb_graph_pin *from_pb_graph_pin, *to_pb_graph_pin; | |
| int inode, inet; | |
| int incr; | |
| int count; | |
| f_net_to_driver_tnode = (int*)my_malloc(num_logical_nets * sizeof(int)); | |
| for (i = 0; i < num_logical_nets; i++) { | |
| f_net_to_driver_tnode[i] = OPEN; | |
| } | |
| /* allocate space for tnodes */ | |
| num_tnodes = 0; | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| model = logical_block[i].model; | |
| logical_block[i].clock_net_tnode = NULL; | |
| if (logical_block[i].type == VPACK_INPAD) { | |
| logical_block[i].output_net_tnodes = (t_tnode***)my_calloc(1, | |
| sizeof(t_tnode**)); | |
| num_tnodes += 2; | |
| } else if (logical_block[i].type == VPACK_OUTPAD) { | |
| logical_block[i].input_net_tnodes = (t_tnode***)my_calloc(1, sizeof(t_tnode**)); | |
| num_tnodes += 2; | |
| } else { | |
| if (logical_block[i].clock_net == OPEN) { | |
| incr = 1; | |
| } else { | |
| incr = 2; | |
| } | |
| j = 0; | |
| model_port = model->inputs; | |
| while (model_port) { | |
| if (model_port->is_clock == FALSE) { | |
| for (k = 0; k < model_port->size; k++) { | |
| if (logical_block[i].input_nets[j][k] != OPEN) { | |
| num_tnodes += incr; | |
| } | |
| } | |
| j++; | |
| } else { | |
| num_tnodes++; | |
| } | |
| model_port = model_port->next; | |
| } | |
| logical_block[i].input_net_tnodes = (t_tnode ***)my_calloc(j, sizeof(t_tnode**)); | |
| j = 0; | |
| model_port = model->outputs; | |
| while (model_port) { | |
| for (k = 0; k < model_port->size; k++) { | |
| if (logical_block[i].output_nets[j][k] != OPEN) { | |
| num_tnodes += incr; | |
| } | |
| } | |
| j++; | |
| model_port = model_port->next; | |
| } | |
| logical_block[i].output_net_tnodes = (t_tnode ***)my_calloc(j, | |
| sizeof(t_tnode**)); | |
| } | |
| } | |
| tnode = (t_tnode *)my_calloc(num_tnodes, sizeof(t_tnode)); | |
| /* Allocate space for prepacked_data, which is only used pre-packing. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| tnode[inode].prepacked_data = (t_prepacked_tnode_data *) my_malloc(sizeof(t_prepacked_tnode_data)); | |
| } | |
| /* load tnodes, alloc edges for tnodes, load all known tnodes */ | |
| inode = 0; | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| model = logical_block[i].model; | |
| if (logical_block[i].type == VPACK_INPAD) { | |
| logical_block[i].output_net_tnodes[0] = (t_tnode **)my_calloc(1, | |
| sizeof(t_tnode*)); | |
| logical_block[i].output_net_tnodes[0][0] = &tnode[inode]; | |
| f_net_to_driver_tnode[logical_block[i].output_nets[0][0]] = inode; | |
| tnode[inode].prepacked_data->model_pin = 0; | |
| tnode[inode].prepacked_data->model_port = 0; | |
| tnode[inode].prepacked_data->model_port_ptr = model->outputs; | |
| tnode[inode].block = i; | |
| tnode[inode].type = TN_INPAD_OPIN; | |
| tnode[inode].num_edges = | |
| vpack_net[logical_block[i].output_nets[0][0]].num_sinks; | |
| tnode[inode].out_edges = (t_tedge *) my_chunk_malloc( | |
| tnode[inode].num_edges * sizeof(t_tedge), | |
| &tedge_ch); | |
| tnode[inode + 1].num_edges = 1; | |
| tnode[inode + 1].out_edges = (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), &tedge_ch); | |
| tnode[inode + 1].out_edges->Tdel = 0; | |
| tnode[inode + 1].out_edges->to_node = inode; | |
| tnode[inode + 1].type = TN_INPAD_SOURCE; | |
| tnode[inode + 1].block = i; | |
| inode += 2; | |
| } else if (logical_block[i].type == VPACK_OUTPAD) { | |
| logical_block[i].input_net_tnodes[0] = (t_tnode **)my_calloc(1, | |
| sizeof(t_tnode*)); | |
| logical_block[i].input_net_tnodes[0][0] = &tnode[inode]; | |
| tnode[inode].prepacked_data->model_pin = 0; | |
| tnode[inode].prepacked_data->model_port = 0; | |
| tnode[inode].prepacked_data->model_port_ptr = model->inputs; | |
| tnode[inode].block = i; | |
| tnode[inode].type = TN_OUTPAD_IPIN; | |
| tnode[inode].num_edges = 1; | |
| tnode[inode].out_edges = (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), &tedge_ch); | |
| tnode[inode].out_edges->Tdel = 0; | |
| tnode[inode].out_edges->to_node = inode + 1; | |
| tnode[inode + 1].type = TN_OUTPAD_SINK; | |
| tnode[inode + 1].block = i; | |
| tnode[inode + 1].num_edges = 0; | |
| tnode[inode + 1].out_edges = NULL; | |
| inode += 2; | |
| } else { | |
| j = 0; | |
| model_port = model->outputs; | |
| while (model_port) { | |
| logical_block[i].output_net_tnodes[j] = (t_tnode **)my_calloc( | |
| model_port->size, sizeof(t_tnode*)); | |
| for (k = 0; k < model_port->size; k++) { | |
| if (logical_block[i].output_nets[j][k] != OPEN) { | |
| tnode[inode].prepacked_data->model_pin = k; | |
| tnode[inode].prepacked_data->model_port = j; | |
| tnode[inode].prepacked_data->model_port_ptr = model_port; | |
| tnode[inode].block = i; | |
| f_net_to_driver_tnode[logical_block[i].output_nets[j][k]] = | |
| inode; | |
| logical_block[i].output_net_tnodes[j][k] = | |
| &tnode[inode]; | |
| tnode[inode].num_edges = | |
| vpack_net[logical_block[i].output_nets[j][k]].num_sinks; | |
| tnode[inode].out_edges = (t_tedge *) my_chunk_malloc( | |
| tnode[inode].num_edges * sizeof(t_tedge), | |
| &tedge_ch); | |
| if (logical_block[i].clock_net == OPEN) { | |
| tnode[inode].type = TN_PRIMITIVE_OPIN; | |
| inode++; | |
| } else { | |
| /* load delays from predicted clock-to-Q time */ | |
| from_pb_graph_pin = get_pb_graph_node_pin_from_model_port_pin(model_port, k, logical_block[i].expected_lowest_cost_primitive); | |
| tnode[inode].type = TN_FF_OPIN; | |
| tnode[inode + 1].num_edges = 1; | |
| tnode[inode + 1].out_edges = | |
| (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), | |
| &tedge_ch); | |
| tnode[inode + 1].out_edges->to_node = inode; | |
| tnode[inode + 1].out_edges->Tdel = from_pb_graph_pin->tsu_tco; | |
| tnode[inode + 1].type = TN_FF_SOURCE; | |
| tnode[inode + 1].block = i; | |
| inode += 2; | |
| } | |
| } | |
| } | |
| j++; | |
| model_port = model_port->next; | |
| } | |
| j = 0; | |
| model_port = model->inputs; | |
| while (model_port) { | |
| if (model_port->is_clock == FALSE) { | |
| logical_block[i].input_net_tnodes[j] = (t_tnode **)my_calloc( | |
| model_port->size, sizeof(t_tnode*)); | |
| for (k = 0; k < model_port->size; k++) { | |
| if (logical_block[i].input_nets[j][k] != OPEN) { | |
| tnode[inode].prepacked_data->model_pin = k; | |
| tnode[inode].prepacked_data->model_port = j; | |
| tnode[inode].prepacked_data->model_port_ptr = model_port; | |
| tnode[inode].block = i; | |
| logical_block[i].input_net_tnodes[j][k] = | |
| &tnode[inode]; | |
| from_pb_graph_pin = get_pb_graph_node_pin_from_model_port_pin(model_port, k, logical_block[i].expected_lowest_cost_primitive); | |
| if (logical_block[i].clock_net == OPEN) { | |
| /* load predicted combinational delays to predicted edges */ | |
| tnode[inode].type = TN_PRIMITIVE_IPIN; | |
| tnode[inode].out_edges = | |
| (t_tedge *) my_chunk_malloc( | |
| from_pb_graph_pin->num_pin_timing * sizeof(t_tedge), | |
| &tedge_ch); | |
| count = 0; | |
| for(int m = 0; m < from_pb_graph_pin->num_pin_timing; m++) { | |
| to_pb_graph_pin = from_pb_graph_pin->pin_timing[m]; | |
| if(logical_block[i].output_nets[to_pb_graph_pin->port->model_port->index][to_pb_graph_pin->pin_number] == OPEN) { | |
| continue; | |
| } | |
| tnode[inode].out_edges[count].Tdel = from_pb_graph_pin->pin_timing_del_max[m]; | |
| tnode[inode].out_edges[count].to_node = | |
| get_tnode_index(logical_block[i].output_net_tnodes[to_pb_graph_pin->port->model_port->index][to_pb_graph_pin->pin_number]); | |
| count++; | |
| } | |
| tnode[inode].num_edges = count; | |
| inode++; | |
| } else { | |
| /* load predicted setup time */ | |
| tnode[inode].type = TN_FF_IPIN; | |
| tnode[inode].num_edges = 1; | |
| tnode[inode].out_edges = | |
| (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), | |
| &tedge_ch); | |
| tnode[inode].out_edges->to_node = inode + 1; | |
| tnode[inode].out_edges->Tdel = from_pb_graph_pin->tsu_tco; | |
| tnode[inode + 1].type = TN_FF_SINK; | |
| tnode[inode + 1].num_edges = 0; | |
| tnode[inode + 1].out_edges = NULL; | |
| tnode[inode + 1].block = i; | |
| inode += 2; | |
| } | |
| } | |
| } | |
| j++; | |
| } else { | |
| if (logical_block[i].clock_net != OPEN) { | |
| assert(logical_block[i].clock_net_tnode == NULL); | |
| logical_block[i].clock_net_tnode = &tnode[inode]; | |
| tnode[inode].block = i; | |
| tnode[inode].prepacked_data->model_pin = 0; | |
| tnode[inode].prepacked_data->model_port = 0; | |
| tnode[inode].prepacked_data->model_port_ptr = model_port; | |
| tnode[inode].num_edges = 0; | |
| tnode[inode].out_edges = NULL; | |
| tnode[inode].type = TN_FF_CLOCK; | |
| inode++; | |
| } | |
| } | |
| model_port = model_port->next; | |
| } | |
| } | |
| } | |
| assert(inode == num_tnodes); | |
| /* load edge delays and initialize clock domains to OPEN. */ | |
| for (i = 0; i < num_tnodes; i++) { | |
| tnode[i].clock_domain = OPEN; | |
| /* 3 primary scenarios for edge delays | |
| 1. Point-to-point delays inside block | |
| 2. | |
| */ | |
| count = 0; | |
| switch (tnode[i].type) { | |
| case TN_INPAD_OPIN: | |
| case TN_PRIMITIVE_OPIN: | |
| case TN_FF_OPIN: | |
| /* fanout is determined by intra-cluster connections */ | |
| /* Allocate space for edges */ | |
| inet = | |
| logical_block[tnode[i].block].output_nets[tnode[i].prepacked_data->model_port][tnode[i].prepacked_data->model_pin]; | |
| assert(inet != OPEN); | |
| for (j = 1; j <= vpack_net[inet].num_sinks; j++) { | |
| if (vpack_net[inet].is_const_gen) { | |
| tnode[i].out_edges[j - 1].Tdel = HUGE_NEGATIVE_FLOAT; | |
| tnode[i].type = TN_CONSTANT_GEN_SOURCE; | |
| } else { | |
| tnode[i].out_edges[j - 1].Tdel = inter_cluster_net_delay; | |
| } | |
| if (vpack_net[inet].is_global) { | |
| assert( | |
| logical_block[vpack_net[inet].node_block[j]].clock_net == inet); | |
| tnode[i].out_edges[j - 1].to_node = | |
| get_tnode_index(logical_block[vpack_net[inet].node_block[j]].clock_net_tnode); | |
| } else { | |
| assert( | |
| logical_block[vpack_net[inet].node_block[j]].input_net_tnodes[vpack_net[inet].node_block_port[j]][vpack_net[inet].node_block_pin[j]] != NULL); | |
| tnode[i].out_edges[j - 1].to_node = | |
| get_tnode_index(logical_block[vpack_net[inet].node_block[j]].input_net_tnodes[vpack_net[inet].node_block_port[j]][vpack_net[inet].node_block_pin[j]]); | |
| } | |
| } | |
| assert(tnode[i].num_edges == vpack_net[inet].num_sinks); | |
| break; | |
| case TN_PRIMITIVE_IPIN: | |
| case TN_OUTPAD_IPIN: | |
| case TN_INPAD_SOURCE: | |
| case TN_OUTPAD_SINK: | |
| case TN_FF_SINK: | |
| case TN_FF_SOURCE: | |
| case TN_FF_IPIN: | |
| case TN_FF_CLOCK: | |
| break; | |
| default: | |
| vpr_printf(TIO_MESSAGE_ERROR, "Consistency check failed: Unknown tnode type %d.\n", tnode[i].type); | |
| assert(0); | |
| break; | |
| } | |
| } | |
| for (i = 0; i < num_logical_nets; i++) { | |
| assert(f_net_to_driver_tnode[i] != OPEN); | |
| } | |
| } | |
| static void load_tnode(INP t_pb_graph_pin *pb_graph_pin, INP int iblock, | |
| INOUTP int *inode, INP t_timing_inf timing_inf) { | |
| int i; | |
| i = *inode; | |
| tnode[i].pb_graph_pin = pb_graph_pin; | |
| tnode[i].block = iblock; | |
| block[iblock].pb->rr_graph[pb_graph_pin->pin_count_in_cluster].tnode = | |
| &tnode[i]; | |
| if (tnode[i].pb_graph_pin->parent_node->pb_type->blif_model == NULL) { | |
| assert(tnode[i].pb_graph_pin->type == PB_PIN_NORMAL); | |
| if (tnode[i].pb_graph_pin->parent_node->parent_pb_graph_node == NULL) { | |
| if (tnode[i].pb_graph_pin->port->type == IN_PORT) { | |
| tnode[i].type = TN_CB_IPIN; | |
| } else { | |
| assert(tnode[i].pb_graph_pin->port->type == OUT_PORT); | |
| tnode[i].type = TN_CB_OPIN; | |
| } | |
| } else { | |
| tnode[i].type = TN_INTERMEDIATE_NODE; | |
| } | |
| } else { | |
| if (tnode[i].pb_graph_pin->type == PB_PIN_INPAD) { | |
| assert(tnode[i].pb_graph_pin->port->type == OUT_PORT); | |
| tnode[i].type = TN_INPAD_OPIN; | |
| tnode[i + 1].num_edges = 1; | |
| tnode[i + 1].out_edges = (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), &tedge_ch); | |
| tnode[i + 1].out_edges->Tdel = 0; | |
| tnode[i + 1].out_edges->to_node = i; | |
| tnode[i + 1].pb_graph_pin = pb_graph_pin; /* Necessary for propagate_clock_domain_and_skew(). */ | |
| tnode[i + 1].type = TN_INPAD_SOURCE; | |
| tnode[i + 1].block = iblock; | |
| (*inode)++; | |
| } else if (tnode[i].pb_graph_pin->type == PB_PIN_OUTPAD) { | |
| assert(tnode[i].pb_graph_pin->port->type == IN_PORT); | |
| tnode[i].type = TN_OUTPAD_IPIN; | |
| tnode[i].num_edges = 1; | |
| tnode[i].out_edges = (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), &tedge_ch); | |
| tnode[i].out_edges->Tdel = 0; | |
| tnode[i].out_edges->to_node = i + 1; | |
| tnode[i + 1].pb_graph_pin = pb_graph_pin; /* Necessary for find_tnode_net_name(). */ | |
| tnode[i + 1].type = TN_OUTPAD_SINK; | |
| tnode[i + 1].block = iblock; | |
| tnode[i + 1].num_edges = 0; | |
| tnode[i + 1].out_edges = NULL; | |
| (*inode)++; | |
| } else if (tnode[i].pb_graph_pin->type == PB_PIN_SEQUENTIAL) { | |
| if (tnode[i].pb_graph_pin->port->type == IN_PORT) { | |
| tnode[i].type = TN_FF_IPIN; | |
| tnode[i].num_edges = 1; | |
| tnode[i].out_edges = (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), &tedge_ch); | |
| tnode[i].out_edges->Tdel = pb_graph_pin->tsu_tco; | |
| tnode[i].out_edges->to_node = i + 1; | |
| tnode[i + 1].pb_graph_pin = pb_graph_pin; | |
| tnode[i + 1].type = TN_FF_SINK; | |
| tnode[i + 1].block = iblock; | |
| tnode[i + 1].num_edges = 0; | |
| tnode[i + 1].out_edges = NULL; | |
| } else { | |
| assert(tnode[i].pb_graph_pin->port->type == OUT_PORT); | |
| tnode[i].type = TN_FF_OPIN; | |
| tnode[i + 1].num_edges = 1; | |
| tnode[i + 1].out_edges = (t_tedge *) my_chunk_malloc( | |
| 1 * sizeof(t_tedge), &tedge_ch); | |
| tnode[i + 1].out_edges->Tdel = pb_graph_pin->tsu_tco; | |
| tnode[i + 1].out_edges->to_node = i; | |
| tnode[i + 1].pb_graph_pin = pb_graph_pin; | |
| tnode[i + 1].type = TN_FF_SOURCE; | |
| tnode[i + 1].block = iblock; | |
| } | |
| (*inode)++; | |
| } else if (tnode[i].pb_graph_pin->type == PB_PIN_CLOCK) { | |
| tnode[i].type = TN_FF_CLOCK; | |
| tnode[i].num_edges = 0; | |
| tnode[i].out_edges = NULL; | |
| } else { | |
| if (tnode[i].pb_graph_pin->port->type == IN_PORT) { | |
| assert(tnode[i].pb_graph_pin->type == PB_PIN_TERMINAL); | |
| tnode[i].type = TN_PRIMITIVE_IPIN; | |
| } else { | |
| assert(tnode[i].pb_graph_pin->port->type == OUT_PORT); | |
| assert(tnode[i].pb_graph_pin->type == PB_PIN_TERMINAL); | |
| tnode[i].type = TN_PRIMITIVE_OPIN; | |
| } | |
| } | |
| } | |
| (*inode)++; | |
| } | |
| void print_timing_graph(const char *fname) { | |
| /* Prints the timing graph into a file. */ | |
| FILE *fp; | |
| int inode, iedge, ilevel, i; | |
| t_tedge *tedge; | |
| e_tnode_type itype; | |
| const char *tnode_type_names[] = { "TN_INPAD_SOURCE", "TN_INPAD_OPIN", "TN_OUTPAD_IPIN", | |
| "TN_OUTPAD_SINK", "TN_CB_IPIN", "TN_CB_OPIN", "TN_INTERMEDIATE_NODE", | |
| "TN_PRIMITIVE_IPIN", "TN_PRIMITIVE_OPIN", "TN_FF_IPIN", "TN_FF_OPIN", "TN_FF_SINK", | |
| "TN_FF_SOURCE", "TN_FF_CLOCK", "TN_CONSTANT_GEN_SOURCE" }; | |
| fp = my_fopen(fname, "w", 0); | |
| fprintf(fp, "num_tnodes: %d\n", num_tnodes); | |
| fprintf(fp, "Node #\tType\t\tipin\tiblk\tDomain\tSkew\tI/O Delay\t# edges\t" | |
| "to_node Tdel\n\n"); | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| fprintf(fp, "%d\t", inode); | |
| itype = tnode[inode].type; | |
| fprintf(fp, "%-15.15s\t", tnode_type_names[itype]); | |
| if (tnode[inode].pb_graph_pin != NULL) { | |
| fprintf(fp, "%d\t%d\t", | |
| tnode[inode].pb_graph_pin->pin_count_in_cluster, | |
| tnode[inode].block); | |
| } else { | |
| fprintf(fp, "\t%d\t", tnode[inode].block); | |
| } | |
| if (itype == TN_FF_CLOCK || itype == TN_FF_SOURCE || itype == TN_FF_SINK) { | |
| fprintf(fp, "%d\t%.3e\t\t", tnode[inode].clock_domain, tnode[inode].clock_delay); | |
| } else if (itype == TN_INPAD_SOURCE) { | |
| fprintf(fp, "%d\t\t%.3e\t", tnode[inode].clock_domain, tnode[inode].out_edges[0].Tdel); | |
| } else if (itype == TN_OUTPAD_SINK) { | |
| assert(tnode[inode-1].type == TN_OUTPAD_IPIN); /* Outpad ipins should be one prior in the tnode array */ | |
| fprintf(fp, "%d\t\t%.3e\t", tnode[inode].clock_domain, tnode[inode-1].out_edges[0].Tdel); | |
| } else { | |
| fprintf(fp, "\t\t\t\t"); | |
| } | |
| fprintf(fp, "%d", tnode[inode].num_edges); | |
| /* Print all edges after edge 0 on separate lines */ | |
| tedge = tnode[inode].out_edges; | |
| if (tnode[inode].num_edges > 0) { | |
| fprintf(fp, "\t%4d\t%7.3g", tedge[0].to_node, tedge[0].Tdel); | |
| for (iedge = 1; iedge < tnode[inode].num_edges; iedge++) { | |
| fprintf(fp, "\n\t\t\t\t\t\t\t\t\t\t%4d\t%7.3g", tedge[iedge].to_node, tedge[iedge].Tdel); | |
| } | |
| } | |
| fprintf(fp, "\n"); | |
| } | |
| fprintf(fp, "\n\nnum_tnode_levels: %d\n", num_tnode_levels); | |
| for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { | |
| fprintf(fp, "\n\nLevel: %d Num_nodes: %d\nNodes:", ilevel, | |
| tnodes_at_level[ilevel].nelem); | |
| for (i = 0; i < tnodes_at_level[ilevel].nelem; i++) | |
| fprintf(fp, "\t%d", tnodes_at_level[ilevel].list[i]); | |
| } | |
| fprintf(fp, "\n"); | |
| fprintf(fp, "\n\nNet #\tNet_to_driver_tnode\n"); | |
| for (i = 0; i < num_nets; i++) | |
| fprintf(fp, "%4d\t%6d\n", i, f_net_to_driver_tnode[i]); | |
| if (g_sdc->num_constrained_clocks == 1) { | |
| /* Arrival and required times, and forward and backward weights, will be meaningless for multiclock | |
| designs, since the values currently on the graph will only correspond to the most recent traversal. */ | |
| fprintf(fp, "\n\nNode #\t\tT_arr\t\tT_req" | |
| #ifdef PATH_COUNTING | |
| "\tForward weight\tBackward weight" | |
| #endif | |
| "\n\n"); | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| if (tnode[inode].T_arr > HUGE_NEGATIVE_FLOAT + 1) { | |
| fprintf(fp, "%d\t%12g", inode, tnode[inode].T_arr); | |
| } else { | |
| fprintf(fp, "%d\t\t -", inode); | |
| } | |
| if (tnode[inode].T_req < HUGE_POSITIVE_FLOAT - 1) { | |
| fprintf(fp, "\t%12g", tnode[inode].T_req); | |
| } else { | |
| fprintf(fp, "\t\t -"); | |
| } | |
| #ifdef PATH_COUNTING | |
| fprintf(fp, "\t%12g\t%12g\n", tnode[inode].forward_weight, tnode[inode].backward_weight); | |
| #endif | |
| } | |
| } | |
| fclose(fp); | |
| } | |
| static void process_constraints(void) { | |
| /* Removes all constraints between domains which never intersect. We need to do this | |
| so that criticality_denom in do_timing_analysis is not affected by unused constraints. | |
| BFS through the levelized graph once for each source domain. Whenever we get to a sink, | |
| mark off that we've used that sink clock domain. After each traversal, set all unused | |
| constraints to DO_NOT_ANALYSE. | |
| Also, print g_sdc->domain_constraints, constrained I/Os and override constraints, | |
| and convert g_sdc->domain_constraints and flip-flop-level override constraints | |
| to be in seconds rather than nanoseconds. We don't need to normalize g_sdc->cc_constraints | |
| because they're already on the g_sdc->domain_constraints matrix, and we don't need | |
| to normalize constrained_ios because we already did the normalization when | |
| we put the delays onto the timing graph in load_clock_domain_and_clock_and_io_delay. */ | |
| int source_clock_domain, sink_clock_domain, inode, ilevel, num_at_level, i, | |
| num_edges, iedge, to_node, icf, ifc, iff; | |
| t_tedge * tedge; | |
| float constraint; | |
| boolean * constraint_used = (boolean *) my_malloc(g_sdc->num_constrained_clocks * sizeof(boolean)); | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| /* We're going to use arrival time to flag which nodes we've reached, | |
| even though the values we put in will not correspond to actual arrival times. | |
| Nodes which are reached on this traversal will get an arrival time of 0. | |
| Reset arrival times now to an invalid number. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| tnode[inode].T_arr = HUGE_NEGATIVE_FLOAT; | |
| } | |
| /* Reset all constraint_used entries. */ | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| constraint_used[sink_clock_domain] = FALSE; | |
| } | |
| /* Set arrival times for each top-level tnode on this clock domain. */ | |
| num_at_level = tnodes_at_level[0].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[0].list[i]; | |
| if (tnode[inode].clock_domain == source_clock_domain) { | |
| tnode[inode].T_arr = 0.; | |
| } | |
| } | |
| for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { /* Go down one level at a time. */ | |
| num_at_level = tnodes_at_level[ilevel].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[ilevel].list[i]; /* Go through each of the tnodes at the level we're on. */ | |
| if (has_valid_T_arr(inode)) { /* If this tnode has been used */ | |
| num_edges = tnode[inode].num_edges; | |
| if (num_edges == 0) { /* sink */ | |
| /* We've reached the sink domain of this tnode, so set constraint_used | |
| to true for this tnode's clock domain (if it has a valid one). */ | |
| sink_clock_domain = tnode[inode].clock_domain; | |
| if (sink_clock_domain != -1) { | |
| constraint_used[sink_clock_domain] = TRUE; | |
| } | |
| } else { | |
| /* Set arrival time to a valid value (0.) for each tnode in this tnode's fanout. */ | |
| tedge = tnode[inode].out_edges; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| tnode[to_node].T_arr = 0.; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| /* At the end of the source domain traversal, see which sink domains haven't been hit, | |
| and set the constraint for the pair of source and sink domains to DO_NOT_ANALYSE */ | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| if (!constraint_used[sink_clock_domain]) { | |
| g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] = DO_NOT_ANALYSE; | |
| } | |
| } | |
| } | |
| free(constraint_used); | |
| /* Print constraints */ | |
| if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_TIMING_CONSTRAINTS)) { | |
| print_timing_constraint_info(getEchoFileName(E_ECHO_TIMING_CONSTRAINTS)); | |
| } | |
| /* Convert g_sdc->domain_constraint and ff-level override constraints to be in seconds, not nanoseconds. */ | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| constraint = g_sdc->domain_constraint[source_clock_domain][sink_clock_domain]; | |
| if (constraint > NEGATIVE_EPSILON) { /* if constraint does not equal DO_NOT_ANALYSE */ | |
| g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] = constraint * 1e-9; | |
| } | |
| } | |
| } | |
| for (icf = 0; icf < g_sdc->num_cf_constraints; icf++) { | |
| g_sdc->cf_constraints[icf].constraint *= 1e-9; | |
| } | |
| for (ifc = 0; ifc < g_sdc->num_fc_constraints; ifc++) { | |
| g_sdc->fc_constraints[ifc].constraint *= 1e-9; | |
| } | |
| for (iff = 0; iff < g_sdc->num_ff_constraints; iff++) { | |
| g_sdc->ff_constraints[iff].constraint *= 1e-9; | |
| } | |
| /* Finally, free g_sdc->cc_constraints since all of its info is contained in g_sdc->domain_constraint. */ | |
| free_override_constraint(g_sdc->cc_constraints, g_sdc->num_cc_constraints); | |
| } | |
| static void alloc_timing_stats(void) { | |
| /* Allocate f_timing_stats data structure. */ | |
| int i; | |
| f_timing_stats = (t_timing_stats *) my_malloc(sizeof(t_timing_stats)); | |
| f_timing_stats->cpd = (float **) my_malloc(g_sdc->num_constrained_clocks * sizeof(float *)); | |
| f_timing_stats->least_slack = (float **) my_malloc(g_sdc->num_constrained_clocks * sizeof(float *)); | |
| for (i = 0; i < g_sdc->num_constrained_clocks; i++) { | |
| f_timing_stats->cpd[i] = (float *) my_malloc(g_sdc->num_constrained_clocks * sizeof(float)); | |
| f_timing_stats->least_slack[i] = (float *) my_malloc(g_sdc->num_constrained_clocks * sizeof(float)); | |
| } | |
| } | |
| void do_timing_analysis(t_slack * slacks, boolean is_prepacked, boolean do_lut_input_balancing, boolean is_final_analysis) { | |
| /* Performs timing analysis on the circuit. Before this routine is called, t_slack * slacks | |
| must have been allocated, and the circuit must have been converted into a timing graph. | |
| The nodes of the timing graph represent pins and the edges between them represent delays | |
| and m dependencies from one pin to another. Most elements are modeled as a pair of nodes so | |
| that the delay through the element can be marked on the edge between them (e.g. | |
| TN_INPAD_SOURCE->TN_INPAD_OPIN, TN_OUTPAD_IPIN->TN_OUTPAD_SINK, TN_PRIMITIVE_OPIN-> | |
| TN_PRIMITIVE_OPIN, etc.). | |
| The timing graph nodes are stored as an array, tnode [0..num_tnodes - 1]. Each tnode | |
| includes an array of all edges, tedge, which fan out from it. Each tedge includes the | |
| index of the node on its far end (in the tnode array), and the delay to that node. | |
| The timing graph has sources at each TN_FF_SOURCE (Q output), TN_INPAD_SOURCE (input I/O pad) | |
| and TN_CONSTANT_GEN_SOURCE (constant 1 or 0 generator) node and sinks at TN_FF_SINK (D input) | |
| and TN_OUTPAD_SINK (output I/O pad) nodes. Two traversals, one forward (sources to sinks) | |
| and one backward, are performed for each valid constraint (one which is not DO_NOT_ANALYSE) | |
| between a source and a sink clock domain in the matrix g_sdc->domain_constraint | |
| [0..g_sdc->num_constrained_clocks - 1][0..g_sdc->num_constrained_clocks - 1]. This matrix has been | |
| pruned so that all domain pairs with no paths between them have been set to DO_NOT_ANALYSE. | |
| During the traversal pair for each constraint, all nodes in the fanout of sources on the | |
| source clock domain are assigned a T_arr, the arrival time of the last input signal to the node. | |
| All nodes in the fanin of sinks on the sink clock domain are assigned a T_req, the required | |
| arrival time of the last input signal to the node if the critical path for this constraint is | |
| not to be lengthened. Nodes which receive both a valid T_arr and T_req are flagged with | |
| used_on_this_traversal, and nodes which are used on at least one traversal pair are flagged | |
| with has_valid_slack so that later functions know the slack is valid. | |
| After each traversal pair, a slack is calculated for each sink pin on each net (or equivalently, | |
| each connection or tedge fanning out from that net's driver tnode). Slack is calculated as: | |
| T_req (dest node) - T_arr (source node) - Tdel (edge) | |
| and represents the amount of delay which could be added to this connection before the critical | |
| path delay for this constraint would worsen. Edges on the critical path have a slack of 0. | |
| Slacks which are never used are set to HUGE_POSITIVE_FLOAT. | |
| The optimizers actually use a metric called timing_criticality. Timing criticality is defined | |
| as 1 - slack / criticality_denom, where the normalization factor criticality_denom is the max | |
| of all arrival times in the constraint and the constraint itself (T_req-relaxed slacks) or | |
| all arrival times and constraints in the design (shifted slacks). See below for a further | |
| discussion of these two regimes. Timing criticality is always between 0 (not critical) and 1 | |
| (very critical). Unlike slack, which is set to HUGE_POSITIVE_FLOAT for unanalysed connections, | |
| timing criticality is 0 for these, meaning no special check has to be made for which connections | |
| have been analysed. | |
| If path counting is on (PATH_COUNTING is defined in vpr_types.h), the optimizers use a weighted | |
| sum of timing_criticality and path_criticality, the latter of which is an estimate of the | |
| importance of the number of paths using a particular connection. As a path's timing_criticality | |
| decreases, it will become exponentially less important to the path_criticality of any connection | |
| which this path uses. Path criticality also goes from 0 (not critical or unanalysed) to 1. | |
| Slack and criticalities are only calculated if both the driver of the net and the sink pin were | |
| used_on_this_traversal, and are only overwritten if lower than previously-obtained values. | |
| The optimizers actually use criticality rather than slack, but slack is useful for human | |
| designers and so we calculate it only if we need to print it. | |
| This routine outputs slack and criticality to t_slack * slacks. It also stores least slack and | |
| critical path delay per constraint [0..g_sdc->num_constrained_clocks - 1][0..g_sdc->num_constrained_clocks - 1] | |
| in the file-scope variable f_timing_stats. | |
| Is_prepacked flags whether to calculate normalized costs for the clusterer (normalized_slack, | |
| normalized_Tarr, normalized_total_critical_paths). Setting this to FALSE saves time in post- | |
| packed timing analyses. | |
| Do_lut_input_balancing flags whether to rebalance LUT inputs. LUT rebalancing takes advantage of | |
| the fact that different LUT inputs often have different delays. Since we can freely permute which | |
| LUT inputs are used by just changing the logic in the LUT, these LUT permutations can be performed | |
| late into the routing stage of the flow. | |
| Is_final_analysis flags whether this is the final, analysis pass. If it is, the analyser will | |
| compute actual slacks instead of relaxed ones. We "relax" slacks by setting the required time to | |
| the maximum arrival time for tight constraints so that no slacks are negative (which confuses | |
| the optimizers). This is called "T_req-relaxed" slack. However, human designers want to see | |
| actual slack values, so we report those in the final analysis. The alternative way of making | |
| slacks positive is shifting them upwards by the value of the largest negative slack, after | |
| all traversals are complete ("shifted slacks"), which can be enabled by changing SLACK_DEFINITION | |
| from 'R' to 'S' in path_delay.h. | |
| To do: flip-flop to flip-flop and flip-flop to clock domain constraints (set_false_path, set_max_delay, | |
| and especially set_multicycle_path). All the info for these constraints is contained in g_sdc->fc_constraints and | |
| g_sdc->ff_constraints, but graph traversals are not included yet. Probably, an entire traversal will be needed for | |
| each constraint. Clock domain to flip-flop constraints are coded but not tested, and are done within | |
| existing traversals. */ | |
| int i, j, source_clock_domain, sink_clock_domain, inode, inet, ipin; | |
| #if defined PATH_COUNTING || SLACK_DEFINITION == 'S' | |
| int iedge, num_edges; | |
| #endif | |
| #ifdef PATH_COUNTING | |
| float max_path_criticality = HUGE_NEGATIVE_FLOAT /* used to normalize path_criticalities */; | |
| #endif | |
| boolean update_slack = (boolean) (is_final_analysis || getEchoEnabled()); | |
| /* Only update slack values if we need to print it, i.e. | |
| for the final output file (is_final_analysis) or echo files. */ | |
| float criticality_denom; /* (SLACK_DEFINITION == 'R' only) For a particular constraint, the maximum of | |
| the constraint and all arrival times for the constraint's traversal. Used to | |
| normalize the clusterer's normalized_slack and, more importantly, criticality. */ | |
| long max_critical_output_paths, max_critical_input_paths; | |
| t_pb *pb; | |
| #if SLACK_DEFINITION == 'S' | |
| float smallest_slack_in_design = HUGE_POSITIVE_FLOAT; | |
| /* Shift all slacks upwards by this number if it is negative. */ | |
| float criticality_denom_global = HUGE_NEGATIVE_FLOAT; | |
| /* Denominator of criticality for shifted - max of all arrival times and all constraints. */ | |
| #endif | |
| /* Reset LUT input rebalancing. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| if (tnode[inode].type == TN_PRIMITIVE_OPIN && tnode[inode].pb_graph_pin != NULL) { | |
| pb = block[tnode[inode].block].pb->rr_node_to_pb_mapping[tnode[inode].pb_graph_pin->pin_count_in_cluster]; | |
| if (pb != NULL && pb->lut_pin_remap != NULL) { | |
| /* this is a LUT primitive, do pin swapping */ | |
| assert(pb->pb_graph_node->pb_type->num_output_pins == 1 && pb->pb_graph_node->pb_type->num_clock_pins == 0); /* ensure LUT properties are valid */ | |
| assert(pb->pb_graph_node->num_input_ports == 1); | |
| /* If all input pins are known, perform LUT input delay rebalancing, do nothing otherwise */ | |
| for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) { | |
| pb->lut_pin_remap[i] = OPEN; | |
| } | |
| } | |
| } | |
| } | |
| /* Reset all values which need to be reset once per | |
| timing analysis, rather than once per traversal pair. */ | |
| /* Reset slack and criticality */ | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| for (ipin = 1; ipin <= timing_nets[inet].num_sinks; ipin++) { | |
| slacks->slack[inet][ipin] = HUGE_POSITIVE_FLOAT; | |
| slacks->timing_criticality[inet][ipin] = 0.; | |
| #ifdef PATH_COUNTING | |
| slacks->path_criticality[inet][ipin] = 0.; | |
| #endif | |
| } | |
| } | |
| /* Reset f_timing_stats. */ | |
| for (i = 0; i < g_sdc->num_constrained_clocks; i++) { | |
| for (j = 0; j < g_sdc->num_constrained_clocks; j++) { | |
| f_timing_stats->cpd[i][j] = HUGE_NEGATIVE_FLOAT; | |
| f_timing_stats->least_slack[i][j] = HUGE_POSITIVE_FLOAT; | |
| } | |
| } | |
| #ifndef PATH_COUNTING | |
| /* Reset normalized values for clusterer. */ | |
| if (is_prepacked) { | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| tnode[inode].prepacked_data->normalized_slack = HUGE_POSITIVE_FLOAT; | |
| tnode[inode].prepacked_data->normalized_T_arr = HUGE_NEGATIVE_FLOAT; | |
| tnode[inode].prepacked_data->normalized_total_critical_paths = HUGE_NEGATIVE_FLOAT; | |
| } | |
| } | |
| #endif | |
| if (do_lut_input_balancing) { | |
| do_lut_rebalancing(); | |
| } | |
| /* For each valid constraint (pair of source and sink clock domains), we do one | |
| forward and one backward topological traversal to find arrival and required times, | |
| in do_timing_analysis_for_constraint. If path counting is on, we then do another, | |
| simpler traversal to find forward and backward weights, relying on the largest | |
| required time we found from the first traversal. After each constraint's traversals, | |
| we update the slacks, timing criticalities and (if necessary) path criticalities or | |
| normalized costs used by the clusterer. */ | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| if (g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] > NEGATIVE_EPSILON) { /* i.e. != DO_NOT_ANALYSE */ | |
| /* Perform the forward and backward traversal for this constraint. */ | |
| criticality_denom = do_timing_analysis_for_constraint(source_clock_domain, sink_clock_domain, | |
| is_prepacked, is_final_analysis, &max_critical_input_paths, &max_critical_output_paths); | |
| #ifdef PATH_COUNTING | |
| /* Weight the importance of each net, used in slack calculation. */ | |
| do_path_counting(criticality_denom); | |
| #endif | |
| /* Update the slack and criticality for each edge of each net which was | |
| analysed on the most recent traversal and has a lower (slack) or | |
| higher (criticality) value than before. */ | |
| update_slacks(slacks, source_clock_domain, sink_clock_domain, criticality_denom, update_slack); | |
| #ifndef PATH_COUNTING | |
| /* Update the normalized costs used by the clusterer. */ | |
| if (is_prepacked) { | |
| update_normalized_costs(criticality_denom, max_critical_input_paths, max_critical_output_paths); | |
| } | |
| #endif | |
| #if SLACK_DEFINITION == 'S' | |
| /* Set criticality_denom_global to the max of criticality_denom over all traversals. */ | |
| criticality_denom_global = std::max(criticality_denom_global, criticality_denom); | |
| #endif | |
| } | |
| } | |
| } | |
| #ifdef PATH_COUNTING | |
| /* Normalize path criticalities by the largest value in the | |
| circuit. Otherwise, path criticalities would be unbounded. */ | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| max_path_criticality = std::max(max_path_criticality, slacks->path_criticality[inet][iedge + 1]); | |
| } | |
| } | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| slacks->path_criticality[inet][iedge + 1] /= max_path_criticality; | |
| } | |
| } | |
| #endif | |
| #if SLACK_DEFINITION == 'S' | |
| if (!is_final_analysis) { | |
| /* Find the smallest slack in the design. */ | |
| for (i = 0; i < g_sdc->num_constrained_clocks; i++) { | |
| for (j = 0; j < g_sdc->num_constrained_clocks; j++) { | |
| smallest_slack_in_design = std::min(smallest_slack_in_design, f_timing_stats->least_slack[i][j]); | |
| } | |
| } | |
| /* Increase all slacks by the value of the smallest slack in the design, if it's negative. */ | |
| if (smallest_slack_in_design < 0) { | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| slacks->slack[inet][iedge + 1] -= smallest_slack_in_design; | |
| /* Remember, smallest_slack_in_design is negative, so we're INCREASING all the slacks. */ | |
| /* Note that if slack was equal to HUGE_POSITIVE_FLOAT, it will still be equal to more than this, | |
| so it will still be ignored when we calculate criticalities. */ | |
| } | |
| } | |
| } | |
| } | |
| /* We can now calculate criticalities, only after we normalize slacks. */ | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| num_edges = timing_nets[inet].num_sinks; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| if (slacks->slack[inet][iedge + 1] < HUGE_POSITIVE_FLOAT - 1) { /* if the slack is valid */ | |
| slacks->timing_criticality[inet][iedge + 1] = 1 - slacks->slack[inet][iedge + 1]/criticality_denom_global; | |
| } | |
| /* otherwise, criticality remains 0, as it was initialized */ | |
| } | |
| } | |
| #endif | |
| } | |
| static void do_lut_rebalancing() { | |
| int inode, num_at_level, i, ilevel, num_edges, iedge, to_node; | |
| t_tedge * tedge; | |
| /* Reset all arrival times to a very large negative number. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| tnode[inode].T_arr = HUGE_NEGATIVE_FLOAT; | |
| } | |
| /* Set arrival times for each top-level tnode. */ | |
| num_at_level = tnodes_at_level[0].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[0].list[i]; | |
| if (tnode[inode].type == TN_FF_SOURCE) { | |
| /* Set the arrival time of this flip-flop tnode to its clock skew. */ | |
| tnode[inode].T_arr = tnode[inode].clock_delay; | |
| } else if (tnode[inode].type == TN_INPAD_SOURCE) { | |
| /* There's no such thing as clock skew for external clocks. The closest equivalent, | |
| input delay, is already marked on the edge coming out from this node. | |
| As a result, the signal can be said to arrive at t = 0. */ | |
| tnode[inode].T_arr = 0.; | |
| } | |
| } | |
| /* Now we actually start the forward topological traversal, to compute arrival times. */ | |
| for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { /* For each level of our levelized timing graph... */ | |
| num_at_level = tnodes_at_level[ilevel].nelem; /* ...there are num_at_level tnodes at that level. */ | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[ilevel].list[i]; /* Go through each of the tnodes at the level we're on. */ | |
| num_edges = tnode[inode].num_edges; /* Get the number of edges fanning out from the node we're visiting */ | |
| tedge = tnode[inode].out_edges; /* Get the list of edges from the node we're visiting */ | |
| for (iedge = 0; iedge < num_edges; iedge++) { /* Now go through each edge coming out from this tnode */ | |
| to_node = tedge[iedge].to_node; /* Get the index of the destination tnode of this edge. */ | |
| /* The arrival time T_arr at the destination node is set to the maximum of all the | |
| possible arrival times from all edges fanning in to the node. | |
| The arrival time represents the latest time that all inputs must arrive at a node. | |
| LUT input rebalancing also occurs at this step. */ | |
| set_and_balance_arrival_time(to_node, inode, tedge[iedge].Tdel, TRUE); | |
| } | |
| } | |
| } | |
| } | |
| static float do_timing_analysis_for_constraint(int source_clock_domain, int sink_clock_domain, | |
| boolean is_prepacked, boolean is_final_analysis, long * max_critical_input_paths_ptr, | |
| long * max_critical_output_paths_ptr) { | |
| /* Performs a single forward and backward traversal for the domain pair | |
| source_clock_domain and sink_clock_domain. Returns the denominator that | |
| will be later used to normalize criticality - the maximum of all arrival | |
| times from this traversal and the constraint for this pair of domains. | |
| Also returns the maximum number of critical input and output paths of any | |
| node analysed for this constraint, passed by reference from do_timing_analysis. */ | |
| int inode, num_at_level, i, total, ilevel, num_edges, iedge, to_node, icf; | |
| float constraint, Tdel, T_req, max_Tarr = HUGE_NEGATIVE_FLOAT; | |
| /* Max of all arrival times for this constraint - | |
| used to relax required times. */ | |
| t_tedge * tedge; | |
| int num_dangling_nodes; | |
| boolean found; | |
| long max_critical_input_paths = 0, max_critical_output_paths = 0; | |
| /* Reset all values which need to be reset once per | |
| traversal pair, rather than once per timing analysis. */ | |
| /* Reset all arrival and required times. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| tnode[inode].T_arr = HUGE_NEGATIVE_FLOAT; | |
| tnode[inode].T_req = HUGE_POSITIVE_FLOAT; | |
| } | |
| #ifndef PATH_COUNTING | |
| /* Reset num_critical_output_paths. */ | |
| if (is_prepacked) { | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| tnode[inode].prepacked_data->num_critical_output_paths = 0; | |
| } | |
| } | |
| #endif | |
| /* Set arrival times for each top-level tnode on this source domain. */ | |
| num_at_level = tnodes_at_level[0].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[0].list[i]; | |
| if (tnode[inode].clock_domain == source_clock_domain) { | |
| if (tnode[inode].type == TN_FF_SOURCE) { | |
| /* Set the arrival time of this flip-flop tnode to its clock skew. */ | |
| tnode[inode].T_arr = tnode[inode].clock_delay; | |
| } else if (tnode[inode].type == TN_INPAD_SOURCE) { | |
| /* There's no such thing as clock skew for external clocks, and | |
| input delay is already marked on the edge coming out from this node. | |
| As a result, the signal can be said to arrive at t = 0. */ | |
| tnode[inode].T_arr = 0.; | |
| } | |
| } | |
| } | |
| /* Compute arrival times with a forward topological traversal from sources | |
| (TN_FF_SOURCE, TN_INPAD_SOURCE, TN_CONSTANT_GEN_SOURCE) to sinks (TN_FF_SINK, TN_OUTPAD_SINK). */ | |
| total = 0; /* We count up all tnodes to error-check at the end. */ | |
| for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { /* For each level of our levelized timing graph... */ | |
| num_at_level = tnodes_at_level[ilevel].nelem; /* ...there are num_at_level tnodes at that level. */ | |
| total += num_at_level; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[ilevel].list[i]; /* Go through each of the tnodes at the level we're on. */ | |
| if (tnode[inode].T_arr < NEGATIVE_EPSILON) { /* If the arrival time is less than 0 (i.e. HUGE_NEGATIVE_FLOAT)... */ | |
| continue; /* End this iteration of the num_at_level for loop since | |
| this node is not part of the clock domain we're analyzing. | |
| (If it were, it would have received an arrival time already.) */ | |
| } | |
| num_edges = tnode[inode].num_edges; /* Get the number of edges fanning out from the node we're visiting */ | |
| tedge = tnode[inode].out_edges; /* Get the list of edges from the node we're visiting */ | |
| #ifndef PATH_COUNTING | |
| if (is_prepacked && ilevel == 0) { | |
| tnode[inode].prepacked_data->num_critical_input_paths = 1; /* Top-level tnodes have one locally-critical input path. */ | |
| } | |
| /* Using a somewhat convoluted procedure inherited from T-VPack, | |
| count how many locally-critical input paths fan into each tnode, | |
| and also find the maximum number over all tnodes. */ | |
| if (is_prepacked) { | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if (fabs(tnode[to_node].T_arr - (tnode[inode].T_arr + tedge[iedge].Tdel)) < EPSILON) { | |
| /* If the "local forward slack" (T_arr(to_node) - T_arr(inode) - T_del) for this edge | |
| is 0 (i.e. the path from inode to to_node is locally as critical as any other path to | |
| to_node), add to_node's num critical input paths to inode's number. */ | |
| tnode[to_node].prepacked_data->num_critical_input_paths += tnode[inode].prepacked_data->num_critical_input_paths; | |
| } else if (tnode[to_node].T_arr < (tnode[inode].T_arr + tedge[iedge].Tdel)) { | |
| /* If the "local forward slack" for this edge is negative, | |
| reset to_node's num critical input paths to inode's number. */ | |
| tnode[to_node].prepacked_data->num_critical_input_paths = tnode[inode].prepacked_data->num_critical_input_paths; | |
| } | |
| /* Set max_critical_input_paths to the maximum number of critical | |
| input paths for all tnodes analysed on this traversal. */ | |
| if (tnode[to_node].prepacked_data->num_critical_input_paths > max_critical_input_paths) { | |
| max_critical_input_paths = tnode[to_node].prepacked_data->num_critical_input_paths; | |
| } | |
| } | |
| } | |
| #endif | |
| for (iedge = 0; iedge < num_edges; iedge++) { /* Now go through each edge coming out from this tnode */ | |
| to_node = tedge[iedge].to_node; /* Get the index of the destination tnode of this edge. */ | |
| /* The arrival time T_arr at the destination node is set to the maximum of all | |
| the possible arrival times from all edges fanning in to the node. The arrival | |
| time represents the latest time that all inputs must arrive at a node. LUT input | |
| rebalancing also occurs at this step. */ | |
| set_and_balance_arrival_time(to_node, inode, tedge[iedge].Tdel, FALSE); | |
| /* Since we updated the destination node (to_node), change the max arrival | |
| time for the forward traversal if to_node's arrival time is greater than | |
| the existing maximum. */ | |
| max_Tarr = std::max(max_Tarr, tnode[to_node].T_arr); | |
| } | |
| } | |
| } | |
| assert(total == num_tnodes); | |
| num_dangling_nodes = 0; | |
| /* Compute required times with a backward topological traversal from sinks to sources. */ | |
| for (ilevel = num_tnode_levels - 1; ilevel >= 0; ilevel--) { | |
| num_at_level = tnodes_at_level[ilevel].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[ilevel].list[i]; | |
| num_edges = tnode[inode].num_edges; | |
| if (ilevel == 0) { | |
| if (!(tnode[inode].type == TN_INPAD_SOURCE || tnode[inode].type == TN_FF_SOURCE || tnode[inode].type == TN_CONSTANT_GEN_SOURCE)) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "Timing graph started on unexpected node %s.%s[%d].\n", | |
| tnode[inode].pb_graph_pin->parent_node->pb_type->name, | |
| tnode[inode].pb_graph_pin->port->name, | |
| tnode[inode].pb_graph_pin->pin_number); | |
| vpr_printf(TIO_MESSAGE_ERROR, "This is a VPR internal error, contact VPR development team.\n"); | |
| exit(1); | |
| } | |
| } else { | |
| if ((tnode[inode].type == TN_INPAD_SOURCE || tnode[inode].type == TN_FF_SOURCE || tnode[inode].type == TN_CONSTANT_GEN_SOURCE)) { | |
| vpr_printf(TIO_MESSAGE_ERROR, "Timing graph discovered unexpected edge to node %s.%s[%d].\n", | |
| tnode[inode].pb_graph_pin->parent_node->pb_type->name, | |
| tnode[inode].pb_graph_pin->port->name, | |
| tnode[inode].pb_graph_pin->pin_number); | |
| vpr_printf(TIO_MESSAGE_ERROR, "This is a VPR internal error, contact VPR development team.\n"); | |
| exit(1); | |
| } | |
| } | |
| /* Unlike the forward traversal, the sinks are all on different levels, so we always have to | |
| check whether a node is a sink. We give every sink on the sink clock domain we're considering | |
| a valid required time. Every non-sink node in the fanin of one of these sinks and the fanout of | |
| some source from the forward traversal also gets a valid required time. */ | |
| if (num_edges == 0) { /* sink */ | |
| if (tnode[inode].type == TN_FF_CLOCK || tnode[inode].T_arr < HUGE_NEGATIVE_FLOAT + 1) { | |
| continue; /* Skip nodes on the clock net itself, and nodes with unset arrival times. */ | |
| } | |
| if (!(tnode[inode].type == TN_OUTPAD_SINK || tnode[inode].type == TN_FF_SINK)) { | |
| if(is_prepacked) { | |
| vpr_printf(TIO_MESSAGE_WARNING, "Pin on block %s.%s[%d] not used\n", | |
| logical_block[tnode[inode].block].name, | |
| tnode[inode].prepacked_data->model_port_ptr->name, | |
| tnode[inode].prepacked_data->model_pin); | |
| } | |
| num_dangling_nodes++; | |
| /* Note: Still need to do standard traversals with dangling pins so that algorithm runs properly, but T_arr and T_Req to values such that it dangling nodes do not affect actual timing values */ | |
| } | |
| /* Skip nodes not on the sink clock domain of the | |
| constraint we're currently considering */ | |
| if (tnode[inode].clock_domain != sink_clock_domain) { | |
| continue; | |
| } | |
| /* See if there's an override constraint between the source clock domain (name is | |
| g_sdc->constrained_clocks[source_clock_domain].name) and the flip-flop or outpad we're at | |
| now (name is find_tnode_net_name(inode, is_prepacked)). We test if | |
| g_sdc->num_cf_constraints > 0 first so that we can save time looking up names in the vast | |
| majority of cases where there are no such constraints. */ | |
| if (g_sdc->num_cf_constraints > 0 && (icf = find_cf_constraint(g_sdc->constrained_clocks[source_clock_domain].name, find_tnode_net_name(inode, is_prepacked))) != -1) { | |
| constraint = g_sdc->cf_constraints[icf].constraint; | |
| if (constraint < NEGATIVE_EPSILON) { | |
| /* Constraint is DO_NOT_ANALYSE (-1) for this particular sink. */ | |
| continue; | |
| } | |
| } else { /* Use the default constraint from g_sdc->domain_constraint. */ | |
| constraint = g_sdc->domain_constraint[source_clock_domain][sink_clock_domain]; | |
| /* Constraint is guaranteed to be valid since we checked for it at the very beginning. */ | |
| } | |
| /* Now we know we should analyse this tnode. */ | |
| #if SLACK_DEFINITION == 'R' | |
| /* Assign the required time T_req for this leaf node, taking into account clock skew. T_req is the | |
| time all inputs to a tnode must arrive by before it would degrade this constraint's critical path delay. | |
| Relax the required time at the sink node to be non-negative by taking the max of the "real" required | |
| time (constraint + tnode[inode].clock_delay) and the max arrival time in this domain (max_Tarr), except | |
| for the final analysis where we report actual slack. We do this to prevent any slacks from being | |
| negative, since negative slacks are not used effectively by the optimizers. | |
| E.g. if we have a 10 ns constraint and it takes 14 ns to get here, we'll have a slack of at most -4 ns | |
| for any edge along the path that got us here. If we say the required time is 14 ns (no less than the | |
| arrival time), we don't have a negative slack anymore. However, in the final timing analysis, the real | |
| slacks are computed (that's what human designers care about), not the relaxed ones. */ | |
| if (is_final_analysis) { | |
| tnode[inode].T_req = constraint + tnode[inode].clock_delay; | |
| } else { | |
| tnode[inode].T_req = std::max(constraint + tnode[inode].clock_delay, max_Tarr); | |
| } | |
| #else | |
| /* Don't do the relaxation and always set T_req equal to the "real" required time. */ | |
| tnode[inode].T_req = constraint + tnode[inode].clock_delay; | |
| #endif | |
| /* Store the largest critical path delay for this constraint (source domain AND sink domain) | |
| in the matrix critical_path_delay. C.P.D. = T_arr at destination - clock skew at destination | |
| = (datapath delay + clock delay to source) - clock delay to destination. | |
| Critical path delay is really telling us how fast we can run the source clock before we can no | |
| longer meet this constraint. e.g. If the datapath delay is 10 ns, the clock delay at source is | |
| 2 ns and the clock delay at destination is 5 ns, then C.P.D. is 7 ns by the above formula. We | |
| can run the source clock at 7 ns because the clock skew gives us 3 ns extra to meet the 10 ns | |
| datapath delay. */ | |
| f_timing_stats->cpd[source_clock_domain][sink_clock_domain] = | |
| std::max(f_timing_stats->cpd[source_clock_domain][sink_clock_domain], | |
| (tnode[inode].T_arr - tnode[inode].clock_delay)); | |
| #ifndef PATH_COUNTING | |
| if (is_prepacked) { | |
| tnode[inode].prepacked_data->num_critical_output_paths = 1; /* Bottom-level tnodes have one locally-critical input path. */ | |
| } | |
| #endif | |
| } else { /* not a sink */ | |
| assert(!(tnode[inode].type == TN_OUTPAD_SINK || tnode[inode].type == TN_FF_SINK || tnode[inode].type == TN_FF_CLOCK)); | |
| /* We need to skip this node unless it is on a path from source_clock_domain to | |
| sink_clock_domain. We need to skip all nodes which: | |
| 1. Fan out to the sink domain but do not fan in from the source domain. | |
| 2. Fan in from the source domain but do not fan out to the sink domain. | |
| 3. Do not fan in or out to either domain. | |
| If a node does not fan in from the source domain, it will not have a valid arrival time. | |
| So cases 1 and 3 can be skipped by continuing if T_arr = HUGE_NEGATIVE_FLOAT. We cannot | |
| treat case 2 as simply since the required time for this node has not yet been assigned, | |
| so we have to look at the required time for every node in its immediate fanout instead. */ | |
| /* Cases 1 and 3 */ | |
| if (tnode[inode].T_arr < HUGE_NEGATIVE_FLOAT + 1) { | |
| continue; /* Skip nodes with unset arrival times. */ | |
| } | |
| /* Case 2 */ | |
| found = FALSE; | |
| tedge = tnode[inode].out_edges; | |
| for (iedge = 0; iedge < num_edges && !found; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if (tnode[to_node].T_req < HUGE_POSITIVE_FLOAT) { | |
| found = TRUE; | |
| } | |
| } | |
| if (!found) { | |
| continue; | |
| } | |
| /* Now we know this node is on a path from source_clock_domain to | |
| sink_clock_domain, and needs to be analyzed. */ | |
| /* Opposite to T_arr, set T_req to the MINIMUM of the | |
| required times of all edges fanning OUT from this node. */ | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| Tdel = tedge[iedge].Tdel; | |
| T_req = tnode[to_node].T_req; | |
| tnode[inode].T_req = std::min(tnode[inode].T_req, T_req - Tdel); | |
| /* Update least slack per constraint. This is NOT the same as the minimum slack we will | |
| calculate on this traversal for post-packed netlists, which only count inter-cluster | |
| slacks. We only look at edges adjacent to sink nodes on the sink clock domain since | |
| all paths go through one of these edges. */ | |
| if (tnode[to_node].num_edges == 0 && tnode[to_node].clock_domain == sink_clock_domain) { | |
| f_timing_stats->least_slack[source_clock_domain][sink_clock_domain] = | |
| std::min(f_timing_stats->least_slack[source_clock_domain][sink_clock_domain], | |
| (T_req - Tdel - tnode[inode].T_arr)); | |
| } | |
| } | |
| #ifndef PATH_COUNTING | |
| /* Similar to before, we count how many locally-critical output paths fan out from each tnode, | |
| and also find the maximum number over all tnodes. Unlike for input paths, where we haven't set | |
| the arrival time at to_node before analysing it, here the required time is set at both nodes, | |
| so the "local backward slack" (T_req(to_node) - T_req(inode) - T_del) will never be negative. | |
| Hence, we only have to test if the "local backward slack" is 0. */ | |
| if (is_prepacked) { | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| /* If the "local backward slack" (T_arr(to_node) - T_arr(inode) - T_del) for this edge | |
| is 0 (i.e. the path from inode to to_node is locally as critical as any other path to | |
| to_node), add to_node's num critical output paths to inode's number. */ | |
| if (fabs(tnode[to_node].T_req - (tnode[inode].T_req + tedge[iedge].Tdel)) < EPSILON) { | |
| tnode[inode].prepacked_data->num_critical_output_paths += tnode[to_node].prepacked_data->num_critical_output_paths; | |
| } | |
| /* Set max_critical_output_paths to the maximum number of critical | |
| output paths for all tnodes analysed on this traversal. */ | |
| if (tnode[to_node].prepacked_data->num_critical_output_paths > max_critical_output_paths) { | |
| max_critical_output_paths = tnode[to_node].prepacked_data->num_critical_output_paths; | |
| } | |
| } | |
| } | |
| #endif | |
| } | |
| } | |
| } | |
| /* Return max critical input/output paths for this constraint through | |
| the pointers we passed in. */ | |
| if (max_critical_input_paths_ptr && max_critical_output_paths_ptr) { | |
| *max_critical_input_paths_ptr = max_critical_input_paths; | |
| *max_critical_output_paths_ptr = max_critical_output_paths; | |
| } | |
| if(num_dangling_nodes > 0 && (is_final_analysis || is_prepacked)) { | |
| vpr_printf(TIO_MESSAGE_WARNING, "%d unused pins \n", num_dangling_nodes); | |
| } | |
| /* The criticality denominator is the maximum of the max | |
| arrival time and the constraint for this domain pair. */ | |
| return std::max(max_Tarr, g_sdc->domain_constraint[source_clock_domain][sink_clock_domain]); | |
| } | |
| #ifdef PATH_COUNTING | |
| static void do_path_counting(float criticality_denom) { | |
| /* Count the importance of the number of paths going through each net | |
| by giving each net a forward and backward path weight. This is the first step of | |
| "A Novel Net Weighting Algorithm for Timing-Driven Placement" (Kong, 2002). | |
| We only visit nodes with set arrival and required times, so this function | |
| must be called after do_timing_analysis_for_constraints, which sets T_arr and T_req. */ | |
| int inode, num_at_level, i, ilevel, num_edges, iedge, to_node; | |
| t_tedge * tedge; | |
| float forward_local_slack, backward_local_slack, discount; | |
| /* Reset forward and backward weights for all tnodes. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| tnode[inode].forward_weight = 0; | |
| tnode[inode].backward_weight = 0; | |
| } | |
| /* Set foward weights for each top-level tnode. */ | |
| num_at_level = tnodes_at_level[0].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[0].list[i]; | |
| tnode[inode].forward_weight = 1.; | |
| } | |
| /* Do a forward topological traversal to populate forward weights. */ | |
| for (ilevel = 0; ilevel < num_tnode_levels; ilevel++) { | |
| num_at_level = tnodes_at_level[ilevel].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[ilevel].list[i]; | |
| if (!(has_valid_T_arr(inode) && has_valid_T_req(inode))) { | |
| continue; | |
| } | |
| tedge = tnode[inode].out_edges; | |
| num_edges = tnode[inode].num_edges; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if (!(has_valid_T_arr(to_node) && has_valid_T_req(to_node))) { | |
| continue; | |
| } | |
| forward_local_slack = tnode[to_node].T_arr - tnode[inode].T_arr - tedge[iedge].Tdel; | |
| discount = pow((float) DISCOUNT_FUNCTION_BASE, -1 * forward_local_slack / criticality_denom); | |
| tnode[to_node].forward_weight += discount * tnode[inode].forward_weight; | |
| } | |
| } | |
| } | |
| /* Do a backward topological traversal to populate backward weights. | |
| Since the sinks are all on different levels, we have to check for them as we go. */ | |
| for (ilevel = num_tnode_levels - 1; ilevel >= 0; ilevel--) { | |
| num_at_level = tnodes_at_level[ilevel].nelem; | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[ilevel].list[i]; | |
| if (!(has_valid_T_arr(inode) && has_valid_T_req(inode))) { | |
| continue; | |
| } | |
| num_edges = tnode[inode].num_edges; | |
| if (num_edges == 0) { /* sink */ | |
| tnode[inode].backward_weight = 1.; | |
| } else { | |
| tedge = tnode[inode].out_edges; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if (!(has_valid_T_arr(to_node) && has_valid_T_req(to_node))) { | |
| continue; | |
| } | |
| backward_local_slack = tnode[to_node].T_req - tnode[inode].T_req - tedge[iedge].Tdel; | |
| discount = pow((float) DISCOUNT_FUNCTION_BASE, -1 * backward_local_slack / criticality_denom); | |
| tnode[inode].backward_weight += discount * tnode[to_node].backward_weight; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| #endif | |
| static void update_slacks(t_slack * slacks, int source_clock_domain, int sink_clock_domain, float criticality_denom, | |
| boolean update_slack) { | |
| /* Updates the slack and criticality of each sink pin, or equivalently | |
| each edge, of each net. Go through the list of nets. If the net's | |
| driver tnode has been used, go through each tedge of that tnode and | |
| take the minimum of the slack/criticality for this traversal and the | |
| existing values. Since the criticality_denom can vary greatly between | |
| traversals, we have to update slack and criticality separately. | |
| Only update slack if we need to print it later (update_slack == TRUE). | |
| Note: there is a correspondence in indexing between out_edges and the | |
| net data structure: out_edges[iedge] = net[inet].node_block[iedge + 1] | |
| There is an offset of 1 because net[inet].node_block includes the driver | |
| node at index 0, while out_edges is part of the driver node and does | |
| not bother to refer to itself. */ | |
| int inet, iedge, inode, to_node, num_edges; | |
| t_tedge *tedge; | |
| float T_arr, Tdel, T_req, slk, timing_criticality; | |
| for (inet = 0; inet < num_timing_nets; inet++) { | |
| inode = f_net_to_driver_tnode[inet]; | |
| T_arr = tnode[inode].T_arr; | |
| if (!(has_valid_T_arr(inode) && has_valid_T_req(inode))) { | |
| continue; /* Only update this net on this traversal if its | |
| driver node has been updated on this traversal. */ | |
| } | |
| num_edges = tnode[inode].num_edges; | |
| tedge = tnode[inode].out_edges; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if (!(has_valid_T_arr(to_node) && has_valid_T_req(to_node))) { | |
| continue; /* Only update this edge on this traversal if this | |
| particular sink node has been updated on this traversal. */ | |
| } | |
| Tdel = tedge[iedge].Tdel; | |
| T_req = tnode[to_node].T_req; | |
| if (update_slack) { | |
| /* Update the slack for this edge. */ | |
| slk = T_req - T_arr - Tdel; | |
| if (slk < slacks->slack[inet][iedge + 1]) { | |
| /* Only update on this traversal if this edge would have | |
| lower slack from this traversal than its current value. */ | |
| slacks->slack[inet][iedge + 1] = slk; | |
| } | |
| } | |
| #if SLACK_DEFINITION == 'R' | |
| /* Since criticality_denom is not the same on each traversal, | |
| we have to update criticality separately. */ | |
| timing_criticality = 1 - (T_req - T_arr - Tdel)/criticality_denom; | |
| if (timing_criticality > slacks->timing_criticality[inet][iedge + 1]) { | |
| slacks->timing_criticality[inet][iedge + 1] = timing_criticality; | |
| } | |
| #ifdef PATH_COUNTING | |
| /* Also update path criticality separately. Kong uses slack / T_crit for the exponent, | |
| which is equivalent to criticality - 1. Depending on how PATH_COUNTING is defined, | |
| different functional forms are used. */ | |
| #if PATH_COUNTING == 'S' /* Use sum of forward and backward weights. */ | |
| slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1], | |
| (tnode[inode].forward_weight + tnode[to_node].backward_weight) * | |
| pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1)); | |
| #elif PATH_COUNTING == 'P' /* Use product of forward and backward weights. */ | |
| slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1], | |
| tnode[inode].forward_weight * tnode[to_node].backward_weight * | |
| pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1)); | |
| #elif PATH_COUNTING == 'L' /* Use natural log of product of forward and backward weights. */ | |
| slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1], | |
| log(tnode[inode].forward_weight * tnode[to_node].backward_weight) * | |
| pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1)); | |
| #elif PATH_COUNTING == 'R' /* Use product of natural logs of forward and backward weights. */ | |
| slacks->path_criticality[inet][iedge + 1] = max(slacks->path_criticality[inet][iedge + 1], | |
| log(tnode[inode].forward_weight) * log(tnode[to_node].backward_weight) * | |
| pow((float) FINAL_DISCOUNT_FUNCTION_BASE, timing_criticality - 1)); | |
| #endif | |
| #endif | |
| #endif | |
| } | |
| } | |
| } | |
| void print_lut_remapping(const char *fname) { | |
| FILE *fp; | |
| int inode, i; | |
| t_pb *pb; | |
| fp = my_fopen(fname, "w", 0); | |
| fprintf(fp, "# LUT_Name\tinput_pin_mapping\n"); | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| /* Print LUT input rebalancing */ | |
| if (tnode[inode].type == TN_PRIMITIVE_OPIN && tnode[inode].pb_graph_pin != NULL) { | |
| pb = block[tnode[inode].block].pb->rr_node_to_pb_mapping[tnode[inode].pb_graph_pin->pin_count_in_cluster]; | |
| if (pb != NULL && pb->lut_pin_remap != NULL) { | |
| assert(pb->pb_graph_node->pb_type->num_output_pins == 1 && pb->pb_graph_node->pb_type->num_clock_pins == 0); /* ensure LUT properties are valid */ | |
| assert(pb->pb_graph_node->num_input_ports == 1); | |
| fprintf(fp, "%s", pb->name); | |
| for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) { | |
| fprintf(fp, "\t%d", pb->lut_pin_remap[i]); | |
| } | |
| fprintf(fp, "\n"); | |
| } | |
| } | |
| } | |
| fclose(fp); | |
| } | |
| void print_critical_path(const char *fname) { | |
| /* Prints the critical path to a file. */ | |
| t_linked_int *critical_path_head, *critical_path_node; | |
| FILE *fp; | |
| int non_global_nets_on_crit_path, global_nets_on_crit_path; | |
| int tnodes_on_crit_path, inode, iblk, inet; | |
| e_tnode_type type; | |
| float total_net_delay, total_logic_delay, Tdel; | |
| critical_path_head = allocate_and_load_critical_path(); | |
| critical_path_node = critical_path_head; | |
| fp = my_fopen(fname, "w", 0); | |
| non_global_nets_on_crit_path = 0; | |
| global_nets_on_crit_path = 0; | |
| tnodes_on_crit_path = 0; | |
| total_net_delay = 0.; | |
| total_logic_delay = 0.; | |
| while (critical_path_node != NULL) { | |
| Tdel = print_critical_path_node(fp, critical_path_node); | |
| inode = critical_path_node->data; | |
| type = tnode[inode].type; | |
| tnodes_on_crit_path++; | |
| if (type == TN_CB_OPIN) { | |
| get_tnode_block_and_output_net(inode, &iblk, &inet); | |
| if (!timing_nets[inet].is_global) | |
| non_global_nets_on_crit_path++; | |
| else | |
| global_nets_on_crit_path++; | |
| total_net_delay += Tdel; | |
| } else { | |
| total_logic_delay += Tdel; | |
| } | |
| critical_path_node = critical_path_node->next; | |
| } | |
| fprintf(fp, "\nTnodes on critical path: %d Non-global nets on critical path: %d." | |
| "\n", tnodes_on_crit_path, non_global_nets_on_crit_path); | |
| fprintf(fp, "Global nets on critical path: %d.\n", global_nets_on_crit_path); | |
| fprintf(fp, "Total logic delay: %g (s) Total net delay: %g (s)\n", | |
| total_logic_delay, total_net_delay); | |
| vpr_printf(TIO_MESSAGE_INFO, "Nets on critical path: %d normal, %d global.\n", | |
| non_global_nets_on_crit_path, global_nets_on_crit_path); | |
| vpr_printf(TIO_MESSAGE_INFO, "Total logic delay: %g (s), total net delay: %g (s)\n", | |
| total_logic_delay, total_net_delay); | |
| /* Make sure total_logic_delay and total_net_delay add | |
| up to critical path delay,within 5 decimal places. */ | |
| assert(total_logic_delay + total_net_delay - get_critical_path_delay()/1e9 < 1e-5); | |
| fclose(fp); | |
| free_int_list(&critical_path_head); | |
| } | |
| t_linked_int * allocate_and_load_critical_path(void) { | |
| /* Finds the critical path and returns a list of the tnodes on the critical * | |
| * path in a linked list, from the path SOURCE to the path SINK. */ | |
| t_linked_int *critical_path_head, *curr_crit_node, *prev_crit_node; | |
| int inode, iedge, to_node, num_at_level_zero, i, j, crit_node = OPEN, num_edges; | |
| int source_clock_domain = UNDEFINED, sink_clock_domain = UNDEFINED; | |
| float min_slack = HUGE_POSITIVE_FLOAT, slack; | |
| t_tedge *tedge; | |
| /* If there's only one clock, we can use the arrival and required times | |
| currently on the timing graph to find the critical path. If there are multiple | |
| clocks, however, the values currently on the timing graph correspond to the | |
| last constraint (pair of clock domains) analysed, which may not be the constraint | |
| with the critical path. In this case, we have to find the constraint with the | |
| least slack and redo the timing analysis for this constraint so we get the right | |
| values onto the timing graph. */ | |
| if (g_sdc->num_constrained_clocks > 1) { | |
| /* The critical path belongs to the source and sink clock domains | |
| with the least slack. Find these clock domains now. */ | |
| for (i = 0; i < g_sdc->num_constrained_clocks; i++) { | |
| for (j = 0; j < g_sdc->num_constrained_clocks; j++) { | |
| if (min_slack > f_timing_stats->least_slack[i][j]) { | |
| min_slack = f_timing_stats->least_slack[i][j]; | |
| source_clock_domain = i; | |
| sink_clock_domain = j; | |
| } | |
| } | |
| } | |
| /* Do a timing analysis for this clock domain pair only. | |
| Set is_prepacked to FALSE since we don't care about the clusterer's normalized values. | |
| Set is_final_analysis to FALSE to get actual, rather than relaxed, slacks. | |
| Set max critical input/output paths to NULL since they aren't used unless is_prepacked is TRUE. */ | |
| do_timing_analysis_for_constraint(source_clock_domain, sink_clock_domain, FALSE, FALSE, (long*)NULL, (long*)NULL); | |
| } | |
| /* Start at the source (level-0) tnode with the least slack (T_req-T_arr). | |
| This will form the head of our linked list of tnodes on the critical path. */ | |
| min_slack = HUGE_POSITIVE_FLOAT; | |
| num_at_level_zero = tnodes_at_level[0].nelem; | |
| for (i = 0; i < num_at_level_zero; i++) { | |
| inode = tnodes_at_level[0].list[i]; | |
| if (has_valid_T_arr(inode) && has_valid_T_req(inode)) { /* Valid arrival and required times */ | |
| slack = tnode[inode].T_req - tnode[inode].T_arr; | |
| if (slack < min_slack) { | |
| crit_node = inode; | |
| min_slack = slack; | |
| } | |
| } | |
| } | |
| critical_path_head = (t_linked_int *) my_malloc(sizeof(t_linked_int)); | |
| critical_path_head->data = crit_node; | |
| assert(crit_node != OPEN); | |
| prev_crit_node = critical_path_head; | |
| num_edges = tnode[crit_node].num_edges; | |
| /* Keep adding the tnode in this tnode's fanout which has the least slack | |
| to our critical path linked list, then jump to that tnode and repeat, until | |
| we hit a tnode with no edges, which is the sink of the critical path. */ | |
| while (num_edges != 0) { | |
| curr_crit_node = (t_linked_int *) my_malloc(sizeof(t_linked_int)); | |
| prev_crit_node->next = curr_crit_node; | |
| tedge = tnode[crit_node].out_edges; | |
| min_slack = HUGE_POSITIVE_FLOAT; | |
| for (iedge = 0; iedge < num_edges; iedge++) { | |
| to_node = tedge[iedge].to_node; | |
| if (has_valid_T_arr(to_node) && has_valid_T_req(to_node)) { /* Valid arrival and required times */ | |
| slack = tnode[to_node].T_req - tnode[to_node].T_arr; | |
| if (slack < min_slack) { | |
| crit_node = to_node; | |
| min_slack = slack; | |
| } | |
| } | |
| } | |
| curr_crit_node->data = crit_node; | |
| prev_crit_node = curr_crit_node; | |
| num_edges = tnode[crit_node].num_edges; | |
| } | |
| prev_crit_node->next = NULL; | |
| return (critical_path_head); | |
| } | |
| void get_tnode_block_and_output_net(int inode, int *iblk_ptr, int *inet_ptr) { | |
| /* Returns the index of the block that this tnode is part of. If the tnode * | |
| * is a TN_CB_OPIN or TN_INPAD_OPIN (i.e. if it drives a net), the net index is * | |
| * returned via inet_ptr. Otherwise inet_ptr points at OPEN. */ | |
| int inet, ipin, iblk; | |
| e_tnode_type tnode_type; | |
| iblk = tnode[inode].block; | |
| tnode_type = tnode[inode].type; | |
| if (tnode_type == TN_CB_OPIN) { | |
| ipin = tnode[inode].pb_graph_pin->pin_count_in_cluster; | |
| inet = block[iblk].pb->rr_graph[ipin].net_num; | |
| assert(inet != OPEN); | |
| inet = vpack_to_clb_net_mapping[inet]; | |
| } else { | |
| inet = OPEN; | |
| } | |
| *iblk_ptr = iblk; | |
| *inet_ptr = inet; | |
| } | |
| void do_constant_net_delay_timing_analysis(t_timing_inf timing_inf, | |
| float constant_net_delay_value) { | |
| /* Does a timing analysis (simple) where it assumes that each net has a * | |
| * constant delay value. Used only when operation == TIMING_ANALYSIS_ONLY. */ | |
| /*struct s_linked_vptr *net_delay_chunk_list_head;*/ | |
| t_chunk net_delay_ch = {NULL, 0, NULL}; | |
| t_slack * slacks = NULL; | |
| float **net_delay = NULL; | |
| slacks = alloc_and_load_timing_graph(timing_inf); | |
| net_delay = alloc_net_delay(&net_delay_ch, timing_nets, | |
| num_timing_nets); | |
| load_constant_net_delay(net_delay, constant_net_delay_value, timing_nets, | |
| num_timing_nets); | |
| load_timing_graph_net_delays(net_delay); | |
| do_timing_analysis(slacks, FALSE, FALSE, TRUE); | |
| if (getEchoEnabled()) { | |
| if(isEchoFileEnabled(E_ECHO_CRITICAL_PATH)) | |
| print_critical_path("critical_path.echo"); | |
| if(isEchoFileEnabled(E_ECHO_TIMING_GRAPH)) | |
| print_timing_graph(getEchoFileName(E_ECHO_TIMING_GRAPH)); | |
| if(isEchoFileEnabled(E_ECHO_SLACK)) | |
| print_slack(slacks->slack, TRUE, getEchoFileName(E_ECHO_SLACK)); | |
| if(isEchoFileEnabled(E_ECHO_CRITICALITY)) | |
| print_criticality(slacks, TRUE, getEchoFileName(E_ECHO_CRITICALITY)); | |
| if(isEchoFileEnabled(E_ECHO_NET_DELAY)) | |
| print_net_delay(net_delay, getEchoFileName(E_ECHO_NET_DELAY)); | |
| } | |
| print_timing_stats(); | |
| free_timing_graph(slacks); | |
| free_net_delay(net_delay, &net_delay_ch); | |
| } | |
| #ifndef PATH_COUNTING | |
| static void update_normalized_costs(float criticality_denom, long max_critical_input_paths, | |
| long max_critical_output_paths) { | |
| int inode; | |
| /* Update the normalized costs for the clusterer. On each traversal, each cost is | |
| updated for tnodes analysed on this traversal if it would give this tnode a higher | |
| criticality when calculating block criticality for the clusterer. */ | |
| assert(criticality_denom != 0); /* Possible if timing analysis is being run pre-packing | |
| with all delays set to 0. This is not currently done, | |
| but if you're going to do it, you need to decide how | |
| best to normalize these values to suit your purposes. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| /* Only calculate for tnodes which have valid arrival and required times. */ | |
| if (has_valid_T_arr(inode) && has_valid_T_req(inode)) { | |
| tnode[inode].prepacked_data->normalized_slack = std::min(tnode[inode].prepacked_data->normalized_slack, | |
| (tnode[inode].T_req - tnode[inode].T_arr)/criticality_denom); | |
| tnode[inode].prepacked_data->normalized_T_arr = std::max(tnode[inode].prepacked_data->normalized_T_arr, | |
| tnode[inode].T_arr/criticality_denom); | |
| tnode[inode].prepacked_data->normalized_total_critical_paths = std::max(tnode[inode].prepacked_data->normalized_total_critical_paths, | |
| ((float) tnode[inode].prepacked_data->num_critical_input_paths + tnode[inode].prepacked_data->num_critical_output_paths) / | |
| ((float) max_critical_input_paths + max_critical_output_paths)); | |
| } | |
| } | |
| } | |
| #endif | |
| /* Set new arrival time | |
| Special code for LUTs to enable LUT input delay balancing | |
| */ | |
| static void set_and_balance_arrival_time(int to_node, int from_node, float Tdel, boolean do_lut_input_balancing) { | |
| int i, j; | |
| t_pb *pb; | |
| boolean rebalance; | |
| t_tnode *input_tnode; | |
| boolean *assigned = NULL; | |
| int fastest_unassigned_pin, most_crit_tnode, most_crit_pin; | |
| float min_delay, highest_T_arr, balanced_T_arr; | |
| /* Normal case for determining arrival time */ | |
| tnode[to_node].T_arr = std::max(tnode[to_node].T_arr, tnode[from_node].T_arr + Tdel); | |
| /* Do LUT input rebalancing for LUTs */ | |
| if (do_lut_input_balancing && tnode[to_node].type == TN_PRIMITIVE_OPIN && tnode[to_node].pb_graph_pin != NULL) { | |
| pb = block[tnode[to_node].block].pb->rr_node_to_pb_mapping[tnode[to_node].pb_graph_pin->pin_count_in_cluster]; | |
| if (pb != NULL && pb->lut_pin_remap != NULL) { | |
| /* this is a LUT primitive, do pin swapping */ | |
| assert(pb->pb_graph_node->pb_type->num_output_pins == 1 && pb->pb_graph_node->pb_type->num_clock_pins == 0); /* ensure LUT properties are valid */ | |
| assert(pb->pb_graph_node->num_input_ports == 1); | |
| assert(tnode[from_node].block == tnode[to_node].block); | |
| /* assign from_node to default location */ | |
| assert(pb->lut_pin_remap[tnode[from_node].pb_graph_pin->pin_number] == OPEN); | |
| pb->lut_pin_remap[tnode[from_node].pb_graph_pin->pin_number] = tnode[from_node].pb_graph_pin->pin_number; | |
| /* If all input pins are known, perform LUT input delay rebalancing, do nothing otherwise */ | |
| rebalance = TRUE; | |
| for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) { | |
| input_tnode = block[tnode[to_node].block].pb->rr_graph[pb->pb_graph_node->input_pins[0][i].pin_count_in_cluster].tnode; | |
| if (input_tnode != NULL && pb->lut_pin_remap[i] == OPEN) { | |
| rebalance = FALSE; | |
| } | |
| } | |
| if (rebalance == TRUE) { | |
| /* Rebalance LUT inputs so that the most critical paths get the fastest inputs */ | |
| balanced_T_arr = OPEN; | |
| assigned = (boolean*)my_calloc(pb->pb_graph_node->num_input_pins[0], sizeof(boolean)); | |
| /* Clear pin remapping */ | |
| for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) { | |
| pb->lut_pin_remap[i] = OPEN; | |
| } | |
| /* load new T_arr and pin mapping */ | |
| for (i = 0; i < pb->pb_graph_node->num_input_pins[0]; i++) { | |
| /* Find fastest physical input pin of LUT */ | |
| fastest_unassigned_pin = OPEN; | |
| min_delay = OPEN; | |
| for (j = 0; j < pb->pb_graph_node->num_input_pins[0]; j++) { | |
| if (pb->lut_pin_remap[j] == OPEN) { | |
| if (fastest_unassigned_pin == OPEN) { | |
| fastest_unassigned_pin = j; | |
| min_delay = pb->pb_graph_node->input_pins[0][j].pin_timing_del_max[0]; | |
| } else if (min_delay > pb->pb_graph_node->input_pins[0][j].pin_timing_del_max[0]) { | |
| fastest_unassigned_pin = j; | |
| min_delay = pb->pb_graph_node->input_pins[0][j].pin_timing_del_max[0]; | |
| } | |
| } | |
| } | |
| assert(fastest_unassigned_pin != OPEN); | |
| /* Find most critical LUT input pin in user circuit */ | |
| most_crit_tnode = OPEN; | |
| highest_T_arr = OPEN; | |
| most_crit_pin = OPEN; | |
| for (j = 0; j < pb->pb_graph_node->num_input_pins[0]; j++) { | |
| input_tnode = block[tnode[to_node].block].pb->rr_graph[pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].tnode; | |
| if (input_tnode != NULL && assigned[j] == FALSE) { | |
| if (most_crit_tnode == OPEN) { | |
| most_crit_tnode = get_tnode_index(input_tnode); | |
| highest_T_arr = input_tnode->T_arr; | |
| most_crit_pin = j; | |
| } else if (highest_T_arr < input_tnode->T_arr) { | |
| most_crit_tnode = get_tnode_index(input_tnode); | |
| highest_T_arr = input_tnode->T_arr; | |
| most_crit_pin = j; | |
| } | |
| } | |
| } | |
| if (most_crit_tnode == OPEN) { | |
| break; | |
| } else { | |
| assert(tnode[most_crit_tnode].num_edges == 1); | |
| tnode[most_crit_tnode].out_edges[0].Tdel = min_delay; | |
| pb->lut_pin_remap[fastest_unassigned_pin] = most_crit_pin; | |
| assigned[most_crit_pin] = TRUE; | |
| if (balanced_T_arr < min_delay + highest_T_arr) { | |
| balanced_T_arr = min_delay + highest_T_arr; | |
| } | |
| } | |
| } | |
| free(assigned); | |
| if (balanced_T_arr != OPEN) { | |
| tnode[to_node].T_arr = balanced_T_arr; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| static void load_clock_domain_and_clock_and_io_delay(boolean is_prepacked) { | |
| /* Loads clock domain and clock delay onto TN_FF_SOURCE and TN_FF_SINK tnodes. | |
| The clock domain of each clock is its index in g_sdc->constrained_clocks. | |
| We do this by matching each clock input pad to a constrained clock name, then | |
| propagating forward its domain index to all flip-flops fed by it (TN_FF_CLOCK | |
| tnodes), then later looking up the TN_FF_CLOCK tnode corresponding to every | |
| TN_FF_SOURCE and TN_FF_SINK tnode. We also add up the delays along the clock net | |
| to each TN_FF_CLOCK tnode to give it (and the SOURCE/SINK nodes) a clock delay. | |
| Also loads input delay/output delay (from set_input_delay or set_output_delay SDC | |
| constraints) onto the tedges between TN_INPAD_SOURCE/OPIN and TN_OUTPAD_IPIN/SINK | |
| tnodes. Finds fanout of each clock domain, including virtual (external) clocks. | |
| Marks unconstrained I/Os with a dummy clock domain (-1). */ | |
| int i, iclock, inode, num_at_level, clock_index, input_index, output_index; | |
| char * net_name; | |
| t_tnode * clock_node; | |
| /* Wipe fanout of each clock domain in g_sdc->constrained_clocks. */ | |
| for (iclock = 0; iclock < g_sdc->num_constrained_clocks; iclock++) { | |
| g_sdc->constrained_clocks[iclock].fanout = 0; | |
| } | |
| /* First, visit all TN_INPAD_SOURCE tnodes */ | |
| num_at_level = tnodes_at_level[0].nelem; /* There are num_at_level top-level tnodes. */ | |
| for (i = 0; i < num_at_level; i++) { | |
| inode = tnodes_at_level[0].list[i]; /* Iterate through each tnode. inode is the index of the tnode in the array tnode. */ | |
| if (tnode[inode].type == TN_INPAD_SOURCE) { /* See if this node is the start of an I/O pad (as oppposed to a flip-flop source). */ | |
| net_name = find_tnode_net_name(inode, is_prepacked); | |
| if ((clock_index = find_clock(net_name)) != -1) { /* We have a clock inpad. */ | |
| /* Set clock skew to 0 at the source and propagate skew | |
| recursively to all connected nodes, adding delay as we go. | |
| Set the clock domain to the index of the clock in the | |
| g_sdc->constrained_clocks array and propagate unchanged. */ | |
| tnode[inode].clock_delay = 0.; | |
| tnode[inode].clock_domain = clock_index; | |
| propagate_clock_domain_and_skew(inode); | |
| /* Set the clock domain of this clock inpad to -1, so that we do not analyse it. | |
| If we did not do this, the clock net would be analysed on the same iteration of | |
| the timing analyzer as the flip-flops it drives! */ | |
| tnode[inode].clock_domain = -1; | |
| } else if ((input_index = find_input(net_name)) != -1) { | |
| /* We have a constrained non-clock inpad - find the clock it's constrained on. */ | |
| clock_index = find_clock(g_sdc->constrained_inputs[input_index].clock_name); | |
| assert(clock_index != -1); | |
| /* The clock domain for this input is that of its virtual clock */ | |
| tnode[inode].clock_domain = clock_index; | |
| /* Increment the fanout of this virtual clock domain. */ | |
| g_sdc->constrained_clocks[clock_index].fanout++; | |
| /* Mark input delay specified in SDC file on the timing graph edge leading out from the TN_INPAD_SOURCE node. */ | |
| tnode[inode].out_edges[0].Tdel = g_sdc->constrained_inputs[input_index].delay * 1e-9; /* normalize to be in seconds not ns */ | |
| } else { /* We have an unconstrained input - mark with dummy clock domain and do not analyze. */ | |
| tnode[inode].clock_domain = -1; | |
| } | |
| } | |
| } | |
| /* Second, visit all TN_OUTPAD_SINK tnodes. Unlike for TN_INPAD_SOURCE tnodes, | |
| we have to search the entire tnode array since these are all on different levels. */ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| if (tnode[inode].type == TN_OUTPAD_SINK) { | |
| /* Since the pb_graph_pin of TN_OUTPAD_SINK tnodes points to NULL, we have to find the net | |
| from the pb_graph_pin of the corresponding TN_OUTPAD_IPIN node. | |
| Exploit the fact that the TN_OUTPAD_IPIN node will always be one prior in the tnode array. */ | |
| assert(tnode[inode - 1].type == TN_OUTPAD_IPIN); | |
| net_name = find_tnode_net_name(inode, is_prepacked); | |
| output_index = find_output(net_name + 4); /* the + 4 removes the prefix "out:" automatically prepended to outputs */ | |
| if (output_index != -1) { | |
| /* We have a constrained outpad, find the clock it's constrained on. */ | |
| clock_index = find_clock(g_sdc->constrained_outputs[output_index].clock_name); | |
| assert(clock_index != -1); | |
| /* The clock doain for this output is that of its virtual clock */ | |
| tnode[inode].clock_domain = clock_index; | |
| /* Increment the fanout of this virtual clock domain. */ | |
| g_sdc->constrained_clocks[clock_index].fanout++; | |
| /* Mark output delay specified in SDC file on the timing graph edge leading into the TN_OUTPAD_SINK node. | |
| However, this edge is part of the corresponding TN_OUTPAD_IPIN node. | |
| Exploit the fact that the TN_OUTPAD_IPIN node will always be one prior in the tnode array. */ | |
| tnode[inode - 1].out_edges[0].Tdel = g_sdc->constrained_outputs[output_index].delay * 1e-9; /* normalize to be in seconds not ns */ | |
| } else { /* We have an unconstrained input - mark with dummy clock domain and do not analyze. */ | |
| tnode[inode].clock_domain = -1; | |
| } | |
| } | |
| } | |
| /* Third, visit all TN_FF_SOURCE and TN_FF_SINK tnodes, and transfer the clock domain and skew from their corresponding TN_FF_CLOCK tnodes*/ | |
| for (inode = 0; inode < num_tnodes; inode++) { | |
| if (tnode[inode].type == TN_FF_SOURCE || tnode[inode].type == TN_FF_SINK) { | |
| clock_node = find_ff_clock_tnode(inode, is_prepacked); | |
| tnode[inode].clock_domain = clock_node->clock_domain; | |
| tnode[inode].clock_delay = clock_node->clock_delay; | |
| } | |
| } | |
| } | |
| static void propagate_clock_domain_and_skew(int inode) { | |
| /* Given a tnode indexed by inode (which is part of a clock net), | |
| * propagate forward the clock domain (unchanged) and skew (adding the delay of edges) to all nodes in its fanout. | |
| * We then call recursively on all children in a depth-first search. If num_edges is 0, we should be at an TN_FF_CLOCK tnode; we then set the | |
| * TN_FF_SOURCE and TN_FF_SINK nodes to have the same clock domain and skew as the TN_FF_CLOCK node. We implicitly rely on a tnode not being | |
| * part of two separate clock nets, since undefined behaviour would result if one DFS overwrote the results of another. This may | |
| * be problematic in cases of multiplexed or locally-generated clocks. */ | |
| int iedge, to_node; | |
| t_tedge * tedge; | |
| tedge = tnode[inode].out_edges; /* Get the list of edges from the node we're visiting. */ | |
| if (!tedge) { /* Leaf/sink node; base case of the recursion. */ | |
| assert(tnode[inode].type == TN_FF_CLOCK); | |
| assert(tnode[inode].clock_domain != -1); /* Make sure clock domain is valid. */ | |
| g_sdc->constrained_clocks[tnode[inode].clock_domain].fanout++; | |
| return; | |
| } | |
| for (iedge = 0; iedge < tnode[inode].num_edges; iedge++) { /* Go through each edge coming out from this tnode */ | |
| to_node = tedge[iedge].to_node; | |
| /* Propagate clock skew forward along this clock net, adding the delay of the wires (edges) of the clock network. */ | |
| tnode[to_node].clock_delay = tnode[inode].clock_delay + tedge[iedge].Tdel; | |
| /* Propagate clock domain forward unchanged */ | |
| tnode[to_node].clock_domain = tnode[inode].clock_domain; | |
| /* Finally, call recursively on the destination tnode. */ | |
| propagate_clock_domain_and_skew(to_node); | |
| } | |
| } | |
| static char * find_tnode_net_name(int inode, boolean is_prepacked) { | |
| /* Finds the name of the net which a tnode (inode) is on (different for pre-/post-packed netlists). */ | |
| int logic_block; /* Name chosen not to conflict with the array logical_block */ | |
| char * net_name; | |
| t_pb * physical_block; | |
| t_pb_graph_pin * pb_graph_pin; | |
| logic_block = tnode[inode].block; | |
| if (is_prepacked) { | |
| net_name = logical_block[logic_block].name; | |
| } else { | |
| physical_block = block[logic_block].pb; | |
| pb_graph_pin = tnode[inode].pb_graph_pin; | |
| net_name = physical_block->rr_node_to_pb_mapping[pb_graph_pin->pin_count_in_cluster]->name; | |
| } | |
| return net_name; | |
| } | |
| static t_tnode * find_ff_clock_tnode(int inode, boolean is_prepacked) { | |
| /* Finds the TN_FF_CLOCK tnode on the same flipflop as an TN_FF_SOURCE or TN_FF_SINK tnode. */ | |
| int logic_block; /* Name chosen not to conflict with the array logical_block */ | |
| t_tnode * ff_clock_tnode; | |
| t_rr_node * rr_graph; | |
| t_pb_graph_node * parent_pb_graph_node; | |
| t_pb_graph_pin * ff_source_or_sink_pb_graph_pin, * clock_pb_graph_pin; | |
| logic_block = tnode[inode].block; | |
| if (is_prepacked) { | |
| ff_clock_tnode = logical_block[logic_block].clock_net_tnode; | |
| } else { | |
| rr_graph = block[logic_block].pb->rr_graph; | |
| ff_source_or_sink_pb_graph_pin = tnode[inode].pb_graph_pin; | |
| parent_pb_graph_node = ff_source_or_sink_pb_graph_pin->parent_node; | |
| /* Make sure there's only one clock port and only one clock pin in that port */ | |
| assert(parent_pb_graph_node->num_clock_ports == 1); | |
| assert(parent_pb_graph_node->num_clock_pins[0] == 1); | |
| clock_pb_graph_pin = &parent_pb_graph_node->clock_pins[0][0]; | |
| ff_clock_tnode = rr_graph[clock_pb_graph_pin->pin_count_in_cluster].tnode; | |
| } | |
| assert(ff_clock_tnode != NULL); | |
| assert(ff_clock_tnode->type == TN_FF_CLOCK); | |
| return ff_clock_tnode; | |
| } | |
| static int find_clock(char * net_name) { | |
| /* Given a string net_name, find whether it's the name of a clock in the array g_sdc->constrained_clocks. * | |
| * if it is, return the clock's index in g_sdc->constrained_clocks; if it's not, return -1. */ | |
| int index; | |
| for (index = 0; index < g_sdc->num_constrained_clocks; index++) { | |
| if (strcmp(net_name, g_sdc->constrained_clocks[index].name) == 0) { | |
| return index; | |
| } | |
| } | |
| return -1; | |
| } | |
| static int find_input(char * net_name) { | |
| /* Given a string net_name, find whether it's the name of a constrained input in the array g_sdc->constrained_inputs. * | |
| * if it is, return its index in g_sdc->constrained_inputs; if it's not, return -1. */ | |
| int index; | |
| for (index = 0; index < g_sdc->num_constrained_inputs; index++) { | |
| if (strcmp(net_name, g_sdc->constrained_inputs[index].name) == 0) { | |
| return index; | |
| } | |
| } | |
| return -1; | |
| } | |
| static int find_output(char * net_name) { | |
| /* Given a string net_name, find whether it's the name of a constrained output in the array g_sdc->constrained_outputs. * | |
| * if it is, return its index in g_sdc->constrained_outputs; if it's not, return -1. */ | |
| int index; | |
| for (index = 0; index < g_sdc->num_constrained_outputs; index++) { | |
| if (strcmp(net_name, g_sdc->constrained_outputs[index].name) == 0) { | |
| return index; | |
| } | |
| } | |
| return -1; | |
| } | |
| static int find_cf_constraint(char * source_clock_name, char * sink_ff_name) { | |
| /* Given a source clock domain and a sink flip-flop, find out if there's an override constraint between them. | |
| If there is, return the index in g_sdc->cf_constraints; if there is not, return -1. */ | |
| int icf, isource, isink; | |
| for (icf = 0; icf < g_sdc->num_cf_constraints; icf++) { | |
| for (isource = 0; isource < g_sdc->cf_constraints[icf].num_source; isource++) { | |
| if (strcmp(g_sdc->cf_constraints[icf].source_list[isource], source_clock_name) == 0) { | |
| for (isink = 0; isink < g_sdc->cf_constraints[icf].num_sink; isink++) { | |
| if (strcmp(g_sdc->cf_constraints[icf].sink_list[isink], sink_ff_name) == 0) { | |
| return icf; | |
| } | |
| } | |
| } | |
| } | |
| } | |
| return -1; | |
| } | |
| static inline int get_tnode_index(t_tnode * node) { | |
| /* Returns the index of pointer_to_tnode in the array tnode [0..num_tnodes - 1] | |
| using pointer arithmetic. */ | |
| return node - tnode; | |
| } | |
| static inline boolean has_valid_T_arr(int inode) { | |
| /* Has this tnode's arrival time been changed from its original value of HUGE_NEGATIVE_FLOAT? */ | |
| return (boolean) (tnode[inode].T_arr > HUGE_NEGATIVE_FLOAT + 1); | |
| } | |
| static inline boolean has_valid_T_req(int inode) { | |
| /* Has this tnode's required time been changed from its original value of HUGE_POSITIVE_FLOAT? */ | |
| return (boolean) (tnode[inode].T_req < HUGE_POSITIVE_FLOAT - 1); | |
| } | |
| #ifndef PATH_COUNTING | |
| boolean has_valid_normalized_T_arr(int inode) { | |
| /* Has this tnode's normalized_T_arr been changed from its original value of HUGE_NEGATIVE_FLOAT? */ | |
| return (boolean) (tnode[inode].prepacked_data->normalized_T_arr > HUGE_NEGATIVE_FLOAT + 1); | |
| } | |
| #endif | |
| float get_critical_path_delay(void) { | |
| /* Finds the critical path delay, which is the minimum clock period required to meet the constraint | |
| corresponding to the pair of source and sink clock domains with the least slack in the design. */ | |
| int source_clock_domain, sink_clock_domain; | |
| float least_slack_in_design = HUGE_POSITIVE_FLOAT, critical_path_delay = UNDEFINED; | |
| if (!g_sdc) return UNDEFINED; /* If timing analysis is off, for instance. */ | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| if (least_slack_in_design > f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]) { | |
| least_slack_in_design = f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]; | |
| critical_path_delay = f_timing_stats->cpd[source_clock_domain][sink_clock_domain]; | |
| } | |
| } | |
| } | |
| return critical_path_delay * 1e9; /* Convert to nanoseconds */ | |
| } | |
| void print_timing_stats(void) { | |
| /* Prints critical path delay/fmax, least slack in design, and, for multiple-clock designs, | |
| minimum required clock period to meet each constraint, least slack per constraint, | |
| geometric average clock frequency, and fanout-weighted geometric average clock frequency. */ | |
| int source_clock_domain, sink_clock_domain, clock_domain, fanout, total_fanout = 0, | |
| num_netlist_clocks_with_intra_domain_paths = 0; | |
| float geomean_period = 1., least_slack_in_design = HUGE_POSITIVE_FLOAT, critical_path_delay = UNDEFINED; | |
| double fanout_weighted_geomean_period = 1.; | |
| boolean found; | |
| /* Find critical path delay. If the pb_max_internal_delay is greater than this, it becomes | |
| the limiting factor on critical path delay, so print that instead, with a special message. */ | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| if (least_slack_in_design > f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]) { | |
| least_slack_in_design = f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]; | |
| critical_path_delay = f_timing_stats->cpd[source_clock_domain][sink_clock_domain]; | |
| } | |
| } | |
| } | |
| if (pb_max_internal_delay != UNDEFINED && pb_max_internal_delay > critical_path_delay) { | |
| critical_path_delay = pb_max_internal_delay; | |
| vpr_printf(TIO_MESSAGE_INFO, "Final critical path: %g ns\n", 1e9 * critical_path_delay); | |
| vpr_printf(TIO_MESSAGE_INFO, "\t(capped by fmax of block type %s)\n", pbtype_max_internal_delay->name); | |
| } else { | |
| vpr_printf(TIO_MESSAGE_INFO, "Final critical path: %g ns\n", 1e9 * critical_path_delay); | |
| } | |
| if (g_sdc->num_constrained_clocks <= 1) { | |
| /* Although critical path delay is always well-defined, it doesn't make sense to talk about fmax for multi-clock circuits */ | |
| vpr_printf(TIO_MESSAGE_INFO, "f_max: %g MHz\n", 1e-6 / critical_path_delay); | |
| } | |
| /* Also print the least slack in the design */ | |
| vpr_printf(TIO_MESSAGE_INFO, "\n"); | |
| vpr_printf(TIO_MESSAGE_INFO, "Least slack in design: %g ns\n", 1e9 * least_slack_in_design); | |
| vpr_printf(TIO_MESSAGE_INFO, "\n"); | |
| if (g_sdc->num_constrained_clocks > 1) { /* Multiple-clock design */ | |
| /* Print minimum possible clock period to meet each constraint. Convert to nanoseconds. */ | |
| vpr_printf(TIO_MESSAGE_INFO, "Minimum possible clock period to meet each constraint (including skew effects):\n"); | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| /* Print the intra-domain constraint if it was analysed. */ | |
| if (g_sdc->domain_constraint[source_clock_domain][source_clock_domain] > NEGATIVE_EPSILON) { | |
| vpr_printf(TIO_MESSAGE_INFO, "%s to %s: %g ns (%g MHz)\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| 1e9 * f_timing_stats->cpd[source_clock_domain][source_clock_domain], | |
| 1e-6 / f_timing_stats->cpd[source_clock_domain][source_clock_domain]); | |
| } else { | |
| vpr_printf(TIO_MESSAGE_INFO, "%s to %s: --\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[source_clock_domain].name); | |
| } | |
| /* Then, print all other constraints on separate lines, indented. We re-print | |
| the source clock domain's name so there's no ambiguity when parsing. */ | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| if (source_clock_domain == sink_clock_domain) continue; /* already done that */ | |
| if (g_sdc->domain_constraint[source_clock_domain][sink_clock_domain] > NEGATIVE_EPSILON) { | |
| /* If this domain pair was analysed */ | |
| vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: %g ns (%g MHz)\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[sink_clock_domain].name, | |
| 1e9 * f_timing_stats->cpd[source_clock_domain][sink_clock_domain], | |
| 1e-6 / f_timing_stats->cpd[source_clock_domain][sink_clock_domain]); | |
| } else { | |
| vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: --\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[sink_clock_domain].name); | |
| } | |
| } | |
| } | |
| /* Print least slack per constraint. */ | |
| vpr_printf(TIO_MESSAGE_INFO, "\n"); | |
| vpr_printf(TIO_MESSAGE_INFO, "Least slack per constraint:\n"); | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| /* Print the intra-domain slack if valid. */ | |
| if (f_timing_stats->least_slack[source_clock_domain][source_clock_domain] < HUGE_POSITIVE_FLOAT - 1) { | |
| vpr_printf(TIO_MESSAGE_INFO, "%s to %s: %g ns\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| 1e9 * f_timing_stats->least_slack[source_clock_domain][source_clock_domain]); | |
| } else { | |
| vpr_printf(TIO_MESSAGE_INFO, "%s to %s: --\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[source_clock_domain].name); | |
| } | |
| /* Then, print all other slacks on separate lines. */ | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| if (source_clock_domain == sink_clock_domain) continue; /* already done that */ | |
| if (f_timing_stats->least_slack[source_clock_domain][sink_clock_domain] < HUGE_POSITIVE_FLOAT - 1) { | |
| /* If this domain pair was analysed and has a valid slack */ | |
| vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: %g ns\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[sink_clock_domain].name, | |
| 1e9 * f_timing_stats->least_slack[source_clock_domain][sink_clock_domain]); | |
| } else { | |
| vpr_printf(TIO_MESSAGE_INFO, "\t%s to %s: --\n", | |
| g_sdc->constrained_clocks[source_clock_domain].name, | |
| g_sdc->constrained_clocks[sink_clock_domain].name); | |
| } | |
| } | |
| } | |
| /* Calculate geometric mean f_max (fanout-weighted and unweighted) from the diagonal (intra-domain) entries of critical_path_delay, | |
| excluding domains without intra-domain paths (for which the timing constraint is DO_NOT_ANALYSE) and virtual clocks. */ | |
| found = FALSE; | |
| for (clock_domain = 0; clock_domain < g_sdc->num_constrained_clocks; clock_domain++) { | |
| if (g_sdc->domain_constraint[clock_domain][clock_domain] > NEGATIVE_EPSILON && g_sdc->constrained_clocks[clock_domain].is_netlist_clock) { | |
| geomean_period *= f_timing_stats->cpd[clock_domain][clock_domain]; | |
| fanout = g_sdc->constrained_clocks[clock_domain].fanout; | |
| fanout_weighted_geomean_period *= pow((double) f_timing_stats->cpd[clock_domain][clock_domain], fanout); | |
| total_fanout += fanout; | |
| num_netlist_clocks_with_intra_domain_paths++; | |
| found = TRUE; | |
| } | |
| } | |
| if (found) { /* Only print these if we found at least one clock domain with intra-domain paths. */ | |
| geomean_period = pow(geomean_period, (float) 1/num_netlist_clocks_with_intra_domain_paths); | |
| fanout_weighted_geomean_period = pow(fanout_weighted_geomean_period, (double) 1/total_fanout); | |
| /* Convert to MHz */ | |
| vpr_printf(TIO_MESSAGE_INFO, "\n"); | |
| vpr_printf(TIO_MESSAGE_INFO, "Geometric mean intra-domain period: %g ns (%g MHz)\n", | |
| 1e9 * geomean_period, 1e-6 / geomean_period); | |
| vpr_printf(TIO_MESSAGE_INFO, "Fanout-weighted geomean intra-domain period: %g ns (%g MHz)\n", | |
| 1e9 * fanout_weighted_geomean_period, 1e-6 / fanout_weighted_geomean_period); | |
| } | |
| vpr_printf(TIO_MESSAGE_INFO, "\n"); | |
| } | |
| } | |
| static void print_timing_constraint_info(const char *fname) { | |
| /* Prints the contents of g_sdc->domain_constraint, g_sdc->constrained_clocks, constrained_ios and g_sdc->cc_constraints to a file. */ | |
| FILE * fp; | |
| int source_clock_domain, sink_clock_domain, i, j; | |
| int * clock_name_length = (int *) my_malloc(g_sdc->num_constrained_clocks * sizeof(int)); /* Array of clock name lengths */ | |
| int max_clock_name_length = INT_MIN; | |
| char * clock_name; | |
| fp = my_fopen(fname, "w", 0); | |
| /* Get lengths of each clock name and max length so we can space the columns properly. */ | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| clock_name = g_sdc->constrained_clocks[sink_clock_domain].name; | |
| clock_name_length[sink_clock_domain] = strlen(clock_name); | |
| if (clock_name_length[sink_clock_domain] > max_clock_name_length) { | |
| max_clock_name_length = clock_name_length[sink_clock_domain]; | |
| } | |
| } | |
| /* First, combine info from g_sdc->domain_constraint and g_sdc->constrained_clocks into a matrix | |
| (they're indexed the same as each other). */ | |
| fprintf(fp, "Timing constraints in ns (source clock domains down left side, sink along top).\n" | |
| "A value of -1.00 means the pair of source and sink domains will not be analysed.\n\n"); | |
| print_spaces(fp, max_clock_name_length + 4); | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| fprintf(fp, "%s", g_sdc->constrained_clocks[sink_clock_domain].name); | |
| /* Minimum column width of 8 */ | |
| print_spaces(fp, std::max(8 - clock_name_length[sink_clock_domain], 4)); | |
| } | |
| fprintf(fp, "\n"); | |
| for (source_clock_domain = 0; source_clock_domain < g_sdc->num_constrained_clocks; source_clock_domain++) { | |
| fprintf(fp, "%s", g_sdc->constrained_clocks[source_clock_domain].name); | |
| print_spaces(fp, max_clock_name_length + 4 - clock_name_length[source_clock_domain]); | |
| for (sink_clock_domain = 0; sink_clock_domain < g_sdc->num_constrained_clocks; sink_clock_domain++) { | |
| fprintf(fp, "%5.2f", g_sdc->domain_constraint[source_clock_domain][sink_clock_domain]); | |
| /* Minimum column width of 8 */ | |
| print_spaces(fp, std::max(clock_name_length[sink_clock_domain] - 1, 3)); | |
| } | |
| fprintf(fp, "\n"); | |
| } | |
| free(clock_name_length); | |
| /* Second, print I/O constraints. */ | |
| fprintf(fp, "\nList of constrained inputs (note: constraining a clock net input has no effect):\n"); | |
| for (i = 0; i < g_sdc->num_constrained_inputs; i++) { | |
| fprintf(fp, "Input name %s on clock %s with input delay %.2f ns\n", | |
| g_sdc->constrained_inputs[i].name, g_sdc->constrained_inputs[i].clock_name, g_sdc->constrained_inputs[i].delay); | |
| } | |
| fprintf(fp, "\nList of constrained outputs:\n"); | |
| for (i = 0; i < g_sdc->num_constrained_outputs; i++) { | |
| fprintf(fp, "Output name %s on clock %s with output delay %.2f ns\n", | |
| g_sdc->constrained_outputs[i].name, g_sdc->constrained_outputs[i].clock_name, g_sdc->constrained_outputs[i].delay); | |
| } | |
| /* Third, print override constraints. */ | |
| fprintf(fp, "\nList of override constraints (non-default constraints created by set_false_path, set_clock_groups, \nset_max_delay, and set_multicycle_path):\n"); | |
| for (i = 0; i < g_sdc->num_cc_constraints; i++) { | |
| fprintf(fp, "Clock domain"); | |
| for (j = 0; j < g_sdc->cc_constraints[i].num_source; j++) { | |
| fprintf(fp, " %s,", g_sdc->cc_constraints[i].source_list[j]); | |
| } | |
| fprintf(fp, " to clock domain"); | |
| for (j = 0; j < g_sdc->cc_constraints[i].num_sink - 1; j++) { | |
| fprintf(fp, " %s,", g_sdc->cc_constraints[i].sink_list[j]); | |
| } /* We have to print the last one separately because we don't want a comma after it. */ | |
| if (g_sdc->cc_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */ | |
| fprintf(fp, " %s: %.2f ns\n", g_sdc->cc_constraints[i].sink_list[j], g_sdc->cc_constraints[i].constraint); | |
| } else { /* multicycle constraint */ | |
| fprintf(fp, " %s: %d multicycles\n", g_sdc->cc_constraints[i].sink_list[j], g_sdc->cc_constraints[i].num_multicycles); | |
| } | |
| } | |
| for (i = 0; i < g_sdc->num_cf_constraints; i++) { | |
| fprintf(fp, "Clock domain"); | |
| for (j = 0; j < g_sdc->cf_constraints[i].num_source; j++) { | |
| fprintf(fp, " %s,", g_sdc->cf_constraints[i].source_list[j]); | |
| } | |
| fprintf(fp, " to flip-flop"); | |
| for (j = 0; j < g_sdc->cf_constraints[i].num_sink - 1; j++) { | |
| fprintf(fp, " %s,", g_sdc->cf_constraints[i].sink_list[j]); | |
| } /* We have to print the last one separately because we don't want a comma after it. */ | |
| if (g_sdc->cf_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */ | |
| fprintf(fp, " %s: %.2f ns\n", g_sdc->cf_constraints[i].sink_list[j], g_sdc->cf_constraints[i].constraint); | |
| } else { /* multicycle constraint */ | |
| fprintf(fp, " %s: %d multicycles\n", g_sdc->cf_constraints[i].sink_list[j], g_sdc->cf_constraints[i].num_multicycles); | |
| } | |
| } | |
| for (i = 0; i < g_sdc->num_fc_constraints; i++) { | |
| fprintf(fp, "Flip-flop"); | |
| for (j = 0; j < g_sdc->fc_constraints[i].num_source; j++) { | |
| fprintf(fp, " %s,", g_sdc->fc_constraints[i].source_list[j]); | |
| } | |
| fprintf(fp, " to clock domain"); | |
| for (j = 0; j < g_sdc->fc_constraints[i].num_sink - 1; j++) { | |
| fprintf(fp, " %s,", g_sdc->fc_constraints[i].sink_list[j]); | |
| } /* We have to print the last one separately because we don't want a comma after it. */ | |
| if (g_sdc->fc_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */ | |
| fprintf(fp, " %s: %.2f ns\n", g_sdc->fc_constraints[i].sink_list[j], g_sdc->fc_constraints[i].constraint); | |
| } else { /* multicycle constraint */ | |
| fprintf(fp, " %s: %d multicycles\n", g_sdc->fc_constraints[i].sink_list[j], g_sdc->fc_constraints[i].num_multicycles); | |
| } | |
| } | |
| for (i = 0; i < g_sdc->num_ff_constraints; i++) { | |
| fprintf(fp, "Flip-flop"); | |
| for (j = 0; j < g_sdc->ff_constraints[i].num_source; j++) { | |
| fprintf(fp, " %s,", g_sdc->ff_constraints[i].source_list[j]); | |
| } | |
| fprintf(fp, " to flip-flop"); | |
| for (j = 0; j < g_sdc->ff_constraints[i].num_sink - 1; j++) { | |
| fprintf(fp, " %s,", g_sdc->ff_constraints[i].sink_list[j]); | |
| } /* We have to print the last one separately because we don't want a comma after it. */ | |
| if (g_sdc->ff_constraints[i].num_multicycles == 0) { /* not a multicycle constraint */ | |
| fprintf(fp, " %s: %.2f ns\n", g_sdc->ff_constraints[i].sink_list[j], g_sdc->ff_constraints[i].constraint); | |
| } else { /* multicycle constraint */ | |
| fprintf(fp, " %s: %d multicycles\n", g_sdc->ff_constraints[i].sink_list[j], g_sdc->ff_constraints[i].num_multicycles); | |
| } | |
| } | |
| fclose(fp); | |
| } | |
| static void print_spaces(FILE * fp, int num_spaces) { | |
| /* Prints num_spaces spaces to file pointed to by fp. */ | |
| for ( ; num_spaces > 0; num_spaces--) { | |
| fprintf(fp, " "); | |
| } | |
| } | |
| void print_timing_graph_as_blif (const char *fname, t_model *models) { | |
| struct s_model_ports *port; | |
| struct s_linked_vptr *p_io_removed; | |
| /* Prints out the critical path to a file. */ | |
| FILE *fp; | |
| int i, j; | |
| fp = my_fopen(fname, "w", 0); | |
| fprintf(fp, ".model %s\n", blif_circuit_name); | |
| fprintf(fp, ".inputs "); | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| if (logical_block[i].type == VPACK_INPAD) { | |
| fprintf(fp, "\\\n%s ", logical_block[i].name); | |
| } | |
| } | |
| p_io_removed = circuit_p_io_removed; | |
| while (p_io_removed) { | |
| fprintf(fp, "\\\n%s ", (char *) p_io_removed->data_vptr); | |
| p_io_removed = p_io_removed->next; | |
| } | |
| fprintf(fp, "\n"); | |
| fprintf(fp, ".outputs "); | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| if (logical_block[i].type == VPACK_OUTPAD) { | |
| /* Outputs have a "out:" prepended to them, must remove */ | |
| fprintf(fp, "\\\n%s ", &logical_block[i].name[4]); | |
| } | |
| } | |
| fprintf(fp, "\n"); | |
| fprintf(fp, ".names unconn\n"); | |
| fprintf(fp, " 0\n\n"); | |
| /* Print out primitives */ | |
| for (i = 0; i < num_logical_blocks; i++) { | |
| print_primitive_as_blif (fp, i); | |
| } | |
| /* Print out tnode connections */ | |
| for (i = 0; i < num_tnodes; i++) { | |
| if (tnode[i].type != TN_PRIMITIVE_IPIN && tnode[i].type != TN_FF_SOURCE | |
| && tnode[i].type != TN_INPAD_SOURCE | |
| && tnode[i].type != TN_OUTPAD_IPIN) { | |
| for (j = 0; j < tnode[i].num_edges; j++) { | |
| fprintf(fp, ".names tnode_%d tnode_%d\n", i, | |
| tnode[i].out_edges[j].to_node); | |
| fprintf(fp, "1 1\n\n"); | |
| } | |
| } | |
| } | |
| fprintf(fp, ".end\n\n"); | |
| /* Print out .subckt models */ | |
| while (models) { | |
| fprintf(fp, ".model %s\n", models->name); | |
| fprintf(fp, ".inputs "); | |
| port = models->inputs; | |
| while (port) { | |
| if (port->size > 1) { | |
| for (j = 0; j < port->size; j++) { | |
| fprintf(fp, "%s[%d] ", port->name, j); | |
| } | |
| } else { | |
| fprintf(fp, "%s ", port->name); | |
| } | |
| port = port->next; | |
| } | |
| fprintf(fp, "\n"); | |
| fprintf(fp, ".outputs "); | |
| port = models->outputs; | |
| while (port) { | |
| if (port->size > 1) { | |
| for (j = 0; j < port->size; j++) { | |
| fprintf(fp, "%s[%d] ", port->name, j); | |
| } | |
| } else { | |
| fprintf(fp, "%s ", port->name); | |
| } | |
| port = port->next; | |
| } | |
| fprintf(fp, "\n.blackbox\n.end\n\n"); | |
| fprintf(fp, "\n\n"); | |
| models = models->next; | |
| } | |
| fclose(fp); | |
| } | |
| static void print_primitive_as_blif (FILE *fpout, int iblk) { | |
| int i, j; | |
| struct s_model_ports *port; | |
| struct s_linked_vptr *truth_table; | |
| t_rr_node *irr_graph; | |
| t_pb_graph_node *pb_graph_node; | |
| int node; | |
| /* Print primitives found in timing graph in blif format based on whether this is a logical primitive or a physical primitive */ | |
| if (logical_block[iblk].type == VPACK_INPAD) { | |
| if (logical_block[iblk].pb == NULL) { | |
| fprintf(fpout, ".names %s tnode_%d\n", logical_block[iblk].name, | |
| get_tnode_index(logical_block[iblk].output_net_tnodes[0][0])); | |
| } else { | |
| fprintf(fpout, ".names %s tnode_%d\n", logical_block[iblk].name, | |
| get_tnode_index(logical_block[iblk].pb->rr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode)); | |
| } | |
| fprintf(fpout, "1 1\n\n"); | |
| } else if (logical_block[iblk].type == VPACK_OUTPAD) { | |
| /* outputs have the symbol out: automatically prepended to it, must remove */ | |
| if (logical_block[iblk].pb == NULL) { | |
| fprintf(fpout, ".names tnode_%d %s\n", | |
| get_tnode_index(logical_block[iblk].input_net_tnodes[0][0]), | |
| &logical_block[iblk].name[4]); | |
| } else { | |
| /* avoid the out: from output pad naming */ | |
| fprintf(fpout, ".names tnode_%d %s\n", | |
| get_tnode_index(logical_block[iblk].pb->rr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].tnode), | |
| (logical_block[iblk].name + 4)); | |
| } | |
| fprintf(fpout, "1 1\n\n"); | |
| } else if (strcmp(logical_block[iblk].model->name, "latch") == 0) { | |
| fprintf(fpout, ".latch "); | |
| node = OPEN; | |
| if (logical_block[iblk].pb == NULL) { | |
| i = 0; | |
| port = logical_block[iblk].model->inputs; | |
| while (port) { | |
| if (!port->is_clock) { | |
| assert(port->size == 1); | |
| for (j = 0; j < port->size; j++) { | |
| if (logical_block[iblk].input_net_tnodes[i][j] != NULL) { | |
| fprintf(fpout, "tnode_%d ", | |
| get_tnode_index(logical_block[iblk].input_net_tnodes[i][j])); | |
| } else { | |
| assert(0); | |
| } | |
| } | |
| i++; | |
| } | |
| port = port->next; | |
| } | |
| assert(i == 1); | |
| i = 0; | |
| port = logical_block[iblk].model->outputs; | |
| while (port) { | |
| assert(port->size == 1); | |
| for (j = 0; j < port->size; j++) { | |
| if (logical_block[iblk].output_net_tnodes[i][j] != NULL) { | |
| node = | |
| get_tnode_index(logical_block[iblk].output_net_tnodes[i][j]); | |
| fprintf(fpout, "latch_%s re ", | |
| logical_block[iblk].name); | |
| } else { | |
| assert(0); | |
| } | |
| } | |
| i++; | |
| port = port->next; | |
| } | |
| assert(i == 1); | |
| i = 0; | |
| port = logical_block[iblk].model->inputs; | |
| while (port) { | |
| if (port->is_clock) { | |
| for (j = 0; j < port->size; j++) { | |
| if (logical_block[iblk].clock_net_tnode != NULL) { | |
| fprintf(fpout, "tnode_%d 0\n\n", | |
| get_tnode_index(logical_block[iblk].clock_net_tnode)); | |
| } else { | |
| assert(0); | |
| } | |
| } | |
| i++; | |
| } | |
| port = port->next; | |
| } | |
| assert(i == 1); | |
| } else { | |
| irr_graph = logical_block[iblk].pb->rr_graph; | |
| assert( | |
| irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].net_num != OPEN); | |
| fprintf(fpout, "tnode_%d ", | |
| get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][0].pin_count_in_cluster].tnode)); | |
| node = | |
| get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode); | |
| fprintf(fpout, "latch_%s re ", logical_block[iblk].name); | |
| assert( | |
| irr_graph[logical_block[iblk].pb->pb_graph_node->clock_pins[0][0].pin_count_in_cluster].net_num != OPEN); | |
| fprintf(fpout, "tnode_%d 0\n\n", | |
| get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->clock_pins[0][0].pin_count_in_cluster].tnode)); | |
| } | |
| assert(node != OPEN); | |
| fprintf(fpout, ".names latch_%s tnode_%d\n", logical_block[iblk].name, | |
| node); | |
| fprintf(fpout, "1 1\n\n"); | |
| } else if (strcmp(logical_block[iblk].model->name, "names") == 0) { | |
| fprintf(fpout, ".names "); | |
| node = OPEN; | |
| if (logical_block[iblk].pb == NULL) { | |
| i = 0; | |
| port = logical_block[iblk].model->inputs; | |
| while (port) { | |
| assert(!port->is_clock); | |
| for (j = 0; j < port->size; j++) { | |
| if (logical_block[iblk].input_net_tnodes[i][j] != NULL) { | |
| fprintf(fpout, "tnode_%d ", | |
| get_tnode_index(logical_block[iblk].input_net_tnodes[i][j])); | |
| } else { | |
| break; | |
| } | |
| } | |
| i++; | |
| port = port->next; | |
| } | |
| assert(i == 1); | |
| i = 0; | |
| port = logical_block[iblk].model->outputs; | |
| while (port) { | |
| assert(port->size == 1); | |
| fprintf(fpout, "lut_%s\n", logical_block[iblk].name); | |
| node = get_tnode_index(logical_block[iblk].output_net_tnodes[0][0]); | |
| assert(node != OPEN); | |
| i++; | |
| port = port->next; | |
| } | |
| assert(i == 1); | |
| } else { | |
| irr_graph = logical_block[iblk].pb->rr_graph; | |
| assert(logical_block[iblk].pb->pb_graph_node->num_input_ports == 1); | |
| for (i = 0; | |
| i < logical_block[iblk].pb->pb_graph_node->num_input_pins[0]; | |
| i++) { | |
| if(logical_block[iblk].input_nets[0][i] != OPEN) { | |
| for (j = 0; j < logical_block[iblk].pb->pb_graph_node->num_input_pins[0]; j++) { | |
| if (irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].net_num | |
| != OPEN) { | |
| if (irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].net_num == logical_block[iblk].input_nets[0][i]) { | |
| fprintf(fpout, "tnode_%d ", | |
| get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->input_pins[0][j].pin_count_in_cluster].tnode)); | |
| break; | |
| } | |
| } | |
| } | |
| assert(j < logical_block[iblk].pb->pb_graph_node->num_input_pins[0]); | |
| } | |
| } | |
| assert( | |
| logical_block[iblk].pb->pb_graph_node->num_output_ports == 1); | |
| assert( | |
| logical_block[iblk].pb->pb_graph_node->num_output_pins[0] == 1); | |
| fprintf(fpout, "lut_%s\n", logical_block[iblk].name); | |
| node = | |
| get_tnode_index(irr_graph[logical_block[iblk].pb->pb_graph_node->output_pins[0][0].pin_count_in_cluster].tnode); | |
| } | |
| assert(node != OPEN); | |
| truth_table = logical_block[iblk].truth_table; | |
| while (truth_table) { | |
| fprintf(fpout, "%s\n", (char*) truth_table->data_vptr); | |
| truth_table = truth_table->next; | |
| } | |
| fprintf(fpout, "\n"); | |
| fprintf(fpout, ".names lut_%s tnode_%d\n", logical_block[iblk].name, | |
| node); | |
| fprintf(fpout, "1 1\n\n"); | |
| } else { | |
| /* This is a standard .subckt blif structure */ | |
| fprintf(fpout, ".subckt %s ", logical_block[iblk].model->name); | |
| if (logical_block[iblk].pb == NULL) { | |
| i = 0; | |
| port = logical_block[iblk].model->inputs; | |
| while (port) { | |
| if (!port->is_clock) { | |
| for (j = 0; j < port->size; j++) { | |
| if (logical_block[iblk].input_net_tnodes[i][j] != NULL) { | |
| if (port->size > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=tnode_%d ", | |
| port->name, j, | |
| get_tnode_index(logical_block[iblk].input_net_tnodes[i][j])); | |
| } else { | |
| fprintf(fpout, "\\\n%s=tnode_%d ", port->name, | |
| get_tnode_index(logical_block[iblk].input_net_tnodes[i][j])); | |
| } | |
| } else { | |
| if (port->size > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=unconn ", port->name, | |
| j); | |
| } else { | |
| fprintf(fpout, "\\\n%s=unconn ", port->name); | |
| } | |
| } | |
| } | |
| i++; | |
| } | |
| port = port->next; | |
| } | |
| i = 0; | |
| port = logical_block[iblk].model->outputs; | |
| while (port) { | |
| for (j = 0; j < port->size; j++) { | |
| if (logical_block[iblk].output_net_tnodes[i][j] != NULL) { | |
| if (port->size > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=%s ", port->name, j, | |
| vpack_net[logical_block[iblk].output_nets[i][j]].name); | |
| } else { | |
| fprintf(fpout, "\\\n%s=%s ", port->name, | |
| vpack_net[logical_block[iblk].output_nets[i][j]].name); | |
| } | |
| } else { | |
| if (port->size > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=unconn_%d_%s_%d ", | |
| port->name, j, iblk, port->name, j); | |
| } else { | |
| fprintf(fpout, "\\\n%s=unconn_%d_%s ", port->name, | |
| iblk, port->name); | |
| } | |
| } | |
| } | |
| i++; | |
| port = port->next; | |
| } | |
| i = 0; | |
| port = logical_block[iblk].model->inputs; | |
| while (port) { | |
| if (port->is_clock) { | |
| assert(port->size == 1); | |
| if (logical_block[iblk].clock_net_tnode != NULL) { | |
| fprintf(fpout, "\\\n%s=tnode_%d ", port->name, | |
| get_tnode_index(logical_block[iblk].clock_net_tnode)); | |
| } else { | |
| fprintf(fpout, "\\\n%s=unconn ", port->name); | |
| } | |
| i++; | |
| } | |
| port = port->next; | |
| } | |
| fprintf(fpout, "\n\n"); | |
| i = 0; | |
| port = logical_block[iblk].model->outputs; | |
| while (port) { | |
| for (j = 0; j < port->size; j++) { | |
| if (logical_block[iblk].output_net_tnodes[i][j] != NULL) { | |
| fprintf(fpout, ".names %s tnode_%d\n", | |
| vpack_net[logical_block[iblk].output_nets[i][j]].name, | |
| get_tnode_index(logical_block[iblk].output_net_tnodes[i][j])); | |
| fprintf(fpout, "1 1\n\n"); | |
| } | |
| } | |
| i++; | |
| port = port->next; | |
| } | |
| } else { | |
| irr_graph = logical_block[iblk].pb->rr_graph; | |
| pb_graph_node = logical_block[iblk].pb->pb_graph_node; | |
| for (i = 0; i < pb_graph_node->num_input_ports; i++) { | |
| for (j = 0; j < pb_graph_node->num_input_pins[i]; j++) { | |
| if (irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].net_num | |
| != OPEN) { | |
| if (pb_graph_node->num_input_pins[i] > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=tnode_%d ", | |
| pb_graph_node->input_pins[i][j].port->name, | |
| j, | |
| get_tnode_index(irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].tnode)); | |
| } else { | |
| fprintf(fpout, "\\\n%s=tnode_%d ", | |
| pb_graph_node->input_pins[i][j].port->name, | |
| get_tnode_index(irr_graph[pb_graph_node->input_pins[i][j].pin_count_in_cluster].tnode)); | |
| } | |
| } else { | |
| if (pb_graph_node->num_input_pins[i] > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=unconn ", | |
| pb_graph_node->input_pins[i][j].port->name, | |
| j); | |
| } else { | |
| fprintf(fpout, "\\\n%s=unconn ", | |
| pb_graph_node->input_pins[i][j].port->name); | |
| } | |
| } | |
| } | |
| } | |
| for (i = 0; i < pb_graph_node->num_output_ports; i++) { | |
| for (j = 0; j < pb_graph_node->num_output_pins[i]; j++) { | |
| if (irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num | |
| != OPEN) { | |
| if (pb_graph_node->num_output_pins[i] > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=%s ", | |
| pb_graph_node->output_pins[i][j].port->name, | |
| j, | |
| vpack_net[irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num].name); | |
| } else { | |
| char* port_name = | |
| pb_graph_node->output_pins[i][j].port->name; | |
| int pin_count = | |
| pb_graph_node->output_pins[i][j].pin_count_in_cluster; | |
| int node_index = irr_graph[pin_count].net_num; | |
| char* node_name = vpack_net[node_index].name; | |
| fprintf(fpout, "\\\n%s=%s ", port_name, node_name); | |
| } | |
| } else { | |
| if (pb_graph_node->num_output_pins[i] > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=unconn ", | |
| pb_graph_node->output_pins[i][j].port->name, | |
| j); | |
| } else { | |
| fprintf(fpout, "\\\n%s=unconn ", | |
| pb_graph_node->output_pins[i][j].port->name); | |
| } | |
| } | |
| } | |
| } | |
| for (i = 0; i < pb_graph_node->num_clock_ports; i++) { | |
| for (j = 0; j < pb_graph_node->num_clock_pins[i]; j++) { | |
| if (irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].net_num | |
| != OPEN) { | |
| if (pb_graph_node->num_clock_pins[i] > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=tnode_%d ", | |
| pb_graph_node->clock_pins[i][j].port->name, | |
| j, | |
| get_tnode_index(irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].tnode)); | |
| } else { | |
| fprintf(fpout, "\\\n%s=tnode_%d ", | |
| pb_graph_node->clock_pins[i][j].port->name, | |
| get_tnode_index(irr_graph[pb_graph_node->clock_pins[i][j].pin_count_in_cluster].tnode)); | |
| } | |
| } else { | |
| if (pb_graph_node->num_clock_pins[i] > 1) { | |
| fprintf(fpout, "\\\n%s[%d]=unconn ", | |
| pb_graph_node->clock_pins[i][j].port->name, | |
| j); | |
| } else { | |
| fprintf(fpout, "\\\n%s=unconn ", | |
| pb_graph_node->clock_pins[i][j].port->name); | |
| } | |
| } | |
| } | |
| } | |
| fprintf(fpout, "\n\n"); | |
| /* connect up output port names to output tnodes */ | |
| for (i = 0; i < pb_graph_node->num_output_ports; i++) { | |
| for (j = 0; j < pb_graph_node->num_output_pins[i]; j++) { | |
| if (irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num | |
| != OPEN) { | |
| fprintf(fpout, ".names %s tnode_%d\n", | |
| vpack_net[irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].net_num].name, | |
| get_tnode_index(irr_graph[pb_graph_node->output_pins[i][j].pin_count_in_cluster].tnode)); | |
| fprintf(fpout, "1 1\n\n"); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } |