| #include <stdio.h> |
| #include "util.h" |
| #include "vpr_types.h" |
| #include "globals.h" |
| #include "path_delay.h" |
| #include "path_delay2.h" |
| #include "net_delay.h" |
| #include "vpr_utils.h" |
| #include <assert.h> |
| |
| /* TODO: Add option for registered inputs and outputs once works, currently, outputs only */ |
| |
| /****************** Timing graph Structure ************************************ |
| * * |
| * In the timing graph I create, input pads and constant generators have no * |
| * inputs; everything else has inputs. Every input pad and output pad is * |
| * represented by two tnodes -- an input pin and an output pin. For an input * |
| * pad the input pin comes from off chip and has no fanin, while the output * |
| * pin drives outpads and/or CLBs. For output pads, the input node is driven * |
| * by a FB or input pad, and the output node goes off chip and has no * |
| * fanout (out-edges). I need two nodes to respresent things like pads * |
| * because I mark all delay on tedges, not on tnodes. * |
| * * |
| * Every used (not OPEN) FB pin becomes a timing node. As well, every used * |
| * subblock pin within a FB also becomes a timing node. Unused (OPEN) pins * |
| * don't create any timing nodes. If a subblock is used in combinational mode * |
| * (i.e. its clock pin is open), I just hook the subblock input tnodes to the * |
| * subblock output tnode. If the subblock is used in sequential mode, I * |
| * create two extra tnodes. One is just the subblock clock pin, which is * |
| * connected to the subblock output. This means that FFs don't generate * |
| * their output until their clock arrives. For global clocks coming from an * |
| * input pad, the delay of the clock is 0, so the FFs generate their outputs * |
| * at T = 0, as usual. For locally-generated or gated clocks, however, the * |
| * clock will arrive later, and the FF output will be generated later. This * |
| * lets me properly model things like ripple counters and gated clocks. The * |
| * other extra node is the FF storage node (i.e. a sink), which connects to * |
| * the subblock inputs and has no fanout. * |
| * * |
| * One other subblock that needs special attention is a constant generator. * |
| * This has no used inputs, but its output is used. I create an extra tnode, * |
| * a dummy input, in addition to the output pin tnode. The dummy tnode has * |
| * no fanin. Since constant generators really generate their outputs at T = * |
| * -infinity, I set the delay from the input tnode to the output to a large- * |
| * magnitude negative number. This guarantees every block that needs the * |
| * output of a constant generator sees it available very early. * |
| * * |
| * For this routine to work properly, subblocks whose outputs are unused must * |
| * be completely empty -- all their input pins and their clock pin must be * |
| * OPEN. Check_netlist checks the input netlist to guarantee this -- don't * |
| * disable that check. * |
| * * |
| * NB: The discussion below is only relevant for circuits with multiple * |
| * clocks. For circuits with a single clock, everything I do is exactly * |
| * correct. * |
| * * |
| * A note about how I handle FFs: By hooking the clock pin up to the FF * |
| * output, I properly model the time at which the FF generates its output. * |
| * I don't do a completely rigorous job of modelling required arrival time at * |
| * the FF input, however. I assume every FF and outpad needs its input at * |
| * T = 0, which is when the earliest clock arrives. This can be conservative * |
| * -- a fuller analysis would be to do a fast path analysis of the clock * |
| * feeding each FF and subtract its earliest arrival time from the delay of * |
| * the D signal to the FF input. This is too much work, so I'm not doing it. * |
| * Alternatively, when one has N clocks, it might be better to just do N * |
| * separate timing analyses, with only signals from FFs clocked on clock i * |
| * being propagated forward on analysis i, and only FFs clocked on i being * |
| * considered as sinks. This gives all the critical paths within clock * |
| * domains, but ignores interactions. Instead, I assume all the clocks are * |
| * more-or-less synchronized (they might be gated or locally-generated, but * |
| * they all have the same frequency) and explore all interactions. Tough to * |
| * say what's the better way. Since multiple clocks aren't important for my * |
| * work, it's not worth bothering about much. * |
| * * |
| ******************************************************************************/ |
| |
| #define T_CONSTANT_GENERATOR -1000 /* Essentially -ve infinity */ |
| |
| /***************** Types local to this module ***************************/ |
| |
| enum e_subblock_pin_type |
| { SUB_INPUT = 0, SUB_OUTPUT, SUB_CLOCK, NUM_SUB_PIN_TYPES }; |
| |
| /***************** Variables local to this module ***************************/ |
| |
| /* Variables for "chunking" the tedge memory. If the head pointer is NULL, * |
| * no timing graph exists now. */ |
| |
| static struct s_linked_vptr *tedge_ch_list_head = NULL; |
| static int tedge_ch_bytes_avail = 0; |
| static char *tedge_ch_next_avail = NULL; |
| |
| |
| /***************** Subroutines local to this module *************************/ |
| |
| static int alloc_and_load_pin_mappings(int ***block_pin_to_tnode_ptr, |
| int *****snode_block_pin_to_tnode_ptr, |
| t_subblock_data subblock_data, |
| int ***num_uses_of_sblk_opin); |
| |
| static void free_pin_mappings(int **block_pin_to_tnode, |
| int ****snode_block_pin_to_tnode, |
| int *num_subblocks_per_block); |
| |
| static void alloc_and_load_fanout_counts(int ***num_uses_of_fb_ipin_ptr, |
| int ****num_uses_of_sblk_opin_ptr, |
| t_subblock_data subblock_data); |
| |
| static void free_fanout_counts(int **num_uses_of_fb_ipin, |
| int ***num_uses_of_sblk_opin); |
| |
| static float **alloc_net_slack(void); |
| |
| static void compute_net_slacks(float **net_slack); |
| |
| static void alloc_and_load_tnodes_and_net_mapping(int **num_uses_of_fb_ipin, |
| int |
| ***num_uses_of_sblk_opin, |
| int **block_pin_to_tnode, |
| int |
| ****snode_block_pin_to_tnode, |
| t_subblock_data |
| subblock_data, |
| t_timing_inf timing_inf); |
| |
| static void build_fb_tnodes(int iblk, |
| int *n_uses_of_fb_ipin, |
| int **block_pin_to_tnode, |
| int ***sub_pin_to_tnode, |
| int num_subs, |
| t_subblock * sub_inf, |
| float T_fb_ipin_to_sblk_ipin); |
| |
| static void build_subblock_tnodes(int **n_uses_of_sblk_opin, |
| int *node_block_pin_to_tnode, |
| int ***sub_pin_to_tnode, |
| int *num_subblocks_per_block, |
| t_subblock ** subblock_inf, |
| t_timing_inf timing_inf, |
| int iblk); |
| |
| |
| static boolean is_global_clock(int iblk, |
| int sub, |
| int subpin, |
| int *num_subblocks_per_block, |
| t_subblock ** subblock_inf); |
| |
| static void build_block_output_tnode(int inode, |
| int iblk, |
| int ipin, |
| int **block_pin_to_tnode); |
| |
| |
| /********************* Subroutine definitions *******************************/ |
| |
| |
| float ** |
| alloc_and_load_timing_graph(t_timing_inf timing_inf, |
| t_subblock_data subblock_data) |
| { |
| |
| /* This routine builds the graph used for timing analysis. Every fb or * |
| * subblock pin is a timing node (tnode). The connectivity between pins is * |
| * represented by timing edges (tedges). All delay is marked on edges, not * |
| * on nodes. This routine returns an array that will store slack values: * |
| * net_slack[0..num_nets-1][1..num_pins-1]. */ |
| |
| /* The two arrays below are valid only for FBs, not pads. */ |
| int i; |
| int **num_uses_of_fb_ipin; /* [0..num_blocks-1][0..type->num_pins-1] */ |
| int ***num_uses_of_sblk_opin; /* [0..num_blocks-1][0..type->num_subblocks][0..type->max_subblock_outputs] */ |
| |
| /* Array for mapping from a pin on a block to a tnode index. For pads, only * |
| * the first two pin locations are used (input to pad is first, output of * |
| * pad is second). For fbs, all OPEN pins on the fb have their mapping * |
| * set to OPEN so I won't use it by mistake. */ |
| |
| int **block_pin_to_tnode; /* [0..num_blocks-1][0..num_pins-1] */ |
| |
| |
| /* Array for mapping from a pin on a subblock to a tnode index. Unused * |
| * or nonexistent subblock pins have their mapping set to OPEN. * |
| * [0..num_blocks-1][0..num_subblocks_per_block-1][0..NUM_SUB_PIN_TYPES][0..total_subblock_pins-1] */ |
| |
| int ****snode_block_pin_to_tnode; |
| |
| int num_sinks; |
| float **net_slack; /* [0..num_nets-1][1..num_pins-1]. */ |
| |
| /************* End of variable declarations ********************************/ |
| |
| if(tedge_ch_list_head != NULL) |
| { |
| printf("Error in alloc_and_load_timing_graph:\n" |
| "\tAn old timing graph still exists.\n"); |
| exit(1); |
| } |
| |
| /* If either of the checks below ever fail, change the definition of * |
| * tnode_descript to use ints instead of shorts for isubblk or ipin. */ |
| |
| for(i = 0; i < num_types; i++) |
| { |
| if(type_descriptors[i].num_pins > MAX_SHORT) |
| { |
| printf |
| ("Error in alloc_and_load_timing_graph: pins for type %s is %d." |
| "\tWill cause short overflow in tnode_descript.\n", |
| type_descriptors[i].name, |
| type_descriptors[i].num_pins); |
| exit(1); |
| } |
| |
| if(type_descriptors[i].max_subblocks > MAX_SHORT) |
| { |
| printf |
| ("Error in alloc_and_load_timing_graph: max_subblocks_per_block" |
| "\tis %d -- will cause short overflow in tnode_descript.\n", |
| type_descriptors[i].max_subblocks); |
| exit(1); |
| } |
| } |
| |
| alloc_and_load_fanout_counts(&num_uses_of_fb_ipin, |
| &num_uses_of_sblk_opin, subblock_data); |
| |
| num_tnodes = alloc_and_load_pin_mappings(&block_pin_to_tnode, |
| &snode_block_pin_to_tnode, |
| subblock_data, |
| num_uses_of_sblk_opin); |
| |
| alloc_and_load_tnodes_and_net_mapping(num_uses_of_fb_ipin, |
| num_uses_of_sblk_opin, |
| block_pin_to_tnode, |
| snode_block_pin_to_tnode, |
| subblock_data, timing_inf); |
| |
| num_sinks = alloc_and_load_timing_graph_levels(); |
| |
| check_timing_graph(subblock_data.num_const_gen, subblock_data.num_ff, |
| num_sinks); |
| |
| free_fanout_counts(num_uses_of_fb_ipin, num_uses_of_sblk_opin); |
| free_pin_mappings(block_pin_to_tnode, snode_block_pin_to_tnode, |
| subblock_data.num_subblocks_per_block); |
| |
| net_slack = alloc_net_slack(); |
| return (net_slack); |
| } |
| |
| |
| static float ** |
| alloc_net_slack(void) |
| { |
| |
| /* Allocates the net_slack structure. Chunk allocated to save space. */ |
| |
| float **net_slack; /* [0..num_nets-1][1..num_pins-1] */ |
| float *tmp_ptr; |
| int inet; |
| |
| net_slack = (float **)my_malloc(num_nets * sizeof(float *)); |
| |
| for(inet = 0; inet < num_nets; inet++) |
| { |
| tmp_ptr = |
| (float *)my_chunk_malloc(((net[inet].num_sinks + 1) - 1) * |
| sizeof(float), &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| net_slack[inet] = tmp_ptr - 1; /* [1..num_pins-1] */ |
| } |
| |
| return (net_slack); |
| } |
| |
| |
| static int |
| alloc_and_load_pin_mappings(int ***block_pin_to_tnode_ptr, |
| int *****snode_block_pin_to_tnode_ptr, |
| t_subblock_data subblock_data, |
| int ***num_uses_of_sblk_opin) |
| { |
| |
| /* Allocates and loads the block_pin_to_tnode and snode_block_pin_to_tnode * |
| * structures, and computes num_tnodes. */ |
| |
| int iblk, isub, ipin, num_subblocks, opin, clk_pin; |
| int curr_tnode; |
| int ****snode_block_pin_to_tnode, **block_pin_to_tnode; |
| int *num_subblocks_per_block; |
| t_type_ptr type; |
| t_subblock **subblock_inf; |
| boolean has_inputs; |
| |
| num_subblocks_per_block = subblock_data.num_subblocks_per_block; |
| subblock_inf = subblock_data.subblock_inf; |
| |
| |
| block_pin_to_tnode = (int **)my_malloc(num_blocks * sizeof(int *)); |
| |
| snode_block_pin_to_tnode = |
| (int ****)my_malloc(num_blocks * sizeof(int ***)); |
| |
| curr_tnode = 0; |
| |
| for(iblk = 0; iblk < num_blocks; iblk++) |
| { |
| type = block[iblk].type; |
| block_pin_to_tnode[iblk] = |
| (int *)my_malloc(type->num_pins * sizeof(int)); |
| /* First do the block mapping */ |
| for(ipin = 0; ipin < block[iblk].type->num_pins; ipin++) |
| { |
| if(block[iblk].nets[ipin] == OPEN) |
| { |
| block_pin_to_tnode[iblk][ipin] = OPEN; |
| } |
| else |
| { |
| block_pin_to_tnode[iblk][ipin] = curr_tnode; |
| curr_tnode++; |
| } |
| } |
| |
| /* Now do the subblock mapping. */ |
| |
| num_subblocks = num_subblocks_per_block[iblk]; |
| snode_block_pin_to_tnode[iblk] = (int ***)alloc_matrix(0, num_subblocks - 1, 0, NUM_SUB_PIN_TYPES - 1, sizeof(int *)); /* [0..max_subblocks_for_type - 1][0..SUB_NUM_PIN_TYPES - 1] */ |
| |
| for(isub = 0; isub < num_subblocks; isub++) |
| { |
| /* Allocate space for each type of subblock pin */ |
| snode_block_pin_to_tnode[iblk][isub][SUB_INPUT] = |
| (int *)my_malloc(type->max_subblock_inputs * |
| sizeof(int)); |
| snode_block_pin_to_tnode[iblk][isub][SUB_OUTPUT] = |
| (int *)my_malloc(type->max_subblock_outputs * |
| sizeof(int)); |
| snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK] = |
| (int *)my_malloc(sizeof(int)); |
| |
| /* Pin ordering: inputs, outputs, clock. */ |
| |
| has_inputs = FALSE; |
| for(ipin = 0; ipin < type->max_subblock_inputs; ipin++) |
| { |
| if(subblock_inf[iblk][isub].inputs[ipin] != OPEN) |
| { |
| has_inputs = TRUE; |
| snode_block_pin_to_tnode[iblk][isub] |
| [SUB_INPUT][ipin] = curr_tnode; |
| curr_tnode++; |
| if(type == IO_TYPE) |
| curr_tnode++; /* Output pad needs additional dummy sink node */ |
| } |
| else |
| { |
| snode_block_pin_to_tnode[iblk][isub] |
| [SUB_INPUT][ipin] = OPEN; |
| } |
| } |
| |
| /* subblock output */ |
| |
| /* If the subblock opin is unused the subblock is empty and we * |
| * shoudn't count it. */ |
| for(opin = 0; opin < type->max_subblock_outputs; opin++) |
| { |
| |
| if(num_uses_of_sblk_opin[iblk][isub][opin] != 0) |
| { |
| snode_block_pin_to_tnode[iblk][isub] |
| [SUB_OUTPUT][opin] = curr_tnode; |
| |
| if(type == IO_TYPE) |
| curr_tnode += 2; /* Input pad needs a dummy source node */ |
| else if(has_inputs) /* Regular sblk */ |
| curr_tnode++; |
| else /* Constant generator. Make room for dummy input */ |
| curr_tnode += 2; |
| } |
| else |
| { |
| snode_block_pin_to_tnode[iblk][isub] |
| [SUB_OUTPUT][opin] = OPEN; |
| } |
| } |
| |
| clk_pin = 0; |
| |
| if(subblock_inf[iblk][isub].clock != OPEN) |
| { |
| |
| /* If this is a sequential block, we have two more pins per used output: #1: the |
| * clock input (connects to the subblock output node) and #2: the |
| * sequential sink (which the subblock LUT inputs will connect to). */ |
| snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK] |
| [clk_pin] = curr_tnode; |
| |
| for(opin = 0; opin < type->max_subblock_outputs; |
| opin++) |
| { |
| if(subblock_inf[iblk][isub]. |
| outputs[opin] != OPEN) |
| curr_tnode += 2; |
| } |
| } |
| else |
| { |
| snode_block_pin_to_tnode[iblk][isub][SUB_CLOCK] |
| [clk_pin] = OPEN; |
| } |
| } |
| |
| } /* End for all blocks */ |
| |
| *snode_block_pin_to_tnode_ptr = snode_block_pin_to_tnode; |
| *block_pin_to_tnode_ptr = block_pin_to_tnode; |
| return (curr_tnode); |
| } |
| |
| |
| static void |
| free_pin_mappings(int **block_pin_to_tnode, |
| int ****snode_block_pin_to_tnode, |
| int *num_subblocks_per_block) |
| { |
| |
| /* Frees the arrays that map from pins to tnode coordinates. */ |
| |
| int isub, iblk, isubtype, num_subblocks; |
| |
| for(iblk = 0; iblk < num_blocks; iblk++) |
| { |
| num_subblocks = num_subblocks_per_block[iblk]; |
| for(isub = 0; isub < num_subblocks; isub++) |
| { |
| for(isubtype = 0; isubtype < NUM_SUB_PIN_TYPES; |
| isubtype++) |
| { |
| free(snode_block_pin_to_tnode[iblk][isub] |
| [isubtype]); |
| } |
| } |
| free_matrix(snode_block_pin_to_tnode[iblk], 0, |
| num_subblocks_per_block[iblk] - 1, 0, sizeof(int *)); |
| free(block_pin_to_tnode[iblk]); |
| } |
| free(block_pin_to_tnode); |
| free(snode_block_pin_to_tnode); |
| } |
| |
| |
| static void |
| alloc_and_load_fanout_counts(int ***num_uses_of_fb_ipin_ptr, |
| int ****num_uses_of_sblk_opin_ptr, |
| t_subblock_data subblock_data) |
| { |
| |
| /* Allocates and loads two arrays that say how many points each fb input * |
| * pin and each subblock output fan out to. */ |
| |
| int iblk; |
| int **num_uses_of_fb_ipin, ***num_uses_of_sblk_opin; |
| int *num_subblocks_per_block; |
| t_subblock **subblock_inf; |
| |
| num_subblocks_per_block = subblock_data.num_subblocks_per_block; |
| subblock_inf = subblock_data.subblock_inf; |
| |
| num_uses_of_fb_ipin = (int **)my_malloc(num_blocks * sizeof(int *)); |
| |
| num_uses_of_sblk_opin = (int ***)my_malloc(num_blocks * sizeof(int **)); |
| |
| for(iblk = 0; iblk < num_blocks; iblk++) |
| { |
| num_uses_of_fb_ipin[iblk] = |
| (int *)my_calloc(block[iblk].type->num_pins, sizeof(int)); |
| num_uses_of_sblk_opin[iblk] = |
| (int **)alloc_matrix(0, block[iblk].type->max_subblocks - 1, |
| 0, |
| block[iblk].type->max_subblock_outputs - |
| 1, sizeof(int)); |
| |
| load_one_fb_fanout_count(subblock_inf[iblk], |
| num_subblocks_per_block[iblk], |
| num_uses_of_fb_ipin[iblk], |
| num_uses_of_sblk_opin[iblk], iblk); |
| |
| } /* End for all blocks */ |
| |
| *num_uses_of_fb_ipin_ptr = num_uses_of_fb_ipin; |
| *num_uses_of_sblk_opin_ptr = num_uses_of_sblk_opin; |
| } |
| |
| |
| static void |
| free_fanout_counts(int **num_uses_of_fb_ipin, |
| int ***num_uses_of_sblk_opin) |
| { |
| |
| /* Frees the fanout count arrays. */ |
| |
| int iblk; |
| t_type_ptr type; |
| |
| for(iblk = 0; iblk < num_blocks; iblk++) |
| { |
| type = block[iblk].type; |
| free(num_uses_of_fb_ipin[iblk]); |
| free_matrix(num_uses_of_sblk_opin[iblk], 0, |
| type->max_subblocks - 1, 0, sizeof(int)); |
| } |
| |
| free(num_uses_of_fb_ipin); |
| free(num_uses_of_sblk_opin); |
| } |
| |
| |
| static void |
| alloc_and_load_tnodes_and_net_mapping(int **num_uses_of_fb_ipin, |
| int ***num_uses_of_sblk_opin, |
| int **block_pin_to_tnode, |
| int ****snode_block_pin_to_tnode, |
| t_subblock_data subblock_data, |
| t_timing_inf timing_inf) |
| { |
| |
| int iblk; |
| int *num_subblocks_per_block; |
| t_subblock **subblock_inf; |
| |
| |
| tnode = (t_tnode *) my_malloc(num_tnodes * sizeof(t_tnode)); |
| tnode_descript = (t_tnode_descript *) my_malloc(num_tnodes * |
| sizeof(t_tnode_descript)); |
| |
| net_to_driver_tnode = (int *)my_malloc(num_nets * sizeof(int)); |
| |
| subblock_inf = subblock_data.subblock_inf; |
| num_subblocks_per_block = subblock_data.num_subblocks_per_block; |
| |
| |
| for(iblk = 0; iblk < num_blocks; iblk++) |
| { |
| build_fb_tnodes(iblk, num_uses_of_fb_ipin[iblk], |
| block_pin_to_tnode, |
| snode_block_pin_to_tnode[iblk], |
| num_subblocks_per_block[iblk], subblock_inf[iblk], |
| block[iblk].type->type_timing_inf. |
| T_fb_ipin_to_sblk_ipin); |
| |
| build_subblock_tnodes(num_uses_of_sblk_opin[iblk], |
| block_pin_to_tnode[iblk], |
| snode_block_pin_to_tnode[iblk], |
| num_subblocks_per_block, subblock_inf, |
| timing_inf, iblk); |
| } |
| } |
| |
| |
| static void |
| build_fb_tnodes(int iblk, |
| int *n_uses_of_fb_ipin, |
| int **block_pin_to_tnode, |
| int ***sub_pin_to_tnode, |
| int num_subs, |
| t_subblock * sub_inf, |
| float T_fb_ipin_to_sblk_ipin) |
| { |
| |
| /* This routine builds the tnodes corresponding to the fb pins of this |
| * block, and properly hooks them up to the rest of the graph. Note that |
| * only the snode_block_pin_to_tnode, etc. element for this block is passed in. |
| * Assumes that pins are ordered as [inputs][outputs][clk] |
| */ |
| |
| int isub, ipin, iedge, from_pin, opin; |
| int inode, to_node, num_edges; |
| t_tedge *tedge; |
| int clk_pin; |
| t_type_ptr type; |
| int *next_ipin_edge; |
| |
| type = block[iblk].type; |
| next_ipin_edge = (int *)my_malloc(type->num_pins * sizeof(int)); |
| clk_pin = 0; |
| |
| /* Start by allocating the edge arrays, and for opins, loading them. */ |
| |
| for(ipin = 0; ipin < block[iblk].type->num_pins; ipin++) |
| { |
| inode = block_pin_to_tnode[iblk][ipin]; |
| |
| if(inode != OPEN) |
| { /* Pin is used -> put in graph */ |
| if(is_opin(ipin, block[iblk].type)) |
| { |
| build_block_output_tnode(inode, iblk, ipin, |
| block_pin_to_tnode); |
| tnode_descript[inode].type = FB_OPIN; |
| } |
| else |
| { /* FB ipin */ |
| next_ipin_edge[ipin] = 0; /* Reset */ |
| num_edges = n_uses_of_fb_ipin[ipin]; |
| |
| /* if clock pin, timing edges go to each subblock output used */ |
| for(isub = 0; isub < num_subs; isub++) |
| { |
| if(sub_inf[isub].clock == ipin) |
| { |
| for(opin = 0; |
| opin < |
| type->max_subblock_outputs; |
| opin++) |
| { |
| if(sub_inf[isub]. |
| outputs[opin] != OPEN) |
| { |
| num_edges++; |
| } |
| } |
| num_edges--; /* Remove clock_pin count, replaced by outputs */ |
| } |
| } |
| |
| tnode[inode].num_edges = num_edges; |
| |
| tnode[inode].out_edges = |
| (t_tedge *) my_chunk_malloc(num_edges * |
| sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| |
| tnode_descript[inode].type = FB_IPIN; |
| } |
| |
| tnode_descript[inode].ipin = ipin; |
| tnode_descript[inode].isubblk = OPEN; |
| tnode_descript[inode].iblk = iblk; |
| } |
| } |
| |
| /* Now load the edge arrays for the FB input pins. Do this by looking at * |
| * where the subblock input and clock pins are driven from. */ |
| |
| for(isub = 0; isub < num_subs; isub++) |
| { |
| for(ipin = 0; ipin < type->max_subblock_inputs; ipin++) |
| { |
| from_pin = sub_inf[isub].inputs[ipin]; |
| |
| /* Not OPEN and comes from fb ipin? */ |
| |
| if(from_pin != OPEN |
| && from_pin < block[iblk].type->num_pins) |
| { |
| inode = block_pin_to_tnode[iblk][from_pin]; |
| assert(inode != OPEN); |
| to_node = sub_pin_to_tnode[isub][SUB_INPUT][ipin]; |
| tedge = tnode[inode].out_edges; |
| iedge = next_ipin_edge[from_pin]++; |
| tedge[iedge].to_node = to_node; |
| tedge[iedge].Tdel = T_fb_ipin_to_sblk_ipin; |
| } |
| } |
| |
| from_pin = sub_inf[isub].clock; |
| |
| if(from_pin != OPEN && from_pin < block[iblk].type->num_pins) |
| { |
| inode = block_pin_to_tnode[iblk][from_pin]; |
| to_node = sub_pin_to_tnode[isub][SUB_CLOCK][clk_pin]; /* Feeds seq. output */ |
| /* connect to each output flip flop */ |
| for(opin = 0; opin < type->max_subblock_outputs; opin++) |
| { |
| if(sub_inf[isub].outputs[opin] != OPEN) |
| { |
| tedge = tnode[inode].out_edges; |
| iedge = next_ipin_edge[from_pin]++; |
| tedge[iedge].to_node = to_node; |
| |
| /* For the earliest possible clock I want this delay to be zero, so it * |
| * * arrives at flip flops at T = 0. For later clocks or locally generated * |
| * * clocks that may accumulate delay (like the clocks in a ripple counter), * |
| * * I might want to make this delay nonzero. Not worth bothering about now. */ |
| |
| tedge[iedge].Tdel = 0.; |
| to_node += 2; |
| } |
| } |
| } |
| } |
| free(next_ipin_edge); |
| } |
| |
| |
| static void |
| build_block_output_tnode(int inode, |
| int iblk, |
| int ipin, |
| int **block_pin_to_tnode) |
| { |
| |
| /* Sets the number of edges and the edge array for an output pin from a * |
| * block. This pin must be hooked to something -- i.e. not OPEN. */ |
| |
| int iedge, to_blk, to_pin, to_node, num_edges, inet; |
| t_tedge *tedge; |
| |
| inet = block[iblk].nets[ipin]; /* Won't be OPEN, as inode exists */ |
| assert(inet != OPEN); /* Sanity check. */ |
| |
| net_to_driver_tnode[inet] = inode; |
| |
| num_edges = (net[inet].num_sinks + 1) - 1; |
| tnode[inode].num_edges = num_edges; |
| |
| tnode[inode].out_edges = (t_tedge *) my_chunk_malloc(num_edges * |
| sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| |
| tedge = tnode[inode].out_edges; |
| |
| for(iedge = 0; iedge < (net[inet].num_sinks + 1) - 1; iedge++) |
| { |
| to_blk = net[inet].node_block[iedge + 1]; |
| |
| to_pin = net[inet].node_block_pin[iedge + 1]; |
| |
| to_node = block_pin_to_tnode[to_blk][to_pin]; |
| tedge[iedge].to_node = to_node; |
| /* Set delay from net delays with a later call */ |
| } |
| } |
| |
| |
| static void |
| build_subblock_tnodes(int **n_uses_of_sblk_opin, |
| int *node_block_pin_to_tnode, |
| int ***sub_pin_to_tnode, |
| int *num_subblocks_per_block, |
| t_subblock ** subblock_inf, |
| t_timing_inf timing_inf, |
| int iblk) |
| { |
| |
| /* This routine builds the tnodes of the subblock pins within one FB. Note * |
| * that only the block_pin_to_tnode, etc. data for *this* block are passed * |
| * in. */ |
| |
| int isub, ipin, inode, to_node, from_pin, to_pin, opin, from_opin, |
| clk_pin, used_opin_count; |
| int num_subs, from_sub; |
| t_subblock *sub_inf; |
| int iedge, num_edges; |
| float sink_delay; |
| t_tedge *tedge; |
| boolean has_inputs, has_outputs; |
| int **next_sblk_opin_edge; /* [0..max_subblocks-1][0..max_subblock_outputs-1] */ |
| int *num_opin_used_in_sblk; /* [0..max_subblocks-1] */ |
| t_type_ptr type = block[iblk].type; |
| |
| sub_inf = subblock_inf[iblk]; |
| num_subs = num_subblocks_per_block[iblk]; |
| |
| next_sblk_opin_edge = |
| (int **)alloc_matrix(0, type->max_subblocks - 1, 0, |
| type->max_subblock_outputs - 1, sizeof(int)); |
| num_opin_used_in_sblk = |
| (int *)my_malloc(type->max_subblocks * sizeof(int)); |
| |
| clk_pin = 0; |
| |
| /* Allocate memory for output pins first. */ |
| for(isub = 0; isub < num_subs; isub++) |
| { |
| num_opin_used_in_sblk[isub] = 0; |
| for(opin = 0; opin < type->max_subblock_outputs; opin++) |
| { |
| inode = sub_pin_to_tnode[isub][SUB_OUTPUT][opin]; |
| |
| if(inode != OPEN) |
| { /* Output is used -> timing node exists. */ |
| num_opin_used_in_sblk[isub]++; |
| next_sblk_opin_edge[isub][opin] = 0; /* Reset */ |
| num_edges = n_uses_of_sblk_opin[isub][opin]; |
| tnode[inode].num_edges = num_edges; |
| |
| tnode[inode].out_edges = |
| (t_tedge *) my_chunk_malloc(num_edges * |
| sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| |
| if(IO_TYPE == type) |
| { |
| tnode_descript[inode].type = INPAD_OPIN; |
| tnode[inode + 1].num_edges = 1; |
| tnode[inode + 1].out_edges = (t_tedge *) |
| my_chunk_malloc(sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| tedge = tnode[inode + 1].out_edges; |
| tedge[0].to_node = inode; |
| |
| /* For input pads, use sequential output for source timing delay (treat as register) */ |
| if(is_global_clock |
| (iblk, isub, opin, |
| num_subblocks_per_block, |
| subblock_inf)) |
| tedge[0].Tdel = 0.; |
| else |
| tedge[0].Tdel = |
| type->type_timing_inf. |
| T_subblock[isub].T_seq_out[opin]; |
| |
| tnode_descript[inode + 1].type = |
| INPAD_SOURCE; |
| tnode_descript[inode + 1].ipin = OPEN; |
| tnode_descript[inode + 1].isubblk = isub; |
| tnode_descript[inode + 1].iblk = iblk; |
| } |
| else |
| { |
| tnode_descript[inode].type = SUBBLK_OPIN; |
| } |
| |
| tnode_descript[inode].ipin = opin; |
| tnode_descript[inode].isubblk = isub; |
| tnode_descript[inode].iblk = iblk; |
| } |
| } |
| } |
| |
| /* First pass, load the subblock input pins to output pins without connecting edges to output pins. |
| * If the subblock is used in sequential mode (i.e. is clocked), the two clock pin nodes. |
| * Connect edges to output pins and take care of constant generators in second pass |
| */ |
| for(isub = 0; isub < num_subs; isub++) |
| { |
| has_outputs = FALSE; |
| |
| for(opin = 0; opin < type->max_subblock_outputs; opin++) |
| { |
| if(sub_pin_to_tnode[isub][SUB_OUTPUT][opin] != OPEN) |
| { |
| has_outputs = TRUE; |
| } |
| } |
| |
| if(!has_outputs && type != IO_TYPE) |
| { /* Empty, so skip */ |
| continue; |
| } |
| |
| if(sub_inf[isub].clock != OPEN) |
| { /* Sequential mode */ |
| inode = sub_pin_to_tnode[isub][SUB_CLOCK][clk_pin]; |
| /* Each output of a subblock has a flip-flop sink and source */ |
| for(opin = 0; opin < type->max_subblock_outputs; opin++) |
| { |
| if(sub_inf[isub].outputs[opin] != OPEN) |
| { |
| /* First node is the clock input pin; it feeds the sequential output */ |
| tnode[inode].num_edges = 1; |
| tnode[inode].out_edges = (t_tedge *) |
| my_chunk_malloc(sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| |
| tnode_descript[inode].type = FF_SOURCE; |
| tnode_descript[inode].ipin = OPEN; |
| tnode_descript[inode].isubblk = isub; |
| tnode_descript[inode].iblk = iblk; |
| |
| /* Now create the "sequential sink" -- i.e. the FF input node. */ |
| |
| inode++; |
| tnode[inode].num_edges = 0; |
| tnode[inode].out_edges = NULL; |
| |
| tnode_descript[inode].type = FF_SINK; |
| tnode_descript[inode].ipin = OPEN; |
| tnode_descript[inode].isubblk = isub; |
| tnode_descript[inode].iblk = iblk; |
| inode++; |
| } |
| } |
| } |
| |
| /* Build and hook up subblock inputs. */ |
| |
| for(ipin = 0; ipin < type->max_subblock_inputs; ipin++) |
| { |
| inode = sub_pin_to_tnode[isub][SUB_INPUT][ipin]; |
| |
| if(inode != OPEN) |
| { /* tnode exists -> pin is used */ |
| if(type == IO_TYPE) |
| { /* Output pad */ |
| tnode[inode].num_edges = 1; |
| opin = 0; |
| tnode[inode].out_edges = (t_tedge *) |
| my_chunk_malloc(sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| tnode_descript[inode].type = OUTPAD_IPIN; |
| tnode[inode + 1].num_edges = 0; |
| tnode[inode + 1].out_edges = NULL; |
| tedge = tnode[inode].out_edges; |
| tedge[0].to_node = inode + 1; |
| |
| /* For output pads, use subblock combinational time |
| * and sequential in for timing (treat as register) */ |
| tedge[0].Tdel = |
| type->type_timing_inf. |
| T_subblock[isub].T_comb[ipin][opin] + |
| type->type_timing_inf. |
| T_subblock[isub].T_seq_in[opin]; |
| |
| tnode_descript[inode + 1].type = |
| OUTPAD_SINK; |
| tnode_descript[inode + 1].ipin = OPEN; |
| tnode_descript[inode + 1].isubblk = isub; |
| tnode_descript[inode + 1].iblk = iblk; |
| } |
| else |
| { |
| tnode[inode].num_edges = |
| num_opin_used_in_sblk[isub]; |
| tnode[inode].out_edges = |
| (t_tedge *) |
| my_chunk_malloc(num_opin_used_in_sblk |
| [isub] * |
| sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| tnode[inode].num_edges = |
| num_opin_used_in_sblk[isub]; |
| tnode_descript[inode].type = SUBBLK_IPIN; |
| } |
| |
| tnode_descript[inode].ipin = ipin; |
| tnode_descript[inode].isubblk = isub; |
| tnode_descript[inode].iblk = iblk; |
| } |
| } |
| } /* End for each subblock */ |
| |
| /* Second pass load the input pins to output pins array */ |
| /* Load the output pins edge arrays. */ |
| for(isub = 0; isub < num_subs; isub++) |
| { |
| used_opin_count = 0; |
| for(opin = 0; opin < type->max_subblock_outputs; opin++) |
| { |
| if(sub_pin_to_tnode[isub][SUB_OUTPUT][opin] == OPEN) |
| { /* Empty, so skip */ |
| continue; |
| } |
| for(ipin = 0; ipin < type->max_subblock_inputs; ipin++) |
| { /* sblk opin to sblk ipin */ |
| |
| from_pin = sub_inf[isub].inputs[ipin]; |
| |
| /* Not OPEN and comes from local subblock output? */ |
| |
| if(from_pin >= type->num_pins) |
| { |
| /* Convention for numbering netlist pins. |
| * Internal connections are numbered subblock_index + subblock_output_index + num_pins |
| */ |
| from_sub = |
| (from_pin - |
| type->num_pins) / |
| type->max_subblock_outputs; |
| from_opin = |
| (from_pin - |
| type->num_pins) % |
| type->max_subblock_outputs; |
| inode = |
| sub_pin_to_tnode[from_sub][SUB_OUTPUT] |
| [from_opin]; |
| to_node = |
| sub_pin_to_tnode[isub][SUB_INPUT] |
| [ipin]; |
| tedge = tnode[inode].out_edges; |
| iedge = next_sblk_opin_edge[from_sub] |
| [from_opin]++; |
| tedge[iedge].to_node = to_node; |
| tedge[iedge].Tdel = |
| type->type_timing_inf. |
| T_sblk_opin_to_sblk_ipin; |
| } |
| } |
| from_pin = sub_inf[isub].clock; /* sblk opin to sblk clock */ |
| |
| /* Not OPEN and comes from local subblock output? */ |
| |
| if(from_pin >= type->num_pins) |
| { |
| from_sub = |
| (from_pin - |
| type->num_pins) / type->max_subblock_outputs; |
| from_opin = |
| (from_pin - |
| type->num_pins) % type->max_subblock_outputs; |
| inode = sub_pin_to_tnode[from_sub][SUB_OUTPUT] |
| [from_opin]; |
| /* Feeds seq. output, one ff per output pin */ |
| to_node = |
| sub_pin_to_tnode[isub][SUB_CLOCK][clk_pin] + |
| 2 * used_opin_count; |
| tedge = tnode[inode].out_edges; |
| iedge = |
| next_sblk_opin_edge[from_sub][from_opin]++; |
| tedge[iedge].to_node = to_node; |
| |
| /* NB: Could make sblk opin to clk delay parameter; not worth it right now. */ |
| tedge[iedge].Tdel = |
| type->type_timing_inf. |
| T_sblk_opin_to_sblk_ipin; |
| } |
| |
| to_pin = sub_inf[isub].outputs[opin]; |
| if(to_pin != OPEN) |
| { /* sblk opin goes to fb opin? */ |
| |
| /* Check that FB pin connects to something -> * |
| * not just a mandatory BLE to FB opin connection */ |
| |
| if(block[iblk].nets[to_pin] != OPEN) |
| { |
| to_node = node_block_pin_to_tnode[to_pin]; |
| inode = sub_pin_to_tnode[isub][SUB_OUTPUT] |
| [opin]; |
| tedge = tnode[inode].out_edges; |
| iedge = next_sblk_opin_edge[isub][opin]++; |
| tedge[iedge].to_node = to_node; |
| tedge[iedge].Tdel = |
| type->type_timing_inf. |
| T_sblk_opin_to_fb_opin; |
| } |
| } |
| used_opin_count++; |
| } |
| } |
| |
| /* Now load the subblock input pins and, if the subblock is used in * |
| * sequential mode (i.e. is clocked), the two clock pin nodes for the output pin. */ |
| for(isub = 0; isub < num_subs; isub++) |
| { |
| used_opin_count = 0; |
| for(opin = 0; opin < type->max_subblock_outputs; opin++) |
| { |
| |
| if(sub_pin_to_tnode[isub][SUB_OUTPUT][opin] == OPEN) /* Empty, so skip */ |
| continue; |
| /* Begin loading */ |
| if(sub_inf[isub].clock == OPEN) |
| { /* Combinational mode */ |
| to_node = |
| sub_pin_to_tnode[isub][SUB_OUTPUT][opin]; |
| sink_delay = 0; |
| } |
| else |
| { /* Sequential mode. Load two clock nodes. */ |
| inode = |
| sub_pin_to_tnode[isub][SUB_CLOCK][clk_pin] + |
| 2 * used_opin_count; |
| |
| /* First node is the clock input pin; it feeds the sequential output */ |
| tedge = tnode[inode].out_edges; |
| tedge[0].to_node = |
| sub_pin_to_tnode[isub][SUB_OUTPUT][opin]; |
| tedge[0].Tdel = |
| type->type_timing_inf.T_subblock[isub]. |
| T_seq_out[opin]; |
| |
| /* Now create the "sequential sink" -- i.e. the FF input node. */ |
| |
| inode++; |
| to_node = inode; |
| sink_delay = |
| type->type_timing_inf.T_subblock[isub]. |
| T_seq_in[opin]; |
| } |
| |
| /* Build and hook up subblock inputs. */ |
| |
| has_inputs = FALSE; |
| for(ipin = 0; ipin < type->max_subblock_inputs; ipin++) |
| { |
| inode = sub_pin_to_tnode[isub][SUB_INPUT][ipin]; |
| |
| if(inode != OPEN) |
| { /* tnode exists -> pin is used */ |
| has_inputs = TRUE; |
| tedge = tnode[inode].out_edges; |
| tedge[used_opin_count].to_node = to_node; |
| tedge[used_opin_count].Tdel = |
| sink_delay + |
| type->type_timing_inf. |
| T_subblock[isub].T_comb[ipin][opin]; |
| } |
| } |
| |
| if(!has_inputs && type != IO_TYPE) |
| { /* Constant generator. Give fake input. */ |
| |
| inode = |
| sub_pin_to_tnode[isub][SUB_OUTPUT][opin] + 1; |
| tnode[inode].num_edges = 1; |
| tnode[inode].out_edges = |
| (t_tedge *) my_chunk_malloc(sizeof(t_tedge), |
| &tedge_ch_list_head, |
| &tedge_ch_bytes_avail, |
| &tedge_ch_next_avail); |
| tedge = tnode[inode].out_edges; |
| tedge[used_opin_count].to_node = to_node; |
| |
| /* Want constants generated early so they never affect the critical path. */ |
| |
| tedge[used_opin_count].Tdel = |
| T_CONSTANT_GENERATOR; |
| |
| tnode_descript[inode].type = CONSTANT_GEN_SOURCE; |
| tnode_descript[inode].ipin = OPEN; |
| tnode_descript[inode].isubblk = isub; |
| tnode_descript[inode].iblk = iblk; |
| } |
| used_opin_count++; |
| } |
| } |
| free_matrix(next_sblk_opin_edge, 0, type->max_subblocks - 1, 0, |
| sizeof(int)); |
| free(num_opin_used_in_sblk); |
| } |
| |
| static boolean |
| is_global_clock(int iblk, |
| int sub, |
| int subpin, |
| int *num_subblocks_per_block, |
| t_subblock ** subblock_inf) |
| { |
| |
| /* Returns TRUE if the net driven by this block (which must be an INPAD) is * |
| (1) a global signal, and (2) used as a clock input to at least one block. |
| Assumes that there is only one subblock in an IO |
| */ |
| |
| int inet, ipin, to_blk, to_pin, isub; |
| t_type_ptr type = block[iblk].type; |
| |
| assert(type == IO_TYPE); |
| |
| inet = block[iblk].nets[subblock_inf[iblk][sub].outputs[subpin]]; |
| assert(inet != OPEN); |
| |
| if(!net[inet].is_global) |
| return (FALSE); |
| |
| for(ipin = 1; ipin < (net[inet].num_sinks + 1); ipin++) |
| { |
| to_blk = net[inet].node_block[ipin]; |
| to_pin = net[inet].node_block_pin[ipin]; |
| |
| for(isub = 0; isub < num_subblocks_per_block[to_blk]; isub++) |
| { |
| if(subblock_inf[to_blk][isub].clock == to_pin) |
| return (TRUE); |
| } |
| } |
| |
| return (FALSE); |
| } |
| |
| void |
| load_timing_graph_net_delays(float **net_delay) |
| { |
| |
| /* Sets the delays of the inter-FB nets to the values specified by * |
| * net_delay[0..num_nets-1][1..num_pins-1]. These net delays should have * |
| * been allocated and loaded with the net_delay routines. This routine * |
| * marks the corresponding edges in the timing graph with the proper delay. */ |
| |
| int inet, ipin, inode; |
| t_tedge *tedge; |
| |
| for(inet = 0; inet < num_nets; inet++) |
| { |
| inode = net_to_driver_tnode[inet]; |
| tedge = tnode[inode].out_edges; |
| |
| /* Note that the edges of a tnode corresponding to a FB or INPAD opin must * |
| * be in the same order as the pins of the net driven by the tnode. */ |
| |
| for(ipin = 1; ipin < (net[inet].num_sinks + 1); ipin++) |
| tedge[ipin - 1].Tdel = net_delay[inet][ipin]; |
| } |
| } |
| |
| |
| void |
| free_timing_graph(float **net_slack) |
| { |
| |
| /* Frees the timing graph data. */ |
| |
| if(tedge_ch_list_head == NULL) |
| { |
| printf("Error in free_timing_graph: No timing graph to free.\n"); |
| exit(1); |
| } |
| |
| free_chunk_memory(tedge_ch_list_head); |
| free(tnode); |
| free(tnode_descript); |
| free(net_to_driver_tnode); |
| free_ivec_vector(tnodes_at_level, 0, num_tnode_levels - 1); |
| free(net_slack); |
| |
| tedge_ch_list_head = NULL; |
| tedge_ch_bytes_avail = 0; |
| tedge_ch_next_avail = NULL; |
| |
| tnode = NULL; |
| tnode_descript = NULL; |
| num_tnodes = 0; |
| net_to_driver_tnode = NULL; |
| tnodes_at_level = NULL; |
| num_tnode_levels = 0; |
| } |
| |
| |
| void |
| print_net_slack(char *fname, |
| float **net_slack) |
| { |
| |
| /* Prints the net slacks into a file. */ |
| |
| int inet, ipin; |
| FILE *fp; |
| |
| fp = my_fopen(fname, "w"); |
| |
| fprintf(fp, "Net #\tSlacks\n\n"); |
| |
| for(inet = 0; inet < num_nets; inet++) |
| { |
| fprintf(fp, "%5d", inet); |
| for(ipin = 1; ipin < (net[inet].num_sinks + 1); ipin++) |
| { |
| fprintf(fp, "\t%g", net_slack[inet][ipin]); |
| } |
| fprintf(fp, "\n"); |
| } |
| } |
| |
| |
| void |
| print_timing_graph(char *fname) |
| { |
| |
| /* Prints the timing graph into a file. */ |
| |
| FILE *fp; |
| int inode, iedge, ilevel, i; |
| t_tedge *tedge; |
| t_tnode_type itype; |
| char *tnode_type_names[] = { "INPAD_SOURCE", "INPAD_OPIN", |
| "OUTPAD_IPIN", "OUTPAD_SINK", "FB_IPIN", "FB_OPIN", |
| "SUBBLK_IPIN", "SUBBLK_OPIN", "FF_SINK", "FF_SOURCE", |
| "CONSTANT_GEN_SOURCE" |
| }; |
| |
| |
| fp = my_fopen(fname, "w"); |
| |
| fprintf(fp, "num_tnodes: %d\n", num_tnodes); |
| fprintf(fp, "Node #\tType\t\tipin\tisubblk\tiblk\t# edges\t" |
| "Edges (to_node, Tdel)\n\n"); |
| |
| for(inode = 0; inode < num_tnodes; inode++) |
| { |
| fprintf(fp, "%d\t", inode); |
| |
| itype = tnode_descript[inode].type; |
| fprintf(fp, "%-15.15s\t", tnode_type_names[itype]); |
| |
| fprintf(fp, "%d\t%d\t%d\t", tnode_descript[inode].ipin, |
| tnode_descript[inode].isubblk, |
| tnode_descript[inode].iblk); |
| |
| fprintf(fp, "%d\t", tnode[inode].num_edges); |
| tedge = tnode[inode].out_edges; |
| for(iedge = 0; iedge < tnode[inode].num_edges; iedge++) |
| { |
| fprintf(fp, "\t(%4d,%7.3g)", tedge[iedge].to_node, |
| tedge[iedge].Tdel); |
| } |
| fprintf(fp, "\n"); |
| } |
| |
| fprintf(fp, "\n\nnum_tnode_levels: %d\n", num_tnode_levels); |
| |
| for(ilevel = 0; ilevel < num_tnode_levels; ilevel++) |
| { |
| fprintf(fp, "\n\nLevel: %d Num_nodes: %d\nNodes:", ilevel, |
| tnodes_at_level[ilevel].nelem); |
| for(i = 0; i < tnodes_at_level[ilevel].nelem; i++) |
| fprintf(fp, "\t%d", tnodes_at_level[ilevel].list[i]); |
| } |
| |
| fprintf(fp, "\n"); |
| fprintf(fp, "\n\nNet #\tNet_to_driver_tnode\n"); |
| |
| for(i = 0; i < num_nets; i++) |
| fprintf(fp, "%4d\t%6d\n", i, net_to_driver_tnode[i]); |
| |
| fprintf(fp, "\n\nNode #\t\tT_arr\t\tT_req\n\n"); |
| |
| for(inode = 0; inode < num_tnodes; inode++) |
| fprintf(fp, "%d\t%12g\t%12g\n", inode, tnode[inode].T_arr, |
| tnode[inode].T_req); |
| |
| fclose(fp); |
| } |
| |
| |
| float |
| load_net_slack(float **net_slack, |
| float target_cycle_time) |
| { |
| |
| /* Determines the slack of every source-sink pair of block pins in the * |
| * circuit. The timing graph must have already been built. target_cycle_ * |
| * time is the target delay for the circuit -- if 0, the target_cycle_time * |
| * is set to the critical path found in the timing graph. This routine * |
| * loads net_slack, and returns the current critical path delay. */ |
| |
| float T_crit, T_arr, Tdel, T_cycle, T_req; |
| int inode, ilevel, num_at_level, i, num_edges, iedge, to_node; |
| t_tedge *tedge; |
| |
| /* Reset all arrival times to -ve infinity. Can't just set to zero or the * |
| * constant propagation (constant generators work at -ve infinity) won't * |
| * work. */ |
| |
| for(inode = 0; inode < num_tnodes; inode++) |
| tnode[inode].T_arr = T_CONSTANT_GENERATOR; |
| |
| /* Compute all arrival times with a breadth-first analysis from inputs to * |
| * outputs. Also compute critical path (T_crit). */ |
| |
| T_crit = 0.; |
| |
| /* Primary inputs arrive at T = 0. */ |
| |
| num_at_level = tnodes_at_level[0].nelem; |
| for(i = 0; i < num_at_level; i++) |
| { |
| inode = tnodes_at_level[0].list[i]; |
| tnode[inode].T_arr = 0.; |
| } |
| |
| for(ilevel = 0; ilevel < num_tnode_levels; ilevel++) |
| { |
| num_at_level = tnodes_at_level[ilevel].nelem; |
| |
| for(i = 0; i < num_at_level; i++) |
| { |
| inode = tnodes_at_level[ilevel].list[i]; |
| T_arr = tnode[inode].T_arr; |
| num_edges = tnode[inode].num_edges; |
| tedge = tnode[inode].out_edges; |
| T_crit = max(T_crit, T_arr); |
| |
| for(iedge = 0; iedge < num_edges; iedge++) |
| { |
| to_node = tedge[iedge].to_node; |
| Tdel = tedge[iedge].Tdel; |
| tnode[to_node].T_arr = |
| max(tnode[to_node].T_arr, T_arr + Tdel); |
| } |
| } |
| |
| } |
| |
| if(target_cycle_time > 0.) /* User specified target cycle time */ |
| T_cycle = target_cycle_time; |
| else /* Otherwise, target = critical path */ |
| T_cycle = T_crit; |
| |
| /* Compute the required arrival times with a backward breadth-first analysis * |
| * from sinks (output pads, etc.) to primary inputs. */ |
| |
| for(ilevel = num_tnode_levels - 1; ilevel >= 0; ilevel--) |
| { |
| num_at_level = tnodes_at_level[ilevel].nelem; |
| |
| for(i = 0; i < num_at_level; i++) |
| { |
| inode = tnodes_at_level[ilevel].list[i]; |
| num_edges = tnode[inode].num_edges; |
| |
| if(num_edges == 0) |
| { /* sink */ |
| tnode[inode].T_req = T_cycle; |
| } |
| else |
| { |
| tedge = tnode[inode].out_edges; |
| to_node = tedge[0].to_node; |
| Tdel = tedge[0].Tdel; |
| T_req = tnode[to_node].T_req - Tdel; |
| |
| for(iedge = 1; iedge < num_edges; iedge++) |
| { |
| to_node = tedge[iedge].to_node; |
| Tdel = tedge[iedge].Tdel; |
| T_req = |
| min(T_req, |
| tnode[to_node].T_req - Tdel); |
| } |
| |
| tnode[inode].T_req = T_req; |
| } |
| } |
| } |
| |
| compute_net_slacks(net_slack); |
| |
| return (T_crit); |
| } |
| |
| |
| static void |
| compute_net_slacks(float **net_slack) |
| { |
| |
| /* Puts the slack of each source-sink pair of block pins in net_slack. */ |
| |
| int inet, iedge, inode, to_node, num_edges; |
| t_tedge *tedge; |
| float T_arr, Tdel, T_req; |
| |
| for(inet = 0; inet < num_nets; inet++) |
| { |
| inode = net_to_driver_tnode[inet]; |
| T_arr = tnode[inode].T_arr; |
| num_edges = tnode[inode].num_edges; |
| tedge = tnode[inode].out_edges; |
| |
| for(iedge = 0; iedge < num_edges; iedge++) |
| { |
| to_node = tedge[iedge].to_node; |
| Tdel = tedge[iedge].Tdel; |
| T_req = tnode[to_node].T_req; |
| net_slack[inet][iedge + 1] = T_req - T_arr - Tdel; |
| } |
| } |
| } |
| |
| |
| void |
| print_critical_path(char *fname, |
| t_subblock_data subblock_data) |
| { |
| |
| /* Prints out the critical path to a file. */ |
| |
| t_linked_int *critical_path_head, *critical_path_node; |
| FILE *fp; |
| int non_global_nets_on_crit_path, global_nets_on_crit_path; |
| int tnodes_on_crit_path, inode, iblk, inet; |
| t_tnode_type type; |
| float total_net_delay, total_logic_delay, Tdel; |
| |
| critical_path_head = allocate_and_load_critical_path(); |
| critical_path_node = critical_path_head; |
| |
| fp = my_fopen(fname, "w"); |
| |
| non_global_nets_on_crit_path = 0; |
| global_nets_on_crit_path = 0; |
| tnodes_on_crit_path = 0; |
| total_net_delay = 0.; |
| total_logic_delay = 0.; |
| |
| while(critical_path_node != NULL) |
| { |
| Tdel = |
| print_critical_path_node(fp, critical_path_node, |
| subblock_data); |
| inode = critical_path_node->data; |
| type = tnode_descript[inode].type; |
| tnodes_on_crit_path++; |
| |
| if(type == INPAD_OPIN || type == FB_OPIN) |
| { |
| get_tnode_block_and_output_net(inode, &iblk, &inet); |
| |
| if(!net[inet].is_global) |
| non_global_nets_on_crit_path++; |
| else |
| global_nets_on_crit_path++; |
| |
| total_net_delay += Tdel; |
| } |
| else |
| { |
| total_logic_delay += Tdel; |
| } |
| |
| critical_path_node = critical_path_node->next; |
| } |
| |
| fprintf(fp, |
| "\nTnodes on crit. path: %d Non-global nets on crit. path: %d." |
| "\n", tnodes_on_crit_path, non_global_nets_on_crit_path); |
| fprintf(fp, "Global nets on crit. path: %d.\n", global_nets_on_crit_path); |
| fprintf(fp, "Total logic delay: %g (s) Total net delay: %g (s)\n", |
| total_logic_delay, total_net_delay); |
| |
| printf("Nets on crit. path: %d normal, %d global.\n", |
| non_global_nets_on_crit_path, global_nets_on_crit_path); |
| |
| printf("Total logic delay: %g (s) Total net delay: %g (s)\n", |
| total_logic_delay, total_net_delay); |
| |
| fclose(fp); |
| free_int_list(&critical_path_head); |
| } |
| |
| |
| t_linked_int * |
| allocate_and_load_critical_path(void) |
| { |
| |
| /* Finds the critical path and puts a list of the tnodes on the critical * |
| * path in a linked list, from the path SOURCE to the path SINK. */ |
| |
| t_linked_int *critical_path_head, *curr_crit_node, *prev_crit_node; |
| int inode, iedge, to_node, num_at_level, i, crit_node, num_edges; |
| float min_slack, slack; |
| t_tedge *tedge; |
| |
| num_at_level = tnodes_at_level[0].nelem; |
| min_slack = HUGE_FLOAT; |
| crit_node = OPEN; /* Stops compiler warnings. */ |
| |
| for(i = 0; i < num_at_level; i++) |
| { /* Path must start at SOURCE (no inputs) */ |
| inode = tnodes_at_level[0].list[i]; |
| slack = tnode[inode].T_req - tnode[inode].T_arr; |
| |
| if(slack < min_slack) |
| { |
| crit_node = inode; |
| min_slack = slack; |
| } |
| } |
| |
| critical_path_head = (t_linked_int *) my_malloc(sizeof(t_linked_int)); |
| critical_path_head->data = crit_node; |
| prev_crit_node = critical_path_head; |
| num_edges = tnode[crit_node].num_edges; |
| |
| while(num_edges != 0) |
| { /* Path will end at SINK (no fanout) */ |
| curr_crit_node = (t_linked_int *) my_malloc(sizeof(t_linked_int)); |
| prev_crit_node->next = curr_crit_node; |
| tedge = tnode[crit_node].out_edges; |
| min_slack = HUGE_FLOAT; |
| |
| for(iedge = 0; iedge < num_edges; iedge++) |
| { |
| to_node = tedge[iedge].to_node; |
| slack = tnode[to_node].T_req - tnode[to_node].T_arr; |
| |
| if(slack < min_slack) |
| { |
| crit_node = to_node; |
| min_slack = slack; |
| } |
| } |
| |
| curr_crit_node->data = crit_node; |
| prev_crit_node = curr_crit_node; |
| num_edges = tnode[crit_node].num_edges; |
| } |
| |
| prev_crit_node->next = NULL; |
| return (critical_path_head); |
| } |
| |
| |
| void |
| get_tnode_block_and_output_net(int inode, |
| int *iblk_ptr, |
| int *inet_ptr) |
| { |
| |
| /* Returns the index of the block that this tnode is part of. If the tnode * |
| * is a FB_OPIN or INPAD_OPIN (i.e. if it drives a net), the net index is * |
| * returned via inet_ptr. Otherwise inet_ptr points at OPEN. */ |
| |
| int inet, ipin, iblk; |
| t_tnode_type tnode_type; |
| |
| iblk = tnode_descript[inode].iblk; |
| tnode_type = tnode_descript[inode].type; |
| |
| if(tnode_type == FB_OPIN || tnode_type == INPAD_OPIN) |
| { |
| ipin = tnode_descript[inode].ipin; |
| inet = block[iblk].nets[ipin]; |
| } |
| else |
| { |
| inet = OPEN; |
| } |
| |
| *iblk_ptr = iblk; |
| *inet_ptr = inet; |
| } |
| |
| |
| void |
| do_constant_net_delay_timing_analysis(t_timing_inf timing_inf, |
| t_subblock_data subblock_data, |
| float constant_net_delay_value) |
| { |
| |
| /* Does a timing analysis (simple) where it assumes that each net has a * |
| * constant delay value. Used only when operation == TIMING_ANALYSIS_ONLY. */ |
| |
| struct s_linked_vptr *net_delay_chunk_list_head; |
| float **net_delay, **net_slack; |
| |
| float T_crit; |
| |
| net_slack = alloc_and_load_timing_graph(timing_inf, subblock_data); |
| net_delay = alloc_net_delay(&net_delay_chunk_list_head); |
| |
| load_constant_net_delay(net_delay, constant_net_delay_value); |
| load_timing_graph_net_delays(net_delay); |
| T_crit = load_net_slack(net_slack, 0); |
| |
| printf("\n"); |
| printf("\nCritical Path: %g (s)\n", T_crit); |
| |
| #ifdef CREATE_ECHO_FILES |
| print_critical_path("critical_path.echo", subblock_data); |
| print_timing_graph("timing_graph.echo"); |
| print_net_slack("net_slack.echo", net_slack); |
| print_net_delay(net_delay, "net_delay.echo"); |
| #endif /* CREATE_ECHO_FILES */ |
| |
| free_timing_graph(net_slack); |
| free_net_delay(net_delay, &net_delay_chunk_list_head); |
| } |