| /* |
| Permission is hereby granted, free of charge, to any person |
| obtaining a copy of this software and associated documentation |
| files (the "Software"), to deal in the Software without |
| restriction, including without limitation the rights to use, |
| copy, modify, merge, publish, distribute, sublicense, and/or sell |
| copies of the Software, and to permit persons to whom the |
| Software is furnished to do so, subject to the following |
| conditions: |
| |
| The above copyright notice and this permission notice shall be |
| included in all copies or substantial portions of the Software. |
| |
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <algorithm> |
| #include <cmath> |
| #include <string> |
| #include "odin_types.h" |
| #include "odin_util.h" |
| #include "node_creation_library.h" |
| #include "multipliers.h" |
| #include "netlist_utils.h" |
| #include "partial_map.h" |
| #include "read_xml_arch_file.h" |
| #include "odin_globals.h" |
| |
| #include "adders.h" |
| |
| #include "vtr_memory.h" |
| #include "vtr_list.h" |
| #include "vtr_util.h" |
| |
| |
| using vtr::t_linked_vptr; |
| using vtr::insert_in_vptr_list; |
| |
| t_model *hard_multipliers = NULL; |
| t_linked_vptr *mult_list = NULL; |
| int min_mult = 0; |
| int *mults = NULL; |
| |
| void record_mult_distribution(nnode_t *node); |
| void terminate_mult_distribution(); |
| void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int offb, int b, nnode_t *node_a, nnode_t *node_b); |
| void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b); |
| void split_multiplier_a(nnode_t *node, int a0, int a1, int b); |
| void split_multiplier_b(nnode_t *node, int a, int b1, int b0); |
| void pad_multiplier(nnode_t *node, netlist_t *netlist); |
| void split_soft_multiplier(nnode_t *node, netlist_t *netlist); |
| |
| // data structure representing a row of bits an adder tree |
| struct AdderTreeRow { |
| // the shift of this row from the least significant bit of the multiplier output |
| int shift; |
| // array representing the bits in the row, each bit is a node |
| // pointer and the index of this bit in this node output array. |
| std::vector<std::pair<nnode_t*, int>> bits; |
| }; |
| |
| /*--------------------------------------------------------------------------- |
| * (function: instantiate_simple_soft_multiplier ) |
| * Sample 4x4 multiplier to help understand logic. |
| * |
| * a3 a2 a1 a0 |
| * b3 b2 b1 b0 |
| * --------------------------- |
| * c03 c02 c01 c00 |
| * + c13 c12 c11 c10 |
| * ----------------------------------- |
| * r14 r13 r12 r11 r10 |
| * + c23 c22 c21 c20 |
| * ----------------------------------- |
| * r24 r23 r22 r21 r20 |
| * + c33 c32 c31 c30 |
| * ------------------------------------ |
| * o7 o6 o5 o4 o3 o2 o1 o0 |
| * |
| * In the first case will be c01 |
| *-------------------------------------------------------------------------*/ |
| void instantiate_simple_soft_multiplier(nnode_t *node, short mark, netlist_t *netlist) |
| { |
| int width_a; |
| int width_b; |
| int width; |
| int multiplier_width; |
| int multiplicand_width; |
| nnode_t **adders_for_partial_products; |
| nnode_t ***partial_products; |
| int multiplicand_offset_index; |
| int multiplier_offset_index; |
| int current_index; |
| int i, j; |
| |
| /* need for an carry-ripple-adder for each of the bits of port B. */ |
| /* good question of which is better to put on the bottom of multiplier. Larger means more smaller adds, or small is |
| * less large adds */ |
| oassert(node->num_output_pins > 0); |
| oassert(node->num_input_pins > 0); |
| oassert(node->num_input_port_sizes == 2); |
| oassert(node->num_output_port_sizes == 1); |
| width_a = node->input_port_sizes[0]; |
| width_b = node->input_port_sizes[1]; |
| width = node->output_port_sizes[0]; |
| multiplicand_width = width_b; |
| multiplier_width = width_a; |
| /* offset is related to which multport is chosen as the multiplicand */ |
| multiplicand_offset_index = width_a; |
| multiplier_offset_index = 0; |
| |
| adders_for_partial_products = (nnode_t**)vtr::malloc(sizeof(nnode_t*)*multiplicand_width-1); |
| |
| /* need to generate partial products for each bit in width B. */ |
| partial_products = (nnode_t***)vtr::malloc(sizeof(nnode_t**)*multiplicand_width); |
| |
| /* generate the AND partial products */ |
| for (i = 0; i < multiplicand_width; i++) |
| { |
| /* create the memory for each AND gate needed for the levels of partial products */ |
| partial_products[i] = (nnode_t**)vtr::malloc(sizeof(nnode_t*)*multiplier_width); |
| |
| if (i < multiplicand_width - 1) |
| { |
| adders_for_partial_products[i] = make_2port_gate(ADD, multiplier_width+1, multiplier_width+1, multiplier_width+1, node, mark); |
| } |
| |
| for (j = 0; j < multiplier_width; j++) |
| { |
| /* create each one of the partial products */ |
| partial_products[i][j] = make_1port_logic_gate(LOGICAL_AND, 2, node, mark); |
| } |
| } |
| |
| /* generate the connections to the AND gates */ |
| for (i = 0; i < multiplicand_width; i++) |
| { |
| for (j = 0; j < multiplier_width; j++) |
| { |
| /* hookup the input of B to each AND gate */ |
| if (j == 0) |
| { |
| /* IF - this is the first time we are mapping multiplicand port then can remap */ |
| remap_pin_to_new_node(node->input_pins[i+multiplicand_offset_index], partial_products[i][j], 0); |
| } |
| else |
| { |
| /* ELSE - this needs to be a new output of the multiplicand port */ |
| add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[i][0]->input_pins[0]), 0); |
| } |
| |
| /* hookup the input of the multiplier to each AND gate */ |
| if (i == 0) |
| { |
| /* IF - this is the first time we are mapping multiplier port then can remap */ |
| remap_pin_to_new_node(node->input_pins[j+multiplier_offset_index], partial_products[i][j], 1); |
| } |
| else |
| { |
| /* ELSE - this needs to be a new output of the multiplier port */ |
| add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[0][j]->input_pins[1]), 1); |
| } |
| } |
| } |
| |
| /* hookup each of the adders */ |
| for (i = 0; i < multiplicand_width-1; i++) // -1 since the first stage is a combo of partial products while all others are part of tree |
| { |
| for (j = 0; j < multiplier_width+1; j++) // +1 since adders are one greater than multwidth to pass carry |
| { |
| /* join to port 1 of the add one of the partial products. */ |
| if (i == 0) |
| { |
| /* IF - this is the first addition row, then adding two sets of partial products and first set is from the c0* */ |
| if (j < multiplier_width-1) |
| { |
| /* IF - we just take an element of the first list c[0][j+1]. */ |
| connect_nodes(partial_products[i][j+1], 0, adders_for_partial_products[i], j); |
| } |
| else |
| { |
| /* ELSE - this is the last input to the first adder, then we pass in 0 since no carry yet */ |
| add_input_pin_to_node(adders_for_partial_products[i], get_zero_pin(netlist), j); |
| } |
| } |
| else if (j < multiplier_width) |
| { |
| /* ELSE - this is the standard situation when we need to hookup this adder with a previous adder, r[i-1][j+1] */ |
| connect_nodes(adders_for_partial_products[i-1], j+1, adders_for_partial_products[i], j); |
| } |
| else |
| { |
| add_input_pin_to_node(adders_for_partial_products[i], get_zero_pin(netlist), j); |
| } |
| |
| if (j < multiplier_width) |
| { |
| /* IF - this is not most significant bit then just add current partial product */ |
| connect_nodes(partial_products[i+1][j], 0, adders_for_partial_products[i], j+multiplier_width+1); |
| } |
| else |
| { |
| add_input_pin_to_node(adders_for_partial_products[i], get_zero_pin(netlist), j+multiplier_width+1); |
| } |
| } |
| } |
| |
| current_index = 0; |
| /* hookup the outputs */ |
| for (i = 0; i < width; i++) |
| { |
| if (multiplicand_width == 1) |
| { |
| // this is undealt with |
| error_message(PARSE_ERROR,-1,-1, "%s", "Cannot create soft multiplier with multiplicand width of 1.\n"); |
| } |
| else if (i == 0) |
| { |
| /* IF - this is the LSbit, then we use a pass through from the partial product */ |
| remap_pin_to_new_node(node->output_pins[i], partial_products[0][0], 0); |
| } |
| else if (i < multiplicand_width - 1) |
| { |
| /* ELSE IF - these are the middle values that come from the LSbit of partial adders */ |
| remap_pin_to_new_node(node->output_pins[i], adders_for_partial_products[i-1], 0); |
| } |
| else |
| { |
| /* ELSE - the final outputs are straight from the outputs of the last adder */ |
| remap_pin_to_new_node(node->output_pins[i], adders_for_partial_products[multiplicand_width-2], current_index); |
| current_index++; |
| } |
| } |
| |
| /* soft map the adders if they need to be mapped */ |
| for (i = 0; i < multiplicand_width - 1; i++) |
| { |
| instantiate_add_w_carry(adders_for_partial_products[i], mark, netlist); |
| } |
| |
| /* Cleanup everything */ |
| if (adders_for_partial_products != NULL) |
| { |
| for (i = 0; i < multiplicand_width-1; i++) { |
| free_nnode(adders_for_partial_products[i]); |
| } |
| vtr::free(adders_for_partial_products); |
| } |
| /* generate the AND partial products */ |
| for (i = 0; i < multiplicand_width; i++) |
| { |
| /* create the memory for each AND gate needed for the levels of partial products */ |
| if (partial_products[i] != NULL) |
| { |
| vtr::free(partial_products[i]); |
| } |
| } |
| if (partial_products != NULL) |
| { |
| vtr::free(partial_products); |
| } |
| } |
| |
| /*--------------------------------------------------------------------------- |
| * (function: init_mult_distribution) |
| *-------------------------------------------------------------------------*/ |
| void init_mult_distribution() |
| { |
| oassert(hard_multipliers != NULL); |
| int len = ( 1 + hard_multipliers->inputs->size ) * ( 1 + hard_multipliers->inputs->next->size ); |
| mults = (int *)vtr::calloc(len, sizeof(int)); |
| } |
| |
| /*--------------------------------------------------------------------------- |
| * (function: record_mult_distribution) |
| *-------------------------------------------------------------------------*/ |
| void record_mult_distribution(nnode_t *node) |
| { |
| int a, b; |
| |
| oassert(hard_multipliers != NULL); |
| oassert(node != NULL); |
| |
| a = node->input_port_sizes[0]; |
| b = node->input_port_sizes[1]; |
| |
| mults[a * hard_multipliers->inputs->size + b] += 1; |
| return; |
| } |
| |
| /*--------------------------------------------------------------------------- |
| * (function: report_mult_distribution) |
| *-------------------------------------------------------------------------*/ |
| void report_mult_distribution() |
| { |
| long num_total = 0; |
| |
| if(hard_multipliers == NULL) |
| return; |
| |
| printf("\nHard Multiplier Distribution\n"); |
| printf("============================\n"); |
| for (long i = 0; i <= hard_multipliers->inputs->size; i++) |
| { |
| for (long j = 1; j <= hard_multipliers->inputs->next->size; j++) |
| { |
| if (mults[i * hard_multipliers->inputs->size + j] != 0) |
| { |
| num_total += mults[i * hard_multipliers->inputs->size + j]; |
| printf("%ld X %ld => %d\n", i, j, mults[i * hard_multipliers->inputs->size + j]); |
| } |
| } |
| } |
| printf("\n"); |
| printf("\nTotal # of multipliers = %ld\n", num_total); |
| vtr::free(mults); |
| } |
| |
| /*--------------------------------------------------------------------------- |
| * (function: find_hard_multipliers) |
| *-------------------------------------------------------------------------*/ |
| void find_hard_multipliers() |
| { |
| hard_multipliers = Arch.models; |
| min_mult = configuration.min_hard_multiplier; |
| while (hard_multipliers != NULL) |
| { |
| if (strcmp(hard_multipliers->name, "multiply") == 0) |
| { |
| init_mult_distribution(); |
| return; |
| } |
| else |
| { |
| hard_multipliers = hard_multipliers->next; |
| } |
| } |
| |
| return; |
| } |
| |
| /*--------------------------------------------------------------------------- |
| * (function: declare_hard_multiplier) |
| *-------------------------------------------------------------------------*/ |
| void declare_hard_multiplier(nnode_t *node) |
| { |
| t_multiplier *tmp; |
| int width_a, width_b, width, swap; |
| |
| /* See if this size instance of multiplier exists? */ |
| if (hard_multipliers == NULL) |
| warning_message(NETLIST_ERROR, node->related_ast_node->line_number, node->related_ast_node->file_number, "%s\n", "Instantiating Mulitpliers where hard multipliers do not exist"); |
| |
| tmp = (t_multiplier *)hard_multipliers->instances; |
| width_a = node->input_port_sizes[0]; |
| width_b = node->input_port_sizes[1]; |
| width = node->output_port_sizes[0]; |
| if (width_a < width_b) /* Make sure a is bigger than b */ |
| { |
| swap = width_b; |
| width_b = width_a; |
| width_a = swap; |
| } |
| while (tmp != NULL) |
| { |
| if ((tmp->size_a == width_a) && (tmp->size_b == width_b) && (tmp->size_out == width)) |
| return; |
| else |
| tmp = tmp->next; |
| } |
| |
| /* Does not exist - must create an instance */ |
| tmp = (t_multiplier *)vtr::malloc(sizeof(t_multiplier)); |
| tmp->next = (t_multiplier *)hard_multipliers->instances; |
| hard_multipliers->instances = tmp; |
| tmp->size_a = width_a; |
| tmp->size_b = width_b; |
| tmp->size_out = width; |
| return; |
| } |
| |
| /*--------------------------------------------------------------------------- |
| * (function: instantiate_hard_multiplier ) |
| *-------------------------------------------------------------------------*/ |
| void instantiate_hard_multiplier(nnode_t *node, short mark, netlist_t * /*netlist*/) |
| { |
| oassert(node |
| && "node is NULL to instanciate hard multiplier"); |
| |
| declare_hard_multiplier(node); |
| |
| std::string node_name = ""; |
| if( node->name ) |
| { |
| node_name = node->name; |
| vtr::free(node->name); |
| node->name = NULL; |
| } |
| |
| if(node->num_output_pins <= 0) |
| { |
| /* wide input first :) */ |
| int portA = 0; |
| int portB = 1; |
| if (node->input_port_sizes[1] > node->input_port_sizes[0]) |
| { |
| portA = 1; |
| portB = 0; |
| } |
| std::string tmp( |
| node_name + |
| "_" + std::to_string(node->input_port_sizes[portA]) + |
| "_" + std::to_string(node->input_port_sizes[portB]) + |
| "_" + std::to_string(node->output_port_sizes[0]) |
| ); |
| node->name = vtr::strdup(tmp.c_str()); |
| } |
| else |
| { |
| /* Give names to the output pins */ |
| for (int i = 0; i < node->num_output_pins; i++) |
| { |
| if (node->output_pins[i]->name) |
| { |
| vtr::free(node->output_pins[i]->name); |
| } |
| //build the output string |
| std::string tmp( |
| node_name + |
| "[" + std::to_string(node->output_pins[i]->pin_node_idx) + "]" |
| ); |
| node->output_pins[i]->name = vtr::strdup(tmp.c_str()); |
| } |
| node->name = vtr::strdup(node->output_pins[node->num_output_pins-1]->name); |
| } |
| node->traverse_visited = mark; |
| return; |
| } |
| |
| |
| /*---------------------------------------------------------------------------- |
| * function: add_the_blackbox_for_mults() |
| *--------------------------------------------------------------------------*/ |
| void add_the_blackbox_for_mults(FILE *out) |
| { |
| long i; |
| int count; |
| int hard_mult_inputs; |
| t_multiplier *muls; |
| t_model_ports *ports; |
| char buffer[MAX_BUF]; |
| char *pa, *pb, *po; |
| |
| /* Check to make sure this target architecture has hard multipliers */ |
| if (hard_multipliers == NULL) |
| return; |
| |
| /* Get the names of the ports for the multiplier */ |
| ports = hard_multipliers->inputs; |
| pb = ports->name; |
| ports = ports->next; |
| pa = ports->name; |
| po = hard_multipliers->outputs->name; |
| |
| /* find the multiplier devices in the tech library */ |
| muls = (t_multiplier *)(hard_multipliers->instances); |
| if (muls == NULL) /* No multipliers instantiated */ |
| return; |
| |
| /* simplified way of getting the multsize, but fine for quick example */ |
| while (muls != NULL) |
| { |
| /* write out this multiplier model */ |
| if (configuration.fixed_hard_multiplier != 0) |
| count = fprintf(out, ".model multiply\n"); |
| else |
| count = fprintf(out, ".model mult_%d_%d_%d\n", muls->size_a, muls->size_b, muls->size_out); |
| |
| /* add the inputs */ |
| count = count + fprintf(out, ".inputs"); |
| hard_mult_inputs = muls->size_a + muls->size_b; |
| for (i = 0; i < hard_mult_inputs; i++) |
| { |
| if (i < muls->size_a) |
| { |
| count = count + odin_sprintf(buffer, " %s[%ld]", pa, i); |
| } |
| else |
| { |
| count = count + odin_sprintf(buffer, " %s[%ld]", pb, i - muls->size_a); |
| } |
| |
| if (count > 78) |
| count = fprintf(out, " \\\n %s", buffer) - 3; |
| else |
| fprintf(out, " %s", buffer); |
| } |
| fprintf(out, "\n"); |
| |
| /* add the outputs */ |
| count = fprintf(out, ".outputs"); |
| for (i = 0; i < muls->size_out; i++) |
| { |
| count = count + odin_sprintf(buffer, " %s[%ld]", po, i); |
| if (count > 78) |
| { |
| fprintf(out, " \\\n%s", buffer); |
| count = strlen(buffer); |
| } |
| else |
| fprintf(out, "%s", buffer); |
| } |
| fprintf(out, "\n"); |
| fprintf(out, ".blackbox\n"); |
| fprintf(out, ".end\n"); |
| fprintf(out, "\n"); |
| |
| muls = muls->next; |
| } |
| |
| free_multipliers(); |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: define_mult_function) |
| *-----------------------------------------------------------------------*/ |
| void define_mult_function(nnode_t *node, FILE *out) |
| { |
| long i, j; |
| int count; |
| char buffer[MAX_BUF]; |
| |
| count = fprintf(out, "\n.subckt"); |
| count--; |
| oassert(node->input_port_sizes[0] > 0); |
| oassert(node->input_port_sizes[1] > 0); |
| oassert(node->output_port_sizes[0] > 0); |
| |
| int flip = false; |
| |
| if (configuration.fixed_hard_multiplier != 0) |
| { |
| count += fprintf(out, " multiply"); |
| } |
| else |
| { |
| if (node->input_port_sizes[0] > node->input_port_sizes[1]) |
| { |
| count += fprintf(out, " mult_%d_%d_%d", node->input_port_sizes[0], |
| node->input_port_sizes[1], node->output_port_sizes[0]); |
| |
| flip = false; |
| } |
| else |
| { |
| count += fprintf(out, " mult_%d_%d_%d", node->input_port_sizes[1], |
| node->input_port_sizes[0], node->output_port_sizes[0]); |
| |
| flip = true; |
| } |
| } |
| |
| for (i = 0; i < node->num_input_pins; i++) |
| { |
| if (i < node->input_port_sizes[flip?1:0]) |
| { |
| npin_t *driver_pin = flip |
| ?node->input_pins[i+node->input_port_sizes[0]]->net->driver_pin |
| :node->input_pins[i ]->net->driver_pin; |
| |
| if (!driver_pin->name) |
| j = odin_sprintf(buffer, " %s[%ld]=%s", hard_multipliers->inputs->next->name, i, driver_pin->node->name); |
| else |
| j = odin_sprintf(buffer, " %s[%ld]=%s", hard_multipliers->inputs->next->name, i, driver_pin->name); |
| } |
| else |
| { |
| npin_t *driver_pin = flip |
| ?node->input_pins[i-node->input_port_sizes[1]]->net->driver_pin |
| :node->input_pins[i ]->net->driver_pin; |
| |
| long index = flip |
| ?i - node->input_port_sizes[1] |
| :i - node->input_port_sizes[0]; |
| |
| if (!driver_pin->name) |
| j = odin_sprintf(buffer, " %s[%ld]=%s", hard_multipliers->inputs->name, index, driver_pin->node->name); |
| else |
| j = odin_sprintf(buffer, " %s[%ld]=%s", hard_multipliers->inputs->name, index, driver_pin->name); |
| } |
| |
| if (count + j > 79) |
| { |
| fprintf(out, "\\\n"); |
| count = 0; |
| } |
| count += fprintf(out, "%s", buffer); |
| } |
| |
| |
| for (i = 0; i < node->num_output_pins; i++) |
| { |
| j = odin_sprintf(buffer, " %s[%ld]=%s", hard_multipliers->outputs->name, i, node->output_pins[i]->name); |
| if (count + j > 79) |
| { |
| fprintf(out, "\\\n"); |
| count = 0; |
| } |
| count += fprintf(out, "%s", buffer); |
| } |
| |
| fprintf(out, "\n\n"); |
| return; |
| } |
| |
| /*----------------------------------------------------------------------- |
| * (function: init_split_multiplier) |
| * Create a dummy multiplier when spliting. Inputs are connected |
| * to original pins, output pins are set to NULL for later connecting |
| * with temp pins to connect cascading multipliers/adders. |
| *---------------------------------------------------------------------*/ |
| void init_split_multiplier(nnode_t *node, nnode_t *ptr, int offa, int a, int offb, int b, nnode_t *node_a, nnode_t *node_b) |
| { |
| int i; |
| |
| /* Copy properties from original node */ |
| ptr->type = node->type; |
| ptr->related_ast_node = node->related_ast_node; |
| ptr->traverse_visited = node->traverse_visited; |
| ptr->node_data = NULL; |
| |
| /* Set new port sizes and parameters */ |
| ptr->num_input_port_sizes = 2; |
| ptr->input_port_sizes = (int *)vtr::malloc(2 * sizeof(int)); |
| ptr->input_port_sizes[0] = a; |
| ptr->input_port_sizes[1] = b; |
| ptr->num_output_port_sizes = 1; |
| ptr->output_port_sizes = (int *)vtr::malloc(sizeof(int)); |
| ptr->output_port_sizes[0] = a + b; |
| |
| /* Set the number of pins and re-locate previous pin entries */ |
| ptr->num_input_pins = a + b; |
| ptr->input_pins = (npin_t**)vtr::malloc(sizeof(void *) * (a + b)); |
| for (i = 0; i < a; i++) |
| { |
| if (node_a) |
| add_input_pin_to_node(ptr, copy_input_npin(node_a->input_pins[i]), i); |
| else |
| remap_pin_to_new_node(node->input_pins[i+offa], ptr, i); |
| } |
| |
| for (i = 0; i < b; i++) |
| { |
| if (node_b) |
| add_input_pin_to_node(ptr, copy_input_npin(node_b->input_pins[i + node_b->input_port_sizes[0]]), i + a); |
| else |
| remap_pin_to_new_node(node->input_pins[i + node->input_port_sizes[0] + offb], ptr, i + a); |
| } |
| |
| /* Prep output pins for connecting to cascaded multipliers */ |
| ptr->num_output_pins = a + b; |
| ptr->output_pins = (npin_t**)vtr::malloc(sizeof(void *) * (a + b)); |
| for (i = 0; i < a + b; i++) |
| ptr->output_pins[i] = NULL; |
| |
| return; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: init_multiplier_adder) |
| * |
| * This function is used to initialize an adder that is within |
| * a split multiplier or a multiplier addition tree. |
| *-----------------------------------------------------------------------*/ |
| void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b) { |
| |
| int i, size; |
| |
| node->type = ADD; |
| node->related_ast_node = parent->related_ast_node; |
| node->traverse_visited = parent->traverse_visited; |
| node->node_data = NULL; |
| |
| /* Set size to be the maximum input size */ |
| size = a; |
| size = (size < b) ? b : size; |
| |
| /* Set new port sizes and parameters */ |
| node->num_input_port_sizes = 2; |
| node->input_port_sizes = (int *)vtr::malloc(2 * sizeof(int)); |
| node->input_port_sizes[0] = a; |
| node->input_port_sizes[1] = b; |
| node->num_output_port_sizes = 1; |
| node->output_port_sizes = (int *)vtr::malloc(sizeof(int)); |
| node->output_port_sizes[0] = size; |
| |
| /* Set the number of input pins and clear pin entries */ |
| node->num_input_pins = a + b; |
| node->input_pins = (npin_t**)vtr::malloc(sizeof(void *) * (a + b)); |
| for (i = 0; i < a + b; i++) |
| node->input_pins[i] = NULL; |
| |
| /* Set the number of output pins and clear pin entries */ |
| node->num_output_pins = size; |
| node->output_pins = (npin_t**)vtr::malloc(sizeof(void *) * size); |
| for (i = 0; i < size; i++) |
| node->output_pins[i] = NULL; |
| |
| add_list = insert_in_vptr_list(add_list, node); |
| return; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: split_multiplier) |
| * |
| * This function works to split a multiplier into several smaller |
| * multipliers to better "fit" with the available resources in a |
| * targeted FPGA architecture. |
| * |
| * This function is at the lowest level since it simply receives |
| * a multiplier and is told how to split it. The end result is: |
| * |
| * a1a0 * b1b0 => a0 * b0 + a0 * b1 + a1 * b0 + a1 * b1 => c1c0 => c |
| * |
| * If we "balance" the additions, we can actually remove one of the |
| * addition operations since we know that a0 * b0 and a1 * b1 will |
| * not overlap in bits. This allows us to skip the addition between |
| * these two terms and simply concat the results together. Giving us |
| * the resulting logic: |
| * |
| * ((a1 * b1) . (a0 * b0)) + ((a0 * b1) + (a1 * b0)) ==> Result |
| * |
| * Note that for some of the additions we need to perform sign extensions, |
| * but this should not be a problem since the sign extension is always |
| * extending NOT contracting. |
| * |
| *-----------------------------------------------------------------------*/ |
| void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist) |
| { |
| nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig; |
| int i, size; |
| |
| /* Check for a legitimate split */ |
| oassert(node->input_port_sizes[0] == (a0 + a1)); |
| oassert(node->input_port_sizes[1] == (b0 + b1)); |
| |
| /* New node for small multiply */ |
| a0b0 = allocate_nnode(); |
| a0b0->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(a0b0->name, node->name); |
| strcat(a0b0->name, "-0"); |
| init_split_multiplier(node, a0b0, 0, a0, 0, b0, nullptr, nullptr); |
| mult_list = insert_in_vptr_list(mult_list, a0b0); |
| |
| /* New node for big multiply */ |
| a1b1 = allocate_nnode(); |
| a1b1->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(a1b1->name, node->name); |
| strcat(a1b1->name, "-3"); |
| init_split_multiplier(node, a1b1, a0, a1, b0, b1, nullptr, nullptr); |
| mult_list = insert_in_vptr_list(mult_list, a1b1); |
| |
| /* New node for 2nd multiply */ |
| a0b1 = allocate_nnode(); |
| a0b1->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(a0b1->name, node->name); |
| strcat(a0b1->name, "-1"); |
| init_split_multiplier(node, a0b1, 0, a0, b0, b1, a0b0, a1b1); |
| mult_list = insert_in_vptr_list(mult_list, a0b1); |
| |
| /* New node for 3rd multiply */ |
| a1b0 = allocate_nnode(); |
| a1b0->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(a1b0->name, node->name); |
| strcat(a1b0->name, "-2"); |
| init_split_multiplier(node, a1b0, a0, a1, 0, b0, a1b1, a0b0); |
| mult_list = insert_in_vptr_list(mult_list, a1b0); |
| |
| /* New node for the initial add */ |
| addsmall = allocate_nnode(); |
| addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); |
| strcpy(addsmall->name, node->name); |
| strcat(addsmall->name, "-add0"); |
| // this addition will have a carry out in the worst case, add to input pins and connect then to gnd |
| init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1); |
| |
| /* New node for the BIG add */ |
| addbig = allocate_nnode(); |
| addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); |
| strcpy(addbig->name, node->name); |
| strcat(addbig->name, "-add1"); |
| init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins); |
| |
| // connect inputs to port a of addsmall |
| for (i = 0; i < a1b0->num_output_pins; i++) |
| connect_nodes(a1b0, i, addsmall, i); |
| add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins); |
| // connect inputs to port b of addsmall |
| for (i = 0; i < a0b1->num_output_pins; i++) |
| connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]); |
| add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]); |
| |
| // connect inputs to port a of addbig |
| size = addsmall->num_output_pins; |
| for (i = 0; i < size; i++) |
| connect_nodes(addsmall, i, addbig, i); |
| |
| // connect inputs to port b of addbig |
| for (i = b0; i < a0b0->output_port_sizes[0]; i++) |
| connect_nodes(a0b0, i, addbig, i - b0 + size); |
| size = size + a0b0->output_port_sizes[0] - b0; |
| for (i = 0; i < a1b1->output_port_sizes[0]; i++) |
| connect_nodes(a1b1, i, addbig, i + size); |
| |
| // remap the multiplier outputs coming directly from a0b0 |
| for (i = 0; i < b0; i++) { |
| remap_pin_to_new_node(node->output_pins[i], a0b0, i); |
| } |
| |
| // remap the multiplier outputs coming from addbig |
| for (i = 0; i < addbig->num_output_pins; i++) { |
| remap_pin_to_new_node(node->output_pins[i+b0], addbig, i); |
| } |
| |
| /* Probably more to do here in freeing the old node! */ |
| vtr::free(node->name); |
| vtr::free(node->input_port_sizes); |
| vtr::free(node->output_port_sizes); |
| |
| /* Free arrays NOT the pins since relocated! */ |
| vtr::free(node->input_pins); |
| vtr::free(node->output_pins); |
| vtr::free(node); |
| |
| return; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: split_multiplier_a) |
| * |
| * This function works to split the "a" input of a multiplier into |
| * several smaller multipliers to better "fit" with the available |
| * resources in a targeted FPGA architecture. |
| * |
| * This function is at the lowest level since it simply receives |
| * a multiplier and is told how to split it. The end result is: |
| * |
| * a1a0 * b => a0 * b + a1 * b => c |
| * |
| * Note that for the addition we need to perform sign extension, |
| * but this should not be a problem since the sign extension is always |
| * extending NOT contracting. |
| * |
| *-----------------------------------------------------------------------*/ |
| void split_multiplier_a(nnode_t *node, int a0, int a1, int b) |
| { |
| nnode_t *a0b, *a1b, *addsmall; |
| int i; |
| |
| /* Check for a legitimate split */ |
| oassert(node->input_port_sizes[0] == (a0 + a1)); |
| oassert(node->input_port_sizes[1] == b); |
| |
| /* New node for a0b multiply */ |
| a0b = allocate_nnode(); |
| a0b->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(a0b->name, node->name); |
| strcat(a0b->name, "-0"); |
| init_split_multiplier(node, a0b, 0, a0, 0, b, nullptr, nullptr); |
| mult_list = insert_in_vptr_list(mult_list, a0b); |
| |
| /* New node for a1b multiply */ |
| a1b = allocate_nnode(); |
| a1b->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(a1b->name, node->name); |
| strcat(a1b->name, "-1"); |
| init_split_multiplier(node, a1b, a0, a1, 0, b, nullptr, a0b); |
| mult_list = insert_in_vptr_list(mult_list, a1b); |
| |
| /* New node for the add */ |
| addsmall = allocate_nnode(); |
| addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); |
| strcpy(addsmall->name, node->name); |
| strcat(addsmall->name, "-add0"); |
| init_multiplier_adder(addsmall, a0b, b, a1b->num_output_pins); |
| |
| /* Connect pins for addsmall */ |
| for (i = a0; i < a0b->num_output_pins; i++) |
| connect_nodes(a0b, i, addsmall, i-a0); |
| for (i = 0; i < a1b->num_output_pins; i++) |
| connect_nodes(a1b, i, addsmall, i + addsmall->input_port_sizes[0]); |
| |
| /* Move original output pins for multiply to new outputs */ |
| for (i = 0; i < a0; i++) |
| remap_pin_to_new_node(node->output_pins[i], a0b, i); |
| |
| for (i = 0; i < addsmall->num_output_pins; i++) |
| remap_pin_to_new_node(node->output_pins[i+a0], addsmall, i); |
| |
| /* Probably more to do here in freeing the old node! */ |
| vtr::free(node->name); |
| vtr::free(node->input_port_sizes); |
| vtr::free(node->output_port_sizes); |
| |
| /* Free arrays NOT the pins since relocated! */ |
| vtr::free(node->input_pins); |
| vtr::free(node->output_pins); |
| vtr::free(node); |
| return; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: split_multiplier_b) |
| * |
| * This function works to split the "b" input of a multiplier into |
| * several smaller multipliers to better "fit" with the available |
| * resources in a targeted FPGA architecture. |
| * |
| * This function is at the lowest level since it simply receives |
| * a multiplier and is told how to split it. The end result is: |
| * |
| * a * b1b0 => a * b1 + a * b0 => c |
| * |
| * Note that for the addition we need to perform sign extension, |
| * but this should not be a problem since the sign extension is always |
| * extending NOT contracting. |
| * |
| *-----------------------------------------------------------------------*/ |
| void split_multiplier_b(nnode_t *node, int a, int b1, int b0) |
| { |
| nnode_t *ab0, *ab1, *addsmall; |
| int i; |
| |
| /* Check for a legitimate split */ |
| oassert(node->input_port_sizes[0] == a); |
| oassert(node->input_port_sizes[1] == (b0 + b1)); |
| |
| /* New node for ab0 multiply */ |
| ab0 = allocate_nnode(); |
| ab0->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(ab0->name, node->name); |
| strcat(ab0->name, "-0"); |
| init_split_multiplier(node, ab0, 0, a, 0, b0, nullptr, nullptr); |
| mult_list = insert_in_vptr_list(mult_list, ab0); |
| |
| /* New node for ab1 multiply */ |
| ab1 = allocate_nnode(); |
| ab1->name = (char *)vtr::malloc(strlen(node->name) + 3); |
| strcpy(ab1->name, node->name); |
| strcat(ab1->name, "-1"); |
| init_split_multiplier(node, ab1, 0, a, b0, b1, ab0, nullptr); |
| mult_list = insert_in_vptr_list(mult_list, ab1); |
| |
| /* New node for the add */ |
| addsmall = allocate_nnode(); |
| addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); |
| strcpy(addsmall->name, node->name); |
| strcat(addsmall->name, "-add0"); |
| init_multiplier_adder(addsmall, ab1, ab1->num_output_pins, a + b1); |
| |
| /* Connect pins for addsmall */ |
| for (i = b0; i < ab0->output_port_sizes[0]; i++) |
| connect_nodes(ab0, i, addsmall, i-b0); |
| for (i = ab0->output_port_sizes[0] - b0; i < a+b1; i++) /* Sign extend */ |
| connect_nodes(ab0, ab0->output_port_sizes[0]-1, addsmall, i); |
| for (i = b1+a; i < (2 * (a + b1)); i++) |
| connect_nodes(ab1, i-(b1+a), addsmall, i); |
| |
| /* Move original output pins for multiply to new outputs */ |
| for (i = 0; i < b0; i++) |
| remap_pin_to_new_node(node->output_pins[i], ab0, i); |
| |
| for (i = b0; i < node->num_output_pins; i++) |
| remap_pin_to_new_node(node->output_pins[i], addsmall, i-b0); |
| |
| /* Probably more to do here in freeing the old node! */ |
| vtr::free(node->name); |
| vtr::free(node->input_port_sizes); |
| vtr::free(node->output_port_sizes); |
| |
| /* Free arrays NOT the pins since relocated! */ |
| vtr::free(node->input_pins); |
| vtr::free(node->output_pins); |
| vtr::free(node); |
| return; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: pad_multiplier) |
| * |
| * Fill out a multiplier to a fixed size. Size is retrieved from global |
| * hard_multipliers data. |
| * |
| * NOTE: The inputs are extended based on multiplier padding setting. |
| *-----------------------------------------------------------------------*/ |
| void pad_multiplier(nnode_t *node, netlist_t *netlist) |
| { |
| int diffa, diffb, diffout, i; |
| int sizea, sizeb, sizeout; |
| int ina, inb; |
| |
| int testa, testb; |
| |
| static int pad_pin_number = 0; |
| |
| oassert(node->type == MULTIPLY); |
| oassert(hard_multipliers != NULL); |
| |
| sizea = node->input_port_sizes[0]; |
| sizeb = node->input_port_sizes[1]; |
| sizeout = node->output_port_sizes[0]; |
| record_mult_distribution(node); |
| |
| /* Calculate the BEST fit hard multiplier to use */ |
| ina = hard_multipliers->inputs->size; |
| inb = hard_multipliers->inputs->next->size; |
| if (ina < inb) |
| { |
| ina = hard_multipliers->inputs->next->size; |
| inb = hard_multipliers->inputs->size; |
| } |
| diffa = ina - sizea; |
| diffb = inb - sizeb; |
| diffout = hard_multipliers->outputs->size - sizeout; |
| |
| if (configuration.split_hard_multiplier == 1) |
| { |
| t_linked_vptr *plist = hard_multipliers->pb_types; |
| while ((diffa + diffb) && plist) |
| { |
| t_pb_type *physical = (t_pb_type *)(plist->data_vptr); |
| plist = plist->next; |
| testa = physical->ports[0].num_pins; |
| testb = physical->ports[1].num_pins; |
| if ((testa >= sizea) && (testb >= sizeb) && |
| ((testa - sizea + testb - sizeb) < (diffa + diffb))) |
| { |
| diffa = testa - sizea; |
| diffb = testb - sizeb; |
| diffout = physical->ports[2].num_pins - sizeout; |
| } |
| } |
| } |
| |
| /* Expand the inputs */ |
| if ((diffa != 0) || (diffb != 0)) |
| { |
| allocate_more_input_pins(node, diffa + diffb); |
| |
| /* Shift pins for expansion of first input pins */ |
| if (diffa != 0) |
| { |
| for (i = 1; i <= sizeb; i++) |
| { |
| move_input_pin(node, sizea + sizeb - i, node->num_input_pins - diffb - i); |
| } |
| |
| /* Connect unused first input pins to zero/pad pin */ |
| for (i = 0; i < diffa; i++) |
| { |
| if (configuration.mult_padding == 0) |
| add_input_pin_to_node(node, get_zero_pin(netlist), i + sizea); |
| else |
| add_input_pin_to_node(node, get_pad_pin(netlist), i + sizea); |
| } |
| |
| node->input_port_sizes[0] = sizea + diffa; |
| } |
| |
| if (diffb != 0) |
| { |
| /* Connect unused second input pins to zero/pad pin */ |
| for (i = 1; i <= diffb; i++) |
| { |
| if (configuration.mult_padding == 0) |
| add_input_pin_to_node(node, get_zero_pin(netlist), node->num_input_pins - i); |
| else |
| add_input_pin_to_node(node, get_pad_pin(netlist), node->num_input_pins - i); |
| } |
| |
| node->input_port_sizes[1] = sizeb + diffb; |
| } |
| } |
| |
| /* Expand the outputs */ |
| if (diffout != 0) |
| { |
| allocate_more_output_pins(node, diffout); |
| for (i = 0; i < diffout; i++) |
| { |
| // Add new pins to the higher order spots. |
| npin_t *new_pin = allocate_npin(); |
| // Pad outputs with a unique and descriptive name to avoid collisions. |
| new_pin->name = append_string("", "unconnected_multiplier_output~%d", pad_pin_number++); |
| add_output_pin_to_node(node, new_pin, i + sizeout); |
| } |
| node->output_port_sizes[0] = sizeout + diffout; |
| } |
| |
| return; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: iterate_multipliers) |
| * |
| * This function will iterate over all of the multiply operations that |
| * exist in the netlist and perform a splitting so that they can |
| * fit into a basic hard multiplier block that exists on the FPGA. |
| * If the proper option is set, then it will be expanded as well |
| * to just use a fixed size hard multiplier. |
| *-----------------------------------------------------------------------*/ |
| void iterate_multipliers(netlist_t *netlist) |
| { |
| int sizea, sizeb, swap; |
| int mula, mulb; |
| int a0, a1, b0, b1; |
| nnode_t *node; |
| |
| /* Can only perform the optimisation if hard multipliers exist! */ |
| if (hard_multipliers == NULL) |
| return; |
| |
| sizea = hard_multipliers->inputs->size; |
| sizeb = hard_multipliers->inputs->next->size; |
| if (sizea < sizeb) |
| { |
| swap = sizea; |
| sizea = sizeb; |
| sizeb = swap; |
| } |
| |
| while (mult_list != NULL) |
| { |
| node = (nnode_t *)mult_list->data_vptr; |
| mult_list = delete_in_vptr_list(mult_list); |
| |
| oassert(node != NULL); |
| |
| if (node->type == HARD_IP) |
| node->type = MULTIPLY; |
| |
| oassert(node->type == MULTIPLY); |
| |
| mula = node->input_port_sizes[0]; |
| mulb = node->input_port_sizes[1]; |
| int mult_size = std::max<int>(mula, mulb); |
| if (mula < mulb) |
| { |
| swap = sizea; |
| sizea = sizeb; |
| sizeb = swap; |
| } |
| |
| /* Do I need to split the multiplier on both inputs? */ |
| if ((mula > sizea) && (mulb > sizeb)) |
| { |
| a0 = sizea; |
| a1 = mula - sizea; |
| b0 = sizeb; |
| b1 = mulb - sizeb; |
| split_multiplier(node, a0, b0, a1, b1, netlist); |
| } |
| else if (mula > sizea) /* split multiplier on a input? */ |
| { |
| a0 = sizea; |
| a1 = mula - sizea; |
| split_multiplier_a(node, a0, a1, mulb); |
| } |
| else if (mulb > sizeb) /* split multiplier on b input? */ |
| { |
| b1 = sizeb; |
| b0 = mulb - sizeb; |
| split_multiplier_b(node, mula, b1, b0); |
| } |
| // if either of the multiplicands is larger than the |
| // minimum hard multiplier size, use hard multiplier |
| // TODO: implement multipliers where one of the operands is |
| // 1 bit wide using soft logic |
| else if (mult_size >= min_mult || mula == 1 || mulb == 1) |
| { |
| /* Check to ensure IF mult needs to be exact size */ |
| if(configuration.fixed_hard_multiplier != 0) |
| pad_multiplier(node, netlist); |
| |
| /* Otherwise, we still want to record the multiplier node for |
| reporting later on (the pad_multiplier function does this for the |
| other case */ |
| else |
| { |
| record_mult_distribution(node); |
| } |
| } else if (hard_adders) { |
| if (configuration.fixed_hard_multiplier != 0) { |
| split_soft_multiplier(node, netlist); |
| } |
| } |
| } |
| return; |
| } |
| |
| /*--------------------------------------------------------------------------- |
| * (function: split_soft_multiplier) |
| * |
| * This function splits the input multiplier (node) into partial products (AND gates) and |
| * adders, as shown below. The partial products starts with "I", and all the partial products |
| * generated are added together by implementing a balanced adder tree to produce the final product |
| * Sample 4x4 multiplier to help understand logic: |
| * |
| * A3 A2 A1 A0 |
| * B3 B2 B1 B0 |
| * ------------------------------- |
| * I03 I02 I01 I00 |
| * + I13 I12 I11 I10 |
| * I23 I22 I21 I20 Level 0 |
| * + I23 I22 I21 I20 |
| * ------------------------------- |
| * C4 C3 C2 C1 C0 |
| * + D4 D3 D2 D1 D0 I20 Level 1 |
| * ------------------------------- |
| * E5 E4 E3 E2 E1 E0 C0 I00 Level 2 |
| * |
| *-------------------------------------------------------------------------*/ |
| void split_soft_multiplier(nnode_t *node, netlist_t *netlist) { |
| |
| oassert(node->num_output_pins > 0); |
| oassert(node->num_input_pins > 0); |
| oassert(node->num_input_port_sizes == 2); |
| oassert(node->num_output_port_sizes == 1); |
| |
| int multiplier_width = node->input_port_sizes[0]; |
| int multiplicand_width = node->input_port_sizes[1]; |
| |
| // ODIN II doesn't work with multiplicand sizes of 1 since it assumes that the |
| // output of the multiplier is still the sum of the operands sizes. However, it |
| // should only be equal to the long operand since its an AND operation in this case. |
| // If this is fixed, this assert statement should be removed and the code will work properly |
| oassert(multiplicand_width > 1); |
| |
| // number of adders in a balanced tree of the partial product rows |
| const int add_levels = std::ceil(std::log((double)multiplicand_width)/std::log(2.)); |
| |
| // data structure holding the rows of output pins to be added in each addition stage |
| // as well as the shift of each row from the position of the first output |
| std::vector<std::vector<AdderTreeRow>> addition_stages(add_levels+1); |
| // 2-D array of adders, indexed by the level of the adder in the tree and the adder id within the level |
| std::vector<std::vector<nnode_t *>> adders(add_levels); |
| // array holding the adder width at each level in the adder tree |
| std::vector<std::vector<int>> adder_widths(add_levels); |
| |
| // 2-D array of partial products. [0..multiplicand_width][0..multiplier_width] |
| std::vector<std::vector<nnode_t *>> partial_products(multiplicand_width); |
| |
| addition_stages[0].resize(multiplicand_width); |
| // initialize all the AND gates needed for the partial products |
| for (int i = 0; i < multiplicand_width; i++) { |
| std::vector<std::pair<nnode_t*, int>> pp_bits(multiplier_width); |
| // resize the ith row of the partial products |
| partial_products[i].resize(multiplier_width); |
| for (int j = 0; j < multiplier_width; j++) { |
| // create each one of the partial products |
| partial_products[i][j] = make_1port_logic_gate(LOGICAL_AND, 2, node, node->traverse_visited); |
| pp_bits[j] = {partial_products[i][j], 0}; |
| } |
| // add the partial product rows the addition stages data structure |
| addition_stages[0][i] = {i, pp_bits}; |
| } |
| |
| // generate the connections to the AND gates that generates the partial products of the multiplication |
| for (int i = 0; i < multiplicand_width; i++) { |
| for (int j = 0; j < multiplier_width; j++) { |
| // hookup the multiplier bits to the AND gates |
| if (i == 0) { |
| // when connecting the input to an AND gate for the first time, remap the input |
| remap_pin_to_new_node(node->input_pins[j], partial_products[i][j], 1); |
| } else { |
| // this input was remapped before, copy from the AND gate input instead |
| add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[0][j]->input_pins[1]), 1); |
| } |
| // hookup the input multiplicand bits the AND gates |
| if (j == 0) { |
| // when connecting the input to an AND gate for the first time, remap the input |
| remap_pin_to_new_node(node->input_pins[i+node->input_port_sizes[0]], partial_products[i][j], 0); |
| } else { |
| // this input was remapped before, copy from the AND gate input instead |
| add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[i][0]->input_pins[0]), 0); |
| } |
| } |
| } |
| |
| // iterate over all the levels of addition |
| for (size_t level = 0; level < adders.size(); level++) { |
| // the number of rows in the next stage is the ceiling of number of rows in this stage divided by 2 |
| addition_stages[level+1].resize(std::ceil(addition_stages[level].size()/2.)); |
| // the number of adders in this stage is the integer division of the number of rows in this stage |
| adder_widths[level].resize(addition_stages[level].size()/2); |
| adders[level].resize(addition_stages[level].size()/2); |
| |
| // iterate over every two rows |
| for (size_t row = 0; row < addition_stages[level].size() - 1; row +=2) { |
| auto& first_row = addition_stages[level][row]; |
| auto& second_row = addition_stages[level][row+1]; |
| auto shift_difference = second_row.shift - first_row.shift; |
| auto add_id = row/2; |
| |
| // get the widths of the adder, by finding the larger operand size |
| adder_widths[level][add_id] = std::max<int>(first_row.bits.size() - shift_difference, second_row.bits.size()); |
| // first level of addition has a carry out that needs to be generated, so increase adder size by 1 |
| if (level == 0) adder_widths[level][add_id]++; |
| // add one bit for carry out if that last bit of the addition is fed by both levels |
| // (was found to be the only case were a carry out will be needed in this multiplier adder tree) |
| if (first_row.bits.size() - shift_difference == second_row.bits.size()) adder_widths[level][add_id]++; |
| |
| // initialize this adder |
| adders[level][add_id] = allocate_nnode(); |
| init_multiplier_adder(adders[level][add_id], node, adder_widths[level][add_id], adder_widths[level][add_id]); |
| adders[level][add_id]->name = node_name(adders[level][add_id], node->name); |
| |
| // initialize the output of this adder in the next stage |
| addition_stages[level+1][add_id].shift = first_row.shift; |
| addition_stages[level+1][add_id].bits.resize(shift_difference + adder_widths[level][add_id]); |
| // copy the bits that weren't fed to adders in the previous stage |
| for (int i = 0; i < shift_difference; i++) { |
| addition_stages[level+1][add_id].bits[i] = first_row.bits[i]; |
| } |
| // copy adder output bits to their row in next stage |
| for (int i = 0; i < adder_widths[level][add_id]; i++) { |
| addition_stages[level+1][add_id].bits[i + shift_difference] = {adders[level][add_id], i}; |
| } |
| |
| // connect the bits in the rows to the adder inputs. |
| for (int bit = 0; bit < adder_widths[level][add_id]; bit++) { |
| // input port a of the adder |
| if (bit < first_row.bits.size() - shift_difference) { |
| auto bit_a = first_row.bits[bit + shift_difference]; |
| connect_nodes(bit_a.first, bit_a.second, adders[level][add_id], bit); |
| } else { |
| // connect additional inputs to gnd |
| add_input_pin_to_node(adders[level][add_id], get_zero_pin(netlist), bit); |
| } |
| // input port b of the adder |
| if (bit < second_row.bits.size()) { |
| connect_nodes(second_row.bits[bit].first, second_row.bits[bit].second, adders[level][add_id], bit + adder_widths[level][add_id]); |
| } else { |
| // connect additional inputs to gnd |
| add_input_pin_to_node(adders[level][add_id], get_zero_pin(netlist), bit + adder_widths[level][add_id]); |
| } |
| } |
| } |
| |
| // if this level have odd number of rows copy the last row to the next level to be added later |
| if (addition_stages[level].size() % 2 == 1) { |
| addition_stages[level+1].back() = addition_stages[level].back(); |
| } |
| } |
| |
| // the size of the last stage of the adder tree should match the output size of the multiplier |
| oassert(addition_stages[add_levels][0].bits.size() == node->num_output_pins); |
| |
| // Remap the outputs of the multiplier |
| for (size_t i = 0; i < addition_stages[add_levels][0].bits.size(); i++) { |
| auto output_bit = addition_stages[add_levels][0].bits[i]; |
| remap_pin_to_new_node(node->output_pins[i], output_bit.first, output_bit.second); |
| } |
| |
| // check that all connections and input/output remapping is done right |
| // meaning all the inputs and outputs of the multiplier that is splitted are nullptrs |
| // and all inputs and outputs of the AND gates and adders are not nullptrs |
| |
| // check that all the inputs/outputs of the multiplier are remapped |
| for (int i = 0; i < node->num_input_pins; i++) { |
| oassert(!node->input_pins[i]); |
| } |
| for (int i = 0; i < node->num_output_pins; i++) { |
| oassert(!node->output_pins[i]); |
| } |
| |
| // check that all the partial product gates have nets connected to their inputs/outputs |
| for (size_t ilevel = 0; ilevel < partial_products.size(); ilevel++) { |
| for (size_t depth = 0; depth < partial_products[ilevel].size(); depth++) { |
| for (int i = 0; i < partial_products[ilevel][depth]->num_input_pins; i++) { |
| oassert(partial_products[ilevel][depth]->input_pins[i]); |
| } |
| for (int i = 0; i < partial_products[ilevel][depth]->num_output_pins; i++) { |
| oassert(partial_products[ilevel][depth]->output_pins[i]); |
| } |
| } |
| } |
| |
| // check that all adders have nets connected to their inputs/outputs |
| for (size_t ilevel = 0; ilevel < adders.size(); ilevel++) { |
| for (size_t iadd = 0; iadd < adders[ilevel].size(); iadd++) { |
| for (int i = 0; i < adders[ilevel][iadd]->num_input_pins; i++) { |
| oassert(adders[ilevel][iadd]->input_pins[i]); |
| } |
| for (int i = 0; i < adders[ilevel][iadd]->num_output_pins; i++) { |
| oassert(adders[ilevel][iadd]->output_pins[i]); |
| } |
| } |
| } |
| |
| // Probably more to do here in freeing the old node! |
| vtr::free(node->name); |
| vtr::free(node->input_port_sizes); |
| vtr::free(node->output_port_sizes); |
| |
| // Free arrays NOT the pins since relocated! |
| vtr::free(node->input_pins); |
| vtr::free(node->output_pins); |
| vtr::free(node); |
| } |
| |
| bool is_ast_multiplier(ast_node_t *node) |
| { |
| bool is_mult; |
| ast_node_t *instance = node->children[1]; |
| is_mult = (!strcmp(node->children[0]->types.identifier, "multiply")) |
| && (instance->children[1]->num_children == 3); |
| |
| ast_node_t *connect_list = instance->children[1]; |
| if (is_mult && connect_list->children[0]->children[0]) |
| { |
| /* port connections were passed by name; verify port names */ |
| for (int i = 0; i < connect_list->num_children && is_mult; i++) |
| { |
| char *id = connect_list->children[i]->children[0]->types.identifier; |
| |
| if ((strcmp(id, "a") != 0) && |
| (strcmp(id, "b") != 0) && |
| (strcmp(id, "out") != 0) |
| ) |
| { |
| is_mult = false; |
| break; |
| } |
| } |
| } |
| |
| return is_mult; |
| } |
| |
| /*------------------------------------------------------------------------- |
| * (function: clean_multipliers) |
| * |
| * Clean up the memory by deleting the list structure of multipliers |
| * during optimization |
| *-----------------------------------------------------------------------*/ |
| void clean_multipliers() |
| { |
| while (mult_list != NULL) |
| mult_list = delete_in_vptr_list(mult_list); |
| return; |
| } |
| |
| void free_multipliers() |
| { |
| if(hard_multipliers && hard_multipliers->instances) |
| { |
| t_multiplier *tmp = (t_multiplier *)hard_multipliers->instances; |
| |
| while(tmp != NULL) |
| { |
| t_multiplier *tmp2 = tmp->next; |
| vtr::free(tmp); |
| tmp = tmp2; |
| } |
| |
| hard_multipliers->instances = NULL; |
| } |
| } |
| |