odin: Added a function that splits a multiplier into AND gates
implementing partial products and a balanced addition tree for reducing the
partial products and producing the multiplication output.
diff --git a/ODIN_II/SRC/multipliers.cpp b/ODIN_II/SRC/multipliers.cpp
index 242afc0..4b05282 100644
--- a/ODIN_II/SRC/multipliers.cpp
+++ b/ODIN_II/SRC/multipliers.cpp
@@ -53,6 +53,16 @@
void split_multiplier_a(nnode_t *node, int a0, int a1, int b);
void split_multiplier_b(nnode_t *node, int a, int b1, int b0);
void pad_multiplier(nnode_t *node, netlist_t *netlist);
+void split_soft_multiplier(nnode_t *node, netlist_t *netlist);
+
+// data structure representing a row of bits an adder tree
+struct AdderTreeRow {
+ // the shift of this row from the least significant bit of the multiplier output
+ int shift = 0;
+ // array representing the bits in the row, each bit is a node
+ // pointer and the index of this bit in this node output array.
+ std::vector<std::pair<nnode_t*, int>> bits;
+};
/*---------------------------------------------------------------------------
* (function: instantiate_simple_soft_multiplier )
@@ -652,7 +662,7 @@
* (function: init_multiplier_adder)
*
* This function is used to initialize an adder that is within
- * a split multiplier.
+ * a split multiplier or a multiplier addition tree.
*-----------------------------------------------------------------------*/
void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b) {
@@ -1090,8 +1100,6 @@
* fit into a basic hard multiplier block that exists on the FPGA.
* If the proper option is set, then it will be expanded as well
* to just use a fixed size hard multiplier.
- * Note: This function will ignore multipliers that are smaller than
- * the minimum hard multiplier size
*-----------------------------------------------------------------------*/
void iterate_multipliers(netlist_t *netlist)
{
@@ -1171,11 +1179,226 @@
{
record_mult_distribution(node);
}
- }
+ } else if (hard_adders) {
+ if (configuration.fixed_hard_multiplier != 0) {
+ split_soft_multiplier(node, netlist);
+ }
+ }
}
return;
}
+/*---------------------------------------------------------------------------
+ * (function: split_soft_multiplier)
+ *
+ * This function splits the input multiplier (node) into partial products (AND gates) and
+ * adders, as shown below. The partial products starts with "I", and all the partial products
+ * generated are added together by implementing a balanced adder tree to produce the final product
+ * Sample 4x4 multiplier to help understand logic:
+ *
+ * A3 A2 A1 A0
+ * B3 B2 B1 B0
+ * -------------------------------
+ * I03 I02 I01 I00
+ * + I13 I12 I11 I10
+ * I23 I22 I21 I20 Level 0
+ * + I23 I22 I21 I20
+ * -------------------------------
+ * C4 C3 C2 C1 C0
+ * + D4 D3 D2 D1 D0 I20 Level 1
+ * -------------------------------
+ * E5 E4 E3 E2 E1 E0 C0 I00 Level 2
+ *
+ *-------------------------------------------------------------------------*/
+void split_soft_multiplier(nnode_t *node, netlist_t *netlist) {
+
+ oassert(node->num_output_pins > 0);
+ oassert(node->num_input_pins > 0);
+ oassert(node->num_input_port_sizes == 2);
+ oassert(node->num_output_port_sizes == 1);
+
+ int multiplier_width = node->input_port_sizes[0];
+ int multiplicand_width = node->input_port_sizes[1];
+
+ // ODIN II doesn't work with multiplicand sizes of 1 since it assumes that the
+ // output of the multiplier is still the sum of the operands sizes. However, it
+ // should only be equal to the long operand since its an AND operation in this case.
+ // If this is fixed, this assert statement should be removed and the code will work properly
+ oassert(multiplicand_width > 1);
+
+ // number of adders in a balanced tree of the partial product rows
+ const int add_levels = std::ceil(std::log((double)multiplicand_width)/std::log(2.));
+
+ // data structure holding the rows of output pins to be added in each addition stage
+ // as well as the shift of each row from the position of the first output
+ std::vector<std::vector<AdderTreeRow>> addition_stages(add_levels+1);
+ // 2-D array of adders, indexed by the level of the adder in the tree and the adder id within the level
+ std::vector<std::vector<nnode_t *>> adders(add_levels);
+ // array holding the adder width at each level in the adder tree
+ std::vector<std::vector<int>> adder_widths(add_levels);
+
+ // 2-D array of partial products. [0..multiplicand_width][0..multiplier_width]
+ std::vector<std::vector<nnode_t *>> partial_products(multiplicand_width);
+
+ addition_stages[0].resize(multiplicand_width);
+ // initialize all the AND gates needed for the partial products
+ for (int i = 0; i < multiplicand_width; i++) {
+ std::vector<std::pair<nnode_t*, int>> pp_bits(multiplier_width);
+ // resize the ith row of the partial products
+ partial_products[i].resize(multiplier_width);
+ for (int j = 0; j < multiplier_width; j++) {
+ // create each one of the partial products
+ partial_products[i][j] = make_1port_logic_gate(LOGICAL_AND, 2, node, node->traverse_visited);
+ pp_bits[j] = {partial_products[i][j], 0};
+ }
+ // add the partial product rows the addition stages data structure
+ addition_stages[0][i] = {i, pp_bits};
+ }
+
+ // generate the connections to the AND gates that generates the partial products of the multiplication
+ for (int i = 0; i < multiplicand_width; i++) {
+ for (int j = 0; j < multiplier_width; j++) {
+ // hookup the multiplier bits to the AND gates
+ if (i == 0) {
+ // when connecting the input to an AND gate for the first time, remap the input
+ remap_pin_to_new_node(node->input_pins[j], partial_products[i][j], 1);
+ } else {
+ // this input was remapped before, copy from the AND gate input instead
+ add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[0][j]->input_pins[1]), 1);
+ }
+ // hookup the input multiplicand bits the AND gates
+ if (j == 0) {
+ // when connecting the input to an AND gate for the first time, remap the input
+ remap_pin_to_new_node(node->input_pins[i+node->input_port_sizes[0]], partial_products[i][j], 0);
+ } else {
+ // this input was remapped before, copy from the AND gate input instead
+ add_input_pin_to_node(partial_products[i][j], copy_input_npin(partial_products[i][0]->input_pins[0]), 0);
+ }
+ }
+ }
+
+ // iterate over all the levels of addition
+ for (size_t level = 0; level < adders.size(); level++) {
+ // the number of rows in the next stage is the ceiling of number of rows in this stage divided by 2
+ addition_stages[level+1].resize(std::ceil(addition_stages[level].size()/2.));
+ // the number of adders in this stage is the integer division of the number of rows in this stage
+ adder_widths[level].resize(addition_stages[level].size()/2);
+ adders[level].resize(addition_stages[level].size()/2);
+
+ // iterate over every two rows
+ for (size_t row = 0; row < addition_stages[level].size() - 1; row +=2) {
+ auto& first_row = addition_stages[level][row];
+ auto& second_row = addition_stages[level][row+1];
+ auto shift_difference = second_row.shift - first_row.shift;
+ auto add_id = row/2;
+
+ // get the widths of the adder, by finding the larger operand size
+ adder_widths[level][add_id] = std::max<int>(first_row.bits.size() - shift_difference, second_row.bits.size());
+ // first level of addition has a carry out that needs to be generated, so increase adder size by 1
+ if (level == 0) adder_widths[level][add_id]++;
+ // add one bit for carry out if that last bit of the addition is fed by both levels
+ // (was found to be the only case were a carry out will be needed in this multiplier adder tree)
+ if (first_row.bits.size() - shift_difference == second_row.bits.size()) adder_widths[level][add_id]++;
+
+ // initialize this adder
+ adders[level][add_id] = allocate_nnode();
+ init_multiplier_adder(adders[level][add_id], node, adder_widths[level][add_id], adder_widths[level][add_id]);
+ adders[level][add_id]->name = node_name(adders[level][add_id], node->name);
+
+ // initialize the output of this adder in the next stage
+ addition_stages[level+1][add_id].shift = first_row.shift;
+ addition_stages[level+1][add_id].bits.resize(shift_difference + adder_widths[level][add_id]);
+ // copy the bits that weren't fed to adders in the previous stage
+ for (int i = 0; i < shift_difference; i++) {
+ addition_stages[level+1][add_id].bits[i] = first_row.bits[i];
+ }
+ // copy adder output bits to their row in next stage
+ for (int i = 0; i < adder_widths[level][add_id]; i++) {
+ addition_stages[level+1][add_id].bits[i + shift_difference] = {adders[level][add_id], i};
+ }
+
+ // connect the bits in the rows to the adder inputs.
+ for (int bit = 0; bit < adder_widths[level][add_id]; bit++) {
+ // input port a of the adder
+ if (bit < first_row.bits.size() - shift_difference) {
+ auto bit_a = first_row.bits[bit + shift_difference];
+ connect_nodes(bit_a.first, bit_a.second, adders[level][add_id], bit);
+ } else {
+ // connect additional inputs to gnd
+ add_input_pin_to_node(adders[level][add_id], get_zero_pin(netlist), bit);
+ }
+ // input port b of the adder
+ if (bit < second_row.bits.size()) {
+ connect_nodes(second_row.bits[bit].first, second_row.bits[bit].second, adders[level][add_id], bit + adder_widths[level][add_id]);
+ } else {
+ // connect additional inputs to gnd
+ add_input_pin_to_node(adders[level][add_id], get_zero_pin(netlist), bit + adder_widths[level][add_id]);
+ }
+ }
+ }
+
+ // if this level have odd number of rows copy the last row to the next level to be added later
+ if (addition_stages[level].size() % 2 == 1) {
+ addition_stages[level+1].back() = addition_stages[level].back();
+ }
+ }
+
+ // the size of the last stage of the adder tree should match the output size of the multiplier
+ oassert(addition_stages[add_levels][0].bits.size() == node->num_output_pins);
+
+ // Remap the outputs of the multiplier
+ for (size_t i = 0; i < addition_stages[add_levels][0].bits.size(); i++) {
+ auto output_bit = addition_stages[add_levels][0].bits[i];
+ remap_pin_to_new_node(node->output_pins[i], output_bit.first, output_bit.second);
+ }
+
+ // check that all connections and input/output remapping is done right
+ // meaning all the inputs and outputs of the multiplier that is splitted are nullptrs
+ // and all inputs and outputs of the AND gates and adders are not nullptrs
+
+ // check that all the inputs/outputs of the multiplier are remapped
+ for (int i = 0; i < node->num_input_pins; i++) {
+ oassert(!node->input_pins[i]);
+ }
+ for (int i = 0; i < node->num_output_pins; i++) {
+ oassert(!node->output_pins[i]);
+ }
+
+ // check that all the partial product gates have nets connected to their inputs/outputs
+ for (size_t ilevel = 0; ilevel < partial_products.size(); ilevel++) {
+ for (size_t depth = 0; depth < partial_products[ilevel].size(); depth++) {
+ for (int i = 0; i < partial_products[ilevel][depth]->num_input_pins; i++) {
+ oassert(partial_products[ilevel][depth]->input_pins[i]);
+ }
+ for (int i = 0; i < partial_products[ilevel][depth]->num_output_pins; i++) {
+ oassert(partial_products[ilevel][depth]->output_pins[i]);
+ }
+ }
+ }
+
+ // check that all adders have nets connected to their inputs/outputs
+ for (size_t ilevel = 0; ilevel < adders.size(); ilevel++) {
+ for (size_t iadd = 0; iadd < adders[ilevel].size(); iadd++) {
+ for (int i = 0; i < adders[ilevel][iadd]->num_input_pins; i++) {
+ oassert(adders[ilevel][iadd]->input_pins[i]);
+ }
+ for (int i = 0; i < adders[ilevel][iadd]->num_output_pins; i++) {
+ oassert(adders[ilevel][iadd]->output_pins[i]);
+ }
+ }
+ }
+
+ // Probably more to do here in freeing the old node!
+ vtr::free(node->name);
+ vtr::free(node->input_port_sizes);
+ vtr::free(node->output_port_sizes);
+
+ // Free arrays NOT the pins since relocated!
+ vtr::free(node->input_pins);
+ vtr::free(node->output_pins);
+ vtr::free(node);
+}
+
/*-------------------------------------------------------------------------
* (function: clean_multipliers)
*
diff --git a/ODIN_II/SRC/partial_map.cpp b/ODIN_II/SRC/partial_map.cpp
index d6fdd41..9b54719 100644
--- a/ODIN_II/SRC/partial_map.cpp
+++ b/ODIN_II/SRC/partial_map.cpp
@@ -258,7 +258,7 @@
int mult_size = std::max<int>(node->input_port_sizes[0], node->input_port_sizes[1]);
if (hard_multipliers && mult_size >= min_mult) {
instantiate_hard_multiplier(node, traverse_number, netlist);
- else {
+ } else if (!hard_adders) {
instantiate_simple_soft_multiplier(node, traverse_number, netlist);
}
break;