Merge pull request #226 from antmicro/k6n10f_mult_inference

qlf_k6n10f DSP multiplier inference
diff --git a/Makefile_test.common b/Makefile_test.common
index 6497321..16e016e 100644
--- a/Makefile_test.common
+++ b/Makefile_test.common
@@ -23,17 +23,17 @@
 $(error "Didn't find 'yosys-config' under '$(YOSYS_PATH)'")
 endif
 
-GTEST_DIR ?= ../../third_party/googletest
+GTEST_DIR ?= $(abspath ../../third_party/googletest)
 CXX ?= $(shell $(YOSYS_CONFIG) --cxx)
 CXXFLAGS ?= $(shell $(YOSYS_CONFIG) --cxxflags) -I.. -I$(GTEST_DIR)/googletest/include
 LDLIBS ?= $(shell $(YOSYS_CONFIG) --ldlibs) -L$(GTEST_DIR)/build/lib -lgtest -lgtest_main -lpthread
 LDFLAGS ?= $(shell $(YOSYS_CONFIG) --ldflags)
-TEST_UTILS ?= ../../../test-utils/test-utils.tcl
+TEST_UTILS ?= $(abspath ../../test-utils/test-utils.tcl)
 
 define test_tpl =
 $(1): $(1)/ok
 	@set +e; \
-	$$($(1)_verify); \
+	$$($$(subst /,-,$(1)_verify)); \
 	if [ $$$$? -eq 0 ]; then \
 		printf "Test %-20s \e[32mPASSED\e[0m @ %s\n" $(1) $(CURDIR); \
 		touch $$<; \
@@ -43,15 +43,15 @@
 		false; \
 	fi
 
-$(1)/ok: $(1)/$(1).v
+$(1)/ok: $(1)/$$(notdir $(1).v)
 	@set +e; \
 	cd $(1); \
-	echo "source $(TEST_UTILS)" > run-$(1).tcl ;\
-	echo "source $(1).tcl" >> run-$(1).tcl ;\
-	DESIGN_TOP=$(1) TEST_OUTPUT_PREFIX=./ \
-	yosys -c "run-$(1).tcl" -q -q -l $(1).log; \
+	echo "source $(TEST_UTILS)" > run-$$(notdir $(1)).tcl ;\
+	echo "source $$(notdir $(1)).tcl" >> run-$$(notdir $(1)).tcl ;\
+	DESIGN_TOP=$$(notdir $(1)) TEST_OUTPUT_PREFIX=./ \
+	yosys -c "run-$$(notdir $(1)).tcl" -q -q -l $$(notdir $(1)).log; \
 	RETVAL=$$$$?; \
-	rm -f run-$(1).tcl; \
+	rm -f run-$$(notdir $(1)).tcl; \
 	if [ ! -z "$$($(1)_negative)" ] && [ $$($(1)_negative) -eq 1 ]; then \
 		if [ $$$$RETVAL -ne 0 ]; then \
 			printf "Negative test %-20s \e[32mPASSED\e[0m @ %s\n" $(1) $(CURDIR); \
diff --git a/ql-qlf-plugin/qlf_k6n10f/cells_sim.v b/ql-qlf-plugin/qlf_k6n10f/cells_sim.v
index fbdeedc..b8c5b38 100644
--- a/ql-qlf-plugin/qlf_k6n10f/cells_sim.v
+++ b/ql-qlf-plugin/qlf_k6n10f/cells_sim.v
@@ -668,10 +668,10 @@
 module QL_DSP1 (
     input  [19:0] a,
     input  [17:0] b,
+    (* clkbuf_sink *)
     input  clk0,
     (* clkbuf_sink *)
     input  clk1,
-    (* clkbuf_sink *)
     input  [ 1:0] feedback0,
     input  [ 1:0] feedback1,
     input  load_acc0,
@@ -683,3 +683,94 @@
     parameter MODE_BITS = 27'b00000000000000000000000000;
 endmodule  /* QL_DSP1 */
 
+(* blackbox *)
+module QL_DSP2 ( // TODO: Name subject to change
+    input  [19:0] a,
+    input  [17:0] b,
+    input  [ 3:0] acc_fir,
+    output [37:0] z,
+    output [17:0] dly_b,
+
+    (* clkbuf_sink *)
+    input         clk,
+    input         reset,
+
+    input  [1:0]  feedback,
+    input         load_acc,
+    input         unsigned_a,
+    input         unsigned_b,
+
+    input         f_mode,
+    input  [2:0]  output_select,
+    input         saturate_enable,
+    input  [5:0]  shift_right,
+    input         round,
+    input         subtract,
+    input         register_inputs,
+    input  [19:0] coeff_0,
+    input  [19:0] coeff_1,
+    input  [19:0] coeff_2,
+    input  [19:0] coeff_3
+);
+
+endmodule
+
+(* blackbox *) // TODO: add sim model
+module dsp_t1_20x18x64 (
+    input  [19:0] a_i,
+    input  [17:0] b_i,
+    input  [ 3:0] acc_fir_i,
+    output [37:0] z_o,
+    output [17:0] dly_b_o,
+
+    (* clkbuf_sink *)
+    input         clock_i,
+    input         reset_i,
+
+    input  [1:0]  feedback_i,
+    input         load_acc_i,
+    input         unsigned_a_i,
+    input         unsigned_b_i,
+
+    input  [2:0]  output_select_i,
+    input         saturate_enable_i,
+    input  [5:0]  shift_right_i,
+    input         round_i,
+    input         subtract_i,
+    input         register_inputs_i,
+    input  [19:0] coeff_0_i,
+    input  [19:0] coeff_1_i,
+    input  [19:0] coeff_2_i,
+    input  [19:0] coeff_3_i
+);
+endmodule
+
+(* blackbox *) // TODO: add sim model
+module dsp_t1_10x9x32 (
+    input  [ 9:0] a_i,
+    input  [ 8:0] b_i,
+    input  [ 3:0] acc_fir_i,
+    output [18:0] z_o,
+    output [ 8:0] dly_b_o,
+
+    (* clkbuf_sink *)
+    input         clock_i,
+    input         reset_i,
+
+    input  [1:0]  feedback_i,
+    input         load_acc_i,
+    input         unsigned_a_i,
+    input         unsigned_b_i,
+
+    input  [2:0]  output_select_i,
+    input         saturate_enable_i,
+    input  [5:0]  shift_right_i,
+    input         round_i,
+    input         subtract_i,
+    input         register_inputs_i,
+    input  [ 9:0] coeff_0_i,
+    input  [ 9:0] coeff_1_i,
+    input  [ 9:0] coeff_2_i,
+    input  [ 9:0] coeff_3_i
+);
+endmodule
diff --git a/ql-qlf-plugin/qlf_k6n10f/dsp_map.v b/ql-qlf-plugin/qlf_k6n10f/dsp_map.v
index 4b8ae64..f0c5a8b 100644
--- a/ql-qlf-plugin/qlf_k6n10f/dsp_map.v
+++ b/ql-qlf-plugin/qlf_k6n10f/dsp_map.v
@@ -6,22 +6,95 @@
 //
 // SPDX-License-Identifier:ISC
 
-module \$__MUL16X16 (input [15:0] A, input [15:0] B, output [31:0] Y);
-	parameter A_SIGNED = 0;
-	parameter B_SIGNED = 0;
-	parameter A_WIDTH = 0;
-	parameter B_WIDTH = 0;
-	parameter Y_WIDTH = 0;
+module \$__QL_MUL20X18 (input [19:0] A, input [17:0] B, output [37:0] Y);
+    parameter A_SIGNED = 0;
+    parameter B_SIGNED = 0;
+    parameter A_WIDTH = 0;
+    parameter B_WIDTH = 0;
+    parameter Y_WIDTH = 0;
 
-	QL_DSP #(
-		.A_REG(1'b0),
-		.B_REG(1'b0),
-		.C_REG(1'b0),
-		.D_REG(1'b0),
-		.ENABLE_DSP(1'b1),
-	) _TECHMAP_REPLACE_ (
-		.A(A),
-		.B(B),
-		.O(Y),
-	);
+    wire [19:0] a;
+    wire [17:0] b;
+    wire [37:0] z;
+
+    assign a = (A_WIDTH == 20) ? A :
+               (A_SIGNED) ? {{(20 - A_WIDTH){A[A_WIDTH-1]}}, A} :
+                            {{(20 - A_WIDTH){1'b0}},         A};
+
+    assign b = (B_WIDTH == 18) ? B :
+               (B_SIGNED) ? {{(18 - B_WIDTH){B[B_WIDTH-1]}}, B} :
+                            {{(18 - B_WIDTH){1'b0}},         B};
+
+    dsp_t1_20x18x64 _TECHMAP_REPLACE_ (
+        .a_i                (a),
+        .b_i                (b),
+        .acc_fir_i          (4'd0),
+        .z_o                (z),
+
+        .feedback_i         (2'd0),
+        .load_acc_i         (1'b0),
+        .unsigned_a_i       (!A_SIGNED),
+        .unsigned_b_i       (!B_SIGNED),
+
+        .output_select_i    (2'd0),
+        .saturate_enable_i  (1'b0),
+        .shift_right_i      (6'd0),
+        .round_i            (1'b0),
+        .subtract_i         (1'b0),
+        .register_inputs_i  (1'b0),
+        .coeff_0_i          (20'd0),
+        .coeff_1_i          (20'd0),
+        .coeff_2_i          (20'd0),
+        .coeff_3_i          (20'd0)
+    );
+
+    assign Y = z;
+
 endmodule
+
+module \$__QL_MUL10X9 (input [9:0] A, input [8:0] B, output [18:0] Y);
+    parameter A_SIGNED = 0;
+    parameter B_SIGNED = 0;
+    parameter A_WIDTH = 0;
+    parameter B_WIDTH = 0;
+    parameter Y_WIDTH = 0;
+
+    wire [ 9:0] a;
+    wire [ 8:0] b;
+    wire [18:0] z;
+
+    assign a = (A_WIDTH == 10) ? A :
+               (A_SIGNED) ? {{(10 - A_WIDTH){A[A_WIDTH-1]}}, A} :
+                            {{(10 - A_WIDTH){1'b0}},         A};
+
+    assign b = (B_WIDTH ==  9) ? B :
+               (B_SIGNED) ? {{( 9 - B_WIDTH){B[B_WIDTH-1]}}, B} :
+                            {{( 9 - B_WIDTH){1'b0}},         B};
+
+    dsp_t1_10x9x32 _TECHMAP_REPLACE_ (
+        .a_i                (a),
+        .b_i                (b),
+        .acc_fir_i          (4'd0),
+        .z_o                (z),
+
+        .feedback_i         (2'd0),
+        .load_acc_i         (1'b0),
+        .unsigned_a_i       (!A_SIGNED),
+        .unsigned_b_i       (!B_SIGNED),
+
+        .output_select_i    (2'd0),
+        .saturate_enable_i  (1'b0),
+        .shift_right_i      (6'd0),
+        .round_i            (1'b0),
+        .subtract_i         (1'b0),
+        .register_inputs_i  (1'b0),
+        .coeff_0_i          (10'd0),
+        .coeff_1_i          (10'd0),
+        .coeff_2_i          (10'd0),
+        .coeff_3_i          (10'd0)
+    );
+
+    assign Y = z;
+
+endmodule
+
diff --git a/ql-qlf-plugin/synth_quicklogic.cc b/ql-qlf-plugin/synth_quicklogic.cc
index 33433fd..01ce6ef 100644
--- a/ql-qlf-plugin/synth_quicklogic.cc
+++ b/ql-qlf-plugin/synth_quicklogic.cc
@@ -235,21 +235,56 @@
             run("opt_clean");
             run("share");
 
-            if (help_mode || (!nodsp && family == "qlf_k6n10")) {
-                run("memory_dff");
-                run("wreduce t:$mul");
-                run("techmap -map +/mul2dsp.v -map +/quicklogic/" + family +
-                      "/dsp_map.v -D DSP_A_MAXWIDTH=16 -D DSP_B_MAXWIDTH=16 "
-                      "-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_Y_MINWIDTH=11 "
-                      "-D DSP_NAME=$__MUL16X16",
-                    "(if -no_dsp)");
-                run("select a:mul2dsp", "              (if -no_dsp)");
-                run("setattr -unset mul2dsp", "        (if -no_dsp)");
-                run("opt_expr -fine", "                (if -no_dsp)");
-                run("wreduce", "                       (if -no_dsp)");
-                run("select -clear", "                 (if -no_dsp)");
-                run("ql_dsp", "                        (if -no_dsp)");
-                run("chtype -set $mul t:$__soft_mul", "(if -no_dsp)");
+            if (family == "qlf_k6n10") {
+                if (help_mode || !nodsp) {
+                    run("memory_dff");
+                    run("wreduce t:$mul");
+                    run("techmap -map +/mul2dsp.v -map +/quicklogic/" + family +
+                          "/dsp_map.v -D DSP_A_MAXWIDTH=16 -D DSP_B_MAXWIDTH=16 "
+                          "-D DSP_A_MINWIDTH=2 -D DSP_B_MINWIDTH=2 -D DSP_Y_MINWIDTH=11 "
+                          "-D DSP_NAME=$__MUL16X16",
+                        "(for qlf_k6n10 if not -no_dsp)");
+                    run("select a:mul2dsp", "              (for qlf_k6n10 if not -no_dsp)");
+                    run("setattr -unset mul2dsp", "        (for qlf_k6n10 if not -no_dsp)");
+                    run("opt_expr -fine", "                (for qlf_k6n10 if not -no_dsp)");
+                    run("wreduce", "                       (for qlf_k6n10 if not -no_dsp)");
+                    run("select -clear", "                 (for qlf_k6n10 if not -no_dsp)");
+                    run("ql_dsp", "                        (for qlf_k6n10 if not -no_dsp)");
+                    run("chtype -set $mul t:$__soft_mul", "(for qlf_k6n10 if not -no_dsp)");
+                }
+            } else if (family == "qlf_k6n10f") {
+
+                struct DspParams {
+                    size_t a_maxwidth;
+                    size_t b_maxwidth;
+                    size_t a_minwidth;
+                    size_t b_minwidth;
+                    std::string type;
+                };
+
+                const std::vector<DspParams> dsp_rules = {
+                  {20, 18, 11, 10, "$__QL_MUL20X18"},
+                  {10, 9, 4, 4, "$__QL_MUL10X9"},
+                };
+
+                if (help_mode) {
+                    run("wreduce t:$mul", "                (for qlf_k6n10f if not -no_dsp)");
+                    run("techmap -map +/mul2dsp.v [...]", "(for qlf_k6n10f if not -no_dsp)");
+                    run("chtype -set $mul t:$__soft_mul", "(for qlf_k6n10f if not -no_dsp)");
+                    run("techmap -map +/quicklogic/" + family + "/dsp_map.v", "(for qlf_k6n10f if not -no_dsp)");
+                } else if (!nodsp) {
+
+                    run("wreduce t:$mul");
+                    for (const auto &rule : dsp_rules) {
+                        run(stringf("techmap -map +/mul2dsp.v "
+                                    "-D DSP_A_MAXWIDTH=%zu -D DSP_B_MAXWIDTH=%zu "
+                                    "-D DSP_A_MINWIDTH=%zu -D DSP_B_MINWIDTH=%zu "
+                                    "-D DSP_NAME=%s",
+                                    rule.a_maxwidth, rule.b_maxwidth, rule.a_minwidth, rule.b_minwidth, rule.type.c_str()));
+                        run("chtype -set $mul t:$__soft_mul");
+                    }
+                    run("techmap -map +/quicklogic/" + family + "/dsp_map.v");
+                }
             }
 
             run("techmap -map +/cmp2lut.v -D LUT_WIDTH=4");
diff --git a/ql-qlf-plugin/tests/Makefile b/ql-qlf-plugin/tests/Makefile
index 12fab91..4fc598c 100644
--- a/ql-qlf-plugin/tests/Makefile
+++ b/ql-qlf-plugin/tests/Makefile
@@ -20,7 +20,8 @@
 	mux \
 	tribuf \
 	fsm \
-	pp3_bram #\
+	pp3_bram \
+    qlf_k6n10f/dsp_mult
 #	qlf_k6n10_bram \
 
 include $(shell pwd)/../../Makefile_test.common
@@ -38,4 +39,5 @@
 tribuf_verify = true
 fsm_verify = true
 pp3_bram_verify = true
+qlf_k6n10f-dsp_mult_verify = true
 #qlf_k6n10_bram_verify = true
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.tcl b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.tcl
new file mode 100644
index 0000000..90f591d
--- /dev/null
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.tcl
@@ -0,0 +1,35 @@
+yosys -import
+if { [info procs quicklogic_eqn] == {} } { plugin -i ql-qlf}
+yosys -import  ;# ingest plugin commands
+
+read_verilog dsp_mult.v
+design -save read
+
+set TOP "mult_16x16"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:dsp_t1_20x18x64
+
+set TOP "mult_20x18"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:dsp_t1_20x18x64
+
+set TOP "mult_8x8"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:dsp_t1_10x9x32
+
+set TOP "mult_10x9"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:dsp_t1_10x9x32
+
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.v b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.v
new file mode 100644
index 0000000..cd07ba3
--- /dev/null
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.v
@@ -0,0 +1,47 @@
+// Copyright (C) 2020-2021  The SymbiFlow Authors.
+//
+// Use of this source code is governed by a ISC-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/ISC
+//
+// SPDX-License-Identifier:ISC
+
+module mult_16x16 (
+    input  wire [15:0] A,
+    input  wire [15:0] B,
+    output wire [31:0] Z
+);
+
+    assign Z = A * B;
+
+endmodule
+
+module mult_20x18 (
+    input  wire [19:0] A,
+    input  wire [17:0] B,
+    output wire [37:0] Z
+);
+
+    assign Z = A * B;
+
+endmodule
+
+module mult_8x8 (
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output wire [15:0] Z
+);
+
+    assign Z = A * B;
+
+endmodule
+
+module mult_10x9 (
+    input  wire [ 9:0] A,
+    input  wire [ 8:0] B,
+    output wire [18:0] Z
+);
+
+    assign Z = A * B;
+
+endmodule