Merge pull request #252 from antmicro/k6n10f_dsp_macc

k6n10f dsp macc inference
diff --git a/Makefile b/Makefile
index 00e0263..1279f83 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,9 @@
 
 $(foreach plugin,$(PLUGIN_LIST),$(eval $(call install_plugin,$(plugin))))
 
+pmgen.py:
+	wget -nc -O $@ https://raw.githubusercontent.com/YosysHQ/yosys/master/passes/pmgen/pmgen.py
+
 plugins: $(PLUGINS)
 
 install: $(PLUGINS_INSTALL)
@@ -45,6 +48,7 @@
 plugins_clean: $(PLUGINS_CLEAN)
 
 clean:: plugins_clean
+	rm -rf pmgen.py
 
 CLANG_FORMAT ?= clang-format-8
 format:
diff --git a/Makefile_plugin.common b/Makefile_plugin.common
index 49d6ab5..6fc4492 100644
--- a/Makefile_plugin.common
+++ b/Makefile_plugin.common
@@ -56,15 +56,19 @@
 EXTRA_FLAGS ?=
 
 OBJS := $(patsubst %.cc,%.o,$(SOURCES))
+DEPS ?=
 
 all: $(NAME).so
 
-$(OBJS): %.o: %.cc
-	$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(EXTRA_FLAGS) -c -o $@ $^
+$(OBJS): %.o: %.cc $(DEPS)
+	$(CXX) $(CXXFLAGS) $(CPPFLAGS) $(EXTRA_FLAGS) -c -o $@ $(filter %.cc, $^)
 
 $(NAME).so: $(OBJS)
 	$(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -o $@ $^ $(LDLIBS)
 
+../pmgen.py:
+	@$(MAKE) -C .. pmgen.py
+
 install_plugin: $(NAME).so
 	install -D $< $(PLUGINS_DIR)/$<
 
diff --git a/ql-qlf-plugin/Makefile b/ql-qlf-plugin/Makefile
index e9de5bd..00a7c4b 100644
--- a/ql-qlf-plugin/Makefile
+++ b/ql-qlf-plugin/Makefile
@@ -12,7 +12,11 @@
           pp3_braminit.cc \
           quicklogic_eqn.cc \
           ql-edif.cc \
-          ql-dsp-simd.cc
+          ql-dsp-simd.cc \
+          ql-dsp-macc.cc
+
+DEPS = pmgen/ql-dsp-pm.h \
+       pmgen/ql-dsp-macc.h
 
 include ../Makefile_plugin.common
 
@@ -57,9 +61,14 @@
                   $(PP3_DIR)/mult_sim.v        \
                   $(PP3_DIR)/qlal3_sim.v       \
 
-retrieve-pmgen:=$(shell mkdir -p pmgen && wget -nc -O pmgen/pmgen.py https://raw.githubusercontent.com/SymbiFlow/yosys/master%2Bwip/passes/pmgen/pmgen.py)
+pmgen:
+	mkdir -p pmgen
 
-pre-build:=$(shell python3 pmgen/pmgen.py -o pmgen/ql-dsp-pm.h -p ql_dsp ql_dsp.pmg)
+pmgen/ql-dsp-pm.h: ../pmgen.py ql_dsp.pmg | pmgen
+	python3 ../pmgen.py -o $@ -p ql_dsp ql_dsp.pmg
+
+pmgen/ql-dsp-macc.h: ../pmgen.py ql-dsp-macc.pmg | pmgen
+	python3 ../pmgen.py -o $@ -p ql_dsp_macc ql-dsp-macc.pmg
 
 install_modules: $(VERILOG_MODULES)
 	$(foreach f,$^,install -D $(f) $(DATA_DIR)/quicklogic/$(f);)
@@ -68,4 +77,4 @@
 
 clean:
 	$(MAKE) -f ../Makefile_plugin.common $@
-	rm -f *pm.h
+	rm -rf pmgen
diff --git a/ql-qlf-plugin/ql-dsp-macc.cc b/ql-qlf-plugin/ql-dsp-macc.cc
new file mode 100644
index 0000000..c422d31
--- /dev/null
+++ b/ql-qlf-plugin/ql-dsp-macc.cc
@@ -0,0 +1,237 @@
+#include "kernel/sigtools.h"
+#include "kernel/yosys.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+#include "pmgen/ql-dsp-macc.h"
+
+// ============================================================================
+
+void create_ql_macc_dsp(ql_dsp_macc_pm &pm)
+{
+    auto &st = pm.st_ql_dsp_macc;
+
+    // Reject if multiplier drives anything else than either $add or $add and
+    // $mux
+    if (st.mux == nullptr && st.mul_nusers > 2) {
+        return;
+    }
+
+    // Determine whether the output is taken from before or after the ff
+    bool out_ff;
+    if (st.ff_d_nusers == 2 && st.ff_q_nusers == 3) {
+        out_ff = true;
+    } else if (st.ff_d_nusers == 3 && st.ff_q_nusers == 2) {
+        out_ff = false;
+    } else {
+        // Illegal, cannot take the two outputs simulataneously
+        return;
+    }
+
+    // No mux, the adder can driver either the ff or the ff + output
+    if (st.mux == nullptr) {
+        if (out_ff && st.add_nusers != 2) {
+            return;
+        }
+        if (!out_ff && st.add_nusers != 3) {
+            return;
+        }
+    }
+    // Mux present, the adder cannot drive anything else
+    else {
+        if (st.add_nusers != 2) {
+            return;
+        }
+    }
+
+    // Mux can driver either the ff or the ff + output
+    if (st.mux != nullptr) {
+        if (out_ff && st.mux_nusers != 2) {
+            return;
+        }
+        if (!out_ff && st.mux_nusers != 3) {
+            return;
+        }
+    }
+
+    // Get port widths
+    size_t a_width = GetSize(st.mul->getPort(ID(A)));
+    size_t b_width = GetSize(st.mul->getPort(ID(B)));
+    size_t z_width = GetSize(st.ff->getPort(ID(Q)));
+
+    size_t min_width = std::min(a_width, b_width);
+    size_t max_width = std::max(a_width, b_width);
+
+    // Signed / unsigned
+    bool a_signed = st.mul->getParam(ID(A_SIGNED)).as_bool();
+    bool b_signed = st.mul->getParam(ID(B_SIGNED)).as_bool();
+
+    // Determine DSP type or discard if too narrow / wide
+    RTLIL::IdString type;
+    size_t tgt_a_width;
+    size_t tgt_b_width;
+    size_t tgt_z_width;
+
+    if (min_width <= 2 && max_width <= 2 && z_width <= 4) {
+        // Too narrow
+        return;
+    } else if (min_width <= 9 && max_width <= 10 && z_width <= 19) {
+        type = RTLIL::escape_id("dsp_t1_10x9x32");
+        tgt_a_width = 10;
+        tgt_b_width = 9;
+        tgt_z_width = 19;
+    } else if (min_width <= 18 && max_width <= 20 && z_width <= 38) {
+        type = RTLIL::escape_id("dsp_t1_20x18x64");
+        tgt_a_width = 20;
+        tgt_b_width = 18;
+        tgt_z_width = 38;
+    } else {
+        // Too wide
+        return;
+    }
+
+    log("Inferring MACC %zux%zu->%zu as %s from:\n", a_width, b_width, z_width, RTLIL::unescape_id(type).c_str());
+
+    for (auto cell : {st.mul, st.add, st.mux, st.ff}) {
+        if (cell != nullptr) {
+            log(" %s (%s)\n", RTLIL::unescape_id(cell->name).c_str(), RTLIL::unescape_id(cell->type).c_str());
+        }
+    }
+
+    // Build the DSP cell name
+    std::string name;
+    name += RTLIL::unescape_id(st.mul->name) + "_";
+    name += RTLIL::unescape_id(st.add->name) + "_";
+    if (st.mux != nullptr) {
+        name += RTLIL::unescape_id(st.mux->name) + "_";
+    }
+    name += RTLIL::unescape_id(st.ff->name);
+
+    // Add the DSP cell
+    RTLIL::Cell *cell = pm.module->addCell(RTLIL::escape_id(name), type);
+
+    // Get input/output data signals
+    RTLIL::SigSpec sig_a;
+    RTLIL::SigSpec sig_b;
+    RTLIL::SigSpec sig_z;
+
+    if (a_width >= b_width) {
+        sig_a = st.mul->getPort(ID(A));
+        sig_b = st.mul->getPort(ID(B));
+    } else {
+        sig_a = st.mul->getPort(ID(B));
+        sig_b = st.mul->getPort(ID(A));
+    }
+
+    sig_z = out_ff ? st.ff->getPort(ID(Q)) : st.ff->getPort(ID(D));
+
+    // Connect input data ports, sign extend / pad with zeros
+    sig_a.extend_u0(tgt_a_width, a_signed);
+    sig_b.extend_u0(tgt_b_width, b_signed);
+    cell->setPort(RTLIL::escape_id("a_i"), sig_a);
+    cell->setPort(RTLIL::escape_id("b_i"), sig_b);
+
+    // Connect output data port, pad if needed
+    if ((size_t)GetSize(sig_z) < tgt_z_width) {
+        auto *wire = pm.module->addWire(NEW_ID, tgt_z_width - GetSize(sig_z));
+        sig_z.append(wire);
+    }
+    cell->setPort(RTLIL::escape_id("z_o"), sig_z);
+
+    // Connect clock, reset and enable
+    cell->setPort(RTLIL::escape_id("clock_i"), st.ff->getPort(ID(CLK)));
+
+    RTLIL::SigSpec rst;
+    RTLIL::SigSpec ena;
+
+    if (st.ff->hasPort(ID(ARST))) {
+        rst = st.ff->getPort(ID(ARST));
+    } else {
+        rst = RTLIL::SigSpec(RTLIL::S0);
+    }
+
+    if (st.ff->hasPort(ID(EN))) {
+        ena = st.ff->getPort(ID(EN));
+    } else {
+        ena = RTLIL::SigSpec(RTLIL::S1);
+    }
+
+    cell->setPort(RTLIL::escape_id("reset_i"), rst);
+    cell->setPort(RTLIL::escape_id("load_acc_i"), ena);
+
+    // Insert feedback_i control logic used for clearing / loading the accumulator
+    if (st.mux != nullptr) {
+        RTLIL::SigSpec sig_s = st.mux->getPort(ID(S));
+
+        // Depending on the mux port ordering insert inverter if needed
+        log_assert(st.mux_ab == ID(A) || st.mux_ab == ID(B));
+        if (st.mux_ab == ID(B)) {
+            sig_s = pm.module->Not(NEW_ID, sig_s);
+        }
+
+        // Assemble the full control signal for the feedback_i port
+        RTLIL::SigSpec sig_f;
+        sig_f.append(RTLIL::S0);
+        sig_f.append(sig_s);
+        cell->setPort(RTLIL::escape_id("feedback_i"), sig_f);
+    }
+    // No acc clear/load
+    else {
+        cell->setPort(RTLIL::escape_id("feedback_i"), RTLIL::SigSpec(RTLIL::S0, 2));
+    }
+
+    // Connect control ports
+    cell->setPort(RTLIL::escape_id("unsigned_a_i"), RTLIL::SigSpec(a_signed ? RTLIL::S0 : RTLIL::S1));
+    cell->setPort(RTLIL::escape_id("unsigned_b_i"), RTLIL::SigSpec(b_signed ? RTLIL::S0 : RTLIL::S1));
+
+    // Connect config ports
+    cell->setPort(RTLIL::escape_id("saturate_enable_i"), RTLIL::SigSpec(RTLIL::S0));
+    cell->setPort(RTLIL::escape_id("shift_right_i"), RTLIL::SigSpec(RTLIL::S0, 6));
+    cell->setPort(RTLIL::escape_id("round_i"), RTLIL::SigSpec(RTLIL::S0));
+    cell->setPort(RTLIL::escape_id("register_inputs_i"), RTLIL::SigSpec(RTLIL::S0));
+
+    bool subtract = (st.add->type == RTLIL::escape_id("$sub"));
+    cell->setPort(RTLIL::escape_id("subtract_i"), RTLIL::SigSpec(subtract ? RTLIL::S1 : RTLIL::S0));
+
+    // 3 - output post acc
+    // 1 - output pre acc
+    cell->setPort(RTLIL::escape_id("output_select_i"), out_ff ? RTLIL::Const(3, 3) : RTLIL::Const(1, 3));
+
+    // Mark the cells for removal
+    pm.autoremove(st.mul);
+    pm.autoremove(st.add);
+    if (st.mux != nullptr) {
+        pm.autoremove(st.mux);
+    }
+    pm.autoremove(st.ff);
+}
+
+struct QlDspMacc : public Pass {
+
+    QlDspMacc() : Pass("ql_dsp_macc", "Does something") {}
+
+    void help() override
+    {
+        log("\n");
+        log("    ql_dsp_macc [options] [selection]\n");
+        log("\n");
+    }
+
+    void execute(std::vector<std::string> a_Args, RTLIL::Design *a_Design) override
+    {
+        log_header(a_Design, "Executing QL_DSP_MACC pass.\n");
+
+        size_t argidx;
+        for (argidx = 1; argidx < a_Args.size(); argidx++) {
+            break;
+        }
+        extra_args(a_Args, argidx, a_Design);
+
+        for (auto module : a_Design->selected_modules()) {
+            ql_dsp_macc_pm(module, module->selected_cells()).run_ql_dsp_macc(create_ql_macc_dsp);
+        }
+    }
+} QlDspMacc;
+
+PRIVATE_NAMESPACE_END
diff --git a/ql-qlf-plugin/ql-dsp-macc.pmg b/ql-qlf-plugin/ql-dsp-macc.pmg
new file mode 100644
index 0000000..4cfd15a
--- /dev/null
+++ b/ql-qlf-plugin/ql-dsp-macc.pmg
@@ -0,0 +1,50 @@
+pattern ql_dsp_macc
+
+state <IdString> add_ba
+state <IdString> mux_ab
+
+state <int> mul_nusers
+state <int> add_nusers
+state <int> mux_nusers
+state <int> ff_d_nusers
+state <int> ff_q_nusers
+
+match mul
+    select mul->type.in($mul)
+    select nusers(port(mul, \Y)) <= 3
+    set mul_nusers nusers(port(mul, \Y))
+endmatch
+
+match add
+    select add->type.in($add, $sub)
+    choice <IdString> AB {\A, \B}
+    define <IdString> BA (AB == \A ? \B : \A)
+    index <SigSpec> port(add, AB) === port(mul, \Y)
+    select nusers(port(add, \Y)) <= 3
+    set add_nusers nusers(port(add, \Y))
+    set add_ba BA
+endmatch
+
+match mux
+    select mux->type.in($mux)
+    choice <IdString> AB {\A, \B}
+    define <IdString> BA (AB == \A ? \B : \A)
+    index <SigSpec> port(mux, AB) === port(mul, \Y)
+    index <SigSpec> port(mux, BA) === port(add, \Y)
+    select nusers(port(mux, \Y)) <= 3
+    set mux_nusers nusers(port(mux, \Y))
+    set mux_ab AB
+    optional
+endmatch
+
+match ff
+    select ff->type.in($dff, $adff, $dffe, $adffe)
+    index <SigSpec> port(ff, \D) === (mux == nullptr ? port(add, \Y) : port(mux, \Y))
+    index <SigSpec> port(ff, \Q) === port(add, add_ba)
+    set ff_d_nusers nusers(port(ff, \D))
+    set ff_q_nusers nusers(port(ff, \Q))
+endmatch
+
+code
+    accept;
+endcode
diff --git a/ql-qlf-plugin/synth_quicklogic.cc b/ql-qlf-plugin/synth_quicklogic.cc
index 3bb44c5..fdb23b0 100644
--- a/ql-qlf-plugin/synth_quicklogic.cc
+++ b/ql-qlf-plugin/synth_quicklogic.cc
@@ -269,6 +269,7 @@
 
                 if (help_mode) {
                     run("wreduce t:$mul", "                (for qlf_k6n10f if not -no_dsp)");
+                    run("ql_dsp_macc", "                   (for qlf_k6n10f if not -no_dsp)");
                     run("techmap -map +/mul2dsp.v [...]", "(for qlf_k6n10f if not -no_dsp)");
                     run("chtype -set $mul t:$__soft_mul", "(for qlf_k6n10f if not -no_dsp)");
                     run("techmap -map +/quicklogic/" + family + "/dsp_map.v", "(for qlf_k6n10f if not -no_dsp)");
@@ -277,6 +278,8 @@
                 } else if (!nodsp) {
 
                     run("wreduce t:$mul");
+                    run("ql_dsp_macc");
+
                     for (const auto &rule : dsp_rules) {
                         run(stringf("techmap -map +/mul2dsp.v "
                                     "-D DSP_A_MAXWIDTH=%zu -D DSP_B_MAXWIDTH=%zu "
diff --git a/ql-qlf-plugin/tests/Makefile b/ql-qlf-plugin/tests/Makefile
index e54aeaf..1cc48ca 100644
--- a/ql-qlf-plugin/tests/Makefile
+++ b/ql-qlf-plugin/tests/Makefile
@@ -22,7 +22,8 @@
 	fsm \
 	pp3_bram \
     qlf_k6n10f/dsp_mult \
-    qlf_k6n10f/dsp_simd
+    qlf_k6n10f/dsp_simd \
+    qlf_k6n10f/dsp_macc
 #	qlf_k6n10_bram \
 
 include $(shell pwd)/../../Makefile_test.common
@@ -42,4 +43,5 @@
 pp3_bram_verify = true
 qlf_k6n10f-dsp_mult_verify = true
 qlf_k6n10f-dsp_simd_verify = true
+qlf_k6n10f-dsp_macc_verify = true
 #qlf_k6n10_bram_verify = true
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl
new file mode 100644
index 0000000..2de3bdc
--- /dev/null
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl
@@ -0,0 +1,66 @@
+yosys -import
+if { [info procs quicklogic_eqn] == {} } { plugin -i ql-qlf}
+yosys -import  ;# ingest plugin commands
+
+read_verilog dsp_macc.v
+design -save read
+
+set TOP "macc_simple"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_clr"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:\$lut
+select -assert-count 2 t:*
+
+set TOP "macc_simple_arst"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_ena"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_arst_clr_ena"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:\$lut
+select -assert-count 2 t:*
+
+set TOP "macc_simple_preacc"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_preacc_clr"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:\$lut
+select -assert-count 2 t:*
+
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.v b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.v
new file mode 100644
index 0000000..0840210
--- /dev/null
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.v
@@ -0,0 +1,113 @@
+// Copyright (C) 2020-2021  The SymbiFlow Authors.
+//
+// Use of this source code is governed by a ISC-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/ISC
+//
+// SPDX-License-Identifier:ISC
+
+module macc_simple (
+    input  wire        clk,
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output reg  [15:0] Z
+);
+
+    always @(posedge clk)
+        Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_clr (
+    input  wire        clk,
+    input  wire        clr,
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output reg  [15:0] Z
+);
+
+    always @(posedge clk)
+        if (clr) Z <=     (A * B);
+        else     Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_arst (
+    input  wire        clk,
+    input  wire        rst,
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output reg  [15:0] Z
+);
+
+    always @(posedge clk or posedge rst)
+        if (rst) Z <= 0;
+        else     Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_ena (
+    input  wire        clk,
+    input  wire        ena,
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output reg  [15:0] Z
+);
+
+    always @(posedge clk)
+        if (ena) Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_arst_clr_ena (
+    input  wire        clk,
+    input  wire        rst,
+    input  wire        clr,
+    input  wire        ena,
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output reg  [15:0] Z
+);
+
+    always @(posedge clk or posedge rst)
+        if (rst)     Z <= 0;
+        else if (ena) begin
+            if (clr) Z <=     (A * B);
+            else     Z <= Z + (A * B);
+        end
+
+endmodule
+
+module macc_simple_preacc (
+    input  wire        clk,
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output wire [15:0] Z
+);
+
+    reg [15:0] acc;
+
+    assign Z = acc + (A * B);
+
+    always @(posedge clk)
+        acc <= Z;
+
+endmodule
+
+module macc_simple_preacc_clr (
+    input  wire        clk,
+    input  wire        clr,
+    input  wire [ 7:0] A,
+    input  wire [ 7:0] B,
+    output reg  [15:0] Z
+);
+
+    reg [15:0] acc;
+
+    assign Z = (clr) ? (A * B) : (acc + (A * B));
+
+    always @(posedge clk)
+        acc <= Z;
+
+endmodule
+