Merge pull request #252 from antmicro/k6n10f_dsp_macc
k6n10f dsp macc inference
diff --git a/Makefile b/Makefile
index 00e0263..1279f83 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,9 @@
$(foreach plugin,$(PLUGIN_LIST),$(eval $(call install_plugin,$(plugin))))
+pmgen.py:
+ wget -nc -O $@ https://raw.githubusercontent.com/YosysHQ/yosys/master/passes/pmgen/pmgen.py
+
plugins: $(PLUGINS)
install: $(PLUGINS_INSTALL)
@@ -45,6 +48,7 @@
plugins_clean: $(PLUGINS_CLEAN)
clean:: plugins_clean
+ rm -rf pmgen.py
CLANG_FORMAT ?= clang-format-8
format:
diff --git a/Makefile_plugin.common b/Makefile_plugin.common
index 49d6ab5..6fc4492 100644
--- a/Makefile_plugin.common
+++ b/Makefile_plugin.common
@@ -56,15 +56,19 @@
EXTRA_FLAGS ?=
OBJS := $(patsubst %.cc,%.o,$(SOURCES))
+DEPS ?=
all: $(NAME).so
-$(OBJS): %.o: %.cc
- $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(EXTRA_FLAGS) -c -o $@ $^
+$(OBJS): %.o: %.cc $(DEPS)
+ $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(EXTRA_FLAGS) -c -o $@ $(filter %.cc, $^)
$(NAME).so: $(OBJS)
$(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -o $@ $^ $(LDLIBS)
+../pmgen.py:
+ @$(MAKE) -C .. pmgen.py
+
install_plugin: $(NAME).so
install -D $< $(PLUGINS_DIR)/$<
diff --git a/ql-qlf-plugin/Makefile b/ql-qlf-plugin/Makefile
index e9de5bd..00a7c4b 100644
--- a/ql-qlf-plugin/Makefile
+++ b/ql-qlf-plugin/Makefile
@@ -12,7 +12,11 @@
pp3_braminit.cc \
quicklogic_eqn.cc \
ql-edif.cc \
- ql-dsp-simd.cc
+ ql-dsp-simd.cc \
+ ql-dsp-macc.cc
+
+DEPS = pmgen/ql-dsp-pm.h \
+ pmgen/ql-dsp-macc.h
include ../Makefile_plugin.common
@@ -57,9 +61,14 @@
$(PP3_DIR)/mult_sim.v \
$(PP3_DIR)/qlal3_sim.v \
-retrieve-pmgen:=$(shell mkdir -p pmgen && wget -nc -O pmgen/pmgen.py https://raw.githubusercontent.com/SymbiFlow/yosys/master%2Bwip/passes/pmgen/pmgen.py)
+pmgen:
+ mkdir -p pmgen
-pre-build:=$(shell python3 pmgen/pmgen.py -o pmgen/ql-dsp-pm.h -p ql_dsp ql_dsp.pmg)
+pmgen/ql-dsp-pm.h: ../pmgen.py ql_dsp.pmg | pmgen
+ python3 ../pmgen.py -o $@ -p ql_dsp ql_dsp.pmg
+
+pmgen/ql-dsp-macc.h: ../pmgen.py ql-dsp-macc.pmg | pmgen
+ python3 ../pmgen.py -o $@ -p ql_dsp_macc ql-dsp-macc.pmg
install_modules: $(VERILOG_MODULES)
$(foreach f,$^,install -D $(f) $(DATA_DIR)/quicklogic/$(f);)
@@ -68,4 +77,4 @@
clean:
$(MAKE) -f ../Makefile_plugin.common $@
- rm -f *pm.h
+ rm -rf pmgen
diff --git a/ql-qlf-plugin/ql-dsp-macc.cc b/ql-qlf-plugin/ql-dsp-macc.cc
new file mode 100644
index 0000000..c422d31
--- /dev/null
+++ b/ql-qlf-plugin/ql-dsp-macc.cc
@@ -0,0 +1,237 @@
+#include "kernel/sigtools.h"
+#include "kernel/yosys.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+#include "pmgen/ql-dsp-macc.h"
+
+// ============================================================================
+
+void create_ql_macc_dsp(ql_dsp_macc_pm &pm)
+{
+ auto &st = pm.st_ql_dsp_macc;
+
+ // Reject if multiplier drives anything else than either $add or $add and
+ // $mux
+ if (st.mux == nullptr && st.mul_nusers > 2) {
+ return;
+ }
+
+ // Determine whether the output is taken from before or after the ff
+ bool out_ff;
+ if (st.ff_d_nusers == 2 && st.ff_q_nusers == 3) {
+ out_ff = true;
+ } else if (st.ff_d_nusers == 3 && st.ff_q_nusers == 2) {
+ out_ff = false;
+ } else {
+ // Illegal, cannot take the two outputs simulataneously
+ return;
+ }
+
+ // No mux, the adder can driver either the ff or the ff + output
+ if (st.mux == nullptr) {
+ if (out_ff && st.add_nusers != 2) {
+ return;
+ }
+ if (!out_ff && st.add_nusers != 3) {
+ return;
+ }
+ }
+ // Mux present, the adder cannot drive anything else
+ else {
+ if (st.add_nusers != 2) {
+ return;
+ }
+ }
+
+ // Mux can driver either the ff or the ff + output
+ if (st.mux != nullptr) {
+ if (out_ff && st.mux_nusers != 2) {
+ return;
+ }
+ if (!out_ff && st.mux_nusers != 3) {
+ return;
+ }
+ }
+
+ // Get port widths
+ size_t a_width = GetSize(st.mul->getPort(ID(A)));
+ size_t b_width = GetSize(st.mul->getPort(ID(B)));
+ size_t z_width = GetSize(st.ff->getPort(ID(Q)));
+
+ size_t min_width = std::min(a_width, b_width);
+ size_t max_width = std::max(a_width, b_width);
+
+ // Signed / unsigned
+ bool a_signed = st.mul->getParam(ID(A_SIGNED)).as_bool();
+ bool b_signed = st.mul->getParam(ID(B_SIGNED)).as_bool();
+
+ // Determine DSP type or discard if too narrow / wide
+ RTLIL::IdString type;
+ size_t tgt_a_width;
+ size_t tgt_b_width;
+ size_t tgt_z_width;
+
+ if (min_width <= 2 && max_width <= 2 && z_width <= 4) {
+ // Too narrow
+ return;
+ } else if (min_width <= 9 && max_width <= 10 && z_width <= 19) {
+ type = RTLIL::escape_id("dsp_t1_10x9x32");
+ tgt_a_width = 10;
+ tgt_b_width = 9;
+ tgt_z_width = 19;
+ } else if (min_width <= 18 && max_width <= 20 && z_width <= 38) {
+ type = RTLIL::escape_id("dsp_t1_20x18x64");
+ tgt_a_width = 20;
+ tgt_b_width = 18;
+ tgt_z_width = 38;
+ } else {
+ // Too wide
+ return;
+ }
+
+ log("Inferring MACC %zux%zu->%zu as %s from:\n", a_width, b_width, z_width, RTLIL::unescape_id(type).c_str());
+
+ for (auto cell : {st.mul, st.add, st.mux, st.ff}) {
+ if (cell != nullptr) {
+ log(" %s (%s)\n", RTLIL::unescape_id(cell->name).c_str(), RTLIL::unescape_id(cell->type).c_str());
+ }
+ }
+
+ // Build the DSP cell name
+ std::string name;
+ name += RTLIL::unescape_id(st.mul->name) + "_";
+ name += RTLIL::unescape_id(st.add->name) + "_";
+ if (st.mux != nullptr) {
+ name += RTLIL::unescape_id(st.mux->name) + "_";
+ }
+ name += RTLIL::unescape_id(st.ff->name);
+
+ // Add the DSP cell
+ RTLIL::Cell *cell = pm.module->addCell(RTLIL::escape_id(name), type);
+
+ // Get input/output data signals
+ RTLIL::SigSpec sig_a;
+ RTLIL::SigSpec sig_b;
+ RTLIL::SigSpec sig_z;
+
+ if (a_width >= b_width) {
+ sig_a = st.mul->getPort(ID(A));
+ sig_b = st.mul->getPort(ID(B));
+ } else {
+ sig_a = st.mul->getPort(ID(B));
+ sig_b = st.mul->getPort(ID(A));
+ }
+
+ sig_z = out_ff ? st.ff->getPort(ID(Q)) : st.ff->getPort(ID(D));
+
+ // Connect input data ports, sign extend / pad with zeros
+ sig_a.extend_u0(tgt_a_width, a_signed);
+ sig_b.extend_u0(tgt_b_width, b_signed);
+ cell->setPort(RTLIL::escape_id("a_i"), sig_a);
+ cell->setPort(RTLIL::escape_id("b_i"), sig_b);
+
+ // Connect output data port, pad if needed
+ if ((size_t)GetSize(sig_z) < tgt_z_width) {
+ auto *wire = pm.module->addWire(NEW_ID, tgt_z_width - GetSize(sig_z));
+ sig_z.append(wire);
+ }
+ cell->setPort(RTLIL::escape_id("z_o"), sig_z);
+
+ // Connect clock, reset and enable
+ cell->setPort(RTLIL::escape_id("clock_i"), st.ff->getPort(ID(CLK)));
+
+ RTLIL::SigSpec rst;
+ RTLIL::SigSpec ena;
+
+ if (st.ff->hasPort(ID(ARST))) {
+ rst = st.ff->getPort(ID(ARST));
+ } else {
+ rst = RTLIL::SigSpec(RTLIL::S0);
+ }
+
+ if (st.ff->hasPort(ID(EN))) {
+ ena = st.ff->getPort(ID(EN));
+ } else {
+ ena = RTLIL::SigSpec(RTLIL::S1);
+ }
+
+ cell->setPort(RTLIL::escape_id("reset_i"), rst);
+ cell->setPort(RTLIL::escape_id("load_acc_i"), ena);
+
+ // Insert feedback_i control logic used for clearing / loading the accumulator
+ if (st.mux != nullptr) {
+ RTLIL::SigSpec sig_s = st.mux->getPort(ID(S));
+
+ // Depending on the mux port ordering insert inverter if needed
+ log_assert(st.mux_ab == ID(A) || st.mux_ab == ID(B));
+ if (st.mux_ab == ID(B)) {
+ sig_s = pm.module->Not(NEW_ID, sig_s);
+ }
+
+ // Assemble the full control signal for the feedback_i port
+ RTLIL::SigSpec sig_f;
+ sig_f.append(RTLIL::S0);
+ sig_f.append(sig_s);
+ cell->setPort(RTLIL::escape_id("feedback_i"), sig_f);
+ }
+ // No acc clear/load
+ else {
+ cell->setPort(RTLIL::escape_id("feedback_i"), RTLIL::SigSpec(RTLIL::S0, 2));
+ }
+
+ // Connect control ports
+ cell->setPort(RTLIL::escape_id("unsigned_a_i"), RTLIL::SigSpec(a_signed ? RTLIL::S0 : RTLIL::S1));
+ cell->setPort(RTLIL::escape_id("unsigned_b_i"), RTLIL::SigSpec(b_signed ? RTLIL::S0 : RTLIL::S1));
+
+ // Connect config ports
+ cell->setPort(RTLIL::escape_id("saturate_enable_i"), RTLIL::SigSpec(RTLIL::S0));
+ cell->setPort(RTLIL::escape_id("shift_right_i"), RTLIL::SigSpec(RTLIL::S0, 6));
+ cell->setPort(RTLIL::escape_id("round_i"), RTLIL::SigSpec(RTLIL::S0));
+ cell->setPort(RTLIL::escape_id("register_inputs_i"), RTLIL::SigSpec(RTLIL::S0));
+
+ bool subtract = (st.add->type == RTLIL::escape_id("$sub"));
+ cell->setPort(RTLIL::escape_id("subtract_i"), RTLIL::SigSpec(subtract ? RTLIL::S1 : RTLIL::S0));
+
+ // 3 - output post acc
+ // 1 - output pre acc
+ cell->setPort(RTLIL::escape_id("output_select_i"), out_ff ? RTLIL::Const(3, 3) : RTLIL::Const(1, 3));
+
+ // Mark the cells for removal
+ pm.autoremove(st.mul);
+ pm.autoremove(st.add);
+ if (st.mux != nullptr) {
+ pm.autoremove(st.mux);
+ }
+ pm.autoremove(st.ff);
+}
+
+struct QlDspMacc : public Pass {
+
+ QlDspMacc() : Pass("ql_dsp_macc", "Does something") {}
+
+ void help() override
+ {
+ log("\n");
+ log(" ql_dsp_macc [options] [selection]\n");
+ log("\n");
+ }
+
+ void execute(std::vector<std::string> a_Args, RTLIL::Design *a_Design) override
+ {
+ log_header(a_Design, "Executing QL_DSP_MACC pass.\n");
+
+ size_t argidx;
+ for (argidx = 1; argidx < a_Args.size(); argidx++) {
+ break;
+ }
+ extra_args(a_Args, argidx, a_Design);
+
+ for (auto module : a_Design->selected_modules()) {
+ ql_dsp_macc_pm(module, module->selected_cells()).run_ql_dsp_macc(create_ql_macc_dsp);
+ }
+ }
+} QlDspMacc;
+
+PRIVATE_NAMESPACE_END
diff --git a/ql-qlf-plugin/ql-dsp-macc.pmg b/ql-qlf-plugin/ql-dsp-macc.pmg
new file mode 100644
index 0000000..4cfd15a
--- /dev/null
+++ b/ql-qlf-plugin/ql-dsp-macc.pmg
@@ -0,0 +1,50 @@
+pattern ql_dsp_macc
+
+state <IdString> add_ba
+state <IdString> mux_ab
+
+state <int> mul_nusers
+state <int> add_nusers
+state <int> mux_nusers
+state <int> ff_d_nusers
+state <int> ff_q_nusers
+
+match mul
+ select mul->type.in($mul)
+ select nusers(port(mul, \Y)) <= 3
+ set mul_nusers nusers(port(mul, \Y))
+endmatch
+
+match add
+ select add->type.in($add, $sub)
+ choice <IdString> AB {\A, \B}
+ define <IdString> BA (AB == \A ? \B : \A)
+ index <SigSpec> port(add, AB) === port(mul, \Y)
+ select nusers(port(add, \Y)) <= 3
+ set add_nusers nusers(port(add, \Y))
+ set add_ba BA
+endmatch
+
+match mux
+ select mux->type.in($mux)
+ choice <IdString> AB {\A, \B}
+ define <IdString> BA (AB == \A ? \B : \A)
+ index <SigSpec> port(mux, AB) === port(mul, \Y)
+ index <SigSpec> port(mux, BA) === port(add, \Y)
+ select nusers(port(mux, \Y)) <= 3
+ set mux_nusers nusers(port(mux, \Y))
+ set mux_ab AB
+ optional
+endmatch
+
+match ff
+ select ff->type.in($dff, $adff, $dffe, $adffe)
+ index <SigSpec> port(ff, \D) === (mux == nullptr ? port(add, \Y) : port(mux, \Y))
+ index <SigSpec> port(ff, \Q) === port(add, add_ba)
+ set ff_d_nusers nusers(port(ff, \D))
+ set ff_q_nusers nusers(port(ff, \Q))
+endmatch
+
+code
+ accept;
+endcode
diff --git a/ql-qlf-plugin/synth_quicklogic.cc b/ql-qlf-plugin/synth_quicklogic.cc
index 3bb44c5..fdb23b0 100644
--- a/ql-qlf-plugin/synth_quicklogic.cc
+++ b/ql-qlf-plugin/synth_quicklogic.cc
@@ -269,6 +269,7 @@
if (help_mode) {
run("wreduce t:$mul", " (for qlf_k6n10f if not -no_dsp)");
+ run("ql_dsp_macc", " (for qlf_k6n10f if not -no_dsp)");
run("techmap -map +/mul2dsp.v [...]", "(for qlf_k6n10f if not -no_dsp)");
run("chtype -set $mul t:$__soft_mul", "(for qlf_k6n10f if not -no_dsp)");
run("techmap -map +/quicklogic/" + family + "/dsp_map.v", "(for qlf_k6n10f if not -no_dsp)");
@@ -277,6 +278,8 @@
} else if (!nodsp) {
run("wreduce t:$mul");
+ run("ql_dsp_macc");
+
for (const auto &rule : dsp_rules) {
run(stringf("techmap -map +/mul2dsp.v "
"-D DSP_A_MAXWIDTH=%zu -D DSP_B_MAXWIDTH=%zu "
diff --git a/ql-qlf-plugin/tests/Makefile b/ql-qlf-plugin/tests/Makefile
index e54aeaf..1cc48ca 100644
--- a/ql-qlf-plugin/tests/Makefile
+++ b/ql-qlf-plugin/tests/Makefile
@@ -22,7 +22,8 @@
fsm \
pp3_bram \
qlf_k6n10f/dsp_mult \
- qlf_k6n10f/dsp_simd
+ qlf_k6n10f/dsp_simd \
+ qlf_k6n10f/dsp_macc
# qlf_k6n10_bram \
include $(shell pwd)/../../Makefile_test.common
@@ -42,4 +43,5 @@
pp3_bram_verify = true
qlf_k6n10f-dsp_mult_verify = true
qlf_k6n10f-dsp_simd_verify = true
+qlf_k6n10f-dsp_macc_verify = true
#qlf_k6n10_bram_verify = true
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl
new file mode 100644
index 0000000..2de3bdc
--- /dev/null
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl
@@ -0,0 +1,66 @@
+yosys -import
+if { [info procs quicklogic_eqn] == {} } { plugin -i ql-qlf}
+yosys -import ;# ingest plugin commands
+
+read_verilog dsp_macc.v
+design -save read
+
+set TOP "macc_simple"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_clr"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:\$lut
+select -assert-count 2 t:*
+
+set TOP "macc_simple_arst"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_ena"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_arst_clr_ena"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:\$lut
+select -assert-count 2 t:*
+
+set TOP "macc_simple_preacc"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:*
+
+set TOP "macc_simple_preacc_clr"
+design -load read
+hierarchy -top $TOP
+synth_quicklogic -family qlf_k6n10f -top $TOP
+yosys cd $TOP
+select -assert-count 1 t:QL_DSP2
+select -assert-count 1 t:\$lut
+select -assert-count 2 t:*
+
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.v b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.v
new file mode 100644
index 0000000..0840210
--- /dev/null
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.v
@@ -0,0 +1,113 @@
+// Copyright (C) 2020-2021 The SymbiFlow Authors.
+//
+// Use of this source code is governed by a ISC-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/ISC
+//
+// SPDX-License-Identifier:ISC
+
+module macc_simple (
+ input wire clk,
+ input wire [ 7:0] A,
+ input wire [ 7:0] B,
+ output reg [15:0] Z
+);
+
+ always @(posedge clk)
+ Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_clr (
+ input wire clk,
+ input wire clr,
+ input wire [ 7:0] A,
+ input wire [ 7:0] B,
+ output reg [15:0] Z
+);
+
+ always @(posedge clk)
+ if (clr) Z <= (A * B);
+ else Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_arst (
+ input wire clk,
+ input wire rst,
+ input wire [ 7:0] A,
+ input wire [ 7:0] B,
+ output reg [15:0] Z
+);
+
+ always @(posedge clk or posedge rst)
+ if (rst) Z <= 0;
+ else Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_ena (
+ input wire clk,
+ input wire ena,
+ input wire [ 7:0] A,
+ input wire [ 7:0] B,
+ output reg [15:0] Z
+);
+
+ always @(posedge clk)
+ if (ena) Z <= Z + (A * B);
+
+endmodule
+
+module macc_simple_arst_clr_ena (
+ input wire clk,
+ input wire rst,
+ input wire clr,
+ input wire ena,
+ input wire [ 7:0] A,
+ input wire [ 7:0] B,
+ output reg [15:0] Z
+);
+
+ always @(posedge clk or posedge rst)
+ if (rst) Z <= 0;
+ else if (ena) begin
+ if (clr) Z <= (A * B);
+ else Z <= Z + (A * B);
+ end
+
+endmodule
+
+module macc_simple_preacc (
+ input wire clk,
+ input wire [ 7:0] A,
+ input wire [ 7:0] B,
+ output wire [15:0] Z
+);
+
+ reg [15:0] acc;
+
+ assign Z = acc + (A * B);
+
+ always @(posedge clk)
+ acc <= Z;
+
+endmodule
+
+module macc_simple_preacc_clr (
+ input wire clk,
+ input wire clr,
+ input wire [ 7:0] A,
+ input wire [ 7:0] B,
+ output reg [15:0] Z
+);
+
+ reg [15:0] acc;
+
+ assign Z = (clr) ? (A * B) : (acc + (A * B));
+
+ always @(posedge clk)
+ acc <= Z;
+
+endmodule
+