Merge pull request #298 from antmicro/k6n10f_ram_dsp_mode_bits

Merge k6n10f RAM and DSP parameters into MODE_BITS
diff --git a/ql-qlf-plugin/ql-dsp-macc.cc b/ql-qlf-plugin/ql-dsp-macc.cc
index f95ca87..5275024 100644
--- a/ql-qlf-plugin/ql-dsp-macc.cc
+++ b/ql-qlf-plugin/ql-dsp-macc.cc
@@ -151,13 +151,13 @@
     RTLIL::SigSpec ena;
 
     if (st.ff->hasPort(ID(ARST))) {
-        if (st.ff->getParam(ID(ARST_POLARITY)) != RTLIL::S0) {
+        if (st.ff->getParam(ID(ARST_POLARITY)) != RTLIL::S1) {
             rst = pm.module->Not(NEW_ID, st.ff->getPort(ID(ARST)));
         } else {
             rst = st.ff->getPort(ID(ARST));
         }
     } else {
-        rst = RTLIL::SigSpec(RTLIL::S1);
+        rst = RTLIL::SigSpec(RTLIL::S0);
     }
 
     if (st.ff->hasPort(ID(EN))) {
diff --git a/ql-qlf-plugin/qlf_k6n10f/cells_sim.v b/ql-qlf-plugin/qlf_k6n10f/cells_sim.v
index 6c85937..bf1648f 100644
--- a/ql-qlf-plugin/qlf_k6n10f/cells_sim.v
+++ b/ql-qlf-plugin/qlf_k6n10f/cells_sim.v
@@ -1274,7 +1274,7 @@
 module QL_DSP2 ( // TODO: Name subject to change
       input  [19:0] a,
       input  [17:0] b,
-      input  [3:0] acc_fir,
+      input  [ 5:0] acc_fir,
       output [37:0] z,
       output [17:0] dly_b,
 
@@ -1307,8 +1307,6 @@
       localparam NBITS_A = 20;
       localparam NBITS_B = 18;
       localparam NBITS_Z = 38;
-      localparam NBITS_COEF = 20;
-      localparam NBITS_AF = 4;
 
       wire [NBITS_Z-1:0] dsp_full_z;
       wire [(NBITS_Z/2)-1:0] dsp_frac0_z;
@@ -1326,16 +1324,14 @@
         .NBITS_A(NBITS_A/2),
             .NBITS_B(NBITS_B/2),
             .NBITS_ACC(NBITS_ACC/2),
-            .NBITS_Z(NBITS_Z/2),
-            .NBITS_COEF(NBITS_COEF/2),
-            .NBITS_AF(NBITS_AF/2)
+            .NBITS_Z(NBITS_Z/2)
         ) dsp_frac0 (
             .a_i(a[(NBITS_A/2)-1:0]),
             .b_i(b[(NBITS_B/2)-1:0]),
             .z_o(dsp_frac0_z),
             .dly_b_o(dsp_frac0_dly_b),
 
-            .acc_fir_i(acc_fir[(NBITS_AF/2)-1:0]),
+            .acc_fir_i(acc_fir),
             .feedback_i(feedback),
             .load_acc_i(load_acc),
 
@@ -1343,7 +1339,7 @@
             .unsigned_b_i(unsigned_b),
 
             .clock_i(clk),
-            .reset_n_i(reset),
+            .s_reset(reset),
 
             .saturate_enable_i(saturate_enable),
             .output_select_i(output_select),
@@ -1351,10 +1347,10 @@
             .shift_right_i(shift_right),
             .subtract_i(subtract),
             .register_inputs_i(register_inputs),
-            .coef_0_i(COEFF_0[(NBITS_COEF/2)-1:0]),
-            .coef_1_i(COEFF_1[(NBITS_COEF/2)-1:0]),
-            .coef_2_i(COEFF_2[(NBITS_COEF/2)-1:0]),
-            .coef_3_i(COEFF_3[(NBITS_COEF/2)-1:0])
+            .coef_0_i(COEFF_0[(NBITS_A/2)-1:0]),
+            .coef_1_i(COEFF_1[(NBITS_A/2)-1:0]),
+            .coef_2_i(COEFF_2[(NBITS_A/2)-1:0]),
+            .coef_3_i(COEFF_3[(NBITS_A/2)-1:0])
         );
 
     // Output used when fmode == 1
@@ -1362,16 +1358,14 @@
         .NBITS_A(NBITS_A/2),
             .NBITS_B(NBITS_B/2),
             .NBITS_ACC(NBITS_ACC/2),
-            .NBITS_Z(NBITS_Z/2),
-            .NBITS_COEF(NBITS_COEF/2),
-            .NBITS_AF(NBITS_AF/2)
+            .NBITS_Z(NBITS_Z/2)
         ) dsp_frac1 (
             .a_i(a[NBITS_A-1:NBITS_A/2]),
             .b_i(b[NBITS_B-1:NBITS_B/2]),
             .z_o(dsp_frac1_z),
             .dly_b_o(dsp_frac1_dly_b),
 
-            .acc_fir_i(acc_fir[NBITS_AF-1:NBITS_AF/2]),
+            .acc_fir_i(acc_fir),
             .feedback_i(feedback),
             .load_acc_i(load_acc),
 
@@ -1379,7 +1373,7 @@
             .unsigned_b_i(unsigned_b),
 
             .clock_i(clk),
-            .reset_n_i(reset),
+            .s_reset(reset),
 
             .saturate_enable_i(saturate_enable),
             .output_select_i(output_select),
@@ -1387,10 +1381,10 @@
             .shift_right_i(shift_right),
             .subtract_i(subtract),
             .register_inputs_i(register_inputs),
-            .coef_0_i(COEFF_0[NBITS_COEF-1:NBITS_COEF/2]),
-            .coef_1_i(COEFF_1[NBITS_COEF-1:NBITS_COEF/2]),
-            .coef_2_i(COEFF_2[NBITS_COEF-1:NBITS_COEF/2]),
-            .coef_3_i(COEFF_3[NBITS_COEF-1:NBITS_COEF/2])
+            .coef_0_i(COEFF_0[NBITS_A-1:NBITS_A/2]),
+            .coef_1_i(COEFF_1[NBITS_A-1:NBITS_A/2]),
+            .coef_2_i(COEFF_2[NBITS_A-1:NBITS_A/2]),
+            .coef_3_i(COEFF_3[NBITS_A-1:NBITS_A/2])
         );
 
     // Output used when fmode == 0
@@ -1398,9 +1392,7 @@
              .NBITS_A(NBITS_A),
              .NBITS_B(NBITS_B),
              .NBITS_ACC(NBITS_ACC),
-             .NBITS_Z(NBITS_Z),
-             .NBITS_COEF(NBITS_COEF),
-             .NBITS_AF(NBITS_AF)
+             .NBITS_Z(NBITS_Z)
         ) dsp_full (
             .a_i(a),
             .b_i(b),
@@ -1415,7 +1407,7 @@
             .unsigned_b_i(unsigned_b),
 
             .clock_i(clk),
-            .reset_n_i(reset),
+            .s_reset(reset),
 
             .saturate_enable_i(saturate_enable),
             .output_select_i(output_select),
@@ -1434,16 +1426,14 @@
     parameter NBITS_ACC  = 64,
     parameter NBITS_A    = 20,
     parameter NBITS_B    = 18,
-    parameter NBITS_Z    = 38,
-    parameter NBITS_COEF = 20,
-    parameter NBITS_AF   = 4
+    parameter NBITS_Z    = 38
 )(
     input [NBITS_A-1:0] a_i,
     input [NBITS_B-1:0] b_i,
     output [NBITS_Z-1:0] z_o,
     output reg [NBITS_B-1:0] dly_b_o,
 
-    input [NBITS_AF-1:0] acc_fir_i,
+    input [5:0] acc_fir_i,
     input [2:0] feedback_i,
     input load_acc_i,
 
@@ -1451,7 +1441,7 @@
     input unsigned_b_i,
 
     input clock_i,
-    input reset_n_i,
+    input s_reset,
 
     input saturate_enable_i,
     input [2:0] output_select_i,
@@ -1459,10 +1449,10 @@
     input [5:0] shift_right_i,
     input subtract_i,
     input register_inputs_i,
-    input [NBITS_COEF-1:0] coef_0_i,
-    input [NBITS_COEF-1:0] coef_1_i,
-    input [NBITS_COEF-1:0] coef_2_i,
-    input [NBITS_COEF-1:0] coef_3_i
+    input [NBITS_A-1:0] coef_0_i,
+    input [NBITS_A-1:0] coef_1_i,
+    input [NBITS_A-1:0] coef_2_i,
+    input [NBITS_A-1:0] coef_3_i
 );
 
 // FIXME: The version of Icarus Verilog from Conda seems not to recognize the
@@ -1475,7 +1465,7 @@
     // Input registers
     reg  [NBITS_A-1:0]  r_a;
     reg  [NBITS_B-1:0]  r_b;
-    reg  [NBITS_AF-1:0] r_acc_fir;
+    reg  [5:0]          r_acc_fir;
     reg                 r_unsigned_a;
     reg                 r_unsigned_b;
     reg                 r_load_acc;
@@ -1503,8 +1493,8 @@
         r_rnd        <= 0;
     end
 
-    always @(posedge clock_i or negedge reset_n_i) begin
-        if (~reset_n_i) begin
+    always @(posedge clock_i or posedge s_reset) begin
+        if (s_reset) begin
 
             r_a <= 'h0;
             r_b <= 'h0;
@@ -1543,7 +1533,7 @@
     wire [NBITS_A-1:0]  a = register_inputs_i ? r_a : a_i;
     wire [NBITS_B-1:0]  b = register_inputs_i ? r_b : b_i;
 
-    wire [NBITS_AF-1:0] acc_fir = register_inputs_i ? r_acc_fir : acc_fir_i;
+    wire [5:0] acc_fir = register_inputs_i ? r_acc_fir : acc_fir_i;
     wire       unsigned_a = register_inputs_i ? r_unsigned_a : unsigned_a_i;
     wire       unsigned_b = register_inputs_i ? r_unsigned_b : unsigned_b_i;
     wire [2:0] feedback   = register_inputs_i ? r_feedback   : feedback_i;
@@ -1571,12 +1561,12 @@
     wire [NBITS_B-1:0] mult_b = (feedback == 2'h2) ? {NBITS_B{1'b0}}  : b;
 
     wire [NBITS_A-1:0] mult_sgn_a = mult_a[NBITS_A-1];
-    wire [NBITS_A-1:0] mult_mag_a = (mult_sgn_a) ? (~mult_a + 1) : mult_a;
+    wire [NBITS_A-1:0] mult_mag_a = (mult_sgn_a && !unsigned_a) ? (~mult_a + 1) : mult_a;
     wire [NBITS_B-1:0] mult_sgn_b = mult_b[NBITS_B-1];
-    wire [NBITS_B-1:0] mult_mag_b = (mult_sgn_b) ? (~mult_b + 1) : mult_b;
+    wire [NBITS_B-1:0] mult_mag_b = (mult_sgn_b && !unsigned_b) ? (~mult_b + 1) : mult_b;
 
     wire [NBITS_A+NBITS_B-1:0] mult_mag = mult_mag_a * mult_mag_b;
-    wire mult_sgn = mult_sgn_a ^ mult_sgn_b;
+    wire mult_sgn = (mult_sgn_a && !unsigned_a) ^ (mult_sgn_b && !unsigned_b);
 
     wire [NBITS_A+NBITS_B-1:0] mult = (unsigned_a && unsigned_b) ?
         (mult_a * mult_b) : (mult_sgn ? (~mult_mag + 1) : mult_mag);
@@ -1586,22 +1576,21 @@
         {{(NBITS_ACC-NBITS_A-NBITS_B){1'b0}},                    mult[NBITS_A+NBITS_B-1:0]} :
         {{(NBITS_ACC-NBITS_A-NBITS_B){mult[NBITS_A+NBITS_B-1]}}, mult[NBITS_A+NBITS_B-1:0]};
 
-    wire [NBITS_ACC-1:0] a_xtnd = (unsigned_a) ?
-                  { {(NBITS_ACC - NBITS_A - NBITS_AF){1'b0}}, acc_fir, {a} } :
-                  { {(NBITS_ACC - NBITS_A - NBITS_AF){acc_fir[NBITS_AF-1]}}, acc_fir, {a[NBITS_A-1:0]} };
-
     // Adder
-    wire [NBITS_ACC-1:0] add_a = (subtract_i) ? (~mult_xtnd + 1) : mult_xtnd;
+    wire [NBITS_ACC-1:0] acc_fir_int = unsigned_a ? {{(NBITS_ACC-NBITS_A){1'b0}},         a} :
+                                                    {{(NBITS_ACC-NBITS_A){a[NBITS_A-1]}}, a} ;
+
+    wire [NBITS_ACC-1:0] add_a = (subtract) ? (~mult_xtnd + 1) : mult_xtnd;
     wire [NBITS_ACC-1:0] add_b = (feedback_i == 3'h0) ? acc :
-                                 (feedback_i == 3'h1) ? {{NBITS_ACC}{1'b0}} : a_xtnd;
+                                 (feedback_i == 3'h1) ? {{NBITS_ACC}{1'b0}} : (acc_fir_int << acc_fir);
 
     wire [NBITS_ACC-1:0] add_o = add_a + add_b;
 
     // Accumulator
     initial acc <= 0;
 
-    always @(posedge clock_i or negedge reset_n_i)
-        if (~reset_n_i) acc <= 'h0;
+    always @(posedge clock_i or posedge s_reset)
+        if (s_reset) acc <= 'h0;
         else begin
             if (load_acc)
                 acc <= add_o;
@@ -1638,8 +1627,8 @@
 
     initial z1 <= 0;
 
-    always @(posedge clock_i or negedge reset_n_i)
-        if (!reset_n_i)
+    always @(posedge clock_i or posedge s_reset)
+        if (s_reset)
             z1 <= 0;
         else begin
             z1 <= (output_select_i == 3'b100) ? z0 : z2;
@@ -1658,8 +1647,8 @@
     // B input delayed passthrough
     initial dly_b_o <= 0;
 
-    always @(posedge clock_i or negedge reset_n_i)
-        if (!reset_n_i)
+    always @(posedge clock_i or posedge s_reset)
+        if (s_reset)
             dly_b_o <= 0;
         else
             dly_b_o <= b_i;
@@ -1669,7 +1658,7 @@
 module dsp_t1_20x18x64 (
     input  [19:0] a_i,
     input  [17:0] b_i,
-    input  [ 3:0] acc_fir_i,
+    input  [ 5:0] acc_fir_i,
     output [37:0] z_o,
     output [17:0] dly_b_o,
 
@@ -1727,7 +1716,7 @@
 module dsp_t1_10x9x32 (
     input  [ 9:0] a_i,
     input  [ 8:0] b_i,
-    input  [ 1:0] acc_fir_i,
+    input  [ 5:0] acc_fir_i,
     output [18:0] z_o,
     output [ 8:0] dly_b_o,
 
@@ -1769,7 +1758,7 @@
 
     .f_mode(1'b1),  // 10x9x32 DSP
 
-    .acc_fir({2'd0, acc_fir_i}),
+    .acc_fir(acc_fir_i),
     .feedback(feedback_i),
     .load_acc(load_acc_i),
 
diff --git a/ql-qlf-plugin/qlf_k6n10f/dsp_final_map.v b/ql-qlf-plugin/qlf_k6n10f/dsp_final_map.v
index 1c56eed..e2ab7a7 100644
--- a/ql-qlf-plugin/qlf_k6n10f/dsp_final_map.v
+++ b/ql-qlf-plugin/qlf_k6n10f/dsp_final_map.v
@@ -17,7 +17,7 @@
 module dsp_t1_20x18x64 (
     input  [19:0] a_i,
     input  [17:0] b_i,
-    input  [ 3:0] acc_fir_i,
+    input  [ 5:0] acc_fir_i,
     output [37:0] z_o,
     output [17:0] dly_b_o,
 
@@ -73,7 +73,7 @@
 module dsp_t1_10x9x32 (
     input  [ 9:0] a_i,
     input  [ 8:0] b_i,
-    input  [ 1:0] acc_fir_i,
+    input  [ 5:0] acc_fir_i,
     output [18:0] z_o,
     output [ 8:0] dly_b_o,
 
@@ -110,7 +110,7 @@
     ) _TECHMAP_REPLACE_ (
         .a                  ({10'd0, a_i}),
         .b                  ({ 9'd0, b_i}),
-        .acc_fir            ({ 2'd0, acc_fir_i}),
+        .acc_fir            (acc_fir_i),
         .z                  (z),
         .dly_b              (dly_b),
 
diff --git a/ql-qlf-plugin/qlf_k6n10f/dsp_map.v b/ql-qlf-plugin/qlf_k6n10f/dsp_map.v
index fbde7fd..3c16b60 100644
--- a/ql-qlf-plugin/qlf_k6n10f/dsp_map.v
+++ b/ql-qlf-plugin/qlf_k6n10f/dsp_map.v
@@ -36,7 +36,7 @@
     dsp_t1_20x18x64 _TECHMAP_REPLACE_ (
         .a_i                (a),
         .b_i                (b),
-        .acc_fir_i          (4'd0),
+        .acc_fir_i          (6'd0),
         .z_o                (z),
 
         .feedback_i         (3'd0),
@@ -78,7 +78,7 @@
     dsp_t1_10x9x32 _TECHMAP_REPLACE_ (
         .a_i                (a),
         .b_i                (b),
-        .acc_fir_i          (2'd0),
+        .acc_fir_i          (6'd0),
         .z_o                (z),
 
         .feedback_i         (3'd0),
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl
index 468f421..9e47729 100644
--- a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_macc/dsp_macc.tcl
@@ -56,7 +56,7 @@
 design -load postopt
 yosys cd $TOP
 select -assert-count 1 t:QL_DSP2
-select -assert-count 2 t:*
+select -assert-count 1 t:*
 
 #FIXME: DSP not inferred (got $mux instead of $dffe)
 #set TOP "macc_simple_ena"
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.tcl b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.tcl
index b653836..cd8ec40 100644
--- a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.tcl
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.tcl
@@ -14,7 +14,7 @@
     techmap -wb -autoproc -map +/quicklogic/qlf_k6n10f/cells_sim.v
     yosys proc
     opt_expr
-    opt_clean
+    opt_clean -purge
 
     async2sync
     equiv_make gold gate equiv
@@ -59,3 +59,10 @@
 yosys cd ${TOP}
 select -assert-count 1 t:QL_DSP2
 
+set TOP "mult_8x8_s"
+design -load read
+check_equiv ${TOP}
+design -load postopt
+yosys cd ${TOP}
+select -assert-count 1 t:QL_DSP2
+
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.v b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.v
index 1318c3b..8baf45d 100644
--- a/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.v
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/dsp_mult/dsp_mult.v
@@ -53,3 +53,13 @@
     assign Z = A * B;
 
 endmodule
+
+module mult_8x8_s (
+    input  wire signed [ 7:0] A,
+    input  wire signed [ 7:0] B,
+    output wire signed [15:0] Z
+);
+
+    assign Z = A * B;
+
+endmodule
diff --git a/ql-qlf-plugin/tests/qlf_k6n10f/sim_dsp_fir/sim_dsp_fir.v b/ql-qlf-plugin/tests/qlf_k6n10f/sim_dsp_fir/sim_dsp_fir.v
index cf32147..65e519a 100644
--- a/ql-qlf-plugin/tests/qlf_k6n10f/sim_dsp_fir/sim_dsp_fir.v
+++ b/ql-qlf-plugin/tests/qlf_k6n10f/sim_dsp_fir/sim_dsp_fir.v
@@ -84,7 +84,7 @@
     endcase
 
     // UUT
-    wire signed [3:0] acc_fir_i = 4'h0;
+    wire signed [5:0] acc_fir_i = 6'h0;
     wire signed [19:0] A = coeff;
     wire signed [17:0] B = data;
     wire signed [37:0] Z;
@@ -92,7 +92,7 @@
     dsp_t1_sim # (
     ) uut (
         .clock_i		(clk),
-        .reset_n_i		(~rst),
+        .s_reset		(rst),
         .a_i			((!stb) ? A : 20'h0),
         .b_i			((!stb) ? B : 18'h0),
         .acc_fir_i		((!stb) ? acc_fir_i : 4'h0),