Merge pull request #1449 from pepijndevos/gowin

Improvements for gowin support
diff --git a/Makefile b/Makefile
index 845a97b..6e7681c 100644
--- a/Makefile
+++ b/Makefile
@@ -718,6 +718,7 @@
 	+cd tests/arch/ecp5 && bash run-test.sh $(SEEDOPT)
 	+cd tests/arch/efinix && bash run-test.sh $(SEEDOPT)
 	+cd tests/arch/anlogic && bash run-test.sh $(SEEDOPT)
+	+cd tests/arch/gowin && bash run-test.sh $(SEEDOPT)
 	+cd tests/rpc && bash run-test.sh
 	@echo ""
 	@echo "  Passed \"make test\"."
diff --git a/examples/gowin/demo.cst b/examples/gowin/demo.cst
index 22d7eb6..c8f89dc 100644
--- a/examples/gowin/demo.cst
+++ b/examples/gowin/demo.cst
@@ -1,41 +1,10 @@
-// 50 MHz Clock
-IO_LOC "clk" D11;
-
-// LEDs
-IO_LOC "leds[0]"  D22;
-IO_LOC "leds[1]"  E22;
-IO_LOC "leds[2]"  G22;
-IO_LOC "leds[3]"  J22;
-IO_LOC "leds[4]"  L22;
-IO_LOC "leds[5]"  L19;
-IO_LOC "leds[6]"  L20;
-IO_LOC "leds[7]"  M21;
-IO_LOC "leds[8]"  N19;
-IO_LOC "leds[9]"  R19;
-IO_LOC "leds[10]" T18;
-IO_LOC "leds[11]" AA22;
-IO_LOC "leds[12]" U18;
-IO_LOC "leds[13]" V20;
-IO_LOC "leds[14]" AA21;
-IO_LOC "leds[15]" AB21;
-
-
-// 7-Segment Display
-IO_LOC "seg7dig[0]" E20;
-IO_LOC "seg7dig[1]" G18;
-IO_LOC "seg7dig[2]" G20;
-IO_LOC "seg7dig[3]" F21;
-IO_LOC "seg7dig[4]" J20;
-IO_LOC "seg7dig[5]" H21;
-IO_LOC "seg7dig[6]" H18;
-IO_LOC "seg7dig[7]" D20;
-IO_LOC "seg7sel[0]" C19;
-IO_LOC "seg7sel[1]" B22;
-IO_LOC "seg7sel[2]" C20;
-IO_LOC "seg7sel[3]" C21;
-
-// Switches
-IO_LOC "sw[0]" AB20;
-IO_LOC "sw[1]" AB19;
-IO_LOC "sw[2]" AB18;
-IO_LOC "sw[3]" AB17;
+IO_LOC "clk" 35;
+//IO_LOC "rst_n" 77;
+IO_LOC "leds[0]" 79;
+IO_LOC "leds[1]" 80;
+IO_LOC "leds[2]" 81;
+IO_LOC "leds[3]" 82;
+IO_LOC "leds[4]" 83;
+IO_LOC "leds[5]" 84;
+IO_LOC "leds[6]" 85;
+IO_LOC "leds[7]" 86;
\ No newline at end of file
diff --git a/examples/gowin/demo.v b/examples/gowin/demo.v
index 6ea1083..485fec9 100644
--- a/examples/gowin/demo.v
+++ b/examples/gowin/demo.v
@@ -1,9 +1,7 @@
 module demo (
 	input clk,
-	input [3:0] sw,
 	output [15:0] leds,
-	output [7:0] seg7dig,
-	output [3:0] seg7sel
+	output unused
 );
 	localparam PRESCALE = 20;
 	reg [PRESCALE+3:0] counter = 0;
diff --git a/examples/gowin/device.cfg b/examples/gowin/device.cfg
new file mode 100644
index 0000000..f6ab821
--- /dev/null
+++ b/examples/gowin/device.cfg
@@ -0,0 +1,16 @@
+set JTAG regular_io = false
+set SSPI regular_io = false
+set MSPI regular_io = false
+set READY regular_io = false
+set DONE regular_io = false
+set RECONFIG_N regular_io = false
+set MODE regular_io = false
+set CRC_check = true
+set compress = false
+set encryption = false
+set security_bit_enable = true
+set bsram_init_fuse_print = true
+set download_speed = 250/100
+set spi_flash_address = 0x00FFF000
+set format = txt
+set background_programming = false
diff --git a/examples/gowin/pnr.cfg b/examples/gowin/pnr.cfg
new file mode 100644
index 0000000..a1b43cc
--- /dev/null
+++ b/examples/gowin/pnr.cfg
@@ -0,0 +1,8 @@
+-sdf
+-oc
+-ibs
+-posp
+-o
+-warning_all
+-tt
+-timing
diff --git a/examples/gowin/run.sh b/examples/gowin/run.sh
index 33a7b5c..cd26010 100644
--- a/examples/gowin/run.sh
+++ b/examples/gowin/run.sh
@@ -1,8 +1,7 @@
 #!/bin/bash
 set -ex
 yosys -p "synth_gowin -top demo -vout demo_syn.v" demo.v
-$GOWIN_HOME/bin/gowin -d demo_syn.v -cst demo.cst -sdc demo.sdc -p GW2A55-PBGA484-6 \
-		-warning_all -out demo_out.v -rpt demo.rpt -tr demo_tr.html -bit demo.bit
+$GOWIN_HOME/bin/gowin -d demo_syn.v -cst demo.cst -sdc demo.sdc -p GW1NR-9-QFN88-6 -pn GW1NR-LV9QN88C6/I5 -cfg device.cfg -bit -tr -ph -timing -gpa -rpt -warning_all
 
 # post place&route simulation (icarus verilog)
 if false; then
diff --git a/examples/gowin/run.tcl b/examples/gowin/run.tcl
new file mode 100644
index 0000000..39da11c
--- /dev/null
+++ b/examples/gowin/run.tcl
@@ -0,0 +1,9 @@
+# gw_sh run.tcl
+exec yosys -p "synth_gowin -top demo -vout demo_syn.v" demo.v
+add_file -cst demo.cst
+add_file -sdc demo.sdc
+add_file -vm demo_syn.v
+add_file -cfg device.cfg
+set_option -device GW1NR-9-QFN88-6
+set_option -pn GW1NR-LV9QN88C6/I5
+run_pnr -opt pnr.cfg
diff --git a/techlibs/gowin/Makefile.inc b/techlibs/gowin/Makefile.inc
index 6f21593..d285370 100644
--- a/techlibs/gowin/Makefile.inc
+++ b/techlibs/gowin/Makefile.inc
@@ -15,3 +15,13 @@
 
 $(eval $(call add_share_file,share/gowin,techlibs/gowin/brams_init3.vh))
 
+EXTRA_OBJS += techlibs/gowin/brams_init.mk
+.SECONDARY: techlibs/gowin/brams_init.mk
+
+techlibs/gowin/brams_init.mk: techlibs/gowin/brams_init.py
+	$(Q) mkdir -p techlibs/gowin
+	$(P) python3 $<
+	$(Q) touch $@
+
+techlibs/gowin/bram_init_16.vh: techlibs/gowin/brams_init.mk
+$(eval $(call add_gen_share_file,share/gowin,techlibs/gowin/bram_init_16.vh))
diff --git a/techlibs/gowin/arith_map.v b/techlibs/gowin/arith_map.v
index e15de64..b6f9e8c 100644
--- a/techlibs/gowin/arith_map.v
+++ b/techlibs/gowin/arith_map.v
@@ -40,15 +40,15 @@
    \$pos #(.A_SIGNED(B_SIGNED), .A_WIDTH(B_WIDTH), .Y_WIDTH(Y_WIDTH)) B_conv (.A(B), .Y(B_buf));
 
    wire [Y_WIDTH-1:0] 	AA = A_buf;
-   wire [Y_WIDTH-1:0] 	BB = BI ? ~B_buf : B_buf;
+   wire [Y_WIDTH-1:0] 	BB = B_buf;
    wire [Y_WIDTH-1:0] 	C = {CO, CI};
 
    genvar 		i;
    generate for (i = 0; i < Y_WIDTH; i = i + 1) begin:slice
-      ALU #(.ALU_MODE(32'b0))
+      ALU #(.ALU_MODE(2)) // ADDSUB I3 ? add : sub
       alu(.I0(AA[i]),
 	  .I1(BB[i]),
-	  .I3(1'b0),
+	  .I3(~BI),
 	  .CIN(C[i]),
 	  .COUT(CO[i]),
 	  .SUM(Y[i])
diff --git a/techlibs/gowin/bram.txt b/techlibs/gowin/bram.txt
index b5f9a98..e406f9c 100644
--- a/techlibs/gowin/bram.txt
+++ b/techlibs/gowin/bram.txt
@@ -1,6 +1,7 @@
 bram $__GW1NR_SDP
-# uncomment when done
-#  init 1
+  init 1
+  abits 9 @a9d36
+  dbits 32 @a9d36
   abits 10 @a10d18
   dbits 16 @a10d18
   abits 11 @a11d9
@@ -14,7 +15,8 @@
   groups 2
   ports  1 1
   wrmode 1 0
-  enable 1 1 @a10d18
+  enable 4 1 @a9d36
+  enable 2 1 @a10d18
   enable 1 1 @a11d9 @a12d4 @a13d2 @a14d1
   transp 0 0
   clocks 2 3
@@ -24,6 +26,6 @@
 match $__GW1NR_SDP
   min bits 2048
   min efficiency 5
-  shuffle_enable B
+  shuffle_enable A
   make_transp
 endmatch
diff --git a/techlibs/gowin/brams_init.py b/techlibs/gowin/brams_init.py
new file mode 100755
index 0000000..b78eb8d
--- /dev/null
+++ b/techlibs/gowin/brams_init.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+
+with open("techlibs/gowin/bram_init_16.vh", "w") as f:
+    for i in range(0, 0x40):
+        low = i << 8
+        hi = ((i+1) << 8)-1
+        snippet = "INIT[%d:%d]" % (hi, low)
+        print(".INIT_RAM_%02X({%s})," % (i, snippet), file=f)
diff --git a/techlibs/gowin/brams_map.v b/techlibs/gowin/brams_map.v
index e963cfa..fbebc4a 100644
--- a/techlibs/gowin/brams_map.v
+++ b/techlibs/gowin/brams_map.v
@@ -8,26 +8,28 @@
 module \$__GW1NR_SDP (CLK2, CLK3, A1ADDR, A1DATA, A1EN, B1ADDR, B1DATA, B1EN);
 	parameter CFG_ABITS = 10;
 	parameter CFG_DBITS = 16;
-	parameter CFG_ENABLE_A = 3;
-
-        parameter [16383:0] INIT = 16384'hx;
-        parameter CLKPOL2 = 1;
-        parameter CLKPOL3 = 1;
+	parameter CFG_ENABLE_A = 1;
+	parameter [16383:0] INIT = 16384'hx;
+	parameter CLKPOL2 = 1;
+	parameter CLKPOL3 = 1;
 
 	input CLK2;
 	input CLK3;
 
 	input [CFG_ABITS-1:0] A1ADDR;
 	input [CFG_DBITS-1:0] A1DATA;   
-        input [CFG_ENABLE_A-1:0] A1EN;
+	input [CFG_ENABLE_A-1:0] A1EN;
 
 	input [CFG_ABITS-1:0] B1ADDR;
 	output [CFG_DBITS-1:0] B1DATA;
 	input B1EN;
 
+	wire [31-CFG_DBITS:0] open;
+
 	
 	generate if (CFG_DBITS == 1) begin
 		SDP   #(
+      `include "bram_init_16.vh"
 			.READ_MODE(0),
 			.BIT_WIDTH_0(1),
 			.BIT_WIDTH_1(1),
@@ -38,10 +40,14 @@
 			.WREA(A1EN),   .OCE(1'b0), .CEA(1'b1),
 			.WREB(1'b0),   .CEB(B1EN),
 			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
-			.DI(A1DATA), .DO(B1DATA), .ADA(A1ADDR), .ADB(B1ADDR)
+			.DI({{(32-CFG_DBITS){1'b0}}, A1DATA}),
+			.DO({open, B1DATA}),
+			.ADA({A1ADDR, {(14-CFG_ABITS){1'b0}}}),
+			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
 		);
 	end else if (CFG_DBITS == 2) begin
 		SDP    #(
+      `include "bram_init_16.vh"
 			.READ_MODE(0),
 			.BIT_WIDTH_0(2),
 			.BIT_WIDTH_1(2),
@@ -52,10 +58,14 @@
 			.WREA(A1EN),   .OCE(1'b0), .CEA(1'b1),
 			.WREB(1'b0),   .CEB(B1EN),
 			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
-                        .DI(A1DATA), .DO(B1DATA), .ADA(A1ADDR), .ADB(B1ADDR)
+			.DI({{(32-CFG_DBITS){1'b0}}, A1DATA}),
+			.DO({open, B1DATA}),
+			.ADA({A1ADDR, {(14-CFG_ABITS){1'b0}}}),
+			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
 		);
 	end else if (CFG_DBITS <= 4) begin
 		SDP    #(
+      `include "bram_init_16.vh"
 			.READ_MODE(0),
 			.BIT_WIDTH_0(4),
 			.BIT_WIDTH_1(4),
@@ -66,10 +76,14 @@
 			.WREA(A1EN),   .OCE(1'b0),
 			.WREB(1'b0),   .CEB(B1EN), .CEA(1'b1),
 			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
-                        .DI(A1DATA), .DO(B1DATA), .ADA(A1ADDR), .ADB(B1ADDR)
+			.DI({{(32-CFG_DBITS){1'b0}}, A1DATA}),
+			.DO({open, B1DATA}),
+			.ADA({A1ADDR, {(14-CFG_ABITS){1'b0}}}),
+			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
 		);
 	end else if (CFG_DBITS <= 8) begin
 		SDP    #(
+      `include "bram_init_16.vh"
 			.READ_MODE(0),
 			.BIT_WIDTH_0(8),
 			.BIT_WIDTH_1(8),
@@ -80,10 +94,14 @@
 			.WREA(A1EN),   .OCE(1'b0), .CEA(1'b1),
 			.WREB(1'b0),   .CEB(B1EN),
 			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
-                        .DI(A1DATA), .DO(B1DATA), .ADA(A1ADDR), .ADB(B1ADDR)
+			.DI({{(32-CFG_DBITS){1'b0}}, A1DATA}),
+			.DO({open, B1DATA}),
+			.ADA({A1ADDR, {(14-CFG_ABITS){1'b0}}}),
+			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
 		);
 	end else if (CFG_DBITS <= 16) begin
 		SDP    #(
+      `include "bram_init_16.vh"
 			.READ_MODE(0),
 			.BIT_WIDTH_0(16),
 			.BIT_WIDTH_1(16),
@@ -91,10 +109,31 @@
 			.RESET_MODE("SYNC")
 		) _TECHMAP_REPLACE_ (
 			.CLKA(CLK2),   .CLKB(CLK3),
-			.WREA(A1EN),   .OCE(1'b0),
+			.WREA(|A1EN),   .OCE(1'b0),
 			.WREB(1'b0),   .CEB(B1EN), .CEA(1'b1),
 			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
-                        .DI(A1DATA), .DO(B1DATA), .ADA(A1ADDR), .ADB(B1ADDR)
+			.DI({{(32-CFG_DBITS){1'b0}}, A1DATA}),
+			.DO({open, B1DATA}),
+			.ADA({A1ADDR, {(12-CFG_ABITS){1'b0}}, A1EN}),
+			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
+		);
+	end else if (CFG_DBITS <= 32) begin
+		SDP    #(
+      `include "bram_init_16.vh"
+			.READ_MODE(0),
+			.BIT_WIDTH_0(32),
+			.BIT_WIDTH_1(32),
+			.BLK_SEL(3'b000),
+			.RESET_MODE("SYNC")
+		) _TECHMAP_REPLACE_ (
+			.CLKA(CLK2),   .CLKB(CLK3),
+			.WREA(|A1EN),   .OCE(1'b0),
+			.WREB(1'b0),   .CEB(B1EN), .CEA(1'b1),
+			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
+			.DI(A1DATA),
+			.DO(B1DATA),
+			.ADA({A1ADDR, {(10-CFG_ABITS){1'b0}}, A1EN}),
+			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
 		);
 	end else begin
 		wire TECHMAP_FAIL = 1'b1;
diff --git a/techlibs/gowin/cells_map.v b/techlibs/gowin/cells_map.v
index ebdc88a..9845e56 100644
--- a/techlibs/gowin/cells_map.v
+++ b/techlibs/gowin/cells_map.v
@@ -1,9 +1,83 @@
-module  \$_DFF_N_ (input D, C, output Q); DFFN _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C)); endmodule
-module  \$_DFF_P_ #(parameter INIT = 1'b0) (input D, C, output Q); DFF  #(.INIT(INIT)) _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C)); endmodule
+//All DFF* have INIT, but the hardware is always initialised to the reset
+//value regardless. The parameter is ignored.
 
+// DFFN      D Flip-Flop with Negative-Edge Clock
+module  \$_DFF_N_ (input D, C, output Q); DFFN _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C)); endmodule
+// DFF       D Flip-Flop
+module  \$_DFF_P_ (input D, C, output Q); DFF _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C)); endmodule
+
+// DFFE      D Flip-Flop with Clock Enable
+module  \$_DFFE_PP_ (input D, C, E, output Q); DFFE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CE(E)); endmodule
+module  \$_DFFE_PN_ (input D, C, E, output Q); DFFE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CE(!E)); endmodule
+
+// DFFNE     D Flip-Flop with Negative-Edge Clock and Clock Enable
+module  \$_DFFE_NP_ (input D, C, E, output Q); DFFNE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CE(E)); endmodule
+module  \$_DFFE_NN_ (input D, C, E, output Q); DFFNE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CE(!E)); endmodule
+
+// DFFR      D Flip-Flop with Synchronous Reset
 module  \$__DFFS_PN0_ (input D, C, R, output Q); DFFR _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(!R)); endmodule
 module  \$__DFFS_PP0_ (input D, C, R, output Q); DFFR _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(R)); endmodule
-module  \$__DFFS_PP1_ (input D, C, R, output Q); DFFR  _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(R)); endmodule
+
+// DFFNR     D Flip-Flop with Negative-Edge Clock and Synchronous Reset
+module  \$__DFFS_NN0_ (input D, C, R, output Q); DFFNR _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(!R)); endmodule
+module  \$__DFFS_NP0_ (input D, C, R, output Q); DFFNR _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(R)); endmodule
+
+// DFFRE     D Flip-Flop with Clock Enable and Synchronous Reset
+module  \$__DFFSE_PN0 (input D, C, R, E, output Q); DFFRE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(!R), .CE(E)); endmodule
+module  \$__DFFSE_PP0 (input D, C, R, E, output Q); DFFRE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(R), .CE(E)); endmodule
+
+// DFFNRE    D Flip-Flop with Negative-Edge Clock,Clock Enable, and Synchronous Reset
+module  \$__DFFNSE_PN0 (input D, C, R, E, output Q); DFFNRE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(!R), .CE(E)); endmodule
+module  \$__DFFNSE_PP0 (input D, C, R, E, output Q); DFFNRE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .RESET(R), .CE(E)); endmodule
+
+// DFFS      D Flip-Flop with Synchronous Set
+module  \$__DFFS_PN1_ (input D, C, R, output Q); DFFS _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(!R)); endmodule
+module  \$__DFFS_PP1_ (input D, C, R, output Q); DFFS _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(R)); endmodule
+
+// DFFNS     D Flip-Flop with Negative-Edge Clock and Synchronous Set
+module  \$__DFFS_NN1_ (input D, C, R, output Q); DFFNS _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(!R)); endmodule
+module  \$__DFFS_NP1_ (input D, C, R, output Q); DFFNS _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(R)); endmodule
+
+// DFFSE     D Flip-Flop with Clock Enable and Synchronous Set
+module  \$__DFFSE_PN1 (input D, C, R, E, output Q); DFFSE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(!R), .CE(E)); endmodule
+module  \$__DFFSE_PP1 (input D, C, R, E, output Q); DFFSE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(R), .CE(E)); endmodule
+
+// DFFNSE    D Flip-Flop with Negative-Edge Clock,Clock Enable,and Synchronous Set
+module  \$__DFFSE_NN1 (input D, C, R, E, output Q); DFFNSE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(!R), .CE(E)); endmodule
+module  \$__DFFSE_NP1 (input D, C, R, E, output Q); DFFNSE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .SET(R), .CE(E)); endmodule
+
+// DFFP      D Flip-Flop with Asynchronous Preset
+module  \$_DFF_PP1_ (input D, C, R, output Q); DFFP _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(R)); endmodule
+module  \$_DFF_PN1_ (input D, C, R, output Q); DFFP _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(!R)); endmodule
+
+// DFFNP     D Flip-Flop with Negative-Edge Clock and Asynchronous Preset
+module  \$_DFF_NP1_ (input D, C, R, output Q); DFFNP _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(R)); endmodule
+module  \$_DFF_NN1_ (input D, C, R, output Q); DFFNP _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(!R)); endmodule
+
+// DFFC      D Flip-Flop with Asynchronous Clear
+module  \$_DFF_PP0_ (input D, C, R, output Q); DFFC _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(R)); endmodule
+module  \$_DFF_PN0_ (input D, C, R, output Q); DFFC _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(!R)); endmodule
+
+// DFFNC     D Flip-Flop with Negative-Edge Clock and Asynchronous Clear
+module  \$_DFF_NP0_ (input D, C, R, output Q); DFFNC _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(R)); endmodule
+module  \$_DFF_NN0_ (input D, C, R, output Q); DFFNC _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(!R)); endmodule
+
+// DFFPE     D Flip-Flop with Clock Enable and Asynchronous Preset
+module  \$__DFFE_PP1 (input D, C, R, E, output Q); DFFPE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(R), .CE(E)); endmodule
+module  \$__DFFE_PN1 (input D, C, R, E, output Q); DFFPE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(!R), .CE(E)); endmodule
+
+// DFFNPE    D Flip-Flop with Negative-Edge Clock,Clock Enable, and Asynchronous Preset
+module  \$__DFFE_NP1 (input D, C, R, E, output Q); DFFNPE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(R), .CE(E)); endmodule
+module  \$__DFFE_NN1 (input D, C, R, E, output Q); DFFNPE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .PRESET(!R), .CE(E)); endmodule
+
+// DFFCE     D Flip-Flop with Clock Enable and Asynchronous Clear
+module  \$__DFFE_PP0 (input D, C, R, E, output Q); DFFCE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(R), .CE(E)); endmodule
+module  \$__DFFE_PN0 (input D, C, R, E, output Q); DFFCE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(!R), .CE(E)); endmodule
+
+// DFFNCE    D Flip-Flop with Negative-Edge Clock,Clock Enable and Asynchronous Clear
+module  \$__DFFE_NP0 (input D, C, R, E, output Q); DFFNCE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(R), .CE(E)); endmodule
+module  \$__DFFE_NN0 (input D, C, R, E, output Q); DFFNCE _TECHMAP_REPLACE_ (.D(D), .Q(Q), .CLK(C), .CLEAR(!R), .CE(E)); endmodule
+
 
 module \$lut (A, Y);
   parameter WIDTH = 0;
@@ -28,6 +102,30 @@
     if (WIDTH == 4) begin
       LUT4 #(.INIT(LUT)) _TECHMAP_REPLACE_ (.F(Y),
         .I0(A[0]), .I1(A[1]), .I2(A[2]), .I3(A[3]));
+    end else
+    if (WIDTH == 5) begin
+      wire f0, f1;
+      \$lut #(.LUT(LUT[15: 0]), .WIDTH(4)) lut0 (.A(A[3:0]), .Y(f0));
+      \$lut #(.LUT(LUT[31:16]), .WIDTH(4)) lut1 (.A(A[3:0]), .Y(f1));
+      MUX2_LUT5 mux5(.I0(f0), .I1(f1), .S0(A[4]), .O(Y));
+    end else
+    if (WIDTH == 6) begin
+      wire f0, f1;
+      \$lut #(.LUT(LUT[31: 0]), .WIDTH(5)) lut0 (.A(A[4:0]), .Y(f0));
+      \$lut #(.LUT(LUT[63:32]), .WIDTH(5)) lut1 (.A(A[4:0]), .Y(f1));
+      MUX2_LUT6 mux6(.I0(f0), .I1(f1), .S0(A[5]), .O(Y));
+    end else
+    if (WIDTH == 7) begin
+      wire f0, f1;
+      \$lut #(.LUT(LUT[63: 0]), .WIDTH(6)) lut0 (.A(A[5:0]), .Y(f0));
+      \$lut #(.LUT(LUT[127:64]), .WIDTH(6)) lut1 (.A(A[5:0]), .Y(f1));
+      MUX2_LUT7 mux7(.I0(f0), .I1(f1), .S0(A[6]), .O(Y));
+    end else
+    if (WIDTH == 8) begin
+      wire f0, f1;
+      \$lut #(.LUT(LUT[127: 0]), .WIDTH(7)) lut0 (.A(A[6:0]), .Y(f0));
+      \$lut #(.LUT(LUT[255:128]), .WIDTH(7)) lut1 (.A(A[6:0]), .Y(f1));
+      MUX2_LUT8 mux8(.I0(f0), .I1(f1), .S0(A[7]), .O(Y));
     end else begin
       wire _TECHMAP_FAIL_ = 1;
     end
diff --git a/techlibs/gowin/cells_sim.v b/techlibs/gowin/cells_sim.v
index ebb238b..a67855d 100644
--- a/techlibs/gowin/cells_sim.v
+++ b/techlibs/gowin/cells_sim.v
@@ -24,6 +24,41 @@
 	assign F = I0 ? s1[1] : s1[0];
 endmodule
 
+module MUX2 (O, I0, I1, S0);
+  input I0,I1;
+  input S0;
+  output O;
+  assign O = S0 ? I1 : I0;
+endmodule
+
+module MUX2_LUT5 (O, I0, I1, S0);
+  input I0,I1;
+  input S0;
+  output O;
+  MUX2 mux2_lut5 (O, I0, I1, S0);
+endmodule
+
+module MUX2_LUT6 (O, I0, I1, S0);
+  input I0,I1;
+  input S0;
+  output O;
+  MUX2 mux2_lut6 (O, I0, I1, S0);
+endmodule
+
+module MUX2_LUT7 (O, I0, I1, S0);
+  input I0,I1;
+  input S0;
+  output O;
+  MUX2 mux2_lut7 (O, I0, I1, S0);
+endmodule
+
+module MUX2_LUT8 (O, I0, I1, S0);
+  input I0,I1;
+  input S0;
+  output O;
+  MUX2 mux2_lut8 (O, I0, I1, S0);
+endmodule
+
 module DFF (output reg Q, input CLK, D);
 	parameter [0:0] INIT = 1'b0;
 	initial Q = INIT;
@@ -31,6 +66,112 @@
 		Q <= D;
 endmodule
 
+module DFFE (output reg Q, input D, CLK, CE);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK) begin
+    if (CE)
+      Q <= D;
+  end
+endmodule // DFFE (positive clock edge; clock enable)
+
+
+module DFFS (output reg Q, input D, CLK, SET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK) begin
+    if (SET)
+      Q <= 1'b1;
+    else
+      Q <= D;	
+  end
+endmodule // DFFS (positive clock edge; synchronous set)
+
+
+module DFFSE (output reg Q, input D, CLK, CE, SET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK) begin
+    if (SET)
+      Q <= 1'b1;
+    else if (CE)
+      Q <= D;
+end
+endmodule // DFFSE (positive clock edge; synchronous set takes precedence over clock enable)
+
+
+module DFFR (output reg Q, input D, CLK, RESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK) begin
+    if (RESET)
+      Q <= 1'b0;
+    else
+      Q <= D;
+  end
+endmodule // DFFR (positive clock edge; synchronous reset)
+
+
+module DFFRE (output reg Q, input D, CLK, CE, RESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK) begin
+    if (RESET)
+      Q <= 1'b0;
+    else if (CE)
+      Q <= D;
+  end
+endmodule // DFFRE (positive clock edge; synchronous reset takes precedence over clock enable)
+
+
+module DFFP (output reg Q, input D, CLK, PRESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK or posedge PRESET) begin
+    if(PRESET)
+      Q <= 1'b1;
+    else
+      Q <= D;
+  end
+endmodule // DFFP (positive clock edge; asynchronous preset)
+
+
+module DFFPE (output reg Q, input D, CLK, CE, PRESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK or posedge PRESET) begin
+    if(PRESET)
+      Q <= 1'b1;
+    else if (CE)
+      Q <= D;
+  end
+endmodule // DFFPE (positive clock edge; asynchronous preset; clock enable)
+
+
+module DFFC (output reg Q, input D, CLK, CLEAR);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK or posedge CLEAR) begin
+    if(CLEAR)
+      Q <= 1'b0;
+    else
+      Q <= D;
+  end
+endmodule // DFFC (positive clock edge; asynchronous clear)
+
+
+module DFFCE (output reg Q, input D, CLK, CE, CLEAR);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(posedge CLK or posedge CLEAR) begin
+    if(CLEAR)
+      Q <= 1'b0;
+    else if (CE)
+      Q <= D;
+  end
+endmodule // DFFCE (positive clock edge; asynchronous clear; clock enable)
+
+
 module DFFN (output reg Q, input CLK, D);
 	parameter [0:0] INIT = 1'b0;
 	initial Q = INIT;
@@ -38,16 +179,112 @@
 		Q <= D;
 endmodule
 
-module DFFR (output reg Q, input D, CLK, RESET);
-	parameter [0:0] INIT = 1'b0;
-	initial Q = INIT;
-	always @(posedge CLK) begin
-        if (RESET)
-                Q <= 1'b0;
-        else
-                Q <= D;
-	end
-endmodule // DFFR (positive clock edge; synchronous reset)
+module DFFNE (output reg Q, input D, CLK, CE);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK) begin
+    if (CE)
+      Q <= D;
+  end
+endmodule // DFFNE (negative clock edge; clock enable)
+
+
+module DFFNS (output reg Q, input D, CLK, SET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK) begin
+    if (SET)
+      Q <= 1'b1;
+    else
+      Q <= D;	
+  end
+endmodule // DFFNS (negative clock edge; synchronous set)
+
+
+module DFFNSE (output reg Q, input D, CLK, CE, SET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK) begin
+    if (SET)
+      Q <= 1'b1;
+    else if (CE)
+      Q <= D;
+end
+endmodule // DFFNSE (negative clock edge; synchronous set takes precedence over clock enable)
+
+
+module DFFNR (output reg Q, input D, CLK, RESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK) begin
+    if (RESET)
+      Q <= 1'b0;
+    else
+      Q <= D;
+  end
+endmodule // DFFNR (negative clock edge; synchronous reset)
+
+
+module DFFNRE (output reg Q, input D, CLK, CE, RESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK) begin
+    if (RESET)
+      Q <= 1'b0;
+    else if (CE)
+      Q <= D;
+  end
+endmodule // DFFNRE (negative clock edge; synchronous reset takes precedence over clock enable)
+
+
+module DFFNP (output reg Q, input D, CLK, PRESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK or posedge PRESET) begin
+    if(PRESET)
+      Q <= 1'b1;
+    else
+      Q <= D;
+  end
+endmodule // DFFNP (negative clock edge; asynchronous preset)
+
+
+module DFFNPE (output reg Q, input D, CLK, CE, PRESET);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK or posedge PRESET) begin
+    if(PRESET)
+      Q <= 1'b1;
+    else if (CE)
+      Q <= D;
+  end
+endmodule // DFFNPE (negative clock edge; asynchronous preset; clock enable)
+
+
+module DFFNC (output reg Q, input D, CLK, CLEAR);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK or posedge CLEAR) begin
+    if(CLEAR)
+      Q <= 1'b0;
+    else
+      Q <= D;
+  end
+endmodule // DFFNC (negative clock edge; asynchronous clear)
+
+
+module DFFNCE (output reg Q, input D, CLK, CE, CLEAR);
+  parameter [0:0] INIT = 1'b0;
+  initial Q = INIT;
+  always @(negedge CLK or posedge CLEAR) begin
+    if(CLEAR)
+      Q <= 1'b0;
+    else if (CE)
+      Q <= D;
+  end
+endmodule // DFFNCE (negative clock edge; asynchronous clear; clock enable)
+
+// TODO add more DFF sim cells
 
 module VCC(output V);
 	assign V = 1;
@@ -65,14 +302,98 @@
 	assign O = I;
 endmodule
 
+module TBUF (O, I, OEN);
+  input I, OEN;
+  output O;
+  assign O = OEN ? I : 1'bz;
+endmodule
+
+module IOBUF (O, IO, I, OEN);
+  input I,OEN;
+  output O;
+  inout IO;
+  assign IO = OEN ? I : 1'bz;
+  assign I = IO;
+endmodule
+
 module GSR (input GSRI);
 	wire GSRO = GSRI;
 endmodule
 
-module ALU (input I0, input I1, input I3, input CIN, output COUT, output SUM);
-   parameter [3:0] ALU_MODE = 0; // default 0 = ADD
-   assign  {COUT, SUM} = CIN + I1 + I0;
-endmodule // alu
+module ALU (SUM, COUT, I0, I1, I3, CIN);
+
+input I0;
+input I1;
+input I3;
+input CIN;
+output SUM;
+output COUT;
+
+localparam ADD = 0;
+localparam SUB = 1;
+localparam ADDSUB = 2;
+localparam NE = 3;
+localparam GE = 4;
+localparam LE = 5;
+localparam CUP = 6;
+localparam CDN = 7;
+localparam CUPCDN = 8;
+localparam MULT = 9;
+
+parameter ALU_MODE = 0;
+
+reg S, C;
+
+assign SUM = S ^ CIN;
+assign COUT = S? CIN : C;
+
+always @* begin
+	case (ALU_MODE)
+		ADD: begin
+			S = I0 ^ I1;
+			C = I0;
+		end
+		SUB: begin
+			S = I0 ^ ~I1;
+			C = I0;
+		end
+		ADDSUB: begin
+			S = I3? I0 ^ I1 : I0 ^ ~I1;
+			C = I0;
+		end
+		NE: begin
+			S = I0 ^ ~I1;
+			C = 1'b1;
+		end
+		GE: begin
+			S = I0 ^ ~I1;
+			C = I0;
+		end
+		LE: begin
+			S = ~I0 ^ I1;
+			C = I1;
+		end
+		CUP: begin
+			S = I0;
+			C = 1'b0;
+		end
+		CDN: begin
+			S = ~I0;
+			C = 1'b1;
+		end
+		CUPCDN: begin
+			S = I3? I0 : ~I0;
+			C = I0;
+		end
+		MULT: begin
+			S = I0 & I1;
+			C = I0 & I1;
+		end
+	endcase
+end
+
+endmodule
+
 
 module RAM16S4 (DO, DI, AD, WRE, CLK);
    parameter WIDTH  = 4;
diff --git a/techlibs/gowin/synth_gowin.cc b/techlibs/gowin/synth_gowin.cc
index ac3dbfb..3c14264 100644
--- a/techlibs/gowin/synth_gowin.cc
+++ b/techlibs/gowin/synth_gowin.cc
@@ -64,6 +64,12 @@
 		log("    -retime\n");
 		log("        run 'abc' with -dff option\n");
 		log("\n");
+		log("    -nowidelut\n");
+		log("        do not use muxes to implement LUTs larger than LUT4s\n");
+		log("\n");
+		log("    -abc9\n");
+		log("        use new ABC9 flow (EXPERIMENTAL)\n");
+		log("\n");
 		log("\n");
 		log("The following commands are executed by this synthesis command:\n");
 		help_script();
@@ -71,7 +77,7 @@
 	}
 
 	string top_opt, vout_file;
-	bool retime, nobram, nodram, flatten, nodffe;
+	bool retime, nobram, nodram, flatten, nodffe, nowidelut, abc9;
 
 	void clear_flags() YS_OVERRIDE
 	{
@@ -82,6 +88,8 @@
 		nobram = false;
 		nodffe = false;
 		nodram = false;
+		nowidelut = false;
+		abc9 = false;
 	}
 
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@@ -128,6 +136,14 @@
 				flatten = false;
 				continue;
 			}
+			if (args[argidx] == "-nowidelut") {
+				nowidelut = true;
+				continue;
+			}
+			if (args[argidx] == "-abc9") {
+				abc9 = true;
+				continue;
+			}
 			break;
 		}
 		extra_args(args, argidx, design);
@@ -163,8 +179,8 @@
 		{
 			run("synth -run coarse");
 		}
-		
-                if (!nobram && check_label("bram", "(skip if -nobram)"))
+
+		if (!nobram && check_label("bram", "(skip if -nobram)"))
 		{
 			run("memory_bram -rules +/gowin/bram.txt");
 			run("techmap -map +/gowin/brams_map.v -map +/gowin/cells_sim.v");
@@ -186,6 +202,7 @@
 			run("techmap -map +/techmap.v");
 			if (retime || help_mode)
 				run("abc -dff", "(only if -retime)");
+			run("splitnets");
 		}
 
 		if (check_label("map_ffs"))
@@ -202,16 +219,25 @@
 
 		if (check_label("map_luts"))
 		{
-			run("abc -lut 4");
+			if (nowidelut && abc9) {
+				run("abc9 -lut 4");
+			} else if (nowidelut && !abc9) {
+				run("abc -lut 4");
+			} else if (!nowidelut && abc9) {
+				run("abc9 -lut 4:8");
+			} else if (!nowidelut && !abc9) {
+				run("abc -lut 4:8");
+			}
 			run("clean");
 		}
 
 		if (check_label("map_cells"))
 		{
 			run("techmap -map +/gowin/cells_map.v");
-			run("hilomap -hicell VCC V -locell GND G");
-			run("iopadmap -bits -inpad IBUF O:I -outpad OBUF I:O", "(unless -noiopads)");
-			run("dffinit  -ff DFF Q INIT");
+			run("setundef -undriven -params -zero");
+			run("hilomap -singleton -hicell VCC V -locell GND G");
+			run("iopadmap -bits -inpad IBUF O:I -outpad OBUF I:O "
+				"-toutpad TBUF OEN:I:O -tinoutpad IOBUF OEN:O:I:IO", "(unless -noiopads)");
 			run("clean");
 
 		}
@@ -226,7 +252,7 @@
 		if (check_label("vout"))
 		{
 			if (!vout_file.empty() || help_mode)
-				run(stringf("write_verilog -nodec -attr2comment -defparam -renameprefix gen %s",
+				 run(stringf("write_verilog -decimal -attr2comment -defparam -renameprefix gen %s",
 						help_mode ? "<file-name>" : vout_file.c_str()));
 		}
 	}
diff --git a/tests/arch/gowin/.gitignore b/tests/arch/gowin/.gitignore
new file mode 100644
index 0000000..b48f808
--- /dev/null
+++ b/tests/arch/gowin/.gitignore
@@ -0,0 +1,3 @@
+/*.log
+/*.out
+/run-test.mk
diff --git a/tests/arch/gowin/add_sub.ys b/tests/arch/gowin/add_sub.ys
new file mode 100644
index 0000000..9b53dc0
--- /dev/null
+++ b/tests/arch/gowin/add_sub.ys
@@ -0,0 +1,13 @@
+read_verilog ../common/add_sub.v
+hierarchy -top top
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd top # Constrain all select calls below inside the top module
+select -assert-count 8 t:ALU
+select -assert-count 8 t:OBUF
+select -assert-count 8 t:IBUF
+select -assert-count 1 t:GND
+select -assert-count 1 t:VCC
+select -assert-none t:ALU t:OBUF t:IBUF t:GND t:VCC %% t:* %D
+
diff --git a/tests/arch/gowin/adffs.ys b/tests/arch/gowin/adffs.ys
new file mode 100644
index 0000000..fc7ee01
--- /dev/null
+++ b/tests/arch/gowin/adffs.ys
@@ -0,0 +1,55 @@
+read_verilog ../common/adffs.v
+design -save read
+
+hierarchy -top adff
+proc
+equiv_opt -async2sync -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd adff # Constrain all select calls below inside the top module
+stat
+select -assert-count 1 t:DFFC
+select -assert-count 3 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:DFFC t:IBUF t:OBUF %% t:* %D
+
+
+design -load read
+hierarchy -top adffn
+proc
+equiv_opt -async2sync -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd adffn # Constrain all select calls below inside the top module
+select -assert-count 1 t:DFFC
+select -assert-count 1 t:LUT1
+select -assert-count 3 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:DFFC t:IBUF t:OBUF t:LUT1 %% t:* %D
+
+
+design -load read
+hierarchy -top dffs
+proc
+equiv_opt -async2sync -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd dffs # Constrain all select calls below inside the top module
+select -assert-count 1 t:DFFS
+select -assert-count 4 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:DFFS t:IBUF t:OBUF %% t:* %D
+
+
+design -load read
+hierarchy -top ndffnr
+proc
+equiv_opt -async2sync -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd ndffnr # Constrain all select calls below inside the top module
+select -assert-count 1 t:DFFNR
+select -assert-count 1 t:LUT1
+select -assert-count 4 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:DFFNR t:IBUF t:OBUF t:LUT1 %% t:* %D
diff --git a/tests/arch/gowin/counter.ys b/tests/arch/gowin/counter.ys
new file mode 100644
index 0000000..920479d
--- /dev/null
+++ b/tests/arch/gowin/counter.ys
@@ -0,0 +1,15 @@
+read_verilog ../common/counter.v
+hierarchy -top top
+proc
+flatten
+equiv_opt -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd top # Constrain all select calls below inside the top module
+
+select -assert-count 8 t:DFFC
+select -assert-count 8 t:ALU
+select -assert-count 1 t:GND
+select -assert-count 1 t:VCC
+select -assert-count 2 t:IBUF
+select -assert-count 8 t:OBUF
+select -assert-none t:DFFC t:ALU t:GND t:VCC t:IBUF t:OBUF %% t:* %D
diff --git a/tests/arch/gowin/dffs.ys b/tests/arch/gowin/dffs.ys
new file mode 100644
index 0000000..9c01221
--- /dev/null
+++ b/tests/arch/gowin/dffs.ys
@@ -0,0 +1,25 @@
+read_verilog ../common/dffs.v
+design -save read
+
+hierarchy -top dff
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd dff # Constrain all select calls below inside the top module
+select -assert-count 1 t:DFF
+select -assert-count 2 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:DFF t:IBUF t:OBUF %% t:* %D
+
+design -load read
+hierarchy -top dffe
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd dffe # Constrain all select calls below inside the top module
+select -assert-count 1 t:DFFE
+select -assert-count 3 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:DFFE t:IBUF t:OBUF %% t:* %D
diff --git a/tests/arch/gowin/fsm.ys b/tests/arch/gowin/fsm.ys
new file mode 100644
index 0000000..ce45045
--- /dev/null
+++ b/tests/arch/gowin/fsm.ys
@@ -0,0 +1,11 @@
+read_verilog ../common/fsm.v
+hierarchy -top fsm
+proc
+flatten
+
+equiv_opt -run :prove -map +/gowin/cells_sim.v synth_gowin # equivalency check
+miter -equiv -make_assert -flatten gold gate miter
+sat -verify -show-all -dump_vcd x.vcd -prove-asserts -set-at 1 in_reset 1 -seq 20 -prove-skip 1 miter
+
+#design -load postopt
+#shell
diff --git a/tests/arch/gowin/logic.ys b/tests/arch/gowin/logic.ys
new file mode 100644
index 0000000..d2b9e45
--- /dev/null
+++ b/tests/arch/gowin/logic.ys
@@ -0,0 +1,13 @@
+read_verilog ../common/logic.v
+hierarchy -top top
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd top # Constrain all select calls below inside the top module
+
+select -assert-count 1 t:LUT1
+select -assert-count 6 t:LUT2
+select -assert-count 2 t:LUT4
+select -assert-count 8 t:IBUF
+select -assert-count 10 t:OBUF
+select -assert-none t:LUT1 t:LUT2 t:LUT4 t:IBUF t:OBUF %% t:* %D
diff --git a/tests/arch/gowin/memory.ys b/tests/arch/gowin/memory.ys
new file mode 100644
index 0000000..8f88cdd
--- /dev/null
+++ b/tests/arch/gowin/memory.ys
@@ -0,0 +1,18 @@
+read_verilog ../common/memory.v
+hierarchy -top top
+proc
+memory -nomap
+equiv_opt -run :prove -map +/gowin/cells_sim.v synth_gowin
+memory
+opt -full
+
+miter -equiv -flatten -make_assert -make_outputs gold gate miter
+#ERROR: Called with -verify and proof did fail!
+#sat -verify -prove-asserts -seq 5 -set-init-zero -show-inputs -show-outputs miter
+sat -prove-asserts -seq 5 -set-init-zero -show-inputs -show-outputs miter
+
+design -load postopt
+cd top
+select -assert-count 8 t:RAM16S4
+# other logic present that is not simple
+#select -assert-none t:RAM16S4 %% t:* %D
diff --git a/tests/arch/gowin/mux.ys b/tests/arch/gowin/mux.ys
new file mode 100644
index 0000000..4990be4
--- /dev/null
+++ b/tests/arch/gowin/mux.ys
@@ -0,0 +1,50 @@
+read_verilog ../common/mux.v
+design -save read
+
+hierarchy -top mux2
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd mux2 # Constrain all select calls below inside the top module
+select -assert-count 1 t:LUT3
+select -assert-count 3 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:LUT3 t:IBUF t:OBUF %% t:* %D
+
+design -load read
+hierarchy -top mux4
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd mux4 # Constrain all select calls below inside the top module
+select -assert-count 4 t:LUT4
+select -assert-count 2 t:MUX2_LUT5
+select -assert-count 1 t:MUX2_LUT6
+select -assert-count 6 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:LUT4 t:MUX2_LUT6 t:MUX2_LUT5 t:IBUF t:OBUF %% t:* %D
+
+design -load read
+hierarchy -top mux8
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd mux8 # Constrain all select calls below inside the top module
+select -assert-count 11 t:IBUF
+select -assert-count 1 t:OBUF
+
+select -assert-none t:LUT4 t:MUX2_LUT6 t:MUX2_LUT5 t:IBUF t:OBUF %% t:* %D
+
+design -load read
+hierarchy -top mux16
+proc
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd mux16 # Constrain all select calls below inside the top module
+select -assert-count 20 t:IBUF
+select -assert-count 1 t:OBUF
+show
+
+select -assert-none t:LUT4 t:MUX2_LUT6 t:MUX2_LUT5 t:MUX2_LUT6 t:MUX2_LUT7 t:MUX2_LUT8 t:IBUF t:OBUF %% t:* %D
diff --git a/tests/arch/gowin/run-test.sh b/tests/arch/gowin/run-test.sh
new file mode 100755
index 0000000..bf19b88
--- /dev/null
+++ b/tests/arch/gowin/run-test.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+set -e
+{
+echo "all::"
+for x in *.ys; do
+	echo "all:: run-$x"
+	echo "run-$x:"
+	echo "	@echo 'Running $x..'"
+	echo "	@../../../yosys -ql ${x%.ys}.log -w 'Yosys has only limited support for tri-state logic at the moment.' $x"
+done
+for s in *.sh; do
+	if [ "$s" != "run-test.sh" ]; then
+		echo "all:: run-$s"
+		echo "run-$s:"
+		echo "	@echo 'Running $s..'"
+		echo "	@bash $s"
+	fi
+done
+} > run-test.mk
+exec ${MAKE:-make} -f run-test.mk
diff --git a/tests/arch/gowin/shifter.ys b/tests/arch/gowin/shifter.ys
new file mode 100644
index 0000000..b43b1e8
--- /dev/null
+++ b/tests/arch/gowin/shifter.ys
@@ -0,0 +1,12 @@
+read_verilog ../common/shifter.v
+hierarchy -top top
+proc
+flatten
+equiv_opt -assert -map +/gowin/cells_sim.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd top # Constrain all select calls below inside the top module
+
+select -assert-count 8  t:DFF
+select -assert-count 2  t:IBUF
+select -assert-count 8  t:OBUF
+select -assert-none t:DFF t:IBUF t:OBUF %% t:* %D
diff --git a/tests/arch/gowin/tribuf.ys b/tests/arch/gowin/tribuf.ys
new file mode 100644
index 0000000..5855b9d
--- /dev/null
+++ b/tests/arch/gowin/tribuf.ys
@@ -0,0 +1,13 @@
+read_verilog ../common/tribuf.v
+hierarchy -top tristate
+proc
+tribuf
+flatten
+synth
+equiv_opt -assert -map +/gowin/cells_sim.v -map +/simcells.v synth_gowin # equivalency check
+design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
+cd tristate # Constrain all select calls below inside the top module
+#Internal cell type used. Need support it.
+select -assert-count 1 t:TBUF
+select -assert-count 2 t:IBUF
+select -assert-none t:TBUF t:IBUF %% t:* %D