add 32-bit BRAM and byte-enables
diff --git a/techlibs/gowin/bram.txt b/techlibs/gowin/bram.txt
index 366a710..e406f9c 100644
--- a/techlibs/gowin/bram.txt
+++ b/techlibs/gowin/bram.txt
@@ -1,5 +1,7 @@
 bram $__GW1NR_SDP
   init 1
+  abits 9 @a9d36
+  dbits 32 @a9d36
   abits 10 @a10d18
   dbits 16 @a10d18
   abits 11 @a11d9
@@ -13,7 +15,8 @@
   groups 2
   ports  1 1
   wrmode 1 0
-  enable 1 1 @a10d18
+  enable 4 1 @a9d36
+  enable 2 1 @a10d18
   enable 1 1 @a11d9 @a12d4 @a13d2 @a14d1
   transp 0 0
   clocks 2 3
@@ -23,6 +26,6 @@
 match $__GW1NR_SDP
   min bits 2048
   min efficiency 5
-  shuffle_enable B
+  shuffle_enable A
   make_transp
 endmatch
diff --git a/techlibs/gowin/brams_map.v b/techlibs/gowin/brams_map.v
index 6c5e473..fbebc4a 100644
--- a/techlibs/gowin/brams_map.v
+++ b/techlibs/gowin/brams_map.v
@@ -109,12 +109,30 @@
 			.RESET_MODE("SYNC")
 		) _TECHMAP_REPLACE_ (
 			.CLKA(CLK2),   .CLKB(CLK3),
-			.WREA(A1EN),   .OCE(1'b0),
+			.WREA(|A1EN),   .OCE(1'b0),
 			.WREB(1'b0),   .CEB(B1EN), .CEA(1'b1),
 			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
 			.DI({{(32-CFG_DBITS){1'b0}}, A1DATA}),
 			.DO({open, B1DATA}),
-			.ADA({A1ADDR, {(12-CFG_ABITS){1'b0}}, 2'b11}),
+			.ADA({A1ADDR, {(12-CFG_ABITS){1'b0}}, A1EN}),
+			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
+		);
+	end else if (CFG_DBITS <= 32) begin
+		SDP    #(
+      `include "bram_init_16.vh"
+			.READ_MODE(0),
+			.BIT_WIDTH_0(32),
+			.BIT_WIDTH_1(32),
+			.BLK_SEL(3'b000),
+			.RESET_MODE("SYNC")
+		) _TECHMAP_REPLACE_ (
+			.CLKA(CLK2),   .CLKB(CLK3),
+			.WREA(|A1EN),   .OCE(1'b0),
+			.WREB(1'b0),   .CEB(B1EN), .CEA(1'b1),
+			.RESETA(1'b0), .RESETB(1'b0), .BLKSEL(3'b000),
+			.DI(A1DATA),
+			.DO(B1DATA),
+			.ADA({A1ADDR, {(10-CFG_ABITS){1'b0}}, A1EN}),
 			.ADB({B1ADDR, {(14-CFG_ABITS){1'b0}}})
 		);
 	end else begin