Merge branch 'diego/memattr' of https://github.com/dh73/Yosys into diego/memattr
diff --git a/README.md b/README.md
index e469715..1ce5d47 100644
--- a/README.md
+++ b/README.md
@@ -343,6 +343,13 @@
 - The ``clkbuf_sink`` attribute can be set on an input port of a module to
   request clock buffer insertion by the ``clkbufmap`` pass.
 
+- The ``clkbuf_inv`` attribute can be set on an output port of a module
+  with the value set to the name of an input port of that module.  When
+  the ``clkbufmap`` would otherwise insert a clock buffer on this output,
+  it will instead try inserting the clock buffer on the input port (this
+  is used to implement clock inverter cells that clock buffer insertion
+  will "see through").
+
 - The ``clkbuf_inhibit`` is the default attribute to set on a wire to prevent
   automatic clock buffer insertion by ``clkbufmap``. This behaviour can be
   overridden by providing a custom selection to ``clkbufmap``.
diff --git a/backends/aiger/xaiger.cc b/backends/aiger/xaiger.cc
index 46890b0..6271333 100644
--- a/backends/aiger/xaiger.cc
+++ b/backends/aiger/xaiger.cc
@@ -153,11 +153,6 @@
 			if (wire->port_input)
 				sigmap.add(wire);
 
-		// promote output wires
-		for (auto wire : module->wires())
-			if (wire->port_output)
-				sigmap.add(wire);
-
 		for (auto wire : module->wires())
 		{
 			bool keep = wire->attributes.count("\\keep");
@@ -173,7 +168,7 @@
 				}
 
 				if (keep)
-					keep_bits.insert(bit);
+					keep_bits.insert(wirebit);
 
 				if (wire->port_input || keep) {
 					if (bit != wirebit)
@@ -824,7 +819,7 @@
 		log("        write ASCII version of AIGER format\n");
 		log("\n");
 		log("    -map <filename>\n");
-		log("        write an extra file with port and latch symbols\n");
+		log("        write an extra file with port and box symbols\n");
 		log("\n");
 		log("    -vmap <filename>\n");
 		log("        like -map, but more verbose\n");
diff --git a/passes/opt/opt_share.cc b/passes/opt/opt_share.cc
index 2c45670..f59f978 100644
--- a/passes/opt/opt_share.cc
+++ b/passes/opt/opt_share.cc
@@ -83,7 +83,9 @@
 	bool operator==(const ExtSigSpec &other) const { return is_signed == other.is_signed && sign == other.sign && sig == other.sig && semantics == other.semantics; }
 };
 
-#define BITWISE_OPS ID($_AND_), ID($_NAND_), ID($_OR_), ID($_NOR_), ID($_XOR_), ID($_XNOR_), ID($_ANDNOT_), ID($_ORNOT_), ID($and), ID($or), ID($xor), ID($xnor)
+#define FINE_BITWISE_OPS ID($_AND_), ID($_NAND_), ID($_OR_), ID($_NOR_), ID($_XOR_), ID($_XNOR_), ID($_ANDNOT_), ID($_ORNOT_)
+
+#define BITWISE_OPS FINE_BITWISE_OPS, ID($and), ID($or), ID($xor), ID($xnor)
 
 #define REDUCTION_OPS ID($reduce_and), ID($reduce_or), ID($reduce_xor), ID($reduce_xnor), ID($reduce_bool), ID($reduce_nand)
 
@@ -250,14 +252,19 @@
 		shared_op->setPort(ID(CO), alu_co.extract(0, conn_width));
 	}
 
-	shared_op->setParam(ID(Y_WIDTH), conn_width);
+	bool is_fine = shared_op->type.in(FINE_BITWISE_OPS);
+
+	if (!is_fine)
+		shared_op->setParam(ID(Y_WIDTH), conn_width);
 
 	if (decode_port(shared_op, ID::A, &assign_map) == operand) {
 		shared_op->setPort(ID::B, mux_to_oper);
-		shared_op->setParam(ID(B_WIDTH), max_width);
+		if (!is_fine)
+			shared_op->setParam(ID(B_WIDTH), max_width);
 	} else {
 		shared_op->setPort(ID::A, mux_to_oper);
-		shared_op->setParam(ID(A_WIDTH), max_width);
+		if (!is_fine)
+			shared_op->setParam(ID(A_WIDTH), max_width);
 	}
 }
 
diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg
index 0ba5290..5d3b9c2 100644
--- a/passes/pmgen/xilinx_dsp.pmg
+++ b/passes/pmgen/xilinx_dsp.pmg
@@ -347,9 +347,9 @@
 	index <SigBit> port(postAdd, AB)[0] === sigP[0]
 	filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
 	filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
-	// Check that remainder of AB is a sign-extension
-	define <bool> AB_SIGNED (param(postAdd, AB == \A ? \A_SIGNED : \B_SIGNED).as_bool())
-	filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(AB_SIGNED ? sigP[GetSize(sigP)-1] : State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
+	// Check that remainder of AB is a sign- or zero-extension
+	filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
+
 	set postAddAB AB
 	optional
 endmatch
diff --git a/passes/techmap/clkbufmap.cc b/passes/techmap/clkbufmap.cc
index 246932d..b9cd688 100644
--- a/passes/techmap/clkbufmap.cc
+++ b/passes/techmap/clkbufmap.cc
@@ -115,6 +115,8 @@
 		// Cell type, port name, bit index.
 		pool<pair<IdString, pair<IdString, int>>> sink_ports;
 		pool<pair<IdString, pair<IdString, int>>> buf_ports;
+		dict<pair<IdString, pair<IdString, int>>, pair<IdString, int>> inv_ports_out;
+		dict<pair<IdString, pair<IdString, int>>, pair<IdString, int>> inv_ports_in;
 
 		// Process submodules before module using them.
 		std::vector<Module *> modules_sorted;
@@ -133,6 +135,14 @@
 					if (wire->get_bool_attribute("\\clkbuf_sink"))
 						for (int i = 0; i < GetSize(wire); i++)
 							sink_ports.insert(make_pair(module->name, make_pair(wire->name, i)));
+					auto it = wire->attributes.find("\\clkbuf_inv");
+					if (it != wire->attributes.end()) {
+						IdString in_name = RTLIL::escape_id(it->second.decode_string());
+						for (int i = 0; i < GetSize(wire); i++) {
+							inv_ports_out[make_pair(module->name, make_pair(wire->name, i))] = make_pair(in_name, i);
+							inv_ports_in[make_pair(module->name, make_pair(in_name, i))] = make_pair(wire->name, i);
+						}
+					}
 				}
 				continue;
 			}
@@ -157,6 +167,37 @@
 				if (buf_ports.count(make_pair(cell->type, make_pair(port.first, i))))
 					buf_wire_bits.insert(sigmap(port.second[i]));
 
+			// Third, propagate tags through inverters.
+			bool retry = true;
+			while (retry) {
+				retry = false;
+				for (auto cell : module->cells())
+				for (auto port : cell->connections())
+				for (int i = 0; i < port.second.size(); i++) {
+					auto it = inv_ports_out.find(make_pair(cell->type, make_pair(port.first, i)));
+					auto bit = sigmap(port.second[i]);
+					// If output of an inverter is connected to a sink, mark it as buffered,
+					// and request a buffer on the inverter's input instead.
+					if (it != inv_ports_out.end() && !buf_wire_bits.count(bit) && sink_wire_bits.count(bit)) {
+						buf_wire_bits.insert(bit);
+						auto other_bit = sigmap(cell->getPort(it->second.first)[it->second.second]);
+						sink_wire_bits.insert(other_bit);
+						retry = true;
+					}
+					// If input of an inverter is marked as already-buffered,
+					// mark its output already-buffered as well.
+					auto it2 = inv_ports_in.find(make_pair(cell->type, make_pair(port.first, i)));
+					if (it2 != inv_ports_in.end() && buf_wire_bits.count(bit)) {
+						auto other_bit = sigmap(cell->getPort(it2->second.first)[it2->second.second]);
+						if (!buf_wire_bits.count(other_bit)) {
+							buf_wire_bits.insert(other_bit);
+							retry = true;
+						}
+					}
+
+				}
+			};
+
 			// Collect all driven bits.
 			for (auto cell : module->cells())
 			for (auto port : cell->connections())
diff --git a/techlibs/xilinx/cells_sim.v b/techlibs/xilinx/cells_sim.v
index 5faddcd..1be43f9 100644
--- a/techlibs/xilinx/cells_sim.v
+++ b/techlibs/xilinx/cells_sim.v
@@ -59,6 +59,34 @@
   assign O = I;
 endmodule
 
+module IOBUF (
+    (* iopad_external_pin *)
+    inout IO,
+    output O,
+    input I,
+    input T
+);
+    parameter integer DRIVE = 12;
+    parameter IBUF_LOW_PWR = "TRUE";
+    parameter IOSTANDARD = "DEFAULT";
+    parameter SLEW = "SLOW";
+    assign IO = T ? 1'bz : I;
+    assign O = IO;
+endmodule
+
+module OBUFT (
+    (* iopad_external_pin *)
+    output O,
+    input I,
+    input T
+);
+    parameter CAPACITANCE = "DONT_CARE";
+    parameter integer DRIVE = 12;
+    parameter IOSTANDARD = "DEFAULT";
+    parameter SLEW = "SLOW";
+    assign O = T ? 1'bz : I;
+endmodule
+
 module BUFG(
     (* clkbuf_driver *)
     output O,
@@ -126,7 +154,11 @@
 //   assign O = IO, IO = T ? 1'bz : I;
 // endmodule
 
-module INV(output O, input I);
+module INV(
+    (* clkbuf_inv = "I" *)
+    output O,
+    input I
+);
   assign O = !I;
 endmodule
 
diff --git a/techlibs/xilinx/cells_xtra.py b/techlibs/xilinx/cells_xtra.py
index f401ebe..82e403f 100644
--- a/techlibs/xilinx/cells_xtra.py
+++ b/techlibs/xilinx/cells_xtra.py
@@ -326,7 +326,7 @@
     Cell('IBUFGDS', port_attrs={'I': ['iopad_external_pin'], 'IB': ['iopad_external_pin']}),
     Cell('IBUFGDS_DIFF_OUT', port_attrs={'I': ['iopad_external_pin'], 'IB': ['iopad_external_pin']}),
     # I/O.
-    Cell('IOBUF', port_attrs={'IO': ['iopad_external_pin']}),
+    # Cell('IOBUF', port_attrs={'IO': ['iopad_external_pin']}),
     Cell('IOBUF_DCIEN', port_attrs={'IO': ['iopad_external_pin']}),
     Cell('IOBUF_INTERMDISABLE', port_attrs={'IO': ['iopad_external_pin']}),
     Cell('IOBUFE3', port_attrs={'IO': ['iopad_external_pin']}),
@@ -342,7 +342,7 @@
     Cell('OBUFDS', port_attrs={'O': ['iopad_external_pin'], 'OB': ['iopad_external_pin']}),
     Cell('OBUFDS_DPHY', port_attrs={'O': ['iopad_external_pin'], 'OB': ['iopad_external_pin']}),
     # Output + tristate.
-    Cell('OBUFT', port_attrs={'O': ['iopad_external_pin']}),
+    # Cell('OBUFT', port_attrs={'O': ['iopad_external_pin']}),
     Cell('OBUFTDS', port_attrs={'O': ['iopad_external_pin'], 'OB': ['iopad_external_pin']}),
     # Pulls.
     Cell('KEEPER'),
diff --git a/techlibs/xilinx/cells_xtra.v b/techlibs/xilinx/cells_xtra.v
index ce0949f..671d16e 100644
--- a/techlibs/xilinx/cells_xtra.v
+++ b/techlibs/xilinx/cells_xtra.v
@@ -8160,18 +8160,6 @@
     input IB;
 endmodule
 
-module IOBUF (...);
-    parameter integer DRIVE = 12;
-    parameter IBUF_LOW_PWR = "TRUE";
-    parameter IOSTANDARD = "DEFAULT";
-    parameter SLEW = "SLOW";
-    output O;
-    (* iopad_external_pin *)
-    inout IO;
-    input I;
-    input T;
-endmodule
-
 module IOBUF_DCIEN (...);
     parameter integer DRIVE = 12;
     parameter IBUF_LOW_PWR = "TRUE";
@@ -8373,17 +8361,6 @@
     input LPTX_T;
 endmodule
 
-module OBUFT (...);
-    parameter CAPACITANCE = "DONT_CARE";
-    parameter integer DRIVE = 12;
-    parameter IOSTANDARD = "DEFAULT";
-    parameter SLEW = "SLOW";
-    (* iopad_external_pin *)
-    output O;
-    input I;
-    input T;
-endmodule
-
 module OBUFTDS (...);
     parameter CAPACITANCE = "DONT_CARE";
     parameter IOSTANDARD = "DEFAULT";
diff --git a/techlibs/xilinx/lut_map.v b/techlibs/xilinx/lut_map.v
index 13d3c32..62d5016 100644
--- a/techlibs/xilinx/lut_map.v
+++ b/techlibs/xilinx/lut_map.v
@@ -56,8 +56,12 @@
 
   generate
     if (WIDTH == 1) begin
-      LUT1 #(.INIT(P_LUT)) _TECHMAP_REPLACE_ (.O(Y),
-        .I0(A[0]));
+      if (P_LUT == 2'b01) begin
+        INV _TECHMAP_REPLACE_ (.O(Y), .I(A[0]));
+      end else begin
+        LUT1 #(.INIT(P_LUT)) _TECHMAP_REPLACE_ (.O(Y),
+          .I0(A[0]));
+      end
     end else
     if (WIDTH == 2) begin
       LUT2 #(.INIT(P_LUT)) _TECHMAP_REPLACE_ (.O(Y),
diff --git a/tests/arch/xilinx/adffs.ys b/tests/arch/xilinx/adffs.ys
index 12c3441..e73bfe0 100644
--- a/tests/arch/xilinx/adffs.ys
+++ b/tests/arch/xilinx/adffs.ys
@@ -20,9 +20,9 @@
 cd adffn # Constrain all select calls below inside the top module
 select -assert-count 1 t:BUFG
 select -assert-count 1 t:FDCE
-select -assert-count 1 t:LUT1
+select -assert-count 1 t:INV
 
-select -assert-none t:BUFG t:FDCE t:LUT1 %% t:* %D
+select -assert-none t:BUFG t:FDCE t:INV %% t:* %D
 
 
 design -load read
diff --git a/tests/arch/xilinx/counter.ys b/tests/arch/xilinx/counter.ys
index 57b645d..604acdb 100644
--- a/tests/arch/xilinx/counter.ys
+++ b/tests/arch/xilinx/counter.ys
@@ -8,7 +8,7 @@
 
 select -assert-count 1 t:BUFG
 select -assert-count 8 t:FDCE
-select -assert-count 1 t:LUT1
+select -assert-count 1 t:INV
 select -assert-count 7 t:MUXCY
 select -assert-count 8 t:XORCY
-select -assert-none t:BUFG t:FDCE t:LUT1 t:MUXCY t:XORCY %% t:* %D
+select -assert-none t:BUFG t:FDCE t:INV t:MUXCY t:XORCY %% t:* %D
diff --git a/tests/arch/xilinx/dsp_fastfir.ys b/tests/arch/xilinx/dsp_fastfir.ys
new file mode 100644
index 0000000..0067a82
--- /dev/null
+++ b/tests/arch/xilinx/dsp_fastfir.ys
@@ -0,0 +1,69 @@
+read_verilog <<EOT
+// Citation https://github.com/ZipCPU/dspfilters/blob/master/rtl/fastfir.v
+module fastfir_dynamictaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_result);
+  wire [30:0] _00_;
+  wire [23:0] _01_;
+  wire [11:0] _02_;
+  wire [30:0] _03_;
+  wire [23:0] _04_;
+  wire [30:0] _05_;
+  wire [23:0] _06_;
+  wire [30:0] _07_;
+  wire [23:0] _08_;
+  wire [11:0] _09_;
+  wire [30:0] _10_;
+  wire [23:0] _11_;
+  wire [30:0] _12_;
+  wire [23:0] _13_;
+  wire [11:0] \fir.FILTER[0].tapk.delayed_sample ;
+  reg [30:0] \fir.FILTER[0].tapk.o_acc  = 31'h00000000;
+  wire [11:0] \fir.FILTER[0].tapk.o_sample ;
+  reg [23:0] \fir.FILTER[0].tapk.product ;
+  reg [11:0] \fir.FILTER[0].tapk.tap  = 12'h000;
+  wire [11:0] \fir.FILTER[1].tapk.delayed_sample ;
+  wire [30:0] \fir.FILTER[1].tapk.o_acc ;
+  wire [11:0] \fir.FILTER[1].tapk.o_sample ;
+  reg [23:0] \fir.FILTER[1].tapk.product ;
+  reg [11:0] \fir.FILTER[1].tapk.tap  = 12'h000;
+  input i_ce;
+  input i_clk;
+  input i_reset;
+  input [11:0] i_sample;
+  input [11:0] i_tap;
+  input i_tap_wr;
+  output [30:0] o_result;
+  reg [30:0] o_result;
+  assign _03_ = 31'h00000000 + { \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product  };
+  assign _04_ = $signed(\fir.FILTER[0].tapk.tap ) * $signed(i_sample);
+  always @(posedge i_clk)
+      \fir.FILTER[0].tapk.tap  <= _02_;
+  always @(posedge i_clk)
+      \fir.FILTER[0].tapk.o_acc  <= _00_;
+  always @(posedge i_clk)
+      \fir.FILTER[0].tapk.product  <= _01_;
+  assign _02_ = i_tap_wr ? i_tap : \fir.FILTER[0].tapk.tap ;
+  assign _05_ = i_ce ? _03_ : \fir.FILTER[0].tapk.o_acc ;
+  assign _00_ = i_reset ? 31'h00000000 : _05_;
+  assign _06_ = i_ce ? _04_ : \fir.FILTER[0].tapk.product ;
+  assign _01_ = i_reset ? 24'h000000 : _06_;
+  assign _10_ = \fir.FILTER[0].tapk.o_acc  + { \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product  };
+  assign _11_ = $signed(\fir.FILTER[1].tapk.tap ) * $signed(i_sample);
+  always @(posedge i_clk)
+      \fir.FILTER[1].tapk.tap  <= _09_;
+  always @(posedge i_clk)
+      o_result <= _07_;
+  always @(posedge i_clk)
+      \fir.FILTER[1].tapk.product  <= _08_;
+  assign _09_ = i_tap_wr ? \fir.FILTER[0].tapk.tap  : \fir.FILTER[1].tapk.tap ;
+  assign _12_ = i_ce ? _10_ : o_result;
+  assign _07_ = i_reset ? 31'h00000000 : _12_;
+  assign _13_ = i_ce ? _11_ : \fir.FILTER[1].tapk.product ;
+  assign _08_ = i_reset ? 24'h000000 : _13_;
+  assign \fir.FILTER[1].tapk.o_acc  = o_result;
+endmodule
+EOT
+
+synth_xilinx
+cd fastfir_dynamictaps
+select -assert-count 2 t:DSP48E1
+select -assert-none t:* t:DSP48E1 %d t:BUFG %d
diff --git a/tests/arch/xilinx/latches.ys b/tests/arch/xilinx/latches.ys
index fe7887e..c87a8e3 100644
--- a/tests/arch/xilinx/latches.ys
+++ b/tests/arch/xilinx/latches.ys
@@ -18,9 +18,9 @@
 design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
 cd latchn # Constrain all select calls below inside the top module
 select -assert-count 1 t:LDCE
-select -assert-count 1 t:LUT1
+select -assert-count 1 t:INV
 
-select -assert-none t:LDCE t:LUT1 %% t:* %D
+select -assert-none t:LDCE t:INV %% t:* %D
 
 
 design -load read
diff --git a/tests/arch/xilinx/logic.ys b/tests/arch/xilinx/logic.ys
index c0f6da3..d5b5c1a 100644
--- a/tests/arch/xilinx/logic.ys
+++ b/tests/arch/xilinx/logic.ys
@@ -5,7 +5,7 @@
 design -load postopt # load the post-opt design (otherwise equiv_opt loads the pre-opt design)
 cd top # Constrain all select calls below inside the top module
 
-select -assert-count 1 t:LUT1
+select -assert-count 1 t:INV
 select -assert-count 6 t:LUT2
 select -assert-count 2 t:LUT4
-select -assert-none t:LUT1 t:LUT2 t:LUT4 %% t:* %D
+select -assert-none t:INV t:LUT2 t:LUT4 %% t:* %D
diff --git a/tests/opt/bug1525.ys b/tests/opt/bug1525.ys
new file mode 100644
index 0000000..972bc0a
--- /dev/null
+++ b/tests/opt/bug1525.ys
@@ -0,0 +1,13 @@
+read_verilog << EOF
+module top(...);
+input A1, A2, B, S;
+output O;
+
+assign O = S ? (A1 & B) : (A2 & B);
+
+endmodule
+EOF
+
+simplemap
+opt_share
+dump
diff --git a/tests/simple_abc9/abc9.v b/tests/simple_abc9/abc9.v
index 64b625e..4d5879e 100644
--- a/tests/simple_abc9/abc9.v
+++ b/tests/simple_abc9/abc9.v
@@ -218,12 +218,6 @@
 endmodule
 
 // Citation: https://github.com/alexforencich/verilog-ethernet
-// TODO: yosys -p "synth_xilinx -abc9 -top abc9_test022" abc9.v -q
-// returns before b4321a31
-//   Warning: Wire abc9_test022.\m_eth_payload_axis_tkeep [7] is used but has no
-//   driver.
-//   Warning: Wire abc9_test022.\m_eth_payload_axis_tkeep [3] is used but has no
-//   driver.
 module abc9_test022
 (
     input  wire        clk,
@@ -237,9 +231,6 @@
 endmodule
 
 // Citation: https://github.com/riscv/riscv-bitmanip
-// TODO: yosys -p "synth_xilinx -abc9 -top abc9_test023" abc9.v -q
-// returns before 14233843
-//   Warning: Wire abc9_test023.\dout [1] is used but has no driver.
 module abc9_test023 #(
 	parameter integer N = 2,
 	parameter integer M = 2
diff --git a/tests/techmap/clkbufmap.ys b/tests/techmap/clkbufmap.ys
index f127786..b81a35e 100644
--- a/tests/techmap/clkbufmap.ys
+++ b/tests/techmap/clkbufmap.ys
@@ -4,6 +4,7 @@
 module dffe ((* clkbuf_sink *) input c, input d, e, output q); endmodule
 module latch (input e, d, output q); endmodule
 module clkgen (output o); endmodule
+module inv ((* clkbuf_inv = "i" *) output o, input i); endmodule
 
 module top(input clk1, clk2, clk3, d, e, output [4:0] q);
 wire clk4, clk5, clk6;
@@ -17,12 +18,18 @@
 endmodule
 
 module sub(output sclk4, output sclk5, output sclk6, input sd, output sq);
+wire sclk7, sclk8, sclk9;
+wire siq;
 wire tmp;
 clkgen s7(.o(sclk4));
 clkgen s8(.o(sclk5));
 clkgen s9(.o(tmp));
-clkbuf s10(.i(tmp), .o(sclk6));
-dff s11(.clk(sclk4), .d(sd), .q(sq));
+clkbuf s10(.i(tmp), .o(sclk7));
+dff s11(.clk(sclk4), .d(sd), .q(siq));
+inv s15(.i(sclk7), .o(sclk6));
+clkgen s12(.o(sclk8));
+inv s13(.o(sclk9), .i(sclk8));
+dff s14(.clk(sclk9), .d(siq), .q(sq));
 endmodule
 EOT
 
@@ -34,7 +41,7 @@
 design -load ref
 clkbufmap -buf clkbuf o:i
 select -assert-count 3 top/t:clkbuf
-select -assert-count 2 sub/t:clkbuf
+select -assert-count 3 sub/t:clkbuf
 select -set clk1 w:clk1 %a %co t:clkbuf %i          # Find 'clk1' fanouts that are 'clkbuf'
 select -assert-count 1 @clk1                        # Check there is one such fanout
 select -assert-count 1 @clk1 %x:+[o] %co c:s* %i    # Check that the 'o' of that clkbuf drives one fanout
@@ -51,6 +58,10 @@
 select -assert-count 1 @sclk4
 select -assert-count 1 @sclk4 %x:+[o] %co c:s11 %i
 select -assert-count 1 @sclk4 %x:+[i] %ci c:s7 %i
+select -set sclk8 w:sclk8 %a %ci t:clkbuf %i
+select -assert-count 1 @sclk8
+select -assert-count 1 @sclk8 %x:+[o] %co c:s13 %i
+select -assert-count 1 @sclk8 %x:+[i] %ci c:s12 %i
 
 # ----------------------
 
@@ -72,7 +83,7 @@
 setattr -set buffer_type "bufg" w:clk2
 clkbufmap -buf clkbuf o:i w:* a:buffer_type=none a:buffer_type=bufr %u %d
 select -assert-count 3 top/t:clkbuf
-select -assert-count 2 sub/t:clkbuf
+select -assert-count 3 sub/t:clkbuf
 select -set clk1 w:clk1 %a %co t:clkbuf %i          # Find 'clk1' fanouts that are 'clkbuf'
 select -assert-count 1 @clk1                        # Check there is one such fanout
 select -assert-count 1 @clk1 %x:+[o] %co c:s* %i    # Check that the 'o' of that clkbuf drives one fanout
@@ -93,4 +104,4 @@
 select -assert-count 0 w:clk1 %a %co t:clkbuf %i
 select -assert-count 0 w:clk2 %a %co t:clkbuf %i
 select -assert-count 0 top/t:clkbuf
-select -assert-count 1 sub/t:clkbuf
+select -assert-count 2 sub/t:clkbuf