Merge remote-tracking branch 'origin/mwk/xilinx-dff-improvements' into eddie/exp
diff --git a/CHANGELOG b/CHANGELOG
index cb2b7bf..2f79f57 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -55,6 +55,7 @@
     - Added "check -mapped"
     - Added checking of SystemVerilog always block types (always_comb,
       always_latch and always_ff)
+    - Added "clkpart" pass
 
 Yosys 0.8 .. Yosys 0.9
 ----------------------
diff --git a/backends/aiger/xaiger.cc b/backends/aiger/xaiger.cc
index 46890b0..e05b6cc 100644
--- a/backends/aiger/xaiger.cc
+++ b/backends/aiger/xaiger.cc
@@ -78,11 +78,13 @@
 	Module *module;
 	SigMap sigmap;
 
+	dict<SigBit, bool> init_map;
 	pool<SigBit> input_bits, output_bits;
 	dict<SigBit, SigBit> not_map, alias_map;
 	dict<SigBit, pair<SigBit, SigBit>> and_map;
 	vector<std::tuple<SigBit,RTLIL::Cell*,RTLIL::IdString,int>> ci_bits;
 	vector<std::tuple<SigBit,RTLIL::Cell*,RTLIL::IdString,int,int>> co_bits;
+	dict<SigBit, int> ff_bits;
 	dict<SigBit, float> arrival_times;
 
 	vector<pair<int, int>> aig_gates;
@@ -153,13 +155,16 @@
 			if (wire->port_input)
 				sigmap.add(wire);
 
-		// promote output wires
-		for (auto wire : module->wires())
-			if (wire->port_output)
-				sigmap.add(wire);
-
 		for (auto wire : module->wires())
 		{
+			if (wire->attributes.count("\\init")) {
+				SigSpec initsig = sigmap(wire);
+				Const initval = wire->attributes.at("\\init");
+				for (int i = 0; i < GetSize(wire) && i < GetSize(initval); i++)
+					if (initval[i] == State::S0 || initval[i] == State::S1)
+						init_map[initsig[i]] = initval[i] == State::S1;
+			}
+
 			bool keep = wire->attributes.count("\\keep");
 
 			for (int i = 0; i < GetSize(wire); i++)
@@ -173,7 +178,7 @@
 				}
 
 				if (keep)
-					keep_bits.insert(bit);
+					keep_bits.insert(wirebit);
 
 				if (wire->port_input || keep) {
 					if (bit != wirebit)
@@ -193,17 +198,18 @@
 			}
 		}
 
+		// Cannot fold into above due to use of sigmap
 		for (auto bit : input_bits)
 			undriven_bits.erase(sigmap(bit));
 		for (auto bit : output_bits)
-			if (!bit.wire->port_input)
-				unused_bits.erase(bit);
+			unused_bits.erase(sigmap(bit));
 
 		// TODO: Speed up toposort -- ultimately we care about
 		//       box ordering, but not individual AIG cells
 		dict<SigBit, pool<IdString>> bit_drivers, bit_users;
 		TopoSort<IdString, RTLIL::sort_by_id_str> toposort;
 		bool abc9_box_seen = false;
+		std::vector<Cell*> flop_boxes;
 
 		for (auto cell : module->selected_cells()) {
 			if (cell->type == "$_NOT_")
@@ -241,76 +247,90 @@
 
 			log_assert(!holes_mode);
 
+			if (cell->type == "$__ABC9_FF_")
+			{
+				SigBit D = sigmap(cell->getPort("\\D").as_bit());
+				SigBit Q = sigmap(cell->getPort("\\Q").as_bit());
+				unused_bits.erase(D);
+				undriven_bits.erase(Q);
+				alias_map[Q] = D;
+				auto r = ff_bits.insert(std::make_pair(D, 0));
+				log_assert(r.second);
+				continue;
+			}
+
 			RTLIL::Module* inst_module = module->design->module(cell->type);
 			if (inst_module && inst_module->attributes.count("\\abc9_box_id")) {
 				abc9_box_seen = true;
 
-				if (!holes_mode) {
-					toposort.node(cell->name);
-					for (const auto &conn : cell->connections()) {
-						auto port_wire = inst_module->wire(conn.first);
-						if (port_wire->port_input) {
-							// Ignore inout for the sake of topographical ordering
-							if (port_wire->port_output) continue;
-							for (auto bit : sigmap(conn.second))
-								bit_users[bit].insert(cell->name);
-						}
+				toposort.node(cell->name);
 
-						if (port_wire->port_output)
-							for (auto bit : sigmap(conn.second))
-								bit_drivers[bit].insert(cell->name);
+				for (const auto &conn : cell->connections()) {
+					auto port_wire = inst_module->wire(conn.first);
+					if (port_wire->port_input) {
+						// Ignore inout for the sake of topographical ordering
+						if (port_wire->port_output) continue;
+						for (auto bit : sigmap(conn.second))
+							bit_users[bit].insert(cell->name);
+					}
+
+					if (port_wire->port_output)
+						for (auto bit : sigmap(conn.second))
+							bit_drivers[bit].insert(cell->name);
+				}
+
+				if (inst_module->attributes.count("\\abc9_flop"))
+					flop_boxes.push_back(cell);
+				continue;
+			}
+
+			bool cell_known = inst_module || cell->known();
+			for (const auto &c : cell->connections()) {
+				if (c.second.is_fully_const()) continue;
+				auto port_wire = inst_module ? inst_module->wire(c.first) : nullptr;
+				auto is_input = (port_wire && port_wire->port_input) || !cell_known || cell->input(c.first);
+				auto is_output = (port_wire && port_wire->port_output) || !cell_known || cell->output(c.first);
+				if (!is_input && !is_output)
+					log_error("Connection '%s' on cell '%s' (type '%s') not recognised!\n", log_id(c.first), log_id(cell), log_id(cell->type));
+
+				if (is_input) {
+					for (auto b : c.second) {
+						Wire *w = b.wire;
+						if (!w) continue;
+						if (!w->port_output || !cell_known) {
+							SigBit I = sigmap(b);
+							if (I != b)
+								alias_map[b] = I;
+							output_bits.insert(b);
+							unused_bits.erase(b);
+
+							if (!cell_known)
+								keep_bits.insert(b);
+						}
 					}
 				}
-			}
-			else {
-				bool cell_known = inst_module || cell->known();
-				for (const auto &c : cell->connections()) {
-					if (c.second.is_fully_const()) continue;
-					auto port_wire = inst_module ? inst_module->wire(c.first) : nullptr;
-					auto is_input = (port_wire && port_wire->port_input) || !cell_known || cell->input(c.first);
-					auto is_output = (port_wire && port_wire->port_output) || !cell_known || cell->output(c.first);
-					if (!is_input && !is_output)
-						log_error("Connection '%s' on cell '%s' (type '%s') not recognised!\n", log_id(c.first), log_id(cell), log_id(cell->type));
-
-					if (is_input) {
-						for (auto b : c.second) {
-							Wire *w = b.wire;
-							if (!w) continue;
-							if (!w->port_output || !cell_known) {
-								SigBit I = sigmap(b);
-								if (I != b)
-									alias_map[b] = I;
-								output_bits.insert(b);
-								unused_bits.erase(b);
-
-								if (!cell_known)
-									keep_bits.insert(b);
-							}
+				if (is_output) {
+					int arrival = 0;
+					if (port_wire) {
+						auto it = port_wire->attributes.find("\\abc9_arrival");
+						if (it != port_wire->attributes.end()) {
+							if (it->second.flags != 0)
+								log_error("Attribute 'abc9_arrival' on port '%s' of module '%s' is not an integer.\n", log_id(port_wire), log_id(cell->type));
+							arrival = it->second.as_int();
 						}
 					}
-					if (is_output) {
-						int arrival = 0;
-						if (port_wire) {
-							auto it = port_wire->attributes.find("\\abc9_arrival");
-							if (it != port_wire->attributes.end()) {
-								if (it->second.flags != 0)
-									log_error("Attribute 'abc9_arrival' on port '%s' of module '%s' is not an integer.\n", log_id(port_wire), log_id(cell->type));
-								arrival = it->second.as_int();
-							}
-						}
 
-						for (auto b : c.second) {
-							Wire *w = b.wire;
-							if (!w) continue;
-							input_bits.insert(b);
-							SigBit O = sigmap(b);
-							if (O != b)
-								alias_map[O] = b;
-							undriven_bits.erase(O);
+					for (auto b : c.second) {
+						Wire *w = b.wire;
+						if (!w) continue;
+						input_bits.insert(b);
+						SigBit O = sigmap(b);
+						if (O != b)
+							alias_map[O] = b;
+						undriven_bits.erase(O);
 
-							if (arrival)
-								arrival_times[b] = arrival;
-						}
+						if (arrival)
+							arrival_times[b] = arrival;
 					}
 				}
 			}
@@ -319,6 +339,45 @@
 		}
 
 		if (abc9_box_seen) {
+			dict<IdString, std::pair<IdString,int>> flop_q;
+			for (auto cell : flop_boxes) {
+				auto r = flop_q.insert(std::make_pair(cell->type, std::make_pair(IdString(), 0)));
+				SigBit d;
+				if (r.second) {
+					for (const auto &conn : cell->connections()) {
+						const SigSpec &rhs = conn.second;
+						if (!rhs.is_bit())
+							continue;
+						if (!ff_bits.count(rhs))
+							continue;
+						r.first->second.first = conn.first;
+						Module *inst_module = module->design->module(cell->type);
+						Wire *wire = inst_module->wire(conn.first);
+						log_assert(wire);
+						auto jt = wire->attributes.find("\\abc9_arrival");
+						if (jt != wire->attributes.end()) {
+							if (jt->second.flags != 0)
+								log_error("Attribute 'abc9_arrival' on port '%s' of module '%s' is not an integer.\n", log_id(wire), log_id(cell->type));
+							r.first->second.second = jt->second.as_int();
+						}
+						d = rhs;
+						log_assert(d == sigmap(d));
+						break;
+					}
+				}
+				else
+					d = cell->getPort(r.first->second.first);
+
+				auto it = cell->attributes.find(ID(abc9_mergeability));
+				log_assert(it != cell->attributes.end());
+				ff_bits.at(d) = it->second.as_int();
+				cell->attributes.erase(it);
+
+				auto arrival = r.first->second.second;
+				if (arrival)
+					arrival_times[d] = arrival;
+			}
+
 			for (auto &it : bit_users)
 				if (bit_drivers.count(it.first))
 					for (auto driver_cell : bit_drivers.at(it.first))
@@ -414,6 +473,29 @@
 						}
 					}
 				}
+
+				// Connect <cell>.$abc9_currQ (inserted by abc9_map.v) as an input to the flop box
+				if (box_module->get_bool_attribute("\\abc9_flop")) {
+					SigSpec rhs = module->wire(stringf("%s.$abc9_currQ", cell->name.c_str()));
+					if (rhs.empty())
+						log_error("'%s.$abc9_currQ' is not a wire present in module '%s'.\n", log_id(cell), log_id(module));
+
+					int offset = 0;
+					for (auto b : rhs) {
+						SigBit I = sigmap(b);
+						if (b == RTLIL::Sx)
+							b = State::S0;
+						else if (I != b) {
+							if (I == RTLIL::Sx)
+								alias_map[b] = State::S0;
+							else
+								alias_map[b] = I;
+						}
+						co_bits.emplace_back(b, cell, "\\$abc9_currQ", offset++, 0);
+						unused_bits.erase(b);
+					}
+				}
+
 				box_list.emplace_back(cell);
 			}
 
@@ -492,10 +574,20 @@
 			aig_map[bit] = 2*aig_m;
 		}
 
+		for (const auto &i : ff_bits) {
+			const SigBit &bit = i.first;
+			aig_m++, aig_i++;
+			log_assert(!aig_map.count(bit));
+			aig_map[bit] = 2*aig_m;
+		}
+
+		dict<SigBit, int> ff_aig_map;
 		for (auto &c : ci_bits) {
 			RTLIL::SigBit bit = std::get<0>(c);
 			aig_m++, aig_i++;
-			aig_map[bit] = 2*aig_m;
+			auto r = aig_map.insert(std::make_pair(bit, 2*aig_m));
+			if (!r.second)
+				ff_aig_map[bit] = 2*aig_m;
 		}
 
 		for (auto &c : co_bits) {
@@ -514,6 +606,17 @@
 			aig_outputs.push_back(bit2aig(bit));
 		}
 
+		for (auto &i : ff_bits) {
+			const SigBit &bit = i.first;
+			aig_o++;
+			aig_outputs.push_back(ff_aig_map.at(bit));
+		}
+
+		if (output_bits.empty()) {
+			aig_o++;
+			aig_outputs.push_back(0);
+			omode = true;
+		}
 	}
 
 	void write_aiger(std::ostream &f, bool ascii_mode)
@@ -583,14 +686,14 @@
 		std::stringstream h_buffer;
 		auto write_h_buffer = std::bind(write_buffer, std::ref(h_buffer), std::placeholders::_1);
 		write_h_buffer(1);
-		log_debug("ciNum = %d\n", GetSize(input_bits) + GetSize(ci_bits));
-		write_h_buffer(input_bits.size() + ci_bits.size());
-		log_debug("coNum = %d\n", GetSize(output_bits) + GetSize(co_bits));
-		write_h_buffer(output_bits.size() + GetSize(co_bits));
-		log_debug("piNum = %d\n", GetSize(input_bits));
-		write_h_buffer(input_bits.size());
-		log_debug("poNum = %d\n", GetSize(output_bits));
-		write_h_buffer(output_bits.size());
+		log_debug("ciNum = %d\n", GetSize(input_bits) + GetSize(ff_bits) + GetSize(ci_bits));
+		write_h_buffer(input_bits.size() + ff_bits.size() + ci_bits.size());
+		log_debug("coNum = %d\n", GetSize(output_bits) + GetSize(ff_bits) + GetSize(co_bits));
+		write_h_buffer(output_bits.size() + GetSize(ff_bits) + GetSize(co_bits));
+		log_debug("piNum = %d\n", GetSize(input_bits) + GetSize(ff_bits));
+		write_h_buffer(input_bits.size() + ff_bits.size());
+		log_debug("poNum = %d\n", GetSize(output_bits) + GetSize(ff_bits));
+		write_h_buffer(output_bits.size() + ff_bits.size());
 		log_debug("boxNum = %d\n", GetSize(box_list));
 		write_h_buffer(box_list.size());
 
@@ -606,19 +709,29 @@
 		//for (auto bit : output_bits)
 		//	write_o_buffer(0);
 
-		if (!box_list.empty()) {
+		if (!box_list.empty() || !ff_bits.empty()) {
 			RTLIL::Module *holes_module = module->design->addModule("$__holes__");
 			log_assert(holes_module);
 
+			dict<IdString, Cell*> cell_cache;
+
 			int port_id = 1;
 			int box_count = 0;
 			for (auto cell : box_list) {
 				RTLIL::Module* box_module = module->design->module(cell->type);
+				log_assert(box_module);
+				IdString derived_name = box_module->derive(module->design, cell->parameters);
+				box_module = module->design->module(derived_name);
+				if (box_module->has_processes())
+					Pass::call_on_module(module->design, box_module, "proc");
+
 				int box_inputs = 0, box_outputs = 0;
-				Cell *holes_cell = nullptr;
-				if (box_module->get_bool_attribute("\\whitebox")) {
+				auto r = cell_cache.insert(std::make_pair(derived_name, nullptr));
+				Cell *holes_cell = r.first->second;
+				if (r.second && !holes_cell && box_module->get_bool_attribute("\\whitebox")) {
 					holes_cell = holes_module->addCell(cell->name, cell->type);
 					holes_cell->parameters = cell->parameters;
+					r.first->second = holes_cell;
 				}
 
 				// NB: Assume box_module->ports are sorted alphabetically
@@ -627,8 +740,8 @@
 					RTLIL::Wire *w = box_module->wire(port_name);
 					log_assert(w);
 					RTLIL::Wire *holes_wire;
-					RTLIL::SigSpec port_wire;
-					if (w->port_input) {
+					RTLIL::SigSpec port_sig;
+					if (w->port_input)
 						for (int i = 0; i < GetSize(w); i++) {
 							box_inputs++;
 							holes_wire = holes_module->wire(stringf("\\i%d", box_inputs));
@@ -639,29 +752,47 @@
 								holes_module->ports.push_back(holes_wire->name);
 							}
 							if (holes_cell)
-								port_wire.append(holes_wire);
+								port_sig.append(holes_wire);
 						}
-						if (!port_wire.empty())
-							holes_cell->setPort(w->name, port_wire);
-					}
 					if (w->port_output) {
 						box_outputs += GetSize(w);
 						for (int i = 0; i < GetSize(w); i++) {
 							if (GetSize(w) == 1)
-								holes_wire = holes_module->addWire(stringf("%s.%s", cell->name.c_str(), w->name.c_str()));
+								holes_wire = holes_module->addWire(stringf("$abc%s.%s", cell->name.c_str(), log_id(w->name)));
 							else
-								holes_wire = holes_module->addWire(stringf("%s.%s[%d]", cell->name.c_str(), w->name.c_str(), i));
+								holes_wire = holes_module->addWire(stringf("$abc%s.%s[%d]", cell->name.c_str(), log_id(w->name), i));
 							holes_wire->port_output = true;
 							holes_wire->port_id = port_id++;
 							holes_module->ports.push_back(holes_wire->name);
 							if (holes_cell)
-								port_wire.append(holes_wire);
+								port_sig.append(holes_wire);
 							else
 								holes_module->connect(holes_wire, State::S0);
 						}
-						if (!port_wire.empty())
-							holes_cell->setPort(w->name, port_wire);
 					}
+					if (!port_sig.empty()) {
+						if (r.second)
+							holes_cell->setPort(w->name, port_sig);
+						else
+							holes_module->connect(holes_cell->getPort(w->name), port_sig);
+					}
+				}
+
+				// For flops only, create an extra 1-bit input that drives a new wire
+				//   called "<cell>.$abc9_currQ" that is used below
+				if (box_module->get_bool_attribute("\\abc9_flop")) {
+					log_assert(holes_cell);
+
+					box_inputs++;
+					Wire *holes_wire = holes_module->wire(stringf("\\i%d", box_inputs));
+					if (!holes_wire) {
+						holes_wire = holes_module->addWire(stringf("\\i%d", box_inputs));
+						holes_wire->port_input = true;
+						holes_wire->port_id = port_id++;
+						holes_module->ports.push_back(holes_wire->name);
+					}
+					Wire *w = holes_module->addWire(stringf("%s.$abc9_currQ", cell->name.c_str()));
+					holes_module->connect(w, holes_wire);
 				}
 
 				write_h_buffer(box_inputs);
@@ -672,13 +803,44 @@
 
 			std::stringstream r_buffer;
 			auto write_r_buffer = std::bind(write_buffer, std::ref(r_buffer), std::placeholders::_1);
-			write_r_buffer(0);
+			log_debug("flopNum = %d\n", GetSize(ff_bits));
+			write_r_buffer(ff_bits.size());
+			for (const auto &i : ff_bits) {
+				log_assert(i.second > 0);
+				write_r_buffer(i.second);
+				const SigBit &bit = i.first;
+				write_i_buffer(arrival_times.at(bit, 0));
+				//write_o_buffer(0);
+			}
+
 			f << "r";
 			std::string buffer_str = r_buffer.str();
 			int32_t buffer_size_be = to_big_endian(buffer_str.size());
 			f.write(reinterpret_cast<const char*>(&buffer_size_be), sizeof(buffer_size_be));
 			f.write(buffer_str.data(), buffer_str.size());
 
+			std::stringstream s_buffer;
+			auto write_s_buffer = std::bind(write_buffer, std::ref(s_buffer), std::placeholders::_1);
+			write_s_buffer(ff_bits.size());
+			for (const auto &i : ff_bits) {
+				const SigBit &bit = i.first;
+				auto it = bit.wire->attributes.find("\\init");
+				if (it != bit.wire->attributes.end()) {
+					auto init = it->second[bit.offset];
+					if (init == RTLIL::S1) {
+						write_s_buffer(1);
+						continue;
+					}
+				}
+				// Default flop init is zero
+				write_s_buffer(0);
+			}
+			f << "s";
+			buffer_str = s_buffer.str();
+			buffer_size_be = to_big_endian(buffer_str.size());
+			f.write(reinterpret_cast<const char*>(&buffer_size_be), sizeof(buffer_size_be));
+			f.write(buffer_str.data(), buffer_str.size());
+
 			if (holes_module) {
 				log_push();
 
@@ -686,37 +848,63 @@
 				//holes_module->fixup_ports();
 				holes_module->check();
 
-				holes_module->design->selection_stack.emplace_back(false);
-				RTLIL::Selection& sel = holes_module->design->selection_stack.back();
-				sel.select(holes_module);
-
-				// TODO: Should not need to opt_merge if we only instantiate
-				//       each box type once...
-				Pass::call(holes_module->design, "opt_merge -share_all");
-
-				Pass::call(holes_module->design, "flatten -wb");
-
 				// TODO: Should techmap/aigmap/check all lib_whitebox-es just once,
 				//       instead of per write_xaiger call
-				Pass::call(holes_module->design, "techmap");
-				Pass::call(holes_module->design, "aigmap");
-				for (auto cell : holes_module->cells())
-					if (!cell->type.in("$_NOT_", "$_AND_"))
-						log_error("Whitebox contents cannot be represented as AIG. Please verify whiteboxes are synthesisable.\n");
+				Pass::call_on_module(holes_module->design, holes_module, "flatten -wb; techmap; aigmap");
 
-				holes_module->design->selection_stack.pop_back();
+				dict<SigSig, SigSig> replace;
+				for (auto it = holes_module->cells_.begin(); it != holes_module->cells_.end(); ) {
+					auto cell = it->second;
+					if (cell->type.in("$_DFF_N_", "$_DFF_NN0_", "$_DFF_NN1_", "$_DFF_NP0_", "$_DFF_NP1_",
+											"$_DFF_P_", "$_DFF_PN0_", "$_DFF_PN1", "$_DFF_PP0_", "$_DFF_PP1_")) {
+						SigBit D = cell->getPort("\\D");
+						SigBit Q = cell->getPort("\\Q");
+						// Remove the DFF cell from what needs to be a combinatorial box
+						it = holes_module->cells_.erase(it);
+						Wire *port;
+						if (GetSize(Q.wire) == 1)
+							port = holes_module->wire(stringf("$abc%s", Q.wire->name.c_str()));
+						else
+							port = holes_module->wire(stringf("$abc%s[%d]", Q.wire->name.c_str(), Q.offset));
+						log_assert(port);
+						// Prepare to replace "assign <port> = DFF.Q;" with "assign <port> = DFF.D;"
+						//   in order to extract the combinatorial control logic that feeds the box
+						//   (i.e. clock enable, synchronous reset, etc.)
+						replace.insert(std::make_pair(SigSig(port,Q), SigSig(port,D)));
+						// Since `flatten` above would have created wires named "<cell>.Q",
+						//   extract the pre-techmap cell name
+						auto pos = Q.wire->name.str().rfind(".");
+						log_assert(pos != std::string::npos);
+						IdString driver = Q.wire->name.substr(0, pos);
+						// And drive the signal that was previously driven by "DFF.Q" (typically
+						//   used to implement clock-enable functionality) with the "<cell>.$abc9_currQ"
+						//   wire (which itself is driven an input port) we inserted above
+						Wire *currQ = holes_module->wire(stringf("%s.$abc9_currQ", driver.c_str()));
+						log_assert(currQ);
+						holes_module->connect(Q, currQ);
+						continue;
+					}
+					else if (!cell->type.in("$_NOT_", "$_AND_"))
+						log_error("Whitebox contents cannot be represented as AIG. Please verify whiteboxes are synthesisable.\n");
+					++it;
+				}
+
+				for (auto &conn : holes_module->connections_) {
+					auto it = replace.find(conn);
+					if (it != replace.end())
+						conn = it->second;
+				}
 
 				// Move into a new (temporary) design so that "clean" will only
 				// operate (and run checks on) this one module
 				RTLIL::Design *holes_design = new RTLIL::Design;
-				holes_module->design->modules_.erase(holes_module->name);
+				module->design->modules_.erase(holes_module->name);
 				holes_design->add(holes_module);
 				Pass::call(holes_design, "clean -purge");
 
 				std::stringstream a_buffer;
 				XAigerWriter writer(holes_module, true /* holes_mode */);
 				writer.write_aiger(a_buffer, false /*ascii_mode*/);
-
 				delete holes_design;
 
 				f << "a";
@@ -752,6 +940,7 @@
 	void write_map(std::ostream &f, bool verbose_map)
 	{
 		dict<int, string> input_lines;
+		dict<int, string> init_lines;
 		dict<int, string> output_lines;
 		dict<int, string> wire_lines;
 
@@ -773,7 +962,11 @@
 
 				if (output_bits.count(b)) {
 					int o = ordered_outputs.at(b);
-					output_lines[o] += stringf("output %d %d %s\n", o - GetSize(co_bits), i, log_id(wire));
+					int init = 0;
+					auto it = init_map.find(b);
+					if (it != init_map.end() && it->second)
+						init = 1;
+					output_lines[o] += stringf("output %d %d %s %d\n", o - GetSize(co_bits), i, log_id(wire), init);
 					continue;
 				}
 
@@ -792,6 +985,10 @@
 			f << it.second;
 		log_assert(input_lines.size() == input_bits.size());
 
+		init_lines.sort();
+		for (auto &it : init_lines)
+			f << it.second;
+
 		int box_count = 0;
 		for (auto cell : box_list)
 			f << stringf("box %d %d %s\n", box_count++, 0, log_id(cell->name));
@@ -802,6 +999,8 @@
 		for (auto &it : output_lines)
 			f << it.second;
 		log_assert(output_lines.size() == output_bits.size());
+		if (omode && output_bits.empty())
+			f << "output " << output_lines.size() << " 0 $__dummy__\n";
 
 		wire_lines.sort();
 		for (auto &it : wire_lines)
@@ -824,7 +1023,7 @@
 		log("        write ASCII version of AIGER format\n");
 		log("\n");
 		log("    -map <filename>\n");
-		log("        write an extra file with port and latch symbols\n");
+		log("        write an extra file with port and box symbols\n");
 		log("\n");
 		log("    -vmap <filename>\n");
 		log("        like -map, but more verbose\n");
diff --git a/frontends/aiger/aigerparse.cc b/frontends/aiger/aigerparse.cc
index cf06019..9374f1a 100644
--- a/frontends/aiger/aigerparse.cc
+++ b/frontends/aiger/aigerparse.cc
@@ -432,7 +432,7 @@
 			else if (c == 'r') {
 				uint32_t dataSize YS_ATTRIBUTE(unused) = parse_xaiger_literal(f);
 				flopNum = parse_xaiger_literal(f);
-				log_debug("flopNum: %u\n", flopNum);
+				log_debug("flopNum = %u\n", flopNum);
 				log_assert(dataSize == (flopNum+1) * sizeof(uint32_t));
 				f.ignore(flopNum * sizeof(uint32_t));
 			}
@@ -464,9 +464,10 @@
 					boxes.emplace_back(cell);
 				}
 			}
-			else if (c == 'a' || c == 'i' || c == 'o') {
+			else if (c == 'a' || c == 'i' || c == 'o' || c == 's') {
 				uint32_t dataSize = parse_xaiger_literal(f);
 				f.ignore(dataSize);
+				log_debug("ignoring '%c'\n", c);
 			}
 			else {
 				break;
@@ -734,12 +735,19 @@
 void AigerReader::post_process()
 {
 	pool<IdString> seen_boxes;
-	unsigned ci_count = 0, co_count = 0;
+	pool<IdString> flops;
+	unsigned ci_count = 0, co_count = 0, flop_count = 0;
 	for (auto cell : boxes) {
 		RTLIL::Module* box_module = design->module(cell->type);
 		log_assert(box_module);
 
+		bool is_flop = false;
 		if (seen_boxes.insert(cell->type).second) {
+			if (box_module->attributes.count("\\abc9_flop")) {
+				log_assert(flop_count < flopNum);
+				flops.insert(cell->type);
+				is_flop = true;
+			}
 			auto it = box_module->attributes.find("\\abc9_carry");
 			if (it != box_module->attributes.end()) {
 				RTLIL::Wire *carry_in = nullptr, *carry_out = nullptr;
@@ -779,6 +787,8 @@
 				carry_out->port_id = ports.size();
 			}
 		}
+		else
+			is_flop = flops.count(cell->type);
 
 		// NB: Assume box_module->ports are sorted alphabetically
 		//     (as RTLIL::Module::fixup_ports() would do)
@@ -804,9 +814,32 @@
 				}
 				rhs.append(wire);
 			}
-
 			cell->setPort(port_name, rhs);
 		}
+
+		if (is_flop) {
+			log_assert(co_count < outputs.size());
+			Wire *wire = outputs[co_count++];
+			log_assert(wire);
+			log_assert(wire->port_output);
+			wire->port_output = false;
+
+			RTLIL::Wire *d = outputs[outputs.size() - flopNum + flop_count];
+			log_assert(d);
+			log_assert(d->port_output);
+			d->port_output = false;
+
+			RTLIL::Wire *q = inputs[piNum - flopNum + flop_count];
+			log_assert(q);
+			log_assert(q->port_input);
+			q->port_input = false;
+
+			auto ff = module->addCell(NEW_ID, "$__ABC9_FF_");
+			ff->setPort("\\D", d);
+			ff->setPort("\\Q", q);
+			flop_count++;
+			continue;
+		}
 	}
 
 	dict<RTLIL::IdString, int> wideports_cache;
@@ -909,6 +942,10 @@
 					}
 				}
 				log_debug(" -> %s\n", log_id(wire));
+				int init;
+				mf >> init;
+				if (init < 2)
+					wire->attributes["\\init"] = init;
 			}
 			else if (type == "box") {
 				RTLIL::Cell* cell = module->cell(stringf("$__box%d__", variable));
diff --git a/passes/hierarchy/Makefile.inc b/passes/hierarchy/Makefile.inc
index b3f139b..ea809ec 100644
--- a/passes/hierarchy/Makefile.inc
+++ b/passes/hierarchy/Makefile.inc
@@ -2,4 +2,5 @@
 OBJS += passes/hierarchy/hierarchy.o
 OBJS += passes/hierarchy/uniquify.o
 OBJS += passes/hierarchy/submod.o
+OBJS += passes/hierarchy/clkpart.o
 
diff --git a/passes/hierarchy/clkpart.cc b/passes/hierarchy/clkpart.cc
new file mode 100644
index 0000000..81983e2
--- /dev/null
+++ b/passes/hierarchy/clkpart.cc
@@ -0,0 +1,308 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2012  Clifford Wolf <clifford@clifford.at>
+ *                2019  Eddie Hung <eddie@fpgeh.com>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "kernel/register.h"
+#include "kernel/sigtools.h"
+#include "kernel/celltypes.h"
+#include "kernel/rtlil.h"
+#include "kernel/log.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+struct ClkPartPass : public Pass {
+	ClkPartPass() : Pass("clkpart", "partition design according to clock/enable domain") { }
+	void help() YS_OVERRIDE
+	{
+		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+		log("\n");
+		log("    clkpart [options] [selection]\n");
+		log("\n");
+		log("Partition the contents of selected modules according to the clock (and optionally\n");
+		log("the enable) domains of its $_DFF* cells by extracting them into sub-modules,\n");
+		log("using the `submod` command.\n");
+		log("\n");
+		log("    -set_attr <name> <value>\n");
+		log("        set the specified attribute on all sub-modules created.\n");
+		log("\n");
+		log("    -unpart <name>\n");
+		log("        undo this operation within the selected modules, by flattening those\n");
+		log("        attached with an <name> attribute into those modules without this\n");
+		log("        attribute.\n");
+		log("\n");
+		log("    -enable\n");
+		log("        also consider enable domains.\n");
+		log("\n");
+	}
+
+	bool unpart_mode, enable_mode;
+	IdString attr_name;
+	Const attr_value;
+
+	void clear_flags() YS_OVERRIDE
+	{
+		unpart_mode = false;
+		enable_mode = false;
+		attr_name = IdString();
+		attr_value = Const();
+	}
+	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
+	{
+		log_header(design, "Executing CLKPART pass (partition design according to clock/enable domain).\n");
+		log_push();
+
+		clear_flags();
+
+		size_t argidx;
+		for (argidx = 1; argidx < args.size(); argidx++)
+		{
+			if (args[argidx] == "-set_attr" && argidx+2 < args.size()) {
+				attr_name = RTLIL::escape_id(args[++argidx]);
+				attr_value = args[argidx++];
+				continue;
+			}
+			if (args[argidx] == "-unpart" && argidx+1 < args.size()) {
+				unpart_mode = true;
+				attr_name = RTLIL::escape_id(args[++argidx]);
+				continue;
+			}
+			if (args[argidx] == "-enable") {
+				enable_mode = true;
+				continue;
+			}
+			break;
+		}
+		extra_args(args, argidx, design);
+
+		if (unpart_mode)
+			unpart(design);
+		else
+			part(design);
+
+		log_pop();
+	}
+
+	void part(RTLIL::Design *design)
+	{
+		CellTypes ct(design);
+		SigMap assign_map;
+		std::vector<std::string> new_submods;
+
+		log_header(design, "Summary of detected clock domains:\n");
+		for (auto mod : design->selected_modules())
+		{
+			if (mod->processes.size() > 0) {
+				log("Skipping module %s as it contains processes.\n", log_id(mod));
+				continue;
+			}
+
+			assign_map.set(mod);
+
+			std::vector<RTLIL::Cell*> all_cells = mod->selected_cells();
+			std::set<RTLIL::Cell*> unassigned_cells(all_cells.begin(), all_cells.end());
+
+			std::set<RTLIL::Cell*> expand_queue, next_expand_queue;
+			std::set<RTLIL::Cell*> expand_queue_up, next_expand_queue_up;
+			std::set<RTLIL::Cell*> expand_queue_down, next_expand_queue_down;
+
+			typedef tuple<bool, RTLIL::SigSpec, bool, RTLIL::SigSpec> clkdomain_t;
+			std::map<clkdomain_t, vector<Cell*>> assigned_cells;
+			std::map<RTLIL::Cell*, clkdomain_t> assigned_cells_reverse;
+
+			std::map<RTLIL::Cell*, std::set<RTLIL::SigBit>> cell_to_bit, cell_to_bit_up, cell_to_bit_down;
+			std::map<RTLIL::SigBit, std::set<RTLIL::Cell*>> bit_to_cell, bit_to_cell_up, bit_to_cell_down;
+
+			for (auto cell : all_cells)
+			{
+				clkdomain_t key;
+
+				for (auto &conn : cell->connections())
+				for (auto bit : conn.second) {
+					bit = assign_map(bit);
+					if (bit.wire != nullptr) {
+						cell_to_bit[cell].insert(bit);
+						bit_to_cell[bit].insert(cell);
+						if (ct.cell_input(cell->type, conn.first)) {
+							cell_to_bit_up[cell].insert(bit);
+							bit_to_cell_down[bit].insert(cell);
+						}
+						if (ct.cell_output(cell->type, conn.first)) {
+							cell_to_bit_down[cell].insert(bit);
+							bit_to_cell_up[bit].insert(cell);
+						}
+					}
+				}
+
+				if (cell->type.in(ID($_DFF_N_), ID($_DFF_P_)))
+				{
+					key = clkdomain_t(cell->type == ID($_DFF_P_), assign_map(cell->getPort(ID(C))), true, RTLIL::SigSpec());
+				}
+				else
+				if (cell->type.in(ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_)))
+				{
+					bool this_clk_pol = cell->type.in(ID($_DFFE_PN_), ID($_DFFE_PP_));
+					bool this_en_pol = !enable_mode || cell->type.in(ID($_DFFE_NP_), ID($_DFFE_PP_));
+					key = clkdomain_t(this_clk_pol, assign_map(cell->getPort(ID(C))), this_en_pol, enable_mode ? assign_map(cell->getPort(ID(E))) : RTLIL::SigSpec());
+				}
+				else
+				if (cell->type.in(ID($_DFF_NN0_), ID($_DFF_NN1_), ID($_DFF_NP0_), ID($_DFF_NP1_),
+							ID($_DFF_PN0_), ID($_DFF_PN1_), ID($_DFF_PP0_), ID($_DFF_PP1_)))
+				{
+					bool this_clk_pol = cell->type.in(ID($_DFF_PN0_), ID($_DFF_PN1_), ID($_DFF_PP0_), ID($_DFF_PP1_));
+					log_assert(!enable_mode); // TODO
+					key = clkdomain_t(this_clk_pol, assign_map(cell->getPort(ID(C))), true, RTLIL::SigSpec());
+				}
+				else
+					continue;
+
+				unassigned_cells.erase(cell);
+				expand_queue.insert(cell);
+				expand_queue_up.insert(cell);
+				expand_queue_down.insert(cell);
+
+				assigned_cells[key].push_back(cell);
+				assigned_cells_reverse[cell] = key;
+			}
+
+			while (!expand_queue_up.empty() || !expand_queue_down.empty())
+			{
+				if (!expand_queue_up.empty())
+				{
+					RTLIL::Cell *cell = *expand_queue_up.begin();
+					clkdomain_t key = assigned_cells_reverse.at(cell);
+					expand_queue_up.erase(cell);
+
+					for (auto bit : cell_to_bit_up[cell])
+					for (auto c : bit_to_cell_up[bit])
+						if (unassigned_cells.count(c)) {
+							unassigned_cells.erase(c);
+							next_expand_queue_up.insert(c);
+							assigned_cells[key].push_back(c);
+							assigned_cells_reverse[c] = key;
+							expand_queue.insert(c);
+						}
+				}
+
+				if (!expand_queue_down.empty())
+				{
+					RTLIL::Cell *cell = *expand_queue_down.begin();
+					clkdomain_t key = assigned_cells_reverse.at(cell);
+					expand_queue_down.erase(cell);
+
+					for (auto bit : cell_to_bit_down[cell])
+					for (auto c : bit_to_cell_down[bit])
+						if (unassigned_cells.count(c)) {
+							unassigned_cells.erase(c);
+							next_expand_queue_up.insert(c);
+							assigned_cells[key].push_back(c);
+							assigned_cells_reverse[c] = key;
+							expand_queue.insert(c);
+						}
+				}
+
+				if (expand_queue_up.empty() && expand_queue_down.empty()) {
+					expand_queue_up.swap(next_expand_queue_up);
+					expand_queue_down.swap(next_expand_queue_down);
+				}
+			}
+
+			while (!expand_queue.empty())
+			{
+				RTLIL::Cell *cell = *expand_queue.begin();
+				clkdomain_t key = assigned_cells_reverse.at(cell);
+				expand_queue.erase(cell);
+
+				for (auto bit : cell_to_bit.at(cell)) {
+					for (auto c : bit_to_cell[bit])
+						if (unassigned_cells.count(c)) {
+							unassigned_cells.erase(c);
+							next_expand_queue.insert(c);
+							assigned_cells[key].push_back(c);
+							assigned_cells_reverse[c] = key;
+						}
+					bit_to_cell[bit].clear();
+				}
+
+				if (expand_queue.empty())
+					expand_queue.swap(next_expand_queue);
+			}
+
+			clkdomain_t key(true, RTLIL::SigSpec(), true, RTLIL::SigSpec());
+			for (auto cell : unassigned_cells) {
+				assigned_cells[key].push_back(cell);
+				assigned_cells_reverse[cell] = key;
+			}
+
+			clkdomain_t largest_domain;
+			int largest_domain_size = 0;
+			log("  module %s\n", mod->name.c_str());
+			for (auto &it : assigned_cells) {
+				log("    %d cells in clk=%s%s, en=%s%s\n", GetSize(it.second),
+						std::get<0>(it.first) ? "" : "!", log_signal(std::get<1>(it.first)),
+						std::get<2>(it.first) ? "" : "!", log_signal(std::get<3>(it.first)));
+				if (GetSize(it.second) > largest_domain_size) {
+					largest_domain = it.first;
+					largest_domain_size = GetSize(it.second);
+				}
+			}
+
+			for (auto &it : assigned_cells) {
+				if (it.first == largest_domain)
+					continue;
+
+				auto clk = std::get<1>(it.first);
+				auto en = std::get<3>(it.first);
+				std::string submod = stringf("clk=%s%s%s%s%s",
+						std::get<0>(it.first) ? "" : "!", clk.empty() ? "" : log_signal(clk),
+						std::get<2>(it.first) ? "" : "!", en.empty() ? "" : ".en=", en.empty() ? "" : log_signal(en));
+				for (auto c : it.second)
+					c->attributes[ID(submod)] = submod;
+				new_submods.push_back(stringf("%s_%s", mod->name.c_str(), submod.c_str()));
+			}
+		}
+
+		Pass::call(design, "submod -hidden");
+
+		if (!attr_name.empty())
+			for (auto m : new_submods)
+				design->module(m)->attributes[attr_name] = attr_value;
+	}
+
+	void unpart(RTLIL::Design *design)
+	{
+		vector<Module*> keeped;
+		for (auto mod : design->selected_modules()) {
+			if (mod->get_bool_attribute(attr_name))
+				continue;
+			if (mod->get_bool_attribute(ID(keep_hierarchy)))
+				continue;
+			keeped.push_back(mod);
+			mod->set_bool_attribute(ID(keep_hierarchy));
+		}
+
+		Pass::call(design, "flatten");
+
+		for (auto mod : keeped)
+			mod->set_bool_attribute(ID(keep_hierarchy), false);
+
+	}
+} ClkPartPass;
+
+PRIVATE_NAMESPACE_END
diff --git a/passes/hierarchy/submod.cc b/passes/hierarchy/submod.cc
index ec242aa..211f961 100644
--- a/passes/hierarchy/submod.cc
+++ b/passes/hierarchy/submod.cc
@@ -20,6 +20,7 @@
 #include "kernel/register.h"
 #include "kernel/celltypes.h"
 #include "kernel/log.h"
+#include "kernel/sigtools.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <set>
@@ -32,8 +33,10 @@
 	CellTypes ct;
 	RTLIL::Design *design;
 	RTLIL::Module *module;
+	SigMap sigmap;
 
 	bool copy_mode;
+	bool hidden_mode;
 	std::string opt_name;
 
 	struct SubModule
@@ -46,35 +49,40 @@
 
 	struct wire_flags_t {
 		RTLIL::Wire *new_wire;
-		bool is_int_driven, is_int_used, is_ext_driven, is_ext_used;
-		wire_flags_t() : new_wire(NULL), is_int_driven(false), is_int_used(false), is_ext_driven(false), is_ext_used(false) { }
+		RTLIL::Const is_int_driven;
+		bool is_int_used, is_ext_driven, is_ext_used;
+		wire_flags_t(RTLIL::Wire* wire) : new_wire(NULL), is_int_driven(State::S0, GetSize(wire)), is_int_used(false), is_ext_driven(false), is_ext_used(false) { }
 	};
 	std::map<RTLIL::Wire*, wire_flags_t> wire_flags;
 	bool flag_found_something;
 
-	void flag_wire(RTLIL::Wire *wire, bool create, bool set_int_driven, bool set_int_used, bool set_ext_driven, bool set_ext_used)
+	void flag_wire(RTLIL::Wire *wire, bool create, bool set_int_used, bool set_ext_driven, bool set_ext_used)
 	{
 		if (wire_flags.count(wire) == 0) {
 			if (!create)
 				return;
-			wire_flags[wire] = wire_flags_t();
+			wire_flags.emplace(wire, wire);
 		}
-		if (set_int_driven)
-			wire_flags[wire].is_int_driven = true;
 		if (set_int_used)
-			wire_flags[wire].is_int_used = true;
+			wire_flags.at(wire).is_int_used = true;
 		if (set_ext_driven)
-			wire_flags[wire].is_ext_driven = true;
+			wire_flags.at(wire).is_ext_driven = true;
 		if (set_ext_used)
-			wire_flags[wire].is_ext_used = true;
+			wire_flags.at(wire).is_ext_used = true;
 		flag_found_something = true;
 	}
 
 	void flag_signal(const RTLIL::SigSpec &sig, bool create, bool set_int_driven, bool set_int_used, bool set_ext_driven, bool set_ext_used)
 	{
 		for (auto &c : sig.chunks())
-			if (c.wire != NULL)
-				flag_wire(c.wire, create, set_int_driven, set_int_used, set_ext_driven, set_ext_used);
+			if (c.wire != NULL) {
+				flag_wire(c.wire, create, set_int_used, set_ext_driven, set_ext_used);
+				if (set_int_driven)
+					for (int i = c.offset; i < c.offset+c.width; i++) {
+						wire_flags.at(c.wire).is_int_driven[i] = State::S1;
+						flag_found_something = true;
+					}
+			}
 	}
 
 	void handle_submodule(SubModule &submod)
@@ -127,27 +135,39 @@
 				flags.is_ext_driven = true;
 			if (wire->port_output)
 				flags.is_ext_used = true;
+			else {
+				auto sig = sigmap(wire);
+				for (auto c : sig.chunks())
+					if (c.wire && c.wire->port_output) {
+						flags.is_ext_used = true;
+						break;
+					}
+			}
 
 			bool new_wire_port_input = false;
 			bool new_wire_port_output = false;
 
-			if (flags.is_int_driven && flags.is_ext_used)
+			if (!flags.is_int_driven.is_fully_zero() && flags.is_ext_used)
 				new_wire_port_output = true;
 			if (flags.is_ext_driven && flags.is_int_used)
 				new_wire_port_input = true;
 
-			if (flags.is_int_driven && flags.is_ext_driven)
+			if (!flags.is_int_driven.is_fully_zero() && flags.is_ext_driven)
 				new_wire_port_input = true, new_wire_port_output = true;
 
 			std::string new_wire_name = wire->name.str();
 			if (new_wire_port_input || new_wire_port_output) {
-				while (new_wire_name[0] == '$') {
-					std::string next_wire_name = stringf("\\n%d", auto_name_counter++);
-					if (all_wire_names.count(next_wire_name) == 0) {
-						all_wire_names.insert(next_wire_name);
-						new_wire_name = next_wire_name;
+				if (new_wire_name[0] == '$')
+					while (1) {
+						std::string next_wire_name = stringf("%s\\n%d", hidden_mode ? "$submod" : "", auto_name_counter++);
+						if (all_wire_names.count(next_wire_name) == 0) {
+							all_wire_names.insert(next_wire_name);
+							new_wire_name = next_wire_name;
+							break;
+						}
 					}
-				}
+				else if (hidden_mode)
+					new_wire_name = stringf("$submod%s", new_wire_name.c_str());
 			}
 
 			RTLIL::Wire *new_wire = new_mod->addWire(new_wire_name, wire->width);
@@ -155,6 +175,22 @@
 			new_wire->port_output = new_wire_port_output;
 			new_wire->start_offset = wire->start_offset;
 			new_wire->attributes = wire->attributes;
+			if (!flags.is_int_driven.is_fully_zero()) {
+				new_wire->attributes.erase(ID(init));
+				auto sig = sigmap(wire);
+				for (int i = 0; i < GetSize(sig); i++) {
+					if (flags.is_int_driven[i] == State::S0)
+						continue;
+					if (!sig[i].wire)
+						continue;
+					auto it = sig[i].wire->attributes.find(ID(init));
+					if (it != sig[i].wire->attributes.end()) {
+						auto jt = new_wire->attributes.insert(std::make_pair(ID(init), Const(State::Sx, GetSize(sig)))).first;
+						jt->second[i] = it->second[sig[i].offset];
+						it->second[sig[i].offset] = State::Sx;
+					}
+				}
+			}
 
 			if (new_wire->port_input && new_wire->port_output)
 				log("  signal %s: inout %s\n", wire->name.c_str(), new_wire->name.c_str());
@@ -177,7 +213,7 @@
 				for (auto &bit : conn.second)
 					if (bit.wire != NULL) {
 						log_assert(wire_flags.count(bit.wire) > 0);
-						bit.wire = wire_flags[bit.wire].new_wire;
+						bit.wire = wire_flags.at(bit.wire).new_wire;
 					}
 			log("  cell %s (%s)\n", new_cell->name.c_str(), new_cell->type.c_str());
 			if (!copy_mode)
@@ -189,16 +225,27 @@
 			RTLIL::Cell *new_cell = module->addCell(submod.full_name, submod.full_name);
 			for (auto &it : wire_flags)
 			{
-				RTLIL::Wire *old_wire = it.first;
+				RTLIL::SigSpec old_sig = sigmap(it.first);
 				RTLIL::Wire *new_wire = it.second.new_wire;
-				if (new_wire->port_id > 0)
-					new_cell->setPort(new_wire->name, RTLIL::SigSpec(old_wire));
+				if (new_wire->port_id > 0) {
+					if (new_wire->port_output)
+						for (int i = 0; i < GetSize(old_sig); i++) {
+							auto &b = old_sig[i];
+							// Prevents "ERROR: Mismatch in directionality ..." when flattening
+							if (!b.wire)
+								b = module->addWire(NEW_ID);
+							// Prevents "Warning: multiple conflicting drivers ..."
+							else if (!it.second.is_int_driven[i])
+								b = module->addWire(NEW_ID);
+						}
+					new_cell->setPort(new_wire->name, old_sig);
+				}
 			}
 		}
 	}
 
-	SubmodWorker(RTLIL::Design *design, RTLIL::Module *module, bool copy_mode = false, std::string opt_name = std::string()) :
-			design(design), module(module), copy_mode(copy_mode), opt_name(opt_name)
+	SubmodWorker(RTLIL::Design *design, RTLIL::Module *module, bool copy_mode = false, bool hidden_mode = false, std::string opt_name = std::string()) :
+			design(design), module(module), sigmap(module), copy_mode(copy_mode), hidden_mode(hidden_mode), opt_name(opt_name)
 	{
 		if (!design->selected_whole_module(module->name) && opt_name.empty())
 			return;
@@ -219,6 +266,12 @@
 		ct.setup_stdcells_mem();
 		ct.setup_design(design);
 
+		for (auto port : module->ports) {
+			auto wire = module->wire(port);
+			if (wire->port_output)
+				sigmap.add(wire);
+		}
+
 		if (opt_name.empty())
 		{
 			for (auto &it : module->wires_)
@@ -273,7 +326,7 @@
 	{
 		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
 		log("\n");
-		log("    submod [-copy] [selection]\n");
+		log("    submod [options] [selection]\n");
 		log("\n");
 		log("This pass identifies all cells with the 'submod' attribute and moves them to\n");
 		log("a newly created module. The value of the attribute is used as name for the\n");
@@ -285,16 +338,20 @@
 		log("This pass only operates on completely selected modules with no processes\n");
 		log("or memories.\n");
 		log("\n");
+		log("    -copy\n");
+		log("        by default the cells are 'moved' from the source module and the source\n");
+		log("        module will use an instance of the new module after this command is\n");
+		log("        finished. call with -copy to not modify the source module.\n");
 		log("\n");
-		log("    submod -name <name> [-copy] [selection]\n");
+		log("    -name <name>\n");
+		log("        don't use the 'submod' attribute but instead use the selection. only\n");
+		log("        objects from one module might be selected. the value of the -name option\n");
+		log("        is used as the value of the 'submod' attribute instead.\n");
 		log("\n");
-		log("As above, but don't use the 'submod' attribute but instead use the selection.\n");
-		log("Only objects from one module might be selected. The value of the -name option\n");
-		log("is used as the value of the 'submod' attribute above.\n");
-		log("\n");
-		log("By default the cells are 'moved' from the source module and the source module\n");
-		log("will use an instance of the new module after this command is finished. Call\n");
-		log("with -copy to not modify the source module.\n");
+		log("    -hidden\n");
+		log("        instead of creating submodule ports with public names, create ports with\n");
+		log("        private names so that a subsequent 'flatten; clean' call will restore the\n");
+		log("        original module with original public names.\n");
 		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@@ -304,6 +361,7 @@
 
 		std::string opt_name;
 		bool copy_mode = false;
+		bool hidden_mode = false;
 
 		size_t argidx;
 		for (argidx = 1; argidx < args.size(); argidx++) {
@@ -315,6 +373,10 @@
 				copy_mode = true;
 				continue;
 			}
+			if (args[argidx] == "-hidden") {
+				hidden_mode = true;
+				continue;
+			}
 			break;
 		}
 		extra_args(args, argidx, design);
@@ -335,7 +397,7 @@
 						queued_modules.push_back(mod_it.first);
 				for (auto &modname : queued_modules)
 					if (design->modules_.count(modname) != 0) {
-						SubmodWorker worker(design, design->modules_[modname], copy_mode);
+						SubmodWorker worker(design, design->modules_[modname], copy_mode, hidden_mode);
 						handled_modules.insert(modname);
 						did_something = true;
 					}
@@ -358,7 +420,7 @@
 			else {
 				Pass::call_on_module(design, module, "opt_clean");
 				log_header(design, "Continuing SUBMOD pass.\n");
-				SubmodWorker worker(design, module, copy_mode, opt_name);
+				SubmodWorker worker(design, module, copy_mode, hidden_mode, opt_name);
 			}
 		}
 
diff --git a/passes/memory/memory_bram.cc b/passes/memory/memory_bram.cc
index aa8f941..cd8c9c5 100644
--- a/passes/memory/memory_bram.cc
+++ b/passes/memory/memory_bram.cc
@@ -134,6 +134,8 @@
 		dict<string, int> min_limits, max_limits;
 		bool or_next_if_better, make_transp, make_outreg;
 		char shuffle_enable;
+		IdString attr;
+		Const value;
 	};
 
 	dict<IdString, vector<bram_t>> brams;
@@ -327,6 +329,13 @@
 				continue;
 			}
 
+			if (GetSize(tokens) >= 2 && tokens[0] == "attribute") {
+				data.attr = RTLIL::escape_id(tokens[1]);
+				if (GetSize(tokens) > 2)
+					data.value = tokens[2];
+				continue;
+			}
+
 			syntax_error();
 		}
 	}
@@ -813,6 +822,23 @@
 			return false;
 		}
 
+		if (!match.attr.empty()) {
+			auto it = cell->attributes.find(match.attr);
+			if (it == cell->attributes.end()) {
+				if (!match.value.empty())
+					log("    Rule for bram type %s rejected: requirement 'attribute %s=\"%s\"' not met.\n",
+							log_id(match.name), log_id(match.attr), match.value.decode_string().c_str());
+					return false;
+			}
+			else {
+				if (it->second != match.value) {
+					log("    Rule for bram type %s rejected: requirement 'attribute %s=\"%s\"' not met.\n",
+							log_id(match.name), log_id(match.attr), match.value.decode_string().c_str());
+					return false;
+				}
+			}
+		}
+
 		if (mode == 1)
 			return true;
 	}
@@ -1100,6 +1126,24 @@
 				goto next_match_rule;
 			}
 
+			if (!match.attr.empty()) {
+				auto it = cell->attributes.find(match.attr);
+				if (it == cell->attributes.end()) {
+					if (!match.value.empty()) {
+						log("    Rule for bram type %s rejected: requirement 'attribute %s=\"%s\"' not met.\n",
+								log_id(match.name), log_id(match.attr), match.value.decode_string().c_str());
+						goto next_match_rule;
+					}
+				}
+				else {
+					if (it->second != match.value) {
+						log("    Rule for bram type %s rejected: requirement 'attribute %s=\"%s\"' not met.\n",
+								log_id(match.name), log_id(match.attr), match.value.decode_string().c_str());
+						goto next_match_rule;
+					}
+				}
+			}
+
 			log("    Rule #%d for bram type %s (variant %d) accepted.\n", i+1, log_id(bram.name), bram.variant);
 
 			if (or_next_if_better || !best_rule_cache.empty())
@@ -1225,6 +1269,11 @@
 		log("    dcells  .......  number of cells in 'data-direction'\n");
 		log("    cells  ........  total number of cells (acells*dcells*dups)\n");
 		log("\n");
+		log("A match containing the condition 'attribute' followed by a name and optional\n");
+		log("value requires that the memory contains the given attribute name and value\n");
+		log("(if specified) or that the attribute is not present or the value is empty (if\n");
+		log("value is not specified\n).");
+		log("\n");
 		log("The interface for the created bram instances is derived from the bram\n");
 		log("description. Use 'techmap' to convert the created bram instances into\n");
 		log("instances of the actual bram cells of your target architecture.\n");
diff --git a/passes/opt/opt_share.cc b/passes/opt/opt_share.cc
index 2c45670..f59f978 100644
--- a/passes/opt/opt_share.cc
+++ b/passes/opt/opt_share.cc
@@ -83,7 +83,9 @@
 	bool operator==(const ExtSigSpec &other) const { return is_signed == other.is_signed && sign == other.sign && sig == other.sig && semantics == other.semantics; }
 };
 
-#define BITWISE_OPS ID($_AND_), ID($_NAND_), ID($_OR_), ID($_NOR_), ID($_XOR_), ID($_XNOR_), ID($_ANDNOT_), ID($_ORNOT_), ID($and), ID($or), ID($xor), ID($xnor)
+#define FINE_BITWISE_OPS ID($_AND_), ID($_NAND_), ID($_OR_), ID($_NOR_), ID($_XOR_), ID($_XNOR_), ID($_ANDNOT_), ID($_ORNOT_)
+
+#define BITWISE_OPS FINE_BITWISE_OPS, ID($and), ID($or), ID($xor), ID($xnor)
 
 #define REDUCTION_OPS ID($reduce_and), ID($reduce_or), ID($reduce_xor), ID($reduce_xnor), ID($reduce_bool), ID($reduce_nand)
 
@@ -250,14 +252,19 @@
 		shared_op->setPort(ID(CO), alu_co.extract(0, conn_width));
 	}
 
-	shared_op->setParam(ID(Y_WIDTH), conn_width);
+	bool is_fine = shared_op->type.in(FINE_BITWISE_OPS);
+
+	if (!is_fine)
+		shared_op->setParam(ID(Y_WIDTH), conn_width);
 
 	if (decode_port(shared_op, ID::A, &assign_map) == operand) {
 		shared_op->setPort(ID::B, mux_to_oper);
-		shared_op->setParam(ID(B_WIDTH), max_width);
+		if (!is_fine)
+			shared_op->setParam(ID(B_WIDTH), max_width);
 	} else {
 		shared_op->setPort(ID::A, mux_to_oper);
-		shared_op->setParam(ID(A_WIDTH), max_width);
+		if (!is_fine)
+			shared_op->setParam(ID(A_WIDTH), max_width);
 	}
 }
 
diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc
index 145d2eb..a4d9d42 100644
--- a/passes/pmgen/Makefile.inc
+++ b/passes/pmgen/Makefile.inc
@@ -45,3 +45,9 @@
 OBJS += passes/pmgen/xilinx_srl.o
 passes/pmgen/xilinx_srl.o: passes/pmgen/xilinx_srl_pm.h
 $(eval $(call add_extra_objs,passes/pmgen/xilinx_srl_pm.h))
+
+# --------------------------------------
+
+OBJS += passes/pmgen/xilinx_dff.o
+passes/pmgen/xilinx_dff.o: passes/pmgen/xilinx_dff_pm.h
+$(eval $(call add_extra_objs,passes/pmgen/xilinx_dff_pm.h))
diff --git a/passes/pmgen/xilinx_dff.cc b/passes/pmgen/xilinx_dff.cc
new file mode 100644
index 0000000..bf0c735
--- /dev/null
+++ b/passes/pmgen/xilinx_dff.cc
@@ -0,0 +1,62 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2012  Clifford Wolf <clifford@clifford.at>
+ *                2019  Eddie Hung    <eddie@fpgeh.com>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+#include "kernel/yosys.h"
+#include "kernel/sigtools.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+#include "passes/pmgen/xilinx_dff_pm.h"
+
+struct XilinxDffPass : public Pass {
+	XilinxDffPass() : Pass("xilinx_dff", "Xilinx: TODO") { }
+	void help() YS_OVERRIDE
+	{
+		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+		log("\n");
+		log("    xilinx_dff [options] [selection]\n");
+		log("\n");
+		log("TODO\n");
+		log("\n");
+	}
+	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
+	{
+		log_header(design, "Executing XILINX_DFF pass (TODO).\n");
+
+		size_t argidx;
+		for (argidx = 1; argidx < args.size(); argidx++)
+		{
+			// if (args[argidx] == "-singleton") {
+			// 	singleton_mode = true;
+			// 	continue;
+			// }
+			break;
+		}
+		extra_args(args, argidx, design);
+
+		for (auto module : design->selected_modules()) {
+			xilinx_dff_pm pm(module, module->selected_cells());
+			pm.run_xilinx_dff();
+		}
+	}
+} XilinxDffPass;
+
+PRIVATE_NAMESPACE_END
diff --git a/passes/pmgen/xilinx_dff.pmg b/passes/pmgen/xilinx_dff.pmg
new file mode 100644
index 0000000..4de25c3
--- /dev/null
+++ b/passes/pmgen/xilinx_dff.pmg
@@ -0,0 +1,26 @@
+pattern xilinx_dff
+
+match fd
+	select fd->type.in(\FDRE)
+	select port(fd, \R).is_fully_zero()
+	select port(fd, \CE).is_fully_ones()
+endmatch
+
+match lut
+	select lut->type.in(\LUT2)
+	index <SigSpec> port(lut, \O) === port(fd, \D)
+endmatch
+
+code
+	if (lut->type == \LUT2) {
+		if (param(lut, \INIT) == Const::from_string("0010")) {
+			fd->setPort(\D, port(lut, \I0));
+			fd->setPort(\R, port(lut, \I1));
+		}
+		else if (param(lut, \INIT) == Const::from_string("0100")) {
+			fd->setPort(\R, port(lut, \I0));
+			fd->setPort(\D, port(lut, \I1));
+		}
+	}
+	else log_abort();
+endcode
diff --git a/passes/pmgen/xilinx_dsp.pmg b/passes/pmgen/xilinx_dsp.pmg
index 0ba5290..5d3b9c2 100644
--- a/passes/pmgen/xilinx_dsp.pmg
+++ b/passes/pmgen/xilinx_dsp.pmg
@@ -347,9 +347,9 @@
 	index <SigBit> port(postAdd, AB)[0] === sigP[0]
 	filter GetSize(port(postAdd, AB)) >= GetSize(sigP)
 	filter port(postAdd, AB).extract(0, GetSize(sigP)) == sigP
-	// Check that remainder of AB is a sign-extension
-	define <bool> AB_SIGNED (param(postAdd, AB == \A ? \A_SIGNED : \B_SIGNED).as_bool())
-	filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(AB_SIGNED ? sigP[GetSize(sigP)-1] : State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
+	// Check that remainder of AB is a sign- or zero-extension
+	filter port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(sigP[GetSize(sigP)-1], GetSize(port(postAdd, AB))-GetSize(sigP)) || port(postAdd, AB).extract_end(GetSize(sigP)) == SigSpec(State::S0, GetSize(port(postAdd, AB))-GetSize(sigP))
+
 	set postAddAB AB
 	optional
 endmatch
diff --git a/passes/techmap/abc9.cc b/passes/techmap/abc9.cc
index 27106cc..1931037 100644
--- a/passes/techmap/abc9.cc
+++ b/passes/techmap/abc9.cc
@@ -30,7 +30,7 @@
 						"&st; &if -g -K 6; &synch2; &if {W} -v; &save; &load; "\
 						"&mfs; &ps -l"
 #else
-#define ABC_COMMAND_LUT "&st; &scorr; &sweep; &dc2; &st; &dch -f; &ps; &if {W} {D} -v; &mfs; &ps -l"
+#define ABC_COMMAND_LUT "&st; &scorr; &sweep; &dc2; &st; &dch -f; &ps; &if {W} {D} -v; &mfs; &ps -l; time"
 #endif
 
 
@@ -65,20 +65,15 @@
 
 bool markgroups;
 int map_autoidx;
-SigMap assign_map;
-RTLIL::Module *module;
-
-bool clk_polarity, en_polarity;
-RTLIL::SigSpec clk_sig, en_sig;
 
 inline std::string remap_name(RTLIL::IdString abc9_name)
 {
 	return stringf("$abc$%d$%s", map_autoidx, abc9_name.c_str()+1);
 }
 
-void handle_loops(RTLIL::Design *design)
+void handle_loops(RTLIL::Design *design, RTLIL::Module *module)
 {
-	Pass::call(design, "scc -set_attr abc9_scc_id {}");
+	Pass::call(design, "scc -set_attr abc9_scc_id {} % w:*");
 
 	// For every unique SCC found, (arbitrarily) find the first
 	// cell in the component, and select (and mark) all its output
@@ -243,49 +238,15 @@
 	}
 };
 
-void abc9_module(RTLIL::Design *design, RTLIL::Module *current_module, std::string script_file, std::string exe_file,
-		bool cleanup, vector<int> lut_costs, bool dff_mode, std::string clk_str,
+void abc9_module(RTLIL::Design *design, RTLIL::Module *module, std::string script_file, std::string exe_file,
+		bool cleanup, vector<int> lut_costs, bool /*dff_mode*/, std::string /*clk_str*/,
 		bool /*keepff*/, std::string delay_target, std::string /*lutin_shared*/, bool fast_mode,
 		bool show_tempdir, std::string box_file, std::string lut_file,
 		std::string wire_delay, const dict<int,IdString> &box_lookup, bool nomfs
 )
 {
-	module = current_module;
 	map_autoidx = autoidx++;
 
-	if (clk_str != "$")
-	{
-		clk_polarity = true;
-		clk_sig = RTLIL::SigSpec();
-
-		en_polarity = true;
-		en_sig = RTLIL::SigSpec();
-	}
-
-	if (!clk_str.empty() && clk_str != "$")
-	{
-		if (clk_str.find(',') != std::string::npos) {
-			int pos = clk_str.find(',');
-			std::string en_str = clk_str.substr(pos+1);
-			clk_str = clk_str.substr(0, pos);
-			if (en_str[0] == '!') {
-				en_polarity = false;
-				en_str = en_str.substr(1);
-			}
-			if (module->wires_.count(RTLIL::escape_id(en_str)) != 0)
-				en_sig = assign_map(RTLIL::SigSpec(module->wires_.at(RTLIL::escape_id(en_str)), 0));
-		}
-		if (clk_str[0] == '!') {
-			clk_polarity = false;
-			clk_str = clk_str.substr(1);
-		}
-		if (module->wires_.count(RTLIL::escape_id(clk_str)) != 0)
-			clk_sig = assign_map(RTLIL::SigSpec(module->wires_.at(RTLIL::escape_id(clk_str)), 0));
-	}
-
-	if (dff_mode && clk_sig.empty())
-		log_cmd_error("Clock domain %s not found.\n", clk_str.c_str());
-
 	std::string tempdir_name = "/tmp/yosys-abc-XXXXXX";
 	if (!cleanup)
 		tempdir_name[0] = tempdir_name[4] = '_';
@@ -361,39 +322,14 @@
 	fprintf(f, "%s\n", abc9_script.c_str());
 	fclose(f);
 
-	if (dff_mode || !clk_str.empty())
-	{
-		if (clk_sig.size() == 0)
-			log("No%s clock domain found. Not extracting any FF cells.\n", clk_str.empty() ? "" : " matching");
-		else {
-			log("Found%s %s clock domain: %s", clk_str.empty() ? "" : " matching", clk_polarity ? "posedge" : "negedge", log_signal(clk_sig));
-			if (en_sig.size() != 0)
-				log(", enabled by %s%s", en_polarity ? "" : "!", log_signal(en_sig));
-			log("\n");
-		}
-	}
-
-	bool count_output = false;
-	for (auto port_name : module->ports) {
-		RTLIL::Wire *port_wire = module->wire(port_name);
-		log_assert(port_wire);
-		if (port_wire->port_output) {
-			count_output = true;
-			break;
-		}
-	}
-
+	//bool count_output = false;
 	log_push();
 
-	if (count_output)
+	//if (count_output)
 	{
-		design->selection_stack.emplace_back(false);
-		RTLIL::Selection& sel = design->selection_stack.back();
-		sel.select(module);
+		handle_loops(design, module);
 
-		handle_loops(design);
-
-		Pass::call(design, "aigmap");
+		Pass::call(design, "aigmap -select");
 
 		//log("Extracted %d gates and %d wires to a netlist network with %d inputs and %d outputs.\n",
 		//		count_gates, GetSize(signal_list), count_input, count_output);
@@ -411,15 +347,13 @@
 		log_assert(!design->module(ID($__abc9__)));
 		{
 			AigerReader reader(design, ifs, ID($__abc9__), "" /* clk_name */, buffer.c_str() /* map_filename */, true /* wideports */);
-			reader.parse_xaiger();
+			reader.parse_xaiger(box_lookup);
 		}
 		ifs.close();
-		Pass::call(design, stringf("write_verilog -noexpr -norename"));
+		Pass::call_on_module(design, design->module(ID($__abc9__)), stringf("write_verilog -noexpr -norename -selected"));
 		design->remove(design->module(ID($__abc9__)));
 #endif
 
-		design->selection_stack.pop_back();
-
 		// Now 'unexpose' those wires by undoing
 		// the expose operation -- remove them from PO/PI
 		// and re-connecting them back together
@@ -487,7 +421,7 @@
 		ifs.close();
 
 #if 0
-		Pass::call(design, stringf("write_verilog -noexpr -norename"));
+		Pass::call_on_module(design, design->module(ID($__abc9__)), stringf("write_verilog -noexpr -norename -selected"));
 #endif
 
 		log_header(design, "Re-integrating ABC9 results.\n");
@@ -519,9 +453,8 @@
 
 		dict<IdString, bool> abc9_box;
 		vector<RTLIL::Cell*> boxes;
-		for (const auto &it : module->cells_) {
-			auto cell = it.second;
-			if (cell->type.in(ID($_AND_), ID($_NOT_))) {
+		for (auto cell : module->selected_cells()) {
+			if (cell->type.in(ID($_AND_), ID($_NOT_), ID($__ABC9_FF_))) {
 				module->remove(cell);
 				continue;
 			}
@@ -540,19 +473,19 @@
 		dict<SigBit, std::vector<RTLIL::Cell*>> bit2sinks;
 
 		std::map<IdString, int> cell_stats;
-		for (auto c : mapped_mod->cells())
+		for (auto mapped_cell : mapped_mod->cells())
 		{
-			toposort.node(c->name);
+			toposort.node(mapped_cell->name);
 
 			RTLIL::Cell *cell = nullptr;
-			if (c->type == ID($_NOT_)) {
-				RTLIL::SigBit a_bit = c->getPort(ID::A);
-				RTLIL::SigBit y_bit = c->getPort(ID::Y);
-				bit_users[a_bit].insert(c->name);
-				bit_drivers[y_bit].insert(c->name);
+			if (mapped_cell->type == ID($_NOT_)) {
+				RTLIL::SigBit a_bit = mapped_cell->getPort(ID::A);
+				RTLIL::SigBit y_bit = mapped_cell->getPort(ID::Y);
+				bit_users[a_bit].insert(mapped_cell->name);
+				bit_drivers[y_bit].insert(mapped_cell->name);
 
 				if (!a_bit.wire) {
-					c->setPort(ID::Y, module->addWire(NEW_ID));
+					mapped_cell->setPort(ID::Y, module->addWire(NEW_ID));
 					RTLIL::Wire *wire = module->wire(remap_name(y_bit.wire->name));
 					log_assert(wire);
 					module->connect(RTLIL::SigBit(wire, y_bit.offset), State::S1);
@@ -576,7 +509,7 @@
 					if (!driver_lut) {
 						// If a driver couldn't be found (could be from PI or box CI)
 						// then implement using a LUT
-						cell = module->addLut(remap_name(stringf("%s$lut", c->name.c_str())),
+						cell = module->addLut(remap_name(stringf("%s$lut", mapped_cell->name.c_str())),
 								RTLIL::SigBit(module->wires_.at(remap_name(a_bit.wire->name)), a_bit.offset),
 								RTLIL::SigBit(module->wires_.at(remap_name(y_bit.wire->name)), y_bit.offset),
 								RTLIL::Const::from_string("01"));
@@ -584,7 +517,7 @@
 						cell_stats[ID($lut)]++;
 					}
 					else
-						not2drivers[c] = driver_lut;
+						not2drivers[mapped_cell] = driver_lut;
 					continue;
 				}
 				else
@@ -592,24 +525,26 @@
 				if (cell && markgroups) cell->attributes[ID(abcgroup)] = map_autoidx;
 				continue;
 			}
-			cell_stats[c->type]++;
+			cell_stats[mapped_cell->type]++;
 
 			RTLIL::Cell *existing_cell = nullptr;
-			if (c->type == ID($lut)) {
-				if (GetSize(c->getPort(ID::A)) == 1 && c->getParam(ID(LUT)) == RTLIL::Const::from_string("01")) {
-					SigSpec my_a = module->wires_.at(remap_name(c->getPort(ID::A).as_wire()->name));
-					SigSpec my_y = module->wires_.at(remap_name(c->getPort(ID::Y).as_wire()->name));
+			if (mapped_cell->type.in(ID($lut), ID($__ABC9_FF_))) {
+				if (mapped_cell->type == ID($lut) &&
+						GetSize(mapped_cell->getPort(ID::A)) == 1 &&
+						mapped_cell->getParam(ID(LUT)) == RTLIL::Const::from_string("01")) {
+					SigSpec my_a = module->wires_.at(remap_name(mapped_cell->getPort(ID::A).as_wire()->name));
+					SigSpec my_y = module->wires_.at(remap_name(mapped_cell->getPort(ID::Y).as_wire()->name));
 					module->connect(my_y, my_a);
-					if (markgroups) c->attributes[ID(abcgroup)] = map_autoidx;
+					if (markgroups) mapped_cell->attributes[ID(abcgroup)] = map_autoidx;
 					log_abort();
 					continue;
 				}
-				cell = module->addCell(remap_name(c->name), c->type);
+				cell = module->addCell(remap_name(mapped_cell->name), mapped_cell->type);
 			}
 			else {
-				existing_cell = module->cell(c->name);
+				existing_cell = module->cell(mapped_cell->name);
 				log_assert(existing_cell);
-				cell = module->addCell(remap_name(c->name), c->type);
+				cell = module->addCell(remap_name(mapped_cell->name), mapped_cell->type);
 			}
 
 			if (markgroups) cell->attributes[ID(abcgroup)] = map_autoidx;
@@ -618,10 +553,13 @@
 				cell->attributes = existing_cell->attributes;
 			}
 			else {
-				cell->parameters = c->parameters;
-				cell->attributes = c->attributes;
+				cell->parameters = mapped_cell->parameters;
+				cell->attributes = mapped_cell->attributes;
 			}
-			for (auto &conn : c->connections()) {
+
+			RTLIL::Module* box_module = design->module(mapped_cell->type);
+			auto abc9_flop = box_module && box_module->attributes.count("\\abc9_flop");
+			for (auto &conn : mapped_cell->connections()) {
 				RTLIL::SigSpec newsig;
 				for (auto c : conn.second.chunks()) {
 					if (c.width == 0)
@@ -633,15 +571,17 @@
 				}
 				cell->setPort(conn.first, newsig);
 
-				if (cell->input(conn.first)) {
-					for (auto i : newsig)
-						bit2sinks[i].push_back(cell);
-					for (auto i : conn.second)
-						bit_users[i].insert(c->name);
+				if (!abc9_flop) {
+					if (cell->input(conn.first)) {
+						for (auto i : newsig)
+							bit2sinks[i].push_back(cell);
+						for (auto i : conn.second)
+							bit_users[i].insert(mapped_cell->name);
+					}
+					if (cell->output(conn.first))
+						for (auto i : conn.second)
+							bit_drivers[i].insert(mapped_cell->name);
 				}
-				if (cell->output(conn.first))
-					for (auto i : conn.second)
-						bit_drivers[i].insert(c->name);
 			}
 		}
 
@@ -787,10 +727,10 @@
 
 		design->remove(mapped_mod);
 	}
-	else
-	{
-		log("Don't call ABC as there is nothing to map.\n");
-	}
+	//else
+	//{
+	//	log("Don't call ABC as there is nothing to map.\n");
+	//}
 
 	if (cleanup)
 	{
@@ -812,6 +752,10 @@
 		log("This pass uses the ABC tool [1] for technology mapping of yosys's internal gate\n");
 		log("library to a target architecture.\n");
 		log("\n");
+		log("Selection must only contain fully selected modules. It is assumed that such\n");
+		log("modules contain only cells belonging to the same clock domain, as produced by\n");
+		log("the 'clkpart' command.\n");
+		log("\n");
 		log("    -exe <command>\n");
 #ifdef ABCEXTERNAL
 		log("        use the specified command instead of \"" ABCEXTERNAL "\" to execute ABC.\n");
@@ -903,8 +847,17 @@
 		log("internally. This is not going to \"run ABC on your design\". It will instead run\n");
 		log("ABC on logic snippets extracted from your design. You will not get any useful\n");
 		log("output when passing an ABC script that writes a file. Instead write your full\n");
-		log("design as BLIF file with write_blif and then load that into ABC externally if\n");
-		log("you want to use ABC to convert your design into another format.\n");
+		log("design as an XAIGER file with write_xaiger and then load that into ABC externally\n");
+		log("if you want to use ABC to convert your design into another format.\n");
+		log("\n");
+		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+		log("Delay targets can also be specified on a per clock basis by attaching a\n");
+		log("'(* abc9_period = <int> *)' attribute onto clock wires (specifically, onto wires\n");
+		log("that appear inside any special '$abc9_clock' wires inserted by abc9_map.v). This\n");
+		log("can be achieved by modifying the source directly, or through a `setattr`\n");
+		log("invocation. Since such attributes cannot yet be propagated through a\n");
+		log("hierarchical design (whether or not it has been uniquified) it is recommended\n");
+		log("that the design be flattened when using this feature.\n");
 		log("\n");
 		log("[1] http://www.eecs.berkeley.edu/~alanmi/abc/\n");
 		log("\n");
@@ -914,8 +867,6 @@
 		log_header(design, "Executing ABC9 pass (technology mapping using ABC9).\n");
 		log_push();
 
-		assign_map.clear();
-
 #ifdef ABCEXTERNAL
 		std::string exe_file = ABCEXTERNAL;
 #else
@@ -923,7 +874,7 @@
 #endif
 		std::string script_file, clk_str, box_file, lut_file;
 		std::string delay_target, lutin_shared = "-S 1", wire_delay;
-		bool fast_mode = false, dff_mode = false, keepff = false, cleanup = true;
+		bool fast_mode = false, /*dff_mode = false,*/ keepff = false, cleanup = true;
 		bool show_tempdir = false;
 		bool nomfs = false;
 		vector<int> lut_costs;
@@ -1125,174 +1076,68 @@
 			}
 		}
 
-		for (auto mod : design->selected_modules())
+		for (auto module : design->selected_modules())
 		{
-			if (mod->attributes.count(ID(abc9_box_id)))
+			if (module->attributes.count(ID(abc9_box_id)))
 				continue;
 
-			if (mod->processes.size() > 0) {
-				log("Skipping module %s as it contains processes.\n", log_id(mod));
+			if (module->processes.size() > 0) {
+				log("Skipping module %s as it contains processes.\n", log_id(module));
 				continue;
 			}
 
-			assign_map.set(mod);
-
-			if (!dff_mode || !clk_str.empty()) {
-				abc9_module(design, mod, script_file, exe_file, cleanup, lut_costs, dff_mode, clk_str, keepff,
-						delay_target, lutin_shared, fast_mode, show_tempdir,
-						box_file, lut_file, wire_delay, box_lookup, nomfs);
+			if (!design->selected_whole_module(module)) {
+				log("Skipping module %s as it is partially selected.\n", log_id(module));
 				continue;
 			}
 
-			CellTypes ct(design);
+			SigMap sigmap(module);
 
-			std::vector<RTLIL::Cell*> all_cells = mod->selected_cells();
-			std::set<RTLIL::Cell*> unassigned_cells(all_cells.begin(), all_cells.end());
+			typedef std::pair<IdString, SigSpec> ctrldomain_t;
+			std::map<ctrldomain_t, int> mergeability_class;
+			pool<Wire*> clocks;
+			std::string target = delay_target;
 
-			std::set<RTLIL::Cell*> expand_queue, next_expand_queue;
-			std::set<RTLIL::Cell*> expand_queue_up, next_expand_queue_up;
-			std::set<RTLIL::Cell*> expand_queue_down, next_expand_queue_down;
+			for (auto cell : module->selected_cells()) {
+				auto inst_module = design->module(cell->type);
+				if (!inst_module || !inst_module->attributes.count("\\abc9_flop"))
+					continue;
 
-			typedef tuple<bool, RTLIL::SigSpec, bool, RTLIL::SigSpec> clkdomain_t;
-			std::map<clkdomain_t, std::vector<RTLIL::Cell*>> assigned_cells;
-			std::map<RTLIL::Cell*, clkdomain_t> assigned_cells_reverse;
-
-			std::map<RTLIL::Cell*, std::set<RTLIL::SigBit>> cell_to_bit, cell_to_bit_up, cell_to_bit_down;
-			std::map<RTLIL::SigBit, std::set<RTLIL::Cell*>> bit_to_cell, bit_to_cell_up, bit_to_cell_down;
-
-			for (auto cell : all_cells)
-			{
-				clkdomain_t key;
-
-				for (auto &conn : cell->connections())
-				for (auto bit : conn.second) {
-					bit = assign_map(bit);
-					if (bit.wire != nullptr) {
-						cell_to_bit[cell].insert(bit);
-						bit_to_cell[bit].insert(cell);
-						if (ct.cell_input(cell->type, conn.first)) {
-							cell_to_bit_up[cell].insert(bit);
-							bit_to_cell_down[bit].insert(cell);
-						}
-						if (ct.cell_output(cell->type, conn.first)) {
-							cell_to_bit_down[cell].insert(bit);
-							bit_to_cell_up[bit].insert(cell);
+				if (delay_target.empty()) {
+					Wire *abc9_clock_wire = module->wire(stringf("%s.$abc9_clock", cell->name.c_str()));
+					if (abc9_clock_wire == NULL)
+						log_error("'%s$abc9_clock' is not a wire present in module '%s'.\n", cell->name.c_str(), log_id(module));
+					SigBit abc9_clock = sigmap(abc9_clock_wire);
+					auto r = clocks.insert(abc9_clock.wire);
+					if (r.second) {
+						auto it = abc9_clock.wire->attributes.find("\\abc9_period");
+						if (it != abc9_clock.wire->attributes.end()) {
+							int period = it->second.as_int();
+							log("Identified target period = %d ps for clock %s\n", period, log_signal(abc9_clock));
+							target = stringf("-D %d", period);
 						}
 					}
 				}
 
-				if (cell->type.in(ID($_DFF_N_), ID($_DFF_P_)))
-				{
-					key = clkdomain_t(cell->type == ID($_DFF_P_), assign_map(cell->getPort(ID(C))), true, RTLIL::SigSpec());
-				}
-				else
-				if (cell->type.in(ID($_DFFE_NN_), ID($_DFFE_NP_), ID($_DFFE_PN_), ID($_DFFE_PP_)))
-				{
-					bool this_clk_pol = cell->type.in(ID($_DFFE_PN_), ID($_DFFE_PP_));
-					bool this_en_pol = cell->type.in(ID($_DFFE_NP_), ID($_DFFE_PP_));
-					key = clkdomain_t(this_clk_pol, assign_map(cell->getPort(ID(C))), this_en_pol, assign_map(cell->getPort(ID(E))));
-				}
-				else
-					continue;
 
-				unassigned_cells.erase(cell);
-				expand_queue.insert(cell);
-				expand_queue_up.insert(cell);
-				expand_queue_down.insert(cell);
+				Wire *abc9_control_wire = module->wire(stringf("%s.$abc9_control", cell->name.c_str()));
+				if (abc9_control_wire == NULL)
+					log_error("'%s$abc9_control' is not a wire present in module '%s'.\n", cell->name.c_str(), log_id(module));
+				SigSpec abc9_control = sigmap(abc9_control_wire);
 
-				assigned_cells[key].push_back(cell);
-				assigned_cells_reverse[cell] = key;
+				ctrldomain_t key(cell->type, abc9_control);
+				auto r = mergeability_class.emplace(key, mergeability_class.size() + 1);
+				auto YS_ATTRIBUTE(unused) r2 = cell->attributes.insert(std::make_pair(ID(abc9_mergeability),  r.first->second));
+				log_assert(r2.second);
 			}
 
-			while (!expand_queue_up.empty() || !expand_queue_down.empty())
-			{
-				if (!expand_queue_up.empty())
-				{
-					RTLIL::Cell *cell = *expand_queue_up.begin();
-					clkdomain_t key = assigned_cells_reverse.at(cell);
-					expand_queue_up.erase(cell);
-
-					for (auto bit : cell_to_bit_up[cell])
-					for (auto c : bit_to_cell_up[bit])
-						if (unassigned_cells.count(c)) {
-							unassigned_cells.erase(c);
-							next_expand_queue_up.insert(c);
-							assigned_cells[key].push_back(c);
-							assigned_cells_reverse[c] = key;
-							expand_queue.insert(c);
-						}
-				}
-
-				if (!expand_queue_down.empty())
-				{
-					RTLIL::Cell *cell = *expand_queue_down.begin();
-					clkdomain_t key = assigned_cells_reverse.at(cell);
-					expand_queue_down.erase(cell);
-
-					for (auto bit : cell_to_bit_down[cell])
-					for (auto c : bit_to_cell_down[bit])
-						if (unassigned_cells.count(c)) {
-							unassigned_cells.erase(c);
-							next_expand_queue_up.insert(c);
-							assigned_cells[key].push_back(c);
-							assigned_cells_reverse[c] = key;
-							expand_queue.insert(c);
-						}
-				}
-
-				if (expand_queue_up.empty() && expand_queue_down.empty()) {
-					expand_queue_up.swap(next_expand_queue_up);
-					expand_queue_down.swap(next_expand_queue_down);
-				}
-			}
-
-			while (!expand_queue.empty())
-			{
-				RTLIL::Cell *cell = *expand_queue.begin();
-				clkdomain_t key = assigned_cells_reverse.at(cell);
-				expand_queue.erase(cell);
-
-				for (auto bit : cell_to_bit.at(cell)) {
-					for (auto c : bit_to_cell[bit])
-						if (unassigned_cells.count(c)) {
-							unassigned_cells.erase(c);
-							next_expand_queue.insert(c);
-							assigned_cells[key].push_back(c);
-							assigned_cells_reverse[c] = key;
-						}
-					bit_to_cell[bit].clear();
-				}
-
-				if (expand_queue.empty())
-					expand_queue.swap(next_expand_queue);
-			}
-
-			clkdomain_t key(true, RTLIL::SigSpec(), true, RTLIL::SigSpec());
-			for (auto cell : unassigned_cells) {
-				assigned_cells[key].push_back(cell);
-				assigned_cells_reverse[cell] = key;
-			}
-
-			log_header(design, "Summary of detected clock domains:\n");
-			for (auto &it : assigned_cells)
-				log("  %d cells in clk=%s%s, en=%s%s\n", GetSize(it.second),
-						std::get<0>(it.first) ? "" : "!", log_signal(std::get<1>(it.first)),
-						std::get<2>(it.first) ? "" : "!", log_signal(std::get<3>(it.first)));
-
-			for (auto &it : assigned_cells) {
-				clk_polarity = std::get<0>(it.first);
-				clk_sig = assign_map(std::get<1>(it.first));
-				en_polarity = std::get<2>(it.first);
-				en_sig = assign_map(std::get<3>(it.first));
-				abc9_module(design, mod, script_file, exe_file, cleanup, lut_costs, !clk_sig.empty(), "$",
-						keepff, delay_target, lutin_shared, fast_mode, show_tempdir,
-						box_file, lut_file, wire_delay, box_lookup, nomfs);
-				assign_map.set(mod);
-			}
+			design->selected_active_module = module->name.str();
+			abc9_module(design, module, script_file, exe_file, cleanup, lut_costs, false, "$",
+					keepff, target, lutin_shared, fast_mode, show_tempdir,
+					box_file, lut_file, wire_delay, box_lookup, nomfs);
+			design->selected_active_module.clear();
 		}
 
-		assign_map.clear();
-
 		log_pop();
 	}
 } Abc9Pass;
diff --git a/techlibs/xilinx/abc9_map.v b/techlibs/xilinx/abc9_map.v
index 0eac08f..29ddf71 100644
--- a/techlibs/xilinx/abc9_map.v
+++ b/techlibs/xilinx/abc9_map.v
@@ -18,7 +18,230 @@
  *
  */
 
-// ============================================================================
+// The following techmapping rules are intended to be run (with -max_iter 1)
+//   before invoking the `abc9` pass in order to transform the design into
+//   a format that it understands.
+//
+// For example, (complex) flip-flops are expected to be described as an
+//   combinatorial box (containing all control logic such as clock enable
+//   or synchronous resets) followed by a basic D-Q flop.
+// Yosys will automatically analyse the simulation model (described in
+//   cells_sim.v) and detach any $_DFF_P_ or $_DFF_N_ cells present in
+//   order to extract the combinatorial control logic left behind.
+//   Specifically, a simulation model similar to the one below:
+//
+//                ++===================================++
+//                ||                        Sim model  ||
+//                ||      /\/\/\/\                     ||
+//            D -->>-----<        >     +------+       ||
+//            R -->>-----<  Comb. >     |$_DFF_|       ||
+//           CE -->>-----<  logic >-----| [NP]_|---+---->>-- Q
+//                ||  +--<        >     +------+   |   ||
+//                ||  |   \/\/\/\/                 |   ||
+//                ||  |                            |   ||
+//                ||  +----------------------------+   ||
+//                ||                                   ||
+//                ++===================================++
+//
+//   is transformed into:
+//
+//                ++==================++
+//                ||         Comb box ||
+//                ||                  ||
+//                ||      /\/\/\/\    ||
+//           D  -->>-----<        >   ||            +------+
+//           R  -->>-----<  Comb. >   ||            |$__ABC|
+//          CE  -->>-----<  logic >--->>-- $nextQ --| _FF_ |--+-->> Q
+// $abc9_currQ +-->>-----<        >   ||            +------+  |
+//             |  ||      \/\/\/\/    ||                      |
+//             |  ||                  ||                      |
+//             |  ++==================++                      |
+//             |                                              |
+//             +----------------------------------------------+
+//
+// The purpose of the following FD* rules are to wrap the flop with:
+// (a) a special $__ABC9_FF_ in front of the FD*'s output, indicating to abc9
+//     the connectivity of its basic D-Q flop
+// (b) a special _TECHMAP_REPLACE_.$abc9_clock wire to indicate its clock
+//     signal, used to extract the delay target
+// (c) a special _TECHMAP_REPLACE_.$abc9_control that captures the control 
+//     domain (which, combined with this cell type, encodes to `abc9' which
+//     flops may be merged together)
+// (d) a special _TECHMAP_REPLACE_.$abc9_currQ wire that will be used for feedback
+//     into the (combinatorial) FD* cell to facilitate clock-enable behaviour
+module FDRE (output reg Q, input C, CE, D, R);
+  parameter [0:0] INIT = 1'b0;
+  parameter [0:0] IS_C_INVERTED = 1'b0;
+  parameter [0:0] IS_D_INVERTED = 1'b0;
+  parameter [0:0] IS_R_INVERTED = 1'b0;
+  wire $nextQ;
+  FDRE #(
+    .INIT(INIT),
+    .IS_C_INVERTED(IS_C_INVERTED),
+    .IS_D_INVERTED(IS_D_INVERTED),
+    .IS_R_INVERTED(IS_R_INVERTED)
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ), .C(C), .CE(CE), .R(R)
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, IS_D_INVERTED, R, IS_R_INVERTED};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = Q;
+endmodule
+module FDRE_1 (output reg Q, input C, CE, D, R);
+  parameter [0:0] INIT = 1'b0;
+  wire $nextQ;
+  FDRE_1 #(
+    .INIT(INIT),
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ), .C(C), .CE(CE), .R(R)
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, 1'b0 /* IS_D_INVERTED */, R, 1'b0 /* IS_R_INVERTED */};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = Q;
+endmodule
+
+module FDCE (output reg Q, input C, CE, D, CLR);
+  parameter [0:0] INIT = 1'b0;
+  parameter [0:0] IS_C_INVERTED = 1'b0;
+  parameter [0:0] IS_D_INVERTED = 1'b0;
+  parameter [0:0] IS_CLR_INVERTED = 1'b0;
+  wire $nextQ, $abc9_currQ;
+  FDCE #(
+    .INIT(INIT),
+    .IS_C_INVERTED(IS_C_INVERTED),
+    .IS_D_INVERTED(IS_D_INVERTED),
+    .IS_CLR_INVERTED(IS_CLR_INVERTED)
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ),  .C(C), .CE(CE), .CLR(CLR)
+                                         // ^^^ Note that async
+                                         //     control is not directly
+                                         //     supported by abc9 but its
+                                         //     behaviour is captured by
+                                         //     $__ABC9_ASYNC below
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q($abc9_currQ));
+  // Since this is an async flop, async behaviour is also dealt with
+  //   using the $_ABC9_ASYNC box by abc9_map.v
+  \$__ABC9_ASYNC abc_async (.A($abc9_currQ), .S(CLR ^ IS_CLR_INVERTED), .Y(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, IS_D_INVERTED, CLR, IS_CLR_INVERTED};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = $abc9_currQ;
+endmodule
+module FDCE_1 (output reg Q, input C, CE, D, CLR);
+  parameter [0:0] INIT = 1'b0;
+  wire $nextQ, $abc9_currQ;
+  FDCE_1 #(
+    .INIT(INIT)
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ), .C(C), .CE(CE), .CLR(CLR)
+                                         // ^^^ Note that async
+                                         //     control is not directly
+                                         //     supported by abc9 but its
+                                         //     behaviour is captured by
+                                         //     $__ABC9_ASYNC below
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q($abc9_currQ));
+  \$__ABC9_ASYNC abc_async (.A($abc9_currQ), .S(CLR), .Y(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, 1'b0 /* IS_D_INVERTED */, CLR, 1'b0 /* IS_CLR_INVERTED */};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = $abc9_currQ;
+endmodule
+
+module FDPE (output reg Q, input C, CE, D, PRE);
+  parameter [0:0] INIT = 1'b0;
+  parameter [0:0] IS_C_INVERTED = 1'b0;
+  parameter [0:0] IS_D_INVERTED = 1'b0;
+  parameter [0:0] IS_PRE_INVERTED = 1'b0;
+  wire $nextQ, $abc9_currQ;
+  FDPE #(
+    .INIT(INIT),
+    .IS_C_INVERTED(IS_C_INVERTED),
+    .IS_D_INVERTED(IS_D_INVERTED),
+    .IS_PRE_INVERTED(IS_PRE_INVERTED),
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ), .C(C), .CE(CE), .PRE(PRE)
+                                         // ^^^ Note that async
+                                         //     control is not directly
+                                         //     supported by abc9 but its
+                                         //     behaviour is captured by
+                                         //     $__ABC9_ASYNC below
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q($abc9_currQ));
+  \$__ABC9_ASYNC abc_async (.A($abc9_currQ), .S(PRE ^ IS_PRE_INVERTED), .Y(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, IS_D_INVERTED, PRE, IS_PRE_INVERTED};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = $abc9_currQ;
+endmodule
+module FDPE_1 (output reg Q, input C, CE, D, PRE);
+  parameter [0:0] INIT = 1'b0;
+  wire $nextQ, $abc9_currQ;
+  FDPE_1 #(
+    .INIT(INIT)
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ), .C(C), .CE(CE), .PRE(PRE)
+                                         // ^^^ Note that async
+                                         //     control is not directly
+                                         //     supported by abc9 but its
+                                         //     behaviour is captured by
+                                         //     $__ABC9_ASYNC below
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q($abc9_currQ));
+  \$__ABC9_ASYNC abc_async (.A($abc9_currQ), .S(PRE), .Y(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, 1'b0 /* IS_D_INVERTED */, PRE, 1'b0 /* IS_PRE_INVERTED */};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = $abc9_currQ;
+endmodule
+
+module FDSE (output reg Q, input C, CE, D, S);
+  parameter [0:0] INIT = 1'b1;
+  parameter [0:0] IS_C_INVERTED = 1'b0;
+  parameter [0:0] IS_D_INVERTED = 1'b0;
+  parameter [0:0] IS_S_INVERTED = 1'b0;
+  wire $nextQ;
+  FDSE #(
+    .INIT(INIT),
+    .IS_C_INVERTED(IS_C_INVERTED),
+    .IS_D_INVERTED(IS_D_INVERTED),
+    .IS_S_INVERTED(IS_S_INVERTED)
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ), .C(C), .CE(CE), .S(S)
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, IS_D_INVERTED, S, IS_S_INVERTED};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = Q;
+endmodule
+module FDSE_1 (output reg Q, input C, CE, D, S);
+  parameter [0:0] INIT = 1'b1;
+  wire $nextQ;
+  FDSE_1 #(
+    .INIT(INIT),
+  ) _TECHMAP_REPLACE_ (
+    .D(D), .Q($nextQ), .C(C), .CE(CE), .S(S)
+  );
+  \$__ABC9_FF_ abc_dff (.D($nextQ), .Q(Q));
+
+  // Special signals
+  wire [0:0] _TECHMAP_REPLACE_.$abc9_clock = C;
+  wire [3:0] _TECHMAP_REPLACE_.$abc9_control = {CE, 1'b0 /* IS_D_INVERTED */, S, 1'b0 /* IS_S_INVERTED */};
+  wire _TECHMAP_REPLACE_.$abc9_currQ = Q;
+endmodule
 
 module RAM32X1D (
   output DPO, SPO,
diff --git a/techlibs/xilinx/abc9_model.v b/techlibs/xilinx/abc9_model.v
index 8c8e155..cc0e5ec 100644
--- a/techlibs/xilinx/abc9_model.v
+++ b/techlibs/xilinx/abc9_model.v
@@ -30,6 +30,13 @@
                 : (S0 ? I1 : I0);
 endmodule
 
+module \$__ABC9_FF_ (input D, output Q);
+endmodule
+
+(* abc_box_id = 1000 *)
+module \$__ABC9_ASYNC (input A, S, output Y);
+endmodule
+
 // Box to emulate comb/seq behaviour of RAMD{32,64} and SRL{16,32}
 //   Necessary since RAMD* and SRL* have both combinatorial (i.e.
 //   same-cycle read operation) and sequential (write operation
diff --git a/techlibs/xilinx/abc9_unmap.v b/techlibs/xilinx/abc9_unmap.v
index ad64697..21fe78d 100644
--- a/techlibs/xilinx/abc9_unmap.v
+++ b/techlibs/xilinx/abc9_unmap.v
@@ -20,6 +20,14 @@
 
 // ============================================================================
 
+module \$__ABC9_ASYNC (input A, S, output Y);
+  assign Y = A;
+endmodule
+
+module \$__ABC9_FF_ (input D, output Q);
+  assign Q = D;
+endmodule
+
 module \$__ABC9_LUT6 (input A, input [5:0] S, output Y);
   assign Y = A;
 endmodule
diff --git a/techlibs/xilinx/abc9_xc7.box b/techlibs/xilinx/abc9_xc7.box
index 774388d..24b1898 100644
--- a/techlibs/xilinx/abc9_xc7.box
+++ b/techlibs/xilinx/abc9_xc7.box
@@ -41,6 +41,57 @@
 592 540 520 356 -   512 548 292 -   228
 580 526 507 398 385 508 528 378 380 114
 
+# Box to emulate async behaviour of FD[CP]*
+# Inputs: A S
+# Outputs: Y
+$__ABC9_ASYNC 1000 0 2 1
+0 764
+
+# The following FD*.{CE,R,CLR,PRE) are offset by 46ps to
+# reflect the -46ps Tsu
+# https://github.com/SymbiFlow/prjxray-db/blob/23c8b0851f979f0799318eaca90174413a46b257/artix7/timings/slicel.sdf#L237-L251
+# https://github.com/SymbiFlow/prjxray-db/blob/23c8b0851f979f0799318eaca90174413a46b257/artix7/timings/slicel.sdf#L265-L277
+
+# Inputs: C CE D R \$currQ
+# Outputs: Q
+FDRE 1001 1 5 1
+0 151 0 446 0
+
+# Inputs: C CE D R \$currQ
+# Outputs: Q
+FDRE_1 1002 1 5 1
+0 151 0 446 0
+
+# Inputs: C CE CLR D \$currQ
+# Outputs: Q
+FDCE 1003 1 5 1
+0 151 806 0 0
+
+# Inputs: C CE CLR D \$currQ
+# Outputs: Q
+FDCE_1 1004 1 5 1
+0 151 806 0 0
+
+# Inputs: C CE D PRE \$currQ
+# Outputs: Q
+FDPE 1005 1 5 1
+0 151 0 806 0
+
+# Inputs: C CE D PRE \$currQ
+# Outputs: Q
+FDPE_1 1006 1 5 1
+0 151 0 806 0
+
+# Inputs: C CE D S \$currQ
+# Outputs: Q
+FDSE 1007 1 5 1
+0 151 0 446 0
+
+# Inputs: C CE D S \$currQ
+# Outputs: Q
+FDSE_1 1008 1 5 1
+0 151 0 446 0
+
 # SLICEM/A6LUT
 # Box to emulate comb/seq behaviour of RAMD{32,64} and SRL{16,32}
 #   Necessary since RAMD* and SRL* have both combinatorial (i.e.
diff --git a/techlibs/xilinx/cells_sim.v b/techlibs/xilinx/cells_sim.v
index fa33f45..d845b32 100644
--- a/techlibs/xilinx/cells_sim.v
+++ b/techlibs/xilinx/cells_sim.v
@@ -59,6 +59,34 @@
   assign O = I;
 endmodule
 
+module IOBUF (
+    (* iopad_external_pin *)
+    inout IO,
+    output O,
+    input I,
+    input T
+);
+    parameter integer DRIVE = 12;
+    parameter IBUF_LOW_PWR = "TRUE";
+    parameter IOSTANDARD = "DEFAULT";
+    parameter SLEW = "SLOW";
+    assign IO = T ? 1'bz : I;
+    assign O = IO;
+endmodule
+
+module OBUFT (
+    (* iopad_external_pin *)
+    output O,
+    input I,
+    input T
+);
+    parameter CAPACITANCE = "DONT_CARE";
+    parameter integer DRIVE = 12;
+    parameter IOSTANDARD = "DEFAULT";
+    parameter SLEW = "SLOW";
+    assign O = T ? 1'bz : I;
+endmodule
+
 module BUFG(
     (* clkbuf_driver *)
     output O,
@@ -255,6 +283,7 @@
 
 // Max delay from: https://github.com/SymbiFlow/prjxray-db/blob/34ea6eb08a63d21ec16264ad37a0a7b142ff6031/artix7/timings/CLBLL_L.sdf#L238-L250
 
+(* abc9_box_id=1001, lib_whitebox, abc9_flop *)
 module FDRE (
   (* abc9_arrival=303 *)
   output reg Q,
@@ -278,29 +307,20 @@
   endcase endgenerate
 endmodule
 
-module FDSE (
+(* abc9_box_id=1002, lib_whitebox, abc9_flop *)
+module FDRE_1 (
   (* abc9_arrival=303 *)
   output reg Q,
   (* clkbuf_sink *)
-  (* invertible_pin = "IS_C_INVERTED" *)
   input C,
-  input CE,
-  (* invertible_pin = "IS_D_INVERTED" *)
-  input D,
-  (* invertible_pin = "IS_S_INVERTED" *)
-  input S
+  input CE, D, R
 );
-  parameter [0:0] INIT = 1'b1;
-  parameter [0:0] IS_C_INVERTED = 1'b0;
-  parameter [0:0] IS_D_INVERTED = 1'b0;
-  parameter [0:0] IS_S_INVERTED = 1'b0;
+  parameter [0:0] INIT = 1'b0;
   initial Q <= INIT;
-  generate case (|IS_C_INVERTED)
-    1'b0: always @(posedge C) if (S == !IS_S_INVERTED) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
-    1'b1: always @(negedge C) if (S == !IS_S_INVERTED) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
-  endcase endgenerate
+  always @(negedge C) if (R) Q <= 1'b0; else if (CE) Q <= D;
 endmodule
 
+(* abc9_box_id=1003, lib_whitebox, abc9_flop *)
 module FDCE (
   (* abc9_arrival=303 *)
   output reg Q,
@@ -326,6 +346,20 @@
   endcase endgenerate
 endmodule
 
+(* abc9_box_id=1004, lib_whitebox, abc9_flop *)
+module FDCE_1 (
+  (* abc9_arrival=303 *)
+  output reg Q,
+  (* clkbuf_sink *)
+  input C,
+  input CE, D, CLR
+);
+  parameter [0:0] INIT = 1'b0;
+  initial Q <= INIT;
+  always @(negedge C, posedge CLR) if (CLR) Q <= 1'b0; else if (CE) Q <= D;
+endmodule
+
+(* abc9_box_id=1005, lib_whitebox, abc9_flop *)
 module FDPE (
   (* abc9_arrival=303 *)
   output reg Q,
@@ -344,49 +378,14 @@
   parameter [0:0] IS_PRE_INVERTED = 1'b0;
   initial Q <= INIT;
   generate case ({|IS_C_INVERTED, |IS_PRE_INVERTED})
-    2'b00: always @(posedge C, posedge PRE) if ( PRE) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
-    2'b01: always @(posedge C, negedge PRE) if (!PRE) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
-    2'b10: always @(negedge C, posedge PRE) if ( PRE) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
-    2'b11: always @(negedge C, negedge PRE) if (!PRE) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
+    2'b00: always @(posedge C, posedge PRE) if ( PRE) Q <= 1'b1; else Q <= Q ;
+    2'b01: always @(posedge C, negedge PRE) if (!PRE) Q <= 1'b1; else Q <= Q ;
+    2'b10: always @(negedge C, posedge PRE) if ( PRE) Q <= 1'b1; else Q <= Q ;
+    2'b11: always @(negedge C, negedge PRE) if (!PRE) Q <= 1'b1; else Q <= Q ;
   endcase endgenerate
 endmodule
 
-module FDRE_1 (
-  (* abc9_arrival=303 *)
-  output reg Q,
-  (* clkbuf_sink *)
-  input C,
-  input CE, D, R
-);
-  parameter [0:0] INIT = 1'b0;
-  initial Q <= INIT;
-  always @(negedge C) if (R) Q <= 1'b0; else if(CE) Q <= D;
-endmodule
-
-module FDSE_1 (
-  (* abc9_arrival=303 *)
-  output reg Q,
-  (* clkbuf_sink *)
-  input C,
-  input CE, D, S
-);
-  parameter [0:0] INIT = 1'b1;
-  initial Q <= INIT;
-  always @(negedge C) if (S) Q <= 1'b1; else if(CE) Q <= D;
-endmodule
-
-module FDCE_1 (
-  (* abc9_arrival=303 *)
-  output reg Q,
-  (* clkbuf_sink *)
-  input C,
-  input CE, D, CLR
-);
-  parameter [0:0] INIT = 1'b0;
-  initial Q <= INIT;
-  always @(negedge C, posedge CLR) if (CLR) Q <= 1'b0; else if (CE) Q <= D;
-endmodule
-
+(* abc9_box_id=1006, lib_whitebox, abc9_flop *)
 module FDPE_1 (
   (* abc9_arrival=303 *)
   output reg Q,
@@ -399,6 +398,43 @@
   always @(negedge C, posedge PRE) if (PRE) Q <= 1'b1; else if (CE) Q <= D;
 endmodule
 
+(* abc9_box_id=1007, lib_whitebox, abc9_flop *)
+module FDSE (
+  (* abc9_arrival=303 *)
+  output reg Q,
+  (* clkbuf_sink *)
+  (* invertible_pin = "IS_C_INVERTED" *)
+  input C,
+  input CE,
+  (* invertible_pin = "IS_D_INVERTED" *)
+  input D,
+  (* invertible_pin = "IS_S_INVERTED" *)
+  input S
+);
+  parameter [0:0] INIT = 1'b1;
+  parameter [0:0] IS_C_INVERTED = 1'b0;
+  parameter [0:0] IS_D_INVERTED = 1'b0;
+  parameter [0:0] IS_S_INVERTED = 1'b0;
+  initial Q <= INIT;
+  generate case (|IS_C_INVERTED)
+    1'b0: always @(posedge C) if (S == !IS_S_INVERTED) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
+    1'b1: always @(negedge C) if (S == !IS_S_INVERTED) Q <= 1'b1; else if (CE) Q <= D ^ IS_D_INVERTED;
+  endcase endgenerate
+endmodule
+
+(* abc9_box_id=1008, lib_whitebox, abc9_flop *)
+module FDSE_1 (
+  (* abc9_arrival=303 *)
+  output reg Q,
+  (* clkbuf_sink *)
+  input C,
+  input CE, D, S
+);
+  parameter [0:0] INIT = 1'b1;
+  initial Q <= INIT;
+  always @(negedge C) if (S) Q <= 1'b1; else if (CE) Q <= D;
+endmodule
+
 module LDCE (
   output reg Q,
   (* invertible_pin = "IS_CLR_INVERTED" *)
diff --git a/techlibs/xilinx/cells_xtra.py b/techlibs/xilinx/cells_xtra.py
index f401ebe..82e403f 100644
--- a/techlibs/xilinx/cells_xtra.py
+++ b/techlibs/xilinx/cells_xtra.py
@@ -326,7 +326,7 @@
     Cell('IBUFGDS', port_attrs={'I': ['iopad_external_pin'], 'IB': ['iopad_external_pin']}),
     Cell('IBUFGDS_DIFF_OUT', port_attrs={'I': ['iopad_external_pin'], 'IB': ['iopad_external_pin']}),
     # I/O.
-    Cell('IOBUF', port_attrs={'IO': ['iopad_external_pin']}),
+    # Cell('IOBUF', port_attrs={'IO': ['iopad_external_pin']}),
     Cell('IOBUF_DCIEN', port_attrs={'IO': ['iopad_external_pin']}),
     Cell('IOBUF_INTERMDISABLE', port_attrs={'IO': ['iopad_external_pin']}),
     Cell('IOBUFE3', port_attrs={'IO': ['iopad_external_pin']}),
@@ -342,7 +342,7 @@
     Cell('OBUFDS', port_attrs={'O': ['iopad_external_pin'], 'OB': ['iopad_external_pin']}),
     Cell('OBUFDS_DPHY', port_attrs={'O': ['iopad_external_pin'], 'OB': ['iopad_external_pin']}),
     # Output + tristate.
-    Cell('OBUFT', port_attrs={'O': ['iopad_external_pin']}),
+    # Cell('OBUFT', port_attrs={'O': ['iopad_external_pin']}),
     Cell('OBUFTDS', port_attrs={'O': ['iopad_external_pin'], 'OB': ['iopad_external_pin']}),
     # Pulls.
     Cell('KEEPER'),
diff --git a/techlibs/xilinx/cells_xtra.v b/techlibs/xilinx/cells_xtra.v
index ce0949f..671d16e 100644
--- a/techlibs/xilinx/cells_xtra.v
+++ b/techlibs/xilinx/cells_xtra.v
@@ -8160,18 +8160,6 @@
     input IB;
 endmodule
 
-module IOBUF (...);
-    parameter integer DRIVE = 12;
-    parameter IBUF_LOW_PWR = "TRUE";
-    parameter IOSTANDARD = "DEFAULT";
-    parameter SLEW = "SLOW";
-    output O;
-    (* iopad_external_pin *)
-    inout IO;
-    input I;
-    input T;
-endmodule
-
 module IOBUF_DCIEN (...);
     parameter integer DRIVE = 12;
     parameter IBUF_LOW_PWR = "TRUE";
@@ -8373,17 +8361,6 @@
     input LPTX_T;
 endmodule
 
-module OBUFT (...);
-    parameter CAPACITANCE = "DONT_CARE";
-    parameter integer DRIVE = 12;
-    parameter IOSTANDARD = "DEFAULT";
-    parameter SLEW = "SLOW";
-    (* iopad_external_pin *)
-    output O;
-    input I;
-    input T;
-endmodule
-
 module OBUFTDS (...);
     parameter CAPACITANCE = "DONT_CARE";
     parameter IOSTANDARD = "DEFAULT";
diff --git a/techlibs/xilinx/synth_xilinx.cc b/techlibs/xilinx/synth_xilinx.cc
index 2f68b8e..b0ecda8 100644
--- a/techlibs/xilinx/synth_xilinx.cc
+++ b/techlibs/xilinx/synth_xilinx.cc
@@ -291,10 +291,11 @@
 			ff_map_file = "+/xilinx/xc7_ff_map.v";
 
 		if (check_label("begin")) {
+			std::string read_args;
 			if (vpr)
-				run("read_verilog -lib -D_EXPLICIT_CARRY +/xilinx/cells_sim.v");
-			else
-				run("read_verilog -lib +/xilinx/cells_sim.v");
+				read_args += " -D_EXPLICIT_CARRY";
+			read_args += " -lib +/xilinx/cells_sim.v";
+			run("read_verilog" + read_args);
 
 			run("read_verilog -lib +/xilinx/cells_xtra.v");
 
@@ -519,6 +520,7 @@
 
 		if (check_label("map_ffs")) {
 			if (abc9 || help_mode) {
+				run("clkpart -set_attr clkpart 1", "('-abc9' only)");
 				run("techmap -map " + ff_map_file, "('-abc9' only)");
 			}
 		}
@@ -564,10 +566,13 @@
 			else
 				techmap_args += " -map " + ff_map_file;
 			run("techmap " + techmap_args);
-			run("clean");
+			run("xilinx_dff");
 		}
 
 		if (check_label("finalize")) {
+			if (help_mode || abc9)
+				run("clkpart -unpart clkpart", "(only if 'abc9')");
+
 			bool do_iopad = iopad || (ise && !noiopad);
 			if (help_mode || !noclkbuf) {
 				if (help_mode || do_iopad)
@@ -579,6 +584,7 @@
 				run("iopadmap -bits -outpad OBUF I:O -inpad IBUF O:I A:top", "(only if '-iopad' or '-ise' and not '-noiopad')");
 			if (help_mode || ise)
 				run("extractinv -inv INV O:I", "(only if '-ise')");
+			run("clean");
 		}
 
 		if (check_label("check")) {
diff --git a/techlibs/xilinx/xc7_xcu_brams.txt b/techlibs/xilinx/xc7_xcu_brams.txt
index f116111..7d3d997 100644
--- a/techlibs/xilinx/xc7_xcu_brams.txt
+++ b/techlibs/xilinx/xc7_xcu_brams.txt
@@ -81,10 +81,18 @@
 endmatch
 
 match $__XILINX_RAMB18_SDP
-  min bits 4096
+  min bits 1024
   min efficiency 5
   shuffle_enable B
   make_transp
+  attribute ram_style
+  or_next_if_better
+endmatch
+
+match $__XILINX_RAMB18_SDP
+  shuffle_enable B
+  make_transp
+  attribute ram_style block
   or_next_if_better
 endmatch
 
@@ -97,7 +105,7 @@
 endmatch
 
 match $__XILINX_RAMB18_TDP
-  min bits 4096
+  min bits 1024
   min efficiency 5
   shuffle_enable B
   make_transp
diff --git a/tests/arch/xilinx/dsp_fastfir.ys b/tests/arch/xilinx/dsp_fastfir.ys
new file mode 100644
index 0000000..0067a82
--- /dev/null
+++ b/tests/arch/xilinx/dsp_fastfir.ys
@@ -0,0 +1,69 @@
+read_verilog <<EOT
+// Citation https://github.com/ZipCPU/dspfilters/blob/master/rtl/fastfir.v
+module fastfir_dynamictaps(i_clk, i_reset, i_tap_wr, i_tap, i_ce, i_sample, o_result);
+  wire [30:0] _00_;
+  wire [23:0] _01_;
+  wire [11:0] _02_;
+  wire [30:0] _03_;
+  wire [23:0] _04_;
+  wire [30:0] _05_;
+  wire [23:0] _06_;
+  wire [30:0] _07_;
+  wire [23:0] _08_;
+  wire [11:0] _09_;
+  wire [30:0] _10_;
+  wire [23:0] _11_;
+  wire [30:0] _12_;
+  wire [23:0] _13_;
+  wire [11:0] \fir.FILTER[0].tapk.delayed_sample ;
+  reg [30:0] \fir.FILTER[0].tapk.o_acc  = 31'h00000000;
+  wire [11:0] \fir.FILTER[0].tapk.o_sample ;
+  reg [23:0] \fir.FILTER[0].tapk.product ;
+  reg [11:0] \fir.FILTER[0].tapk.tap  = 12'h000;
+  wire [11:0] \fir.FILTER[1].tapk.delayed_sample ;
+  wire [30:0] \fir.FILTER[1].tapk.o_acc ;
+  wire [11:0] \fir.FILTER[1].tapk.o_sample ;
+  reg [23:0] \fir.FILTER[1].tapk.product ;
+  reg [11:0] \fir.FILTER[1].tapk.tap  = 12'h000;
+  input i_ce;
+  input i_clk;
+  input i_reset;
+  input [11:0] i_sample;
+  input [11:0] i_tap;
+  input i_tap_wr;
+  output [30:0] o_result;
+  reg [30:0] o_result;
+  assign _03_ = 31'h00000000 + { \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product [23], \fir.FILTER[0].tapk.product  };
+  assign _04_ = $signed(\fir.FILTER[0].tapk.tap ) * $signed(i_sample);
+  always @(posedge i_clk)
+      \fir.FILTER[0].tapk.tap  <= _02_;
+  always @(posedge i_clk)
+      \fir.FILTER[0].tapk.o_acc  <= _00_;
+  always @(posedge i_clk)
+      \fir.FILTER[0].tapk.product  <= _01_;
+  assign _02_ = i_tap_wr ? i_tap : \fir.FILTER[0].tapk.tap ;
+  assign _05_ = i_ce ? _03_ : \fir.FILTER[0].tapk.o_acc ;
+  assign _00_ = i_reset ? 31'h00000000 : _05_;
+  assign _06_ = i_ce ? _04_ : \fir.FILTER[0].tapk.product ;
+  assign _01_ = i_reset ? 24'h000000 : _06_;
+  assign _10_ = \fir.FILTER[0].tapk.o_acc  + { \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product [23], \fir.FILTER[1].tapk.product  };
+  assign _11_ = $signed(\fir.FILTER[1].tapk.tap ) * $signed(i_sample);
+  always @(posedge i_clk)
+      \fir.FILTER[1].tapk.tap  <= _09_;
+  always @(posedge i_clk)
+      o_result <= _07_;
+  always @(posedge i_clk)
+      \fir.FILTER[1].tapk.product  <= _08_;
+  assign _09_ = i_tap_wr ? \fir.FILTER[0].tapk.tap  : \fir.FILTER[1].tapk.tap ;
+  assign _12_ = i_ce ? _10_ : o_result;
+  assign _07_ = i_reset ? 31'h00000000 : _12_;
+  assign _13_ = i_ce ? _11_ : \fir.FILTER[1].tapk.product ;
+  assign _08_ = i_reset ? 24'h000000 : _13_;
+  assign \fir.FILTER[1].tapk.o_acc  = o_result;
+endmodule
+EOT
+
+synth_xilinx
+cd fastfir_dynamictaps
+select -assert-count 2 t:DSP48E1
+select -assert-none t:* t:DSP48E1 %d t:BUFG %d
diff --git a/tests/opt/bug1525.ys b/tests/opt/bug1525.ys
new file mode 100644
index 0000000..972bc0a
--- /dev/null
+++ b/tests/opt/bug1525.ys
@@ -0,0 +1,13 @@
+read_verilog << EOF
+module top(...);
+input A1, A2, B, S;
+output O;
+
+assign O = S ? (A1 & B) : (A2 & B);
+
+endmodule
+EOF
+
+simplemap
+opt_share
+dump
diff --git a/tests/simple_abc9/abc9.v b/tests/simple_abc9/abc9.v
index 64b625e..961e760 100644
--- a/tests/simple_abc9/abc9.v
+++ b/tests/simple_abc9/abc9.v
@@ -218,12 +218,6 @@
 endmodule
 
 // Citation: https://github.com/alexforencich/verilog-ethernet
-// TODO: yosys -p "synth_xilinx -abc9 -top abc9_test022" abc9.v -q
-// returns before b4321a31
-//   Warning: Wire abc9_test022.\m_eth_payload_axis_tkeep [7] is used but has no
-//   driver.
-//   Warning: Wire abc9_test022.\m_eth_payload_axis_tkeep [3] is used but has no
-//   driver.
 module abc9_test022
 (
     input  wire        clk,
@@ -237,9 +231,6 @@
 endmodule
 
 // Citation: https://github.com/riscv/riscv-bitmanip
-// TODO: yosys -p "synth_xilinx -abc9 -top abc9_test023" abc9.v -q
-// returns before 14233843
-//   Warning: Wire abc9_test023.\dout [1] is used but has no driver.
 module abc9_test023 #(
 	parameter integer N = 2,
 	parameter integer M = 2
@@ -267,3 +258,30 @@
 assign o = { 1'b1, 1'bx };
 assign p = { 1'b1, 1'bx, 1'b0 };
 endmodule
+
+module abc9_test029(input clk1, clk2, d, output reg q1, q2);
+always @(posedge clk1) q1 <= d;
+always @(negedge clk2) q2 <= q1;
+endmodule
+
+module abc9_test030(input clk, d, r, output reg q);
+always @(posedge clk or posedge r)
+    if (r) q <= 1'b0;
+    else q <= d;
+endmodule
+
+module abc9_test031(input clk, d, r, output reg q);
+always @(negedge clk or posedge r)
+    if (r) q <= 1'b1;
+    else q <= d;
+endmodule
+
+module abc9_test033(input clk, d, output reg q1, q2);
+always @(posedge clk) q1 <= d;
+always @(posedge clk) q2 <= q1;
+endmodule
+
+module abc9_test034(input clk, d, output reg [1:0] q);
+always @(posedge clk) q[0] <= d;
+always @(negedge clk) q[1] <= q[0];
+endmodule
diff --git a/tests/various/abc9.v b/tests/various/abc9.v
index 30ebd4e..f0b3f68 100644
--- a/tests/various/abc9.v
+++ b/tests/various/abc9.v
@@ -9,3 +9,10 @@
 unknown u(~i, w);
 unknown2 u2(w, o);
 endmodule
+
+module abc9_test032(input clk, d, r, output reg q);
+initial q = 1'b0;
+always @(negedge clk or negedge r)
+    if (!r) q <= 1'b0;
+    else q <= d;
+endmodule
diff --git a/tests/various/abc9.ys b/tests/various/abc9.ys
index 5c9a407..81d0afd 100644
--- a/tests/various/abc9.ys
+++ b/tests/various/abc9.ys
@@ -22,3 +22,19 @@
 select -assert-count 1 t:$lut r:LUT=2'b01 r:WIDTH=1 %i %i
 select -assert-count 1 t:unknown
 select -assert-none t:$lut t:unknown %% t: %D
+
+design -load read
+hierarchy -top abc9_test032
+proc
+clk2fflogic
+design -save gold
+
+abc9 -lut 4
+check
+design -stash gate
+
+design -import gold -as gold
+design -import gate -as gate
+
+miter -equiv -flatten -make_assert -make_outputs gold gate miter
+sat -seq 10 -verify -prove-asserts -show-ports miter
diff --git a/tests/various/submod.ys b/tests/various/submod.ys
new file mode 100644
index 0000000..9d7dabd
--- /dev/null
+++ b/tests/various/submod.ys
@@ -0,0 +1,102 @@
+read_verilog <<EOT
+module top(input a, output b);
+wire c;
+(* submod="bar" *) sub s1(a, c);
+assign b = c;
+endmodule
+
+module sub(input a, output c);
+assign c = a;
+endmodule
+EOT
+
+hierarchy -top top
+proc
+design -save gold
+
+submod
+check -assert
+design -stash gate
+
+design -import gold -as gold
+design -import gate -as gate
+
+miter -equiv -flatten -make_assert -make_outputs gold gate miter
+sat -verify -prove-asserts -show-ports miter
+
+
+design -reset
+read_verilog <<EOT
+module top(input a, output [1:0] b);
+(* submod="bar" *) sub s1(a, b[1]);
+assign b[0] = 1'b0;
+endmodule
+
+module sub(input a, output c);
+assign c = a;
+endmodule
+EOT
+
+hierarchy -top top
+proc
+design -save gold
+
+submod
+check -assert top
+design -stash gate
+
+design -import gold -as gold
+design -import gate -as gate
+
+miter -equiv -flatten -make_assert -make_outputs gold gate miter
+sat -verify -prove-asserts -show-ports miter
+
+
+design -reset
+read_verilog <<EOT
+module top(input a, output [1:0] b, c);
+(* submod="bar" *) sub s1(a, b[0]);
+(* submod="bar" *) sub s2(a, c[1]);
+assign c = b;
+endmodule
+
+module sub(input a, output c);
+assign c = a;
+endmodule
+EOT
+
+hierarchy -top top
+proc
+design -save gold
+
+submod
+check -assert top
+design -stash gate
+
+design -import gold -as gold
+design -import gate -as gate
+
+miter -equiv -flatten -make_assert -make_outputs gold gate miter
+sat -verify -prove-asserts -show-ports miter
+
+
+
+design -reset
+read_verilog -icells <<EOT
+module top(input d, c, (* init = 3'b011 *) output reg [2:0] q);
+(* submod="bar" *) DFF s1(.D(d), .C(c), .Q(q[1]));
+DFF s2(.D(d), .C(c), .Q(q[0]));
+DFF s3(.D(d), .C(c), .Q(q[2]));
+endmodule
+
+module DFF(input D, C, output Q);
+parameter INIT = 1'b0;
+endmodule
+EOT
+
+hierarchy -top top
+proc
+
+submod
+dffinit -ff DFF Q INIT
+check -noinit -assert