Merge pull request #181 from YosysHQ/gatecat/ecp5-lutperm

libtrellis: LUT permutation support for ECP5
diff --git a/libtrellis/include/Chip.hpp b/libtrellis/include/Chip.hpp
index c5295ce..3d0b749 100644
--- a/libtrellis/include/Chip.hpp
+++ b/libtrellis/include/Chip.hpp
@@ -177,7 +177,7 @@
     int get_max_col() const;
 
     // Build the routing graph for the chip
-    shared_ptr<RoutingGraph> get_routing_graph();
+    shared_ptr<RoutingGraph> get_routing_graph(bool include_lutperm_pips = false);
 
     vector<vector<vector<pair<string, string>>>> tiles_at_location;
 
@@ -191,7 +191,7 @@
 
 private:
     // Factory functions
-    shared_ptr<RoutingGraph> get_routing_graph_ecp5();
+    shared_ptr<RoutingGraph> get_routing_graph_ecp5(bool include_lutperm_pips = false);
     shared_ptr<RoutingGraph> get_routing_graph_machxo2();
 };
 
diff --git a/libtrellis/include/DedupChipdb.hpp b/libtrellis/include/DedupChipdb.hpp
index fc6468f..3c3d0df 100644
--- a/libtrellis/include/DedupChipdb.hpp
+++ b/libtrellis/include/DedupChipdb.hpp
@@ -81,12 +81,13 @@
     ArcClass cls;
     int32_t delay;
     ident_t tiletype;
+    int16_t lutperm_flags;
 };
 
 inline bool operator==(const DdArcData &a, const DdArcData &b)
 {
     return a.srcWire == b.srcWire && a.sinkWire == b.sinkWire && a.cls == b.cls && a.delay == b.delay &&
-           a.tiletype == b.tiletype;
+           a.tiletype == b.tiletype && a.lutperm_flags == b.lutperm_flags;
 }
 
 struct WireData
@@ -241,6 +242,7 @@
         boost::hash_combine(seed, hash<int8_t>()(arc.cls));
         boost::hash_combine(seed, hash<int32_t>()(arc.delay));
         boost::hash_combine(seed, hash<Trellis::ident_t>()(arc.tiletype));
+        boost::hash_combine(seed, hash<uint16_t>()(arc.lutperm_flags));
         return seed;
     }
 };
@@ -355,7 +357,7 @@
     LocationData get_cs_data(checksum_t id);
 };
 
-shared_ptr<DedupChipdb> make_dedup_chipdb(Chip &chip);
+shared_ptr<DedupChipdb> make_dedup_chipdb(Chip &chip, bool include_lutperm_pips = false);
 
 /*
 An optimized chip database is a database with the following properties, intended to be used in place-and-route flows.
diff --git a/libtrellis/include/RoutingGraph.hpp b/libtrellis/include/RoutingGraph.hpp
index bacc6a1..f0509e8 100644
--- a/libtrellis/include/RoutingGraph.hpp
+++ b/libtrellis/include/RoutingGraph.hpp
@@ -64,6 +64,7 @@
     RoutingId source;
     RoutingId sink;
     bool configurable = false;
+    uint16_t lutperm_flags = 0;
     mutable int cdb_id = 0;
 };
 
diff --git a/libtrellis/src/Chip.cpp b/libtrellis/src/Chip.cpp
index e804969..99bc26d 100644
--- a/libtrellis/src/Chip.cpp
+++ b/libtrellis/src/Chip.cpp
@@ -113,17 +113,17 @@
     return delta;
 }
 
-shared_ptr<RoutingGraph> Chip::get_routing_graph()
+shared_ptr<RoutingGraph> Chip::get_routing_graph(bool include_lutperm_pips)
 {
     if(info.family == "ECP5") {
-        return get_routing_graph_ecp5();
+        return get_routing_graph_ecp5(include_lutperm_pips);
     } else if(info.family == "MachXO2") {
         return get_routing_graph_machxo2();
     } else
       throw runtime_error("Unknown chip family: " + info.family);
 }
 
-shared_ptr<RoutingGraph> Chip::get_routing_graph_ecp5()
+shared_ptr<RoutingGraph> Chip::get_routing_graph_ecp5(bool include_lutperm_pips)
 {
     shared_ptr<RoutingGraph> rg(new RoutingGraph(*this));
     //cout << "Building routing graph" << endl;
@@ -136,8 +136,32 @@
         tie(y, x) = tile->info.get_row_col();
         // SLICE Bels
         if (tile->info.type == "PLC2") {
-            for (int z = 0; z < 4; z++)
+            for (int z = 0; z < 4; z++) {
                 Ecp5Bels::add_lc(*rg, x, y, z);
+                if (include_lutperm_pips) {
+                    // Add permutation pseudo-pips as a crossbar in front of each LUT's inputs
+                    Location loc(x, y);
+                    const string abcd = "ABCD";
+                    for (int k = (z*2); k < ((z+1)*2); k++) {
+                        for (int i = 0; i < 4; i++) {
+                            for (int j = 0; j < 4; j++) {
+                                if (i == j)
+                                    continue;
+                                string input = fmt(abcd[j] << k);
+                                string output = fmt(abcd[i] << k << "_SLICE");
+                                RoutingArc rarc;
+                                rarc.id = rg->ident(fmt(input << "->" << output));
+                                rarc.source = RoutingId{loc, rg->ident(input)};
+                                rarc.sink = RoutingId{loc, rg->ident(output)};
+                                rarc.tiletype = rg->ident(tile->info.type);
+                                rarc.configurable = false;
+                                rarc.lutperm_flags = (0x4000 | (k << 4) | ((i & 0x3) << 2) |(j & 0x3));
+                                rg->add_arc(loc, rarc);
+                            }
+                        }
+                    }
+                }
+            }
         }
         // PIO Bels
         if (tile->info.type.find("PICL0") != string::npos || tile->info.type.find("PICR0") != string::npos)
diff --git a/libtrellis/src/DedupChipdb.cpp b/libtrellis/src/DedupChipdb.cpp
index 828f63a..2b53de4 100644
--- a/libtrellis/src/DedupChipdb.cpp
+++ b/libtrellis/src/DedupChipdb.cpp
@@ -32,9 +32,9 @@
 DedupChipdb::DedupChipdb(const IdStore &base) : IdStore(base)
 {}
 
-shared_ptr<DedupChipdb> make_dedup_chipdb(Chip &chip)
+shared_ptr<DedupChipdb> make_dedup_chipdb(Chip &chip, bool include_lutperm_pips)
 {
-    shared_ptr<RoutingGraph> graph = chip.get_routing_graph();
+    shared_ptr<RoutingGraph> graph = chip.get_routing_graph(include_lutperm_pips);
     for (auto &loc : graph->tiles) {
         const auto &td = loc.second;
         // Index bels, wires and arcs
@@ -87,6 +87,7 @@
             ad.delay = 1;
             ad.sinkWire = RelId{Location(ra.sink.loc.x - x, ra.sink.loc.y - y), graph->tiles.at(ra.sink.loc).wires.at(ra.sink.id).cdb_id};
             ad.srcWire = RelId{Location(ra.source.loc.x - x, ra.source.loc.y - y), graph->tiles.at(ra.source.loc).wires.at(ra.source.id).cdb_id};
+            ad.lutperm_flags = ra.lutperm_flags;
             ld.arcs.push_back(ad);
         }
 
diff --git a/libtrellis/src/PyTrellis.cpp b/libtrellis/src/PyTrellis.cpp
index 674f3fa..5a05f4e 100644
--- a/libtrellis/src/PyTrellis.cpp
+++ b/libtrellis/src/PyTrellis.cpp
@@ -503,7 +503,8 @@
             .def_readwrite("sinkWire", &DdArcData::sinkWire)
             .def_readwrite("cls", &DdArcData::cls)
             .def_readwrite("delay", &DdArcData::delay)
-            .def_readwrite("tiletype", &DdArcData::tiletype);
+            .def_readwrite("tiletype", &DdArcData::tiletype)
+            .def_readwrite("lutperm_flags", &DdArcData::lutperm_flags);
 
     class_<WireData>(m, "WireData")
             .def_readwrite("name", &WireData::name)
@@ -546,7 +547,8 @@
             .def("ident", &DedupChipdb::ident)
             .def("to_str", &DedupChipdb::to_str);
 
-    m.def("make_dedup_chipdb", make_dedup_chipdb);
+    m.def("make_dedup_chipdb", make_dedup_chipdb,
+        py::arg("chip"), py::arg("include_lutperm_pips")=false);
 
     class_<OptimizedChipdb, shared_ptr<OptimizedChipdb>>(m, "OptimizedChipdb")
             .def_readwrite("tiles", &OptimizedChipdb::tiles)