libtrellis: Add experimental support for packing compressed bitstreams Signed-off-by: David Shah <dave@ds0.me>
diff --git a/libtrellis/src/Bitstream.cpp b/libtrellis/src/Bitstream.cpp index 991851f..9548759 100644 --- a/libtrellis/src/Bitstream.cpp +++ b/libtrellis/src/Bitstream.cpp
@@ -10,7 +10,7 @@ #include <iomanip> #include <fstream> #include <array> - +#include <queue> namespace Trellis { static const uint16_t CRC16_POLY = 0x8005; @@ -175,6 +175,122 @@ // if remaining bits > 0 they are just padding bits added to the end so we can ignore them } + // Decode a onehot byte, -1 if not onehot + int decode_onehot(uint8_t in) { + switch(in) { + case 0b00000001: + return 0; + case 0b00000010: + return 1; + case 0b00000100: + return 2; + case 0b00001000: + return 3; + case 0b00010000: + return 4; + case 0b00100000: + return 5; + case 0b01000000: + return 6; + case 0b10000000: + return 7; + default: + return -1; + } + } + + void write_compressed_frames(const std::vector<std::vector<uint8_t>> &frames_in) { + // Build a histogram of bytes to aid creating the dictionary + int histogram[256]; + for (int i = 0; i < 256; i++) + histogram[i] = 0; + for (auto &fr : frames_in) + for (auto b : fr) + ++histogram[b]; + std::priority_queue<std::pair<int, uint8_t>> most_frequent; + for (int i = 0; i < 256; i++) + if (i != 0 && (decode_onehot(i) == -1)) // these values are special and don't go in dict + most_frequent.push(std::make_pair(histogram[i], i)); + uint8_t dict_entries[8]; + for (int i = 0; i < 8; i++) { + dict_entries[i] = most_frequent.top().second; + most_frequent.pop(); + } + // Write dictionary + write_byte(uint8_t(BitstreamCommand::LSC_WRITE_COMP_DIC)); + insert_zeros(3); + for (int i = 7; i >= 0; i--) + write_byte(dict_entries[i]); + // Write data + write_byte(uint8_t(BitstreamCommand::LSC_PROG_INCR_CMP)); + write_byte(0x91); //CRC check, 1 dummy byte + uint16_t frames = uint16_t(frames_in.size()); + write_byte(uint8_t((frames >> 8) & 0xFF)); + write_byte(uint8_t(frames & 0xFF)); + + // For writing a stream of bits + uint8_t buffer = 0; + int bits_in_buffer = 0; + auto flush_bits = [&]() { + if (bits_in_buffer != 0) { + write_byte(buffer); + buffer = 0; + bits_in_buffer = 0; + } + }; + auto add_bit = [&](bool bit) { + if (bit) + buffer |= (1 << (7 - bits_in_buffer)); + bits_in_buffer++; + if (bits_in_buffer == 8) + flush_bits(); + }; + auto add_bits = [&](uint32_t x, int len) { + for (int i = len-1; i >= 0; i--) + add_bit((x & (1 << i)) != 0); + }; + // Add zero bytes (represented by zero bits in the bitstream) + // to pad frame to 64 bits + for (auto &fr : frames_in) { + int frame_bytes = int(fr.size()); + if ((frame_bytes % 8) != 0) + for (int i = 0; i < (8 - (frame_bytes % 8)); i++) + add_bit(0); + // Process bytes of frames + for (auto b : fr) { + if (b == 0) { + add_bit(0); // 0 bit -> 0 byte + continue; + } + int oh = decode_onehot(b); + if (oh != -1) { + add_bits(0b100, 3); // 0b100xxx -> only bit xxx set in byte + add_bits(oh, 3); + continue; + } + // Search dictionary + for (int j = 0; j < 8; j++) + if (dict_entries[j] == b) { + add_bits(0b101, 3); // 0b101xxx -> dictionary entry xxx + add_bits(j, 3); + goto dict_found; + } + if (false) { + dict_found: + continue; + } + // Uncompressable byte; use literal + add_bits(0b11, 2); // 0b11xxxxxxxx -> literal byte + add_bits(b, 8); + } + // This ensures compressed frame is 8-bit aligned + flush_bits(); + // Post-frame CRC and 0xFF byte + insert_crc16(); + write_byte(0xFF); + } + } + // Write multiple bytes from an InputIterator and update CRC template<typename T> void write_bytes(T in, size_t count) { @@ -620,27 +736,49 @@ // Init address wr.write_byte(uint8_t(BitstreamCommand::LSC_INIT_ADDRESS)); wr.insert_zeros(3); - // Bitstream data - wr.write_byte(uint8_t(BitstreamCommand::LSC_PROG_INCR_RTI)); - wr.write_byte(0x91); //CRC check, 1 dummy byte - uint16_t frames = uint16_t(chip.info.num_frames); - wr.write_byte(uint8_t((frames >> 8) & 0xFF)); - wr.write_byte(uint8_t(frames & 0xFF)); - size_t bytes_per_frame = (chip.info.bits_per_frame + chip.info.pad_bits_after_frame + - chip.info.pad_bits_before_frame) / 8U; - unique_ptr<uint8_t[]> frame_bytes = make_unique<uint8_t[]>(bytes_per_frame); - for (size_t i = 0; i < frames; i++) { - fill(frame_bytes.get(), frame_bytes.get() + bytes_per_frame, 0x00); - for (int j = 0; j < chip.info.bits_per_frame; j++) { - size_t ofs = j + chip.info.pad_bits_after_frame; - assert(((bytes_per_frame - 1) - (ofs / 8)) < bytes_per_frame); - frame_bytes[(bytes_per_frame - 1) - (ofs / 8)] |= - (chip.cram.bit((chip.info.num_frames - 1) - i, j) & 0x01) << (ofs % 8); + if (options.count("compress") && options.at("compress") == "yes") { + // First create an uncompressed array of frames + std::vector<std::vector<uint8_t>> frames_data; + uint16_t frames = uint16_t(chip.info.num_frames); + size_t bytes_per_frame = (chip.info.bits_per_frame + chip.info.pad_bits_after_frame + + chip.info.pad_bits_before_frame) / 8U; + for (size_t i = 0; i < frames; i++) { + frames_data.emplace_back(); + auto &frame_bytes = frames_data.back(); + frame_bytes.resize(bytes_per_frame); + for (int j = 0; j < chip.info.bits_per_frame; j++) { + size_t ofs = j + chip.info.pad_bits_after_frame; + assert(((bytes_per_frame - 1) - (ofs / 8)) < bytes_per_frame); + frame_bytes[(bytes_per_frame - 1) - (ofs / 8)] |= + (chip.cram.bit((chip.info.num_frames - 1) - i, j) & 0x01) << (ofs % 8); + } } - wr.write_bytes(frame_bytes.get(), bytes_per_frame); - wr.insert_crc16(); - wr.write_byte(0xFF); + // Then compress and write + wr.write_compressed_frames(frames_data); + } else { + // Bitstream data + wr.write_byte(uint8_t(BitstreamCommand::LSC_PROG_INCR_RTI)); + wr.write_byte(0x91); //CRC check, 1 dummy byte + uint16_t frames = uint16_t(chip.info.num_frames); + wr.write_byte(uint8_t((frames >> 8) & 0xFF)); + wr.write_byte(uint8_t(frames & 0xFF)); + size_t bytes_per_frame = (chip.info.bits_per_frame + chip.info.pad_bits_after_frame + + chip.info.pad_bits_before_frame) / 8U; + unique_ptr<uint8_t[]> frame_bytes = make_unique<uint8_t[]>(bytes_per_frame); + for (size_t i = 0; i < frames; i++) { + fill(frame_bytes.get(), frame_bytes.get() + bytes_per_frame, 0x00); + for (int j = 0; j < chip.info.bits_per_frame; j++) { + size_t ofs = j + chip.info.pad_bits_after_frame; + assert(((bytes_per_frame - 1) - (ofs / 8)) < bytes_per_frame); + frame_bytes[(bytes_per_frame - 1) - (ofs / 8)] |= + (chip.cram.bit((chip.info.num_frames - 1) - i, j) & 0x01) << (ofs % 8); + } + wr.write_bytes(frame_bytes.get(), bytes_per_frame); + wr.insert_crc16(); + wr.write_byte(0xFF); + } } + // Post-bitstream space for SECURITY and SED (not used here) wr.insert_dummy(12); // Program Usercode
diff --git a/libtrellis/tools/ecppack.cpp b/libtrellis/tools/ecppack.cpp index dca3bb1..f060468 100644 --- a/libtrellis/tools/ecppack.cpp +++ b/libtrellis/tools/ecppack.cpp
@@ -45,6 +45,7 @@ options.add_options()("freq", po::value<std::string>(), "config frequency in MHz"); options.add_options()("svf", po::value<std::string>(), "output SVF file"); options.add_options()("svf-rowsize", po::value<int>(), "SVF row size in bits (default 8000)"); + options.add_options()("compress", "compress bitstream to reduce size"); options.add_options()("spimode", po::value<std::string>(), "SPI Mode to use (fast-read, dual-spi, qspi)"); options.add_options()("background", "enable background reconfiguration in bitstream"); options.add_options()("delta", po::value<std::string>(), "create a delta partial bitstream given a reference config"); @@ -132,6 +133,9 @@ if (vm.count("spimode")) bitopts["spimode"] = vm["spimode"].as<string>(); + if (vm.count("compress")) + bitopts["compress"] = "yes"; + if (vm.count("background")) { auto tile_db = get_tile_bitdata(TileLocator{c.info.family, c.info.name, "EFB0_PICB0"}); auto esb = tile_db->get_data_for_enum("SYSCONFIG.BACKGROUND_RECONFIG"); @@ -209,15 +213,16 @@ } if (vm.count("svf")) { - // Create JTAG bitstream without SPI flash related settings, as these // seem to confuse the chip sometimes when configuring over JTAG - if (!bitopts.empty()) { + if (!bitopts.empty() && !(bitopts.size() == 1 && bitopts.count("compress"))) { bitopts.clear(); if (vm.count("background")) bitopts["background"] = "yes"; if (vm.count("bootaddr")) bitopts["multiboot"] = "yes"; + if (vm.count("compress")) + bitopts["compress"] = "yes"; b = Bitstream::serialise_chip(c, bitopts); }