| /* |
| * Copyright (C) 2017-2020 The Project X-Ray Authors. |
| * |
| * Use of this source code is governed by a ISC-style |
| * license that can be found in the LICENSE file or at |
| * https://opensource.org/licenses/ISC |
| * |
| * SPDX-License-Identifier: ISC |
| */ |
| #include <assert.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <unistd.h> |
| #include <algorithm> |
| #include <fstream> |
| #include <iostream> |
| #include <map> |
| #include <numeric> |
| #include <set> |
| #include <string> |
| #include <vector> |
| |
| #include <absl/strings/str_cat.h> |
| #include <gflags/gflags.h> |
| |
| DEFINE_int32( |
| c, |
| 4, |
| "threshold under which candidates are output. set to -1 to output all."); |
| DEFINE_bool(i, false, "add inverted tags"); |
| DEFINE_int32(m, 0, "min number of set/cleared samples each"); |
| DEFINE_int32(M, 0, "min number of set/cleared samples total"); |
| DEFINE_string(o, "", "set output file"); |
| DEFINE_string(k, "", "set output mask file"); |
| |
| using std::map; |
| using std::string; |
| using std::tuple; |
| using std::vector; |
| |
| int num_bits = 0, num_tags = 0; |
| map<string, int> bit_ids, tag_ids; |
| vector<string> bit_ids_r, tag_ids_r; |
| |
| #if 0 |
| struct bool_vec |
| { |
| vector<bool> data; |
| |
| bool_vec(int n = 0, bool initval = false) : data(n, initval) |
| { |
| } |
| |
| void set(int n) |
| { |
| if (int(data.size()) <= n) |
| data.resize(n+1); |
| data[n] = true; |
| } |
| |
| bool get(int n) |
| { |
| return data.at(n); |
| } |
| |
| void resize(int n) |
| { |
| data.resize(n); |
| } |
| |
| int count() |
| { |
| return std::accumulate(data.begin(), data.end(), 0); |
| } |
| |
| void apply_and(const bool_vec &other) |
| { |
| assert(data.size() == other.data.size()); |
| for (int i = 0; i < int(data.size()); i++) |
| data[i] = data[i] && other.data[i]; |
| } |
| |
| void apply_andc(const bool_vec &other) |
| { |
| assert(data.size() == other.data.size()); |
| for (int i = 0; i < int(data.size()); i++) |
| data[i] = data[i] && !other.data[i]; |
| } |
| }; |
| #else |
| struct bool_vec { |
| vector<uint64_t> data; |
| |
| bool_vec(int n = 0, bool initval = false) |
| : data((n + 63) / 64, initval ? ~uint64_t(0) : uint64_t(0)) { |
| for (int i = data.size() * 64 - 1; i >= n; i--) |
| data[n / 64] &= ~(uint64_t(1) << (n % 64)); |
| } |
| |
| void set(int n) { |
| if (int(data.size() * 64) <= n) |
| data.resize((n + 64) / 64); |
| data[n / 64] |= uint64_t(1) << (n % 64); |
| } |
| |
| bool get(int n) { return (data[n / 64] >> (n % 64)) & 1; } |
| |
| void resize(int n) { data.resize((n + 63) / 64); } |
| |
| int count() { |
| int sum = 0; |
| for (int i = 0; i < 64 * int(data.size()); i++) |
| if (get(i)) |
| sum++; |
| return sum; |
| } |
| |
| void apply_and(const bool_vec& other) { |
| assert(data.size() == other.data.size()); |
| for (int i = 0; i < int(data.size()); i++) |
| data[i] &= other.data[i]; |
| } |
| |
| void apply_andc(const bool_vec& other) { |
| assert(data.size() == other.data.size()); |
| for (int i = 0; i < int(data.size()); i++) |
| data[i] &= ~other.data[i]; |
| } |
| }; |
| #endif |
| |
| // segname -> bits, tags_on, tags_off |
| typedef tuple<bool_vec, bool_vec, bool_vec> segdata_t; |
| map<string, segdata_t> segdata; |
| |
| map<string, int> segnamecnt; |
| |
| static inline bool_vec& segdata_bits(segdata_t& sd) { |
| return std::get<0>(sd); |
| } |
| static inline bool_vec& segdata_tags1(segdata_t& sd) { |
| return std::get<1>(sd); |
| } |
| static inline bool_vec& segdata_tags0(segdata_t& sd) { |
| return std::get<2>(sd); |
| } |
| |
| void read_input(std::istream& f, std::string filename) { |
| string token; |
| segdata_t* segptr = nullptr; |
| |
| while (f >> token) { |
| if (token == "seg") { |
| f >> token; |
| token = filename + ":" + token; |
| while (segdata.count(token)) { |
| int idx = 1; |
| if (segnamecnt.count(token)) |
| idx = segnamecnt.at(token); |
| segnamecnt[token] = idx + 1; |
| char buffer[64]; |
| snprintf(buffer, 64, "-%d", idx); |
| token += buffer; |
| } |
| segptr = &segdata[token]; |
| continue; |
| } |
| |
| if (token == "bit") { |
| assert(segptr != nullptr); |
| |
| f >> token; |
| if (bit_ids.count(token) == 0) { |
| bit_ids[token] = num_bits++; |
| bit_ids_r.push_back(token); |
| } |
| |
| int bit_idx = bit_ids.at(token); |
| auto& bits = segdata_bits(*segptr); |
| |
| bits.set(bit_idx); |
| continue; |
| } |
| |
| if (token == "tag") { |
| assert(segptr != nullptr); |
| |
| f >> token; |
| if (tag_ids.count(token) == 0) { |
| tag_ids[token] = num_tags++; |
| tag_ids_r.push_back(token); |
| } |
| |
| int tag_idx = tag_ids.at(token); |
| |
| f >> token; |
| assert(token == "0" || token == "1"); |
| |
| auto& tags = token == "1" ? segdata_tags1(*segptr) |
| : segdata_tags0(*segptr); |
| |
| tags.set(tag_idx); |
| |
| if (FLAGS_i) { |
| auto& inv_tags = token == "1" |
| ? segdata_tags0(*segptr) |
| : segdata_tags1(*segptr); |
| |
| token = tag_ids_r.at(tag_idx) + "__INV"; |
| |
| if (tag_ids.count(token) == 0) { |
| tag_ids[token] = num_tags++; |
| tag_ids_r.push_back(token); |
| } |
| |
| int inv_tag_idx = tag_ids.at(token); |
| inv_tags.set(inv_tag_idx); |
| } |
| |
| continue; |
| } |
| |
| abort(); |
| } |
| |
| // printf("Number of segments: %d\n", int(segdata.size())); |
| // printf("Number of bits: %d\n", num_bits); |
| // printf("Number of tags: %d\n", num_tags); |
| |
| for (auto& segdat : segdata) { |
| segdata_bits(segdat.second).resize(num_bits); |
| segdata_tags1(segdat.second).resize(num_tags); |
| segdata_tags0(segdat.second).resize(num_tags); |
| } |
| } |
| |
| int main(int argc, char** argv) { |
| gflags::SetUsageMessage( |
| absl::StrCat("Usage: ", argv[0], " [options] file..")); |
| gflags::ParseCommandLineFlags(&argc, &argv, true); |
| |
| if (argc > 1) { |
| for (int optind = 1; optind < argc; optind++) { |
| printf("Reading %s.\n", argv[optind]); |
| std::ifstream f; |
| f.open(argv[optind]); |
| |
| // Check if input file exists. |
| if (!f.good()) { |
| printf("ERROR: Input file does not exist!\n"); |
| return -1; |
| } |
| |
| assert(!f.fail()); |
| read_input(f, argv[optind]); |
| } |
| } else { |
| printf("Reading from stdin...\n"); |
| read_input(std::cin, "stdin"); |
| } |
| |
| printf("#of segments: %d\n", int(segdata.size())); |
| printf("#of bits: %d\n", num_bits); |
| printf("#of tags: %d\n", num_tags); |
| |
| FILE* f = stdout; |
| |
| if (!FLAGS_o.empty()) { |
| f = fopen(FLAGS_o.c_str(), "w"); |
| assert(f != nullptr); |
| } |
| |
| int cnt_const0 = 0; |
| int cnt_const1 = 0; |
| int cnt_candidates = 0; |
| int min_candidates = num_bits; |
| int max_candidates = 0; |
| float avg_candidates = 0; |
| |
| std::vector<std::string> out_lines; |
| |
| for (int tag_idx = 0; tag_idx < num_tags; tag_idx++) { |
| bool_vec mask(num_bits, true); |
| int count1 = 0, count0 = 0; |
| |
| for (auto& segdat : segdata) { |
| auto& sd = segdat.second; |
| bool tag1 = segdata_tags1(sd).get(tag_idx); |
| bool tag0 = segdata_tags0(sd).get(tag_idx); |
| |
| assert(!tag1 || !tag0); |
| |
| if (tag1) { |
| count1++; |
| mask.apply_and(segdata_bits(sd)); |
| continue; |
| } |
| |
| if (tag0) { |
| count0++; |
| mask.apply_andc(segdata_bits(sd)); |
| continue; |
| } |
| } |
| |
| assert(count1 || count0); |
| |
| std::string out_line = tag_ids_r.at(tag_idx); |
| |
| if (count1 < FLAGS_m) { |
| char buffer[64]; |
| snprintf(buffer, 64, " <m1 %d>", count1); |
| out_line += buffer; |
| } |
| |
| if (count0 < FLAGS_m) { |
| char buffer[64]; |
| snprintf(buffer, 64, " <m0 %d>", count0); |
| out_line += buffer; |
| } |
| |
| if (count1 + count0 < FLAGS_M) { |
| char buffer[64]; |
| snprintf(buffer, 64, " <M %d %d>", count1, count0); |
| out_line += buffer; |
| } |
| |
| if (!count1) { |
| out_lines.push_back(out_line + " <const0>"); |
| cnt_const0 += 1; |
| continue; |
| } |
| |
| if (!count0) { |
| out_line += " <const1>"; |
| cnt_const1 += 1; |
| } |
| |
| int num_candidates = mask.count(); |
| |
| if (count0) { |
| min_candidates = |
| std::min(min_candidates, num_candidates); |
| max_candidates = |
| std::max(max_candidates, num_candidates); |
| avg_candidates += num_candidates; |
| cnt_candidates += 1; |
| } |
| |
| if (FLAGS_c < 0 || |
| (0 < num_candidates && num_candidates <= FLAGS_c)) { |
| std::vector<std::string> out_tags; |
| for (int bit_idx = 0; bit_idx < num_bits; bit_idx++) |
| if (mask.get(bit_idx)) |
| out_tags.push_back( |
| bit_ids_r.at(bit_idx)); |
| std::sort(out_tags.begin(), out_tags.end()); |
| for (auto& tag : out_tags) |
| out_line += " " + tag; |
| } else { |
| char buffer[64]; |
| snprintf(buffer, 64, " <%d candidates>", |
| num_candidates); |
| out_line += buffer; |
| } |
| |
| out_lines.push_back(out_line); |
| } |
| |
| std::sort(out_lines.begin(), out_lines.end()); |
| |
| for (auto& line : out_lines) |
| fprintf(f, "%s\n", line.c_str()); |
| |
| if (cnt_candidates) |
| avg_candidates /= cnt_candidates; |
| |
| printf("#of const0 tags: %d\n", cnt_const0); |
| printf("#of const1 tags: %d\n", cnt_const1); |
| |
| if (cnt_candidates) { |
| printf("min #of candidates: %d\n", min_candidates); |
| printf("max #of candidates: %d\n", max_candidates); |
| printf("avg #of candidates: %.3f\n", avg_candidates); |
| } |
| |
| if (!FLAGS_k.empty()) { |
| f = fopen(FLAGS_k.c_str(), "w"); |
| assert(f != nullptr); |
| std::sort(bit_ids_r.begin(), bit_ids_r.end()); |
| for (auto bit : bit_ids_r) |
| fprintf(f, "bit %s\n", bit.c_str()); |
| } |
| |
| return 0; |
| } |