blob: d62cc27b0dd43142df7aea82126c746b463397b1 [file] [log] [blame]
/*
* Copyright (C) 2017-2020 The Project X-Ray Authors.
*
* Use of this source code is governed by a ISC-style
* license that can be found in the LICENSE file or at
* https://opensource.org/licenses/ISC
*
* SPDX-License-Identifier: ISC
*/
#include <assert.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include <algorithm>
#include <fstream>
#include <iostream>
#include <map>
#include <numeric>
#include <set>
#include <string>
#include <vector>
#include <absl/strings/str_cat.h>
#include <gflags/gflags.h>
DEFINE_int32(
c,
4,
"threshold under which candidates are output. set to -1 to output all.");
DEFINE_bool(i, false, "add inverted tags");
DEFINE_int32(m, 0, "min number of set/cleared samples each");
DEFINE_int32(M, 0, "min number of set/cleared samples total");
DEFINE_string(o, "", "set output file");
DEFINE_string(k, "", "set output mask file");
using std::map;
using std::string;
using std::tuple;
using std::vector;
int num_bits = 0, num_tags = 0;
map<string, int> bit_ids, tag_ids;
vector<string> bit_ids_r, tag_ids_r;
#if 0
struct bool_vec
{
vector<bool> data;
bool_vec(int n = 0, bool initval = false) : data(n, initval)
{
}
void set(int n)
{
if (int(data.size()) <= n)
data.resize(n+1);
data[n] = true;
}
bool get(int n)
{
return data.at(n);
}
void resize(int n)
{
data.resize(n);
}
int count()
{
return std::accumulate(data.begin(), data.end(), 0);
}
void apply_and(const bool_vec &other)
{
assert(data.size() == other.data.size());
for (int i = 0; i < int(data.size()); i++)
data[i] = data[i] && other.data[i];
}
void apply_andc(const bool_vec &other)
{
assert(data.size() == other.data.size());
for (int i = 0; i < int(data.size()); i++)
data[i] = data[i] && !other.data[i];
}
};
#else
struct bool_vec {
vector<uint64_t> data;
bool_vec(int n = 0, bool initval = false)
: data((n + 63) / 64, initval ? ~uint64_t(0) : uint64_t(0)) {
for (int i = data.size() * 64 - 1; i >= n; i--)
data[n / 64] &= ~(uint64_t(1) << (n % 64));
}
void set(int n) {
if (int(data.size() * 64) <= n)
data.resize((n + 64) / 64);
data[n / 64] |= uint64_t(1) << (n % 64);
}
bool get(int n) { return (data[n / 64] >> (n % 64)) & 1; }
void resize(int n) { data.resize((n + 63) / 64); }
int count() {
int sum = 0;
for (int i = 0; i < 64 * int(data.size()); i++)
if (get(i))
sum++;
return sum;
}
void apply_and(const bool_vec& other) {
assert(data.size() == other.data.size());
for (int i = 0; i < int(data.size()); i++)
data[i] &= other.data[i];
}
void apply_andc(const bool_vec& other) {
assert(data.size() == other.data.size());
for (int i = 0; i < int(data.size()); i++)
data[i] &= ~other.data[i];
}
};
#endif
// segname -> bits, tags_on, tags_off
typedef tuple<bool_vec, bool_vec, bool_vec> segdata_t;
map<string, segdata_t> segdata;
map<string, int> segnamecnt;
static inline bool_vec& segdata_bits(segdata_t& sd) {
return std::get<0>(sd);
}
static inline bool_vec& segdata_tags1(segdata_t& sd) {
return std::get<1>(sd);
}
static inline bool_vec& segdata_tags0(segdata_t& sd) {
return std::get<2>(sd);
}
void read_input(std::istream& f, std::string filename) {
string token;
segdata_t* segptr = nullptr;
while (f >> token) {
if (token == "seg") {
f >> token;
token = filename + ":" + token;
while (segdata.count(token)) {
int idx = 1;
if (segnamecnt.count(token))
idx = segnamecnt.at(token);
segnamecnt[token] = idx + 1;
char buffer[64];
snprintf(buffer, 64, "-%d", idx);
token += buffer;
}
segptr = &segdata[token];
continue;
}
if (token == "bit") {
assert(segptr != nullptr);
f >> token;
if (bit_ids.count(token) == 0) {
bit_ids[token] = num_bits++;
bit_ids_r.push_back(token);
}
int bit_idx = bit_ids.at(token);
auto& bits = segdata_bits(*segptr);
bits.set(bit_idx);
continue;
}
if (token == "tag") {
assert(segptr != nullptr);
f >> token;
if (tag_ids.count(token) == 0) {
tag_ids[token] = num_tags++;
tag_ids_r.push_back(token);
}
int tag_idx = tag_ids.at(token);
f >> token;
assert(token == "0" || token == "1");
auto& tags = token == "1" ? segdata_tags1(*segptr)
: segdata_tags0(*segptr);
tags.set(tag_idx);
if (FLAGS_i) {
auto& inv_tags = token == "1"
? segdata_tags0(*segptr)
: segdata_tags1(*segptr);
token = tag_ids_r.at(tag_idx) + "__INV";
if (tag_ids.count(token) == 0) {
tag_ids[token] = num_tags++;
tag_ids_r.push_back(token);
}
int inv_tag_idx = tag_ids.at(token);
inv_tags.set(inv_tag_idx);
}
continue;
}
abort();
}
// printf("Number of segments: %d\n", int(segdata.size()));
// printf("Number of bits: %d\n", num_bits);
// printf("Number of tags: %d\n", num_tags);
for (auto& segdat : segdata) {
segdata_bits(segdat.second).resize(num_bits);
segdata_tags1(segdat.second).resize(num_tags);
segdata_tags0(segdat.second).resize(num_tags);
}
}
int main(int argc, char** argv) {
gflags::SetUsageMessage(
absl::StrCat("Usage: ", argv[0], " [options] file.."));
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (argc > 1) {
for (int optind = 1; optind < argc; optind++) {
printf("Reading %s.\n", argv[optind]);
std::ifstream f;
f.open(argv[optind]);
// Check if input file exists.
if (!f.good()) {
printf("ERROR: Input file does not exist!\n");
return -1;
}
assert(!f.fail());
read_input(f, argv[optind]);
}
} else {
printf("Reading from stdin...\n");
read_input(std::cin, "stdin");
}
printf("#of segments: %d\n", int(segdata.size()));
printf("#of bits: %d\n", num_bits);
printf("#of tags: %d\n", num_tags);
FILE* f = stdout;
if (!FLAGS_o.empty()) {
f = fopen(FLAGS_o.c_str(), "w");
assert(f != nullptr);
}
int cnt_const0 = 0;
int cnt_const1 = 0;
int cnt_candidates = 0;
int min_candidates = num_bits;
int max_candidates = 0;
float avg_candidates = 0;
std::vector<std::string> out_lines;
for (int tag_idx = 0; tag_idx < num_tags; tag_idx++) {
bool_vec mask(num_bits, true);
int count1 = 0, count0 = 0;
for (auto& segdat : segdata) {
auto& sd = segdat.second;
bool tag1 = segdata_tags1(sd).get(tag_idx);
bool tag0 = segdata_tags0(sd).get(tag_idx);
assert(!tag1 || !tag0);
if (tag1) {
count1++;
mask.apply_and(segdata_bits(sd));
continue;
}
if (tag0) {
count0++;
mask.apply_andc(segdata_bits(sd));
continue;
}
}
assert(count1 || count0);
std::string out_line = tag_ids_r.at(tag_idx);
if (count1 < FLAGS_m) {
char buffer[64];
snprintf(buffer, 64, " <m1 %d>", count1);
out_line += buffer;
}
if (count0 < FLAGS_m) {
char buffer[64];
snprintf(buffer, 64, " <m0 %d>", count0);
out_line += buffer;
}
if (count1 + count0 < FLAGS_M) {
char buffer[64];
snprintf(buffer, 64, " <M %d %d>", count1, count0);
out_line += buffer;
}
if (!count1) {
out_lines.push_back(out_line + " <const0>");
cnt_const0 += 1;
continue;
}
if (!count0) {
out_line += " <const1>";
cnt_const1 += 1;
}
int num_candidates = mask.count();
if (count0) {
min_candidates =
std::min(min_candidates, num_candidates);
max_candidates =
std::max(max_candidates, num_candidates);
avg_candidates += num_candidates;
cnt_candidates += 1;
}
if (FLAGS_c < 0 ||
(0 < num_candidates && num_candidates <= FLAGS_c)) {
std::vector<std::string> out_tags;
for (int bit_idx = 0; bit_idx < num_bits; bit_idx++)
if (mask.get(bit_idx))
out_tags.push_back(
bit_ids_r.at(bit_idx));
std::sort(out_tags.begin(), out_tags.end());
for (auto& tag : out_tags)
out_line += " " + tag;
} else {
char buffer[64];
snprintf(buffer, 64, " <%d candidates>",
num_candidates);
out_line += buffer;
}
out_lines.push_back(out_line);
}
std::sort(out_lines.begin(), out_lines.end());
for (auto& line : out_lines)
fprintf(f, "%s\n", line.c_str());
if (cnt_candidates)
avg_candidates /= cnt_candidates;
printf("#of const0 tags: %d\n", cnt_const0);
printf("#of const1 tags: %d\n", cnt_const1);
if (cnt_candidates) {
printf("min #of candidates: %d\n", min_candidates);
printf("max #of candidates: %d\n", max_candidates);
printf("avg #of candidates: %.3f\n", avg_candidates);
}
if (!FLAGS_k.empty()) {
f = fopen(FLAGS_k.c_str(), "w");
assert(f != nullptr);
std::sort(bit_ids_r.begin(), bit_ids_r.end());
for (auto bit : bit_ids_r)
fprintf(f, "bit %s\n", bit.c_str());
}
return 0;
}