blob: f859611dd08c797d4f0f1863d2db65617b2d1ca1 [file] [log] [blame]
#!/usr/bin/python
import sqlite3
import os.path
from os import makedirs
import sys
import re
from subprocess import call
import argparse
import textwrap
import csv
import StringIO
def list_tasks(dbname = "results.db"):
"""Return a list of all task names in a database"""
db = connect_db(dbname)
cursor = db.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
return [row[0] for row in cursor.fetchall()]
def describe_tasks(tasks, dbname = "results.db"):
"""Return a list of all shared parameters for some tasks of a database"""
db = connect_db(dbname)
cursor = db.cursor()
if not isinstance(tasks,list):
tasks = [tasks]
# only the shared columns will remain
shared_params = []
for task in tasks:
query_command = "pragma table_info([{}])".format(task);
cursor.execute(query_command);
task_params = [" ".join((row[1], row[2])) for row in cursor.fetchall()]
if shared_params:
shared_params = intersection(shared_params, task_params)
else:
shared_params = task_params
return shared_params
def retrieve_data(x_param, y_param, filters, tasks, dbname = "results.db"):
"""
Return a list of selected parameters and a data structure (list of list of tuples),
- 1st index corresponds to the task,
- 2nd index corresponds to the row,
- 3rd index corresponds to the selected parameter.
The key parameters that define a benchmark are always selected.
"""
db = connect_db(dbname)
data = []
cols_to_select = [x_param.split()[0], y_param.split()[0]]
# always pass shared primary key information (they define a distinct benchmark)
primary_keys = []
for t in range(len(tasks)):
if t == 0:
primary_keys = retrieve_primary_keys(tasks[t], db)
else:
primary_keys = intersection(primary_keys, retrieve_primary_keys(tasks[t], db))
for key in primary_keys:
if key not in cols_to_select:
cols_to_select.append(key)
# also pass filter parameter value in
for f in filters:
if f.param not in cols_to_select:
cols_to_select.append(f.param)
sql_val_args = []
filter_command = ""
for t in range(len(tasks)):
select_command = "SELECT DISTINCT {} FROM {} ".format(','.join(cols_to_select), task_name(tasks[t]))
if filters:
# first time, still need to populate sql_val_args and make filter_command
if t == 0:
filter_command = "WHERE "
for f in range(len(filters)):
filter_command += str(filters[f])
sql_val_args.extend(filters[f].args)
if f < len(filters) - 1:
filter_command += " AND "
select_command += filter_command
select_command += ';'
print(select_command)
cursor = db.cursor()
cursor.execute(select_command, sql_val_args)
data.append(tuple(tuple(row) for row in cursor.fetchall()));
return cols_to_select, data
def export_data_csv(selected_cols, data):
"""
Exports retrieved data as in-memory csv files.
Each task will be a separate csv file, with the naming left to the caller.
The first row will be the header for the selected columns, the rest will be values.
"""
for task_data in data:
csvf = StringIO.StringIO()
writer = csv.writer(csvf)
# header information
writer.writerow(selected_cols)
for row in task_data:
writer.writerow(row)
yield csvf
def export_data_csv_todisk(selected_cols, data, tasks, dir = "benchtracker_data"):
"""
Exports retrieved data as csv files on export_data_csv_todisk
Each task is a separate file with their full task name, '/' being replaced by '.'
"""
if not os.path.exists(dir):
makedirs(dir)
t = 0
for csvf in export_data_csv(selected_cols, data):
with open("".join([dir, '/', tasks[t].replace('/','.'), '.csv']), 'w') as f:
csvf.seek(0)
buf = csvf.read(1048576) # 1 MB
while buf:
f.write(buf)
buf = csvf.read(1048576)
t += 1
def describe_param(param, mode, tasks, dbname = "results.db"):
"""
Give back metainformation about a parameter to allow for easier filtering.
Param would be an element of the list returned by describe_tasks - space separated name and type
Returns a 2-tuple describing the parameter type and values for some tasks of a database.
- 1st value is either 'range' or 'categorical'
- 2nd value is either a 2-tuple for range types, or a n-tuple for categorical
"""
db = connect_db(dbname)
cursor = db.cursor()
(param_name, param_type) = param.split()
if param_type == "TEXT":
mode = 'categorical'
elif mode not in {'categorical', 'range'}:
raise ValueError
subquery = ""
min_param = "min_p"
max_param = "max_p"
if not isinstance(tasks,list):
subquery = task_name(tasks)
min_param = max_param = param_name
else:
subquery += '('
for t in range(len(tasks)):
if mode == "categorical":
subquery += "SELECT DISTINCT {} FROM {}".format(param_name, task_name(tasks[t]))
else:
subquery += "SELECT MIN({0}) as min_p, MAX({0}) as max_p FROM {1}".format(param_name, task_name(tasks[t]))
if t < len(tasks) - 1:
subquery += " UNION ALL "
subquery += ')'
print(subquery)
# categorical data, return a list of all distinct values
if mode == 'categorical':
cursor.execute("SELECT DISTINCT {} FROM {};".format(param_name, subquery))
return (mode,tuple(row[0] for row in cursor.fetchall()))
# ranged data, return (min, max)
else:
cursor.execute("SELECT MIN({}), MAX({}) FROM {};".format(min_param, max_param, subquery))
return (mode,tuple(cursor.fetchone()))
def connect_db(dbname = "results.db"):
"""Attempt a database connection, exiting with 1 if dbname does not exist, else return with db connection"""
if not os.path.isfile(dbname):
print("{} does not exist".format(dbname))
raise IOError(dbname)
db = sqlite3.connect(dbname)
db.row_factory = sqlite3.Row
return db
# filter object
valid_filter_methods = {"IN", "BETWEEN", "LIKE", "=", "<>", "!=", ">", "<", ">=", "<="}
class Task_filter:
def __init__(self, param, method, args):
self.param = param
if method.upper() in valid_filter_methods:
self.method = method.upper()
else:
print(method, "is not a supported filter method")
raise ValueError
self.args = args
def __str__(self):
substitutions = sql_substitute(self.args)
if self.method == "BETWEEN":
substitutions = "? AND ?"
elif self.method == "IN":
substitutions = '('+substitutions+')'
return "({} {} {})".format(self.param, self.method, substitutions)
# internal utilities
def task_name(task):
return '['+task+']'
def intersection(first, other):
intersection_set = set.intersection(set(first), set(other))
# reimpose order
return [item for item in first if item in intersection_set]
def sql_substitute(args):
return ('?,'*len(args)).rstrip(',')
def retrieve_primary_keys(task, db):
cursor = db.cursor()
cursor.execute("PRAGMA table_info(%s)" % task_name(task))
column_info = cursor.fetchall()
primary_keys = []
for info in column_info:
print(info)
if info[5] != 0:
print("key param:", info[1])
primary_keys.append(info[1])
return primary_keys