vtr_flow/scripts/benchtracker/interface_db.py - third_party/vtr-verilog-to-routing - Git at Google

 #!/usr/bin/python

 import sqlite3
 import os.path
 from os import makedirs
 import sys
 import re
 from subprocess import call
 import argparse
 import textwrap
 import csv
 import StringIO

 def list_tasks(dbname = "results.db"):
     """Return a list of all task names in a database"""
     db = connect_db(dbname)
     cursor = db.cursor()
     cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
     return [row[0] for row in cursor.fetchall()]


 def describe_tasks(tasks, dbname = "results.db"):
     """Return a list of all shared parameters for some tasks of a database"""
     db = connect_db(dbname)
     cursor = db.cursor()
     if not isinstance(tasks,list):
         tasks = [tasks]

     # only the shared columns will remain
     shared_params = []
     for task in tasks:
         query_command = "pragma table_info([{}])".format(task);
         cursor.execute(query_command);
         task_params = [" ".join((row[1], row[2])) for row in cursor.fetchall()]
         if shared_params:
             shared_params = intersection(shared_params, task_params)
         else:
             shared_params = task_params

     return shared_params


 def retrieve_data(x_param, y_param, filters, tasks, dbname = "results.db"):
     """
     Return a list of selected parameters and a data structure (list of list of tuples),

     - 1st index corresponds to the task,
     - 2nd index corresponds to the row,
     - 3rd index corresponds to the selected parameter.
     The key parameters that define a benchmark are always selected.
     """
     db = connect_db(dbname)
     data = []

     cols_to_select = [x_param.split()[0], y_param.split()[0]]

     # always pass shared primary key information (they define a distinct benchmark)
     primary_keys = []
     for t in range(len(tasks)):
         if t == 0:
             primary_keys = retrieve_primary_keys(tasks[t], db)
         else:
             primary_keys = intersection(primary_keys, retrieve_primary_keys(tasks[t], db))
     for key in primary_keys:
         if key not in cols_to_select:
             cols_to_select.append(key)

     # also pass filter parameter value in
     for f in filters:
         if f.param not in cols_to_select:
             cols_to_select.append(f.param)

     sql_val_args = []
     filter_command = ""
     for t in range(len(tasks)):
         select_command = "SELECT DISTINCT {} FROM {} ".format(','.join(cols_to_select), task_name(tasks[t]))
         if filters:
             # first time, still need to populate sql_val_args and make filter_command
             if t == 0:
                 filter_command = "WHERE "
                 for f in range(len(filters)):

                     filter_command += str(filters[f])
                     sql_val_args.extend(filters[f].args)

                     if f < len(filters) - 1:
                         filter_command += " AND "

         select_command += filter_command
         select_command += ';'

         print(select_command)
         cursor = db.cursor()
         cursor.execute(select_command, sql_val_args)
         data.append(tuple(tuple(row) for row in cursor.fetchall()));

     return cols_to_select, data

 def export_data_csv(selected_cols, data):
     """
     Exports retrieved data as in-memory csv files.

     Each task will be a separate csv file, with the naming left to the caller.
     The first row will be the header for the selected columns, the rest will be values.
     """
     for task_data in data:
         csvf = StringIO.StringIO()
         writer = csv.writer(csvf)
         # header information
         writer.writerow(selected_cols)

         for row in task_data:
             writer.writerow(row)
         yield csvf


 def export_data_csv_todisk(selected_cols, data, tasks, dir = "benchtracker_data"):
     """
     Exports retrieved data as csv files on export_data_csv_todisk

     Each task is a separate file with their full task name, '/' being replaced by '.'
     """
     if not os.path.exists(dir):
         makedirs(dir)
     t = 0
     for csvf in export_data_csv(selected_cols, data):
         with open("".join([dir, '/', tasks[t].replace('/','.'), '.csv']), 'w') as f:
             csvf.seek(0)
             buf = csvf.read(1048576) # 1 MB
             while buf:
                 f.write(buf)
                 buf = csvf.read(1048576)
             t += 1


 def describe_param(param, mode, tasks, dbname = "results.db"):
     """
     Give back metainformation about a parameter to allow for easier filtering.

     Param would be an element of the list returned by describe_tasks - space separated name and type
     Returns a 2-tuple describing the parameter type and values for some tasks of a database.
     - 1st value is either 'range' or 'categorical'
     - 2nd value is either a 2-tuple for range types, or a n-tuple for categorical
     """
     db = connect_db(dbname)
     cursor = db.cursor()

     (param_name, param_type) = param.split()
     if param_type == "TEXT":
         mode = 'categorical'
     elif mode not in {'categorical', 'range'}:
         raise ValueError

     subquery = ""
     min_param = "min_p"
     max_param = "max_p"
     if not isinstance(tasks,list):
         subquery = task_name(tasks)
         min_param = max_param = param_name
     else:
         subquery += '('
         for t in range(len(tasks)):
             if mode == "categorical":
                 subquery += "SELECT DISTINCT {} FROM {}".format(param_name, task_name(tasks[t]))
             else:
                 subquery += "SELECT MIN({0}) as min_p, MAX({0}) as max_p FROM {1}".format(param_name, task_name(tasks[t]))

             if t < len(tasks) - 1:
                 subquery += " UNION ALL "
         subquery += ')'

     print(subquery)

     # categorical data, return a list of all distinct values
     if mode == 'categorical':
         cursor.execute("SELECT DISTINCT {} FROM {};".format(param_name, subquery))
         return (mode,tuple(row[0] for row in cursor.fetchall()))
     # ranged data, return (min, max)
     else:
         cursor.execute("SELECT MIN({}), MAX({}) FROM {};".format(min_param, max_param, subquery))
         return (mode,tuple(cursor.fetchone()))


 def connect_db(dbname = "results.db"):
     """Attempt a database connection, exiting with 1 if dbname does not exist, else return with db connection"""
     if not os.path.isfile(dbname):
         print("{} does not exist".format(dbname))
         raise IOError(dbname)
     db = sqlite3.connect(dbname)
     db.row_factory = sqlite3.Row
     return db


 # filter object
 valid_filter_methods = {"IN", "BETWEEN", "LIKE", "=", "<>", "!=", ">", "<", ">=", "<="}
 class Task_filter:
     def __init__(self, param, method, args):
         self.param = param
         if method.upper() in valid_filter_methods:
             self.method = method.upper()
         else:
             print(method, "is not a supported filter method")
             raise ValueError
         self.args = args
     def __str__(self):
         substitutions = sql_substitute(self.args)
         if self.method == "BETWEEN":
             substitutions = "? AND ?"
         elif self.method == "IN":
             substitutions = '('+substitutions+')'
         return "({} {} {})".format(self.param, self.method, substitutions)


 # internal utilities
 def task_name(task):
     return '['+task+']'
 def intersection(first, other):
     intersection_set = set.intersection(set(first), set(other))
     # reimpose order
     return [item for item in first if item in intersection_set]

 def sql_substitute(args):
     return ('?,'*len(args)).rstrip(',')

 def retrieve_primary_keys(task, db):
     cursor = db.cursor()
     cursor.execute("PRAGMA table_info(%s)" % task_name(task))
     column_info = cursor.fetchall()
     primary_keys = []
     for info in column_info:
         print(info)
         if info[5] != 0:
             print("key param:", info[1])
             primary_keys.append(info[1])

     return primary_keys
	#!/usr/bin/python

	import sqlite3
	import os.path
	from os import makedirs
	import sys
	import re
	from subprocess import call
	import argparse
	import textwrap
	import csv
	import StringIO

	def list_tasks(dbname = "results.db"):
	"""Return a list of all task names in a database"""
	db = connect_db(dbname)
	cursor = db.cursor()
	cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
	return [row[0] for row in cursor.fetchall()]


	def describe_tasks(tasks, dbname = "results.db"):
	"""Return a list of all shared parameters for some tasks of a database"""
	db = connect_db(dbname)
	cursor = db.cursor()
	if not isinstance(tasks,list):
	tasks = [tasks]

	# only the shared columns will remain
	shared_params = []
	for task in tasks:
	query_command = "pragma table_info([{}])".format(task);
	cursor.execute(query_command);
	task_params = [" ".join((row[1], row[2])) for row in cursor.fetchall()]
	if shared_params:
	shared_params = intersection(shared_params, task_params)
	else:
	shared_params = task_params

	return shared_params


	def retrieve_data(x_param, y_param, filters, tasks, dbname = "results.db"):
	"""
	Return a list of selected parameters and a data structure (list of list of tuples),

	- 1st index corresponds to the task,
	- 2nd index corresponds to the row,
	- 3rd index corresponds to the selected parameter.
	The key parameters that define a benchmark are always selected.
	"""
	db = connect_db(dbname)
	data = []

	cols_to_select = [x_param.split()[0], y_param.split()[0]]

	# always pass shared primary key information (they define a distinct benchmark)
	primary_keys = []
	for t in range(len(tasks)):
	if t == 0:
	primary_keys = retrieve_primary_keys(tasks[t], db)
	else:
	primary_keys = intersection(primary_keys, retrieve_primary_keys(tasks[t], db))
	for key in primary_keys:
	if key not in cols_to_select:
	cols_to_select.append(key)

	# also pass filter parameter value in
	for f in filters:
	if f.param not in cols_to_select:
	cols_to_select.append(f.param)

	sql_val_args = []
	filter_command = ""
	for t in range(len(tasks)):
	select_command = "SELECT DISTINCT {} FROM {} ".format(','.join(cols_to_select), task_name(tasks[t]))
	if filters:
	# first time, still need to populate sql_val_args and make filter_command
	if t == 0:
	filter_command = "WHERE "
	for f in range(len(filters)):

	filter_command += str(filters[f])
	sql_val_args.extend(filters[f].args)

	if f < len(filters) - 1:
	filter_command += " AND "

	select_command += filter_command
	select_command += ';'

	print(select_command)
	cursor = db.cursor()
	cursor.execute(select_command, sql_val_args)
	data.append(tuple(tuple(row) for row in cursor.fetchall()));

	return cols_to_select, data

	def export_data_csv(selected_cols, data):
	"""
	Exports retrieved data as in-memory csv files.

	Each task will be a separate csv file, with the naming left to the caller.
	The first row will be the header for the selected columns, the rest will be values.
	"""
	for task_data in data:
	csvf = StringIO.StringIO()
	writer = csv.writer(csvf)
	# header information
	writer.writerow(selected_cols)

	for row in task_data:
	writer.writerow(row)
	yield csvf


	def export_data_csv_todisk(selected_cols, data, tasks, dir = "benchtracker_data"):
	"""
	Exports retrieved data as csv files on export_data_csv_todisk

	Each task is a separate file with their full task name, '/' being replaced by '.'
	"""
	if not os.path.exists(dir):
	makedirs(dir)
	t = 0
	for csvf in export_data_csv(selected_cols, data):
	with open("".join([dir, '/', tasks[t].replace('/','.'), '.csv']), 'w') as f:
	csvf.seek(0)
	buf = csvf.read(1048576) # 1 MB
	while buf:
	f.write(buf)
	buf = csvf.read(1048576)
	t += 1



	def describe_param(param, mode, tasks, dbname = "results.db"):
	"""
	Give back metainformation about a parameter to allow for easier filtering.

	Param would be an element of the list returned by describe_tasks - space separated name and type
	Returns a 2-tuple describing the parameter type and values for some tasks of a database.
	- 1st value is either 'range' or 'categorical'
	- 2nd value is either a 2-tuple for range types, or a n-tuple for categorical
	"""
	db = connect_db(dbname)
	cursor = db.cursor()

	(param_name, param_type) = param.split()
	if param_type == "TEXT":
	mode = 'categorical'
	elif mode not in {'categorical', 'range'}:
	raise ValueError

	subquery = ""
	min_param = "min_p"
	max_param = "max_p"
	if not isinstance(tasks,list):
	subquery = task_name(tasks)
	min_param = max_param = param_name
	else:
	subquery += '('
	for t in range(len(tasks)):
	if mode == "categorical":
	subquery += "SELECT DISTINCT {} FROM {}".format(param_name, task_name(tasks[t]))
	else:
	subquery += "SELECT MIN({0}) as min_p, MAX({0}) as max_p FROM {1}".format(param_name, task_name(tasks[t]))

	if t < len(tasks) - 1:
	subquery += " UNION ALL "
	subquery += ')'

	print(subquery)

	# categorical data, return a list of all distinct values
	if mode == 'categorical':
	cursor.execute("SELECT DISTINCT {} FROM {};".format(param_name, subquery))
	return (mode,tuple(row[0] for row in cursor.fetchall()))
	# ranged data, return (min, max)
	else:
	cursor.execute("SELECT MIN({}), MAX({}) FROM {};".format(min_param, max_param, subquery))
	return (mode,tuple(cursor.fetchone()))



	def connect_db(dbname = "results.db"):
	"""Attempt a database connection, exiting with 1 if dbname does not exist, else return with db connection"""
	if not os.path.isfile(dbname):
	print("{} does not exist".format(dbname))
	raise IOError(dbname)
	db = sqlite3.connect(dbname)
	db.row_factory = sqlite3.Row
	return db


	# filter object
	valid_filter_methods = {"IN", "BETWEEN", "LIKE", "=", "<>", "!=", ">", "<", ">=", "<="}
	class Task_filter:
	def __init__(self, param, method, args):
	self.param = param
	if method.upper() in valid_filter_methods:
	self.method = method.upper()
	else:
	print(method, "is not a supported filter method")
	raise ValueError
	self.args = args
	def __str__(self):
	substitutions = sql_substitute(self.args)
	if self.method == "BETWEEN":
	substitutions = "? AND ?"
	elif self.method == "IN":
	substitutions = '('+substitutions+')'
	return "({} {} {})".format(self.param, self.method, substitutions)


	# internal utilities
	def task_name(task):
	return '['+task+']'
	def intersection(first, other):
	intersection_set = set.intersection(set(first), set(other))
	# reimpose order
	return [item for item in first if item in intersection_set]

	def sql_substitute(args):
	return ('?,'*len(args)).rstrip(',')

	def retrieve_primary_keys(task, db):
	cursor = db.cursor()
	cursor.execute("PRAGMA table_info(%s)" % task_name(task))
	column_info = cursor.fetchall()
	primary_keys = []
	for info in column_info:
	print(info)
	if info[5] != 0:
	print("key param:", info[1])
	primary_keys.append(info[1])

	return primary_keys