vtr_flow/scripts/benchtracker/populate_db.py - third_party/vtr-verilog-to-routing - Git at Google

 #!/usr/bin/python

 from __future__ import print_function, division
 import sys
 from subprocess import call
 import os
 import shlex
 import argparse
 import textwrap
 import sqlite3
 import socket   # for hostname
 import getpass  # for username
 from util import (walk_runs, get_trailing_num, get_result_file,
     get_task_table_name, immediate_subdir, natural_sort,
     is_int, is_float, convert_strictest)

 nullval = '-1'
 type_map = {int: "INT", float: "REAL", str: "TEXT"}

 # main program sequence
 def main():
     params = Params()
     parse_args(params)

     # operate on database
     db = sqlite3.connect(params.database)
     db.row_factory = sqlite3.Row

     while params.task_dir:
         if (params.clean):
             drop_table(params, db)
         else:
             verify_paths(params)
             initialize_tracked_columns(params, db)
             update_db(params, db)

         load_next_task(params)

     db.close()


 def verify_paths(params):
     """verify paths exist as either relational or absolute paths, modifying them if necessary"""
     if os.path.isdir(params.task_dir):
         walk_runs(params, check_result_exists)
     else:
         print(params.task_dir, " directory does not exist")

 def update_db(params, db):
     # check if table for task exists; if not then create it
     task_table_name = params.task_table_name
     create_table(params, db, task_table_name)
     # load up latest run and parsed date for task
     def check_last_runs_table(runs):
         natural_sort(runs)
         cursor = db.cursor()
         cursor.execute("SELECT MAX({}), MAX({}) FROM {}".format("parsed_date", "run", params.task_table_name))
         row = cursor.fetchone()
         if row[0]:
             # storing in database truncates decimal, so add 1 second
             last_parsed_date = row[0] + 1
             last_run = row[1]
             print("last parsed date", last_parsed_date)
             print("last run", last_run)
             params.last_parsed_date = last_parsed_date
             params.last_run = last_run
         else:
             print("first population")
             params.last_run = 0
         # else first run, nothing in table yet

     def add_run_to_db(params, run):
         run_number = get_trailing_num(run)
         resfilename = get_result_file(params, params.run_prefix, run_number)
         try:
             parsed_date = os.path.getmtime(resfilename)
             # throw away unless newer than latest or run number greater than maximum
             if parsed_date <= params.last_parsed_date and run_number <= params.last_run:
                 return
         except OSError:
             print("file {} not found; skipping".format(resfilename))
             return

         params.last_run += 1
         print("run {} added ({}) ({})".format(run_number, params.last_run, parsed_date))

         with open(resfilename, 'r') as res:
             # make sure table is compatible with run data by inserting any new columns
             # always called "run" in table (converted from whatever prefix users use)
             result_params = ["run", "parsed_date"]
             result_params.extend(res.readline().split('\t'))
             if result_params[-1] == '\n':
                 result_params.pop()
             result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params]

             pre_sample_pos = res.tell()
             result_params_sample = res.readline().split('\t')
             # go back to presample location for normal line iteration
             res.seek(pre_sample_pos, os.SEEK_SET)

             # add new column to table
             for c in range(len(result_params)):
                 if result_params[c] not in params.tracked_columns:
                     print("ADDING {} as new column".format(result_params[c]))
                     add_column_to_table(params, db, result_params[c], result_params_sample[c-2]) # -2 accounts for run and parsed date

             # add value rows
             rows_to_add = []
             for line in res:
                 # run number and parsed_date are always recorded
                 result_params_val = [params.last_run, parsed_date]
                 result_params_val.extend(line.split('\t'))
                 if result_params_val[-1] == '\n':
                     result_params_val.pop()
                 # something must be wrong here
                 if len(result_params_val) > len(result_params):
                     print("There are {} values for only {} parameters in run {}; \
                         skipping run".format(len(result_params_val), len(result_params), run_number))
                     # skip this run
                     params.last_run -= 1
                     return

                 # for when the last column value is the empty string
                 while len(result_params_val) < len(result_params):
                     result_params_val.append('')

                 rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val))

             param_placeholders = ("?,"*len(result_params)).rstrip(',')
             # specify columns to insert into since ordering of columns in file may not match table
             insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format(
                 params.task_table_name,
                 ','.join(result_params),
                 param_placeholders)
             cursor = db.cursor()
             cursor.executemany(insert_rows_command, rows_to_add)


     walk_runs(params, add_run_to_db, check_last_runs_table)
     db.commit()


 def drop_table(params, db):
     cursor = db.cursor()
     cursor.execute("DROP TABLE IF EXISTS {}".format(params.task_table_name))

 def create_table(params, db, task_table_name):
     # creates table schema based on the result file of run 1

     with open(get_result_file(params, params.run_prefix, 1), 'r') as res:
         result_params = [param.strip() for param in res.readline().rstrip().split('\t')]
         result_params_sample = [value.strip() for value in res.readline().rstrip().split('\t')]
         while len(result_params_sample) < len(result_params):
             result_params_sample.append('')


         # check if table name already exists
         cursor = db.cursor()
         cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]")))
         if cursor.fetchone():
             return


         # table identifiers cannot be parameterized, so have to use string concatenation
         create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(task_table_name)

         # include one item for each result_params
         create_table_command += "{} INTEGER, ".format(params.run_prefix)
         # time is in seconds since epoch
         create_table_command += "parsed_date REAL, "
         for p in range(len(result_params)):
             p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT")))
             p_schema += ", "
             create_table_command += p_schema


         # treat with unique keys
         primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key
         for primary_key in params.key_params:
             if primary_key not in result_params:
                 print("{} does not exist in result file of run 1".format(primary_key))
                 sys.exit(3)
             primary_keys += primary_key
             primary_keys += ','
         primary_keys = primary_keys[:-1]  # remove trailing ,
         primary_keys += ')'
         create_table_command += primary_keys

         create_table_command += ')'

         print(create_table_command)
         cursor = db.cursor()
         cursor.execute(create_table_command)

         # reinitialize tracked columns for newly created table
         initialize_tracked_columns(params, db)


 def initialize_tracked_columns(params, db):
     cursor = db.cursor()
     cursor.execute("PRAGMA table_info(%s)" % params.task_table_name)
     column_info = cursor.fetchall()
     column_names = set()
     for info in column_info:
         column_names.add('\"' + info[1] + '\"')
     setattr(params, 'tracked_columns', column_names)
     # print('tracked params: ', end='')
     # print(params.tracked_columns)


 # generating program parameters
 class Params(object):
     def __iter__(self):
         for attr, value in self.__dict__.iteritems():
             yield attr, value

 def parse_args(ns=None):
     """parse arguments from command line and return as namespace object"""
     parser = argparse.ArgumentParser(
             formatter_class=argparse.RawDescriptionHelpFormatter,
             description=textwrap.dedent("""\
             populates a central database with benchmark information

         Generated database:
             Each task will become its own table, identified by its absolute path.
             Each table will have as many columns as the result file the first time
             the table is generated. The columns of the result file should not change
             after the table is generated."""),
             usage="%(prog)s <task_dir> [OPTIONS]")

     # arguments should either end with _dir or _file for use by other functions
     parser.add_argument("task_list",
             nargs='+',
             default=[],
             help="directories (relative or absolute) holding runs")
     parser.add_argument("--root_directory",
             default="",
             help="where the task paths are relative to (only set if task paths are relative)")
     parser.add_argument("-f", "--result_file",
             default="parse_results.txt",
             help="result file to look for inside each run; default: %(default)s")
     parser.add_argument("-p", "--run_prefix",
             default="run",
             help="prefix of run directories under the task directory; default: %(default)s")
     parser.add_argument("-s", "--parse_script",
             default="echo parsed result file not found {run_number} for {task_name}",
             help="bash command string to run if parsed file not found;\
                 use {run_number} and {task_name} to retrieve run information;\
                 default: %(default)s")
     parser.add_argument("-d", "--database",
             default="results.db",
             help="name of database to store results in; default: %(default)s")
     parser.add_argument("--clean",
             action='store_true',
             default=False,
             help="clean (remove) the table for this task; exits afterwards")
     parser.add_argument("-k","--key_params", required=True,
             nargs='+',
             default=[],
             help="the combination of key parameters that defines a unique benchmark;")
     params = parser.parse_args(namespace=ns)
     params.database = os.path.expanduser(params.database)
     # initialize to -1 as everything valid must be greater than -1
     setattr(params, 'last_parsed_date', -1)
     setattr(params, 'last_run', -1)
     # if a task list is given (where each line is the path to a task)
     if params.root_directory:
         if params.root_directory[-1] != '/':
             params.root_directory += '/'
         # make directory if it does not exist
         if not os.path.exists(params.root_directory):
             os.makedirs(directory)
         params.task_list = [params.root_directory + task for task in params.task_list]

     if os.path.isfile(params.task_list[0]):
         print("given task list")
         task_list = []
         with open(params.task_list[0], 'r') as tl:
             for line in tl:
                 task_list.append(params.root_directory + line.rstrip())
             params.task_list = task_list

     # load first task
     load_next_task(params)
     return params

 def load_next_task(params):
     if not params.task_list:
         setattr(params, 'task_dir', None)
         return

     active_task = params.task_list.pop()
     print("active task:", active_task)

     setattr(params, 'task_dir', active_task)
     # name of task
     setattr(params, 'task_name', params.task_dir.split('/')[-1])
     # path to task
     setattr(params, 'task_path', os.path.abspath(os.path.join(params.task_dir, os.pardir)))
     # table name in the database
     setattr(params, 'task_table_name', get_task_table_name(params))
     return params


 # walk operations; all take params and run as arguments
 def check_result_exists(params, run):

     run_number = get_trailing_num(run)
     if not os.path.isfile(get_result_file(params, params.run_prefix, run_number)):
         parsed_call = params.parse_script.format(
             task_dir = params.task_dir,
             task_name = params.task_name,
             run_number = get_trailing_num(run))
         print(run, " called ", parsed_call)
         parsed_call = shlex.split(parsed_call)
         parsed_call[0] = os.path.expanduser(parsed_call[0])
         call(parsed_call)
     else:
         print(run, " OK")

 def add_column_to_table(params, db, column_name, sample_val):
     cursor = db.cursor()
     col_type = type_map.get(type(convert_strictest(sample_val)), "TEXT")
     cursor.execute("ALTER TABLE {table} ADD COLUMN {col} {type}".format(
         table=params.task_table_name, col=column_name, type=col_type))
     params.tracked_columns.add(column_name)


 if __name__ == "__main__":
     main()
	#!/usr/bin/python

	from __future__ import print_function, division
	import sys
	from subprocess import call
	import os
	import shlex
	import argparse
	import textwrap
	import sqlite3
	import socket # for hostname
	import getpass # for username
	from util import (walk_runs, get_trailing_num, get_result_file,
	get_task_table_name, immediate_subdir, natural_sort,
	is_int, is_float, convert_strictest)

	nullval = '-1'
	type_map = {int: "INT", float: "REAL", str: "TEXT"}

	# main program sequence
	def main():
	params = Params()
	parse_args(params)

	# operate on database
	db = sqlite3.connect(params.database)
	db.row_factory = sqlite3.Row

	while params.task_dir:
	if (params.clean):
	drop_table(params, db)
	else:
	verify_paths(params)
	initialize_tracked_columns(params, db)
	update_db(params, db)

	load_next_task(params)

	db.close()


	def verify_paths(params):
	"""verify paths exist as either relational or absolute paths, modifying them if necessary"""
	if os.path.isdir(params.task_dir):
	walk_runs(params, check_result_exists)
	else:
	print(params.task_dir, " directory does not exist")

	def update_db(params, db):
	# check if table for task exists; if not then create it
	task_table_name = params.task_table_name
	create_table(params, db, task_table_name)
	# load up latest run and parsed date for task
	def check_last_runs_table(runs):
	natural_sort(runs)
	cursor = db.cursor()
	cursor.execute("SELECT MAX({}), MAX({}) FROM {}".format("parsed_date", "run", params.task_table_name))
	row = cursor.fetchone()
	if row[0]:
	# storing in database truncates decimal, so add 1 second
	last_parsed_date = row[0] + 1
	last_run = row[1]
	print("last parsed date", last_parsed_date)
	print("last run", last_run)
	params.last_parsed_date = last_parsed_date
	params.last_run = last_run
	else:
	print("first population")
	params.last_run = 0
	# else first run, nothing in table yet

	def add_run_to_db(params, run):
	run_number = get_trailing_num(run)
	resfilename = get_result_file(params, params.run_prefix, run_number)
	try:
	parsed_date = os.path.getmtime(resfilename)
	# throw away unless newer than latest or run number greater than maximum
	if parsed_date <= params.last_parsed_date and run_number <= params.last_run:
	return
	except OSError:
	print("file {} not found; skipping".format(resfilename))
	return

	params.last_run += 1
	print("run {} added ({}) ({})".format(run_number, params.last_run, parsed_date))

	with open(resfilename, 'r') as res:
	# make sure table is compatible with run data by inserting any new columns
	# always called "run" in table (converted from whatever prefix users use)
	result_params = ["run", "parsed_date"]
	result_params.extend(res.readline().split('\t'))
	if result_params[-1] == '\n':
	result_params.pop()
	result_params = ["".join(('\"',p.strip(),'\"')) for p in result_params]

	pre_sample_pos = res.tell()
	result_params_sample = res.readline().split('\t')
	# go back to presample location for normal line iteration
	res.seek(pre_sample_pos, os.SEEK_SET)

	# add new column to table
	for c in range(len(result_params)):
	if result_params[c] not in params.tracked_columns:
	print("ADDING {} as new column".format(result_params[c]))
	add_column_to_table(params, db, result_params[c], result_params_sample[c-2]) # -2 accounts for run and parsed date

	# add value rows
	rows_to_add = []
	for line in res:
	# run number and parsed_date are always recorded
	result_params_val = [params.last_run, parsed_date]
	result_params_val.extend(line.split('\t'))
	if result_params_val[-1] == '\n':
	result_params_val.pop()
	# something must be wrong here
	if len(result_params_val) > len(result_params):
	print("There are {} values for only {} parameters in run {}; \
	skipping run".format(len(result_params_val), len(result_params), run_number))
	# skip this run
	params.last_run -= 1
	return

	# for when the last column value is the empty string
	while len(result_params_val) < len(result_params):
	result_params_val.append('')

	rows_to_add.append(tuple(convert_strictest(param) if param != nullval else None for param in result_params_val))

	param_placeholders = ("?,"*len(result_params)).rstrip(',')
	# specify columns to insert into since ordering of columns in file may not match table
	insert_rows_command = "INSERT OR IGNORE INTO {} ({}) VALUES ({})".format(
	params.task_table_name,
	','.join(result_params),
	param_placeholders)
	cursor = db.cursor()
	cursor.executemany(insert_rows_command, rows_to_add)



	walk_runs(params, add_run_to_db, check_last_runs_table)
	db.commit()


	def drop_table(params, db):
	cursor = db.cursor()
	cursor.execute("DROP TABLE IF EXISTS {}".format(params.task_table_name))

	def create_table(params, db, task_table_name):
	# creates table schema based on the result file of run 1

	with open(get_result_file(params, params.run_prefix, 1), 'r') as res:
	result_params = [param.strip() for param in res.readline().rstrip().split('\t')]
	result_params_sample = [value.strip() for value in res.readline().rstrip().split('\t')]
	while len(result_params_sample) < len(result_params):
	result_params_sample.append('')


	# check if table name already exists
	cursor = db.cursor()
	cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='{}'".format(params.task_table_name.strip("[]")))
	if cursor.fetchone():
	return


	# table identifiers cannot be parameterized, so have to use string concatenation
	create_table_command = '''CREATE TABLE IF NOT EXISTS {}('''.format(task_table_name)

	# include one item for each result_params
	create_table_command += "{} INTEGER, ".format(params.run_prefix)
	# time is in seconds since epoch
	create_table_command += "parsed_date REAL, "
	for p in range(len(result_params)):
	p_schema = "".join(('\"',result_params[p].strip(),'\" ', type_map.get(type(convert_strictest(result_params_sample[p])), "TEXT")))
	p_schema += ", "
	create_table_command += p_schema


	# treat with unique keys
	primary_keys = "PRIMARY KEY({},".format(params.run_prefix) # run always considered primary key
	for primary_key in params.key_params:
	if primary_key not in result_params:
	print("{} does not exist in result file of run 1".format(primary_key))
	sys.exit(3)
	primary_keys += primary_key
	primary_keys += ','
	primary_keys = primary_keys[:-1] # remove trailing ,
	primary_keys += ')'
	create_table_command += primary_keys

	create_table_command += ')'

	print(create_table_command)
	cursor = db.cursor()
	cursor.execute(create_table_command)

	# reinitialize tracked columns for newly created table
	initialize_tracked_columns(params, db)


	def initialize_tracked_columns(params, db):
	cursor = db.cursor()
	cursor.execute("PRAGMA table_info(%s)" % params.task_table_name)
	column_info = cursor.fetchall()
	column_names = set()
	for info in column_info:
	column_names.add('\"' + info[1] + '\"')
	setattr(params, 'tracked_columns', column_names)
	# print('tracked params: ', end='')
	# print(params.tracked_columns)



	# generating program parameters
	class Params(object):
	def __iter__(self):
	for attr, value in self.__dict__.iteritems():
	yield attr, value

	def parse_args(ns=None):
	"""parse arguments from command line and return as namespace object"""
	parser = argparse.ArgumentParser(
	formatter_class=argparse.RawDescriptionHelpFormatter,
	description=textwrap.dedent("""\
	populates a central database with benchmark information

	Generated database:
	Each task will become its own table, identified by its absolute path.
	Each table will have as many columns as the result file the first time
	the table is generated. The columns of the result file should not change
	after the table is generated."""),
	usage="%(prog)s <task_dir> [OPTIONS]")

	# arguments should either end with _dir or _file for use by other functions
	parser.add_argument("task_list",
	nargs='+',
	default=[],
	help="directories (relative or absolute) holding runs")
	parser.add_argument("--root_directory",
	default="",
	help="where the task paths are relative to (only set if task paths are relative)")
	parser.add_argument("-f", "--result_file",
	default="parse_results.txt",
	help="result file to look for inside each run; default: %(default)s")
	parser.add_argument("-p", "--run_prefix",
	default="run",
	help="prefix of run directories under the task directory; default: %(default)s")
	parser.add_argument("-s", "--parse_script",
	default="echo parsed result file not found {run_number} for {task_name}",
	help="bash command string to run if parsed file not found;\
	use {run_number} and {task_name} to retrieve run information;\
	default: %(default)s")
	parser.add_argument("-d", "--database",
	default="results.db",
	help="name of database to store results in; default: %(default)s")
	parser.add_argument("--clean",
	action='store_true',
	default=False,
	help="clean (remove) the table for this task; exits afterwards")
	parser.add_argument("-k","--key_params", required=True,
	nargs='+',
	default=[],
	help="the combination of key parameters that defines a unique benchmark;")
	params = parser.parse_args(namespace=ns)
	params.database = os.path.expanduser(params.database)
	# initialize to -1 as everything valid must be greater than -1
	setattr(params, 'last_parsed_date', -1)
	setattr(params, 'last_run', -1)
	# if a task list is given (where each line is the path to a task)
	if params.root_directory:
	if params.root_directory[-1] != '/':
	params.root_directory += '/'
	# make directory if it does not exist
	if not os.path.exists(params.root_directory):
	os.makedirs(directory)
	params.task_list = [params.root_directory + task for task in params.task_list]

	if os.path.isfile(params.task_list[0]):
	print("given task list")
	task_list = []
	with open(params.task_list[0], 'r') as tl:
	for line in tl:
	task_list.append(params.root_directory + line.rstrip())
	params.task_list = task_list

	# load first task
	load_next_task(params)
	return params

	def load_next_task(params):
	if not params.task_list:
	setattr(params, 'task_dir', None)
	return

	active_task = params.task_list.pop()
	print("active task:", active_task)

	setattr(params, 'task_dir', active_task)
	# name of task
	setattr(params, 'task_name', params.task_dir.split('/')[-1])
	# path to task
	setattr(params, 'task_path', os.path.abspath(os.path.join(params.task_dir, os.pardir)))
	# table name in the database
	setattr(params, 'task_table_name', get_task_table_name(params))
	return params



	# walk operations; all take params and run as arguments
	def check_result_exists(params, run):

	run_number = get_trailing_num(run)
	if not os.path.isfile(get_result_file(params, params.run_prefix, run_number)):
	parsed_call = params.parse_script.format(
	task_dir = params.task_dir,
	task_name = params.task_name,
	run_number = get_trailing_num(run))
	print(run, " called ", parsed_call)
	parsed_call = shlex.split(parsed_call)
	parsed_call[0] = os.path.expanduser(parsed_call[0])
	call(parsed_call)
	else:
	print(run, " OK")

	def add_column_to_table(params, db, column_name, sample_val):
	cursor = db.cursor()
	col_type = type_map.get(type(convert_strictest(sample_val)), "TEXT")
	cursor.execute("ALTER TABLE {table} ADD COLUMN {col} {type}".format(
	table=params.task_table_name, col=column_name, type=col_type))
	params.tracked_columns.add(column_name)


	if __name__ == "__main__":
	main()