Initial commit with BSL

This commit is contained in:
Andy Pavlo
2019-08-23 11:47:19 -04:00
commit 3e564ce922
286 changed files with 177642 additions and 0 deletions

View File

@@ -0,0 +1,69 @@
#
# OtterTune - ConfParser.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Mar 23, 2018
@author: Jacky, bohan, Dongsheng
'''
import sys
import json
from collections import OrderedDict
def change_postgres_conf(recommendation, postgresqlconf):
lines = postgresqlconf.readlines()
settings_idx = lines.index("# Add settings for extensions here\n")
postgresqlconf.seek(0)
postgresqlconf.truncate(0)
lines = lines[0:(settings_idx + 1)]
for line in lines:
postgresqlconf.write(line)
for (knob_name, knob_value) in list(recommendation.items()):
postgresqlconf.write(str(knob_name) + " = " + str(knob_value) + "\n")
def change_oracle_conf(recommendation, oracle_conf):
lines = oracle_conf.readlines()
signal = "# configurations recommended by ottertune:\n"
if signal not in lines:
oracle_conf.write('\n' + signal)
oracle_conf.flush()
oracle_conf.seek(0)
lines = oracle_conf.readlines()
settings_idx = lines.index(signal)
oracle_conf.seek(0)
oracle_conf.truncate(0)
lines = lines[0:(settings_idx + 1)]
for line in lines:
oracle_conf.write(line)
for (knob_name, knob_value) in list(recommendation.items()):
oracle_conf.write(str(knob_name) + " = " + str(knob_value).strip('B') + "\n")
def main():
if len(sys.argv) != 4:
raise Exception("Usage: python [DB type] ConfParser.py [Next Config] [Current Config]")
database_type = sys.argv[1]
next_config_name = sys.argv[2]
cur_config_name = sys.argv[3]
with open(next_config_name, 'r') as next_config, open(cur_config_name, 'r+') as cur_config:
config = json.load(next_config, encoding="UTF-8", object_pairs_hook=OrderedDict)
recommendation = config['recommendation']
if database_type == 'postgres':
change_postgres_conf(recommendation, cur_config)
elif database_type == 'oracle':
change_oracle_conf(recommendation, cur_config)
else:
raise Exception("Database Type {} Not Implemented !".format(database_type))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,43 @@
#
# OtterTune - LatencyUDF.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import sys
import json
from collections import OrderedDict
def main():
if (len(sys.argv) != 2):
raise Exception("Usage: python udf.py [Output Directory]")
with open(sys.argv[1] + "/summary.json", "r") as f:
conf = json.load(f,
encoding="UTF-8",
object_pairs_hook=OrderedDict)
start_time = conf['start_time']
end_time = conf['end_time']
with open(sys.argv[1] + "/metrics_before.json", "r") as f:
conf_before = json.load(f,
encoding="UTF-8",
object_pairs_hook=OrderedDict)
conf_before['global']['udf'] = OrderedDict([("latency", "0")])
with open(sys.argv[1] + "/metrics_after.json", "r") as f:
conf_after = json.load(f,
encoding="UTF-8",
object_pairs_hook=OrderedDict)
conf_after['global']['udf'] = OrderedDict([("latency", str(end_time - start_time))])
with open(sys.argv[1] + "/metrics_before.json", "w") as f:
f.write(json.dumps(conf_before, indent=4))
with open(sys.argv[1] + "/metrics_after.json", "w") as f:
f.write(json.dumps(conf_after, indent=4))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,21 @@
{
"database_type" : "postgres",
"database_name" : "tpcc",
"database_disk": "/dev/xvda1",
"database_conf": "/etc/postgresql/9.6/main/postgresql.conf",
"database_save_path": "/home/ubuntu/ottertune",
"username" : "bohan",
"password" : "bohan",
"oltpbench_home": "/home/ubuntu/oltpbench",
"oltpbench_config": "/home/ubuntu/oltpbench/config/tpcc_config_postgres.xml",
"oltpbench_workload": "tpcc",
"oltpbench_log" : "/home/ubuntu/ottertune/client/driver/oltp.log",
"controller_config": "/home/ubuntu/ottertune/client/controller/config/sample_postgres_config.json",
"controller_log" : "/home/ubuntu/ottertune/client/driver/controller.log",
"save_path": "/home/ubuntu/results",
"upload_url" : "http://127.0.0.1:8000",
"upload_code" : "I5I10PXK3PK27FM86YYS",
"lhs_knob_path" : "/home/ubuntu/ottertune/client/driver/knobs/postgres-96.json",
"lhs_save_path" : "/home/ubuntu/ottertune/client/driver/configs",
"oracle_awr_enabled": false
}

421
client/driver/fabfile.py vendored Normal file
View File

@@ -0,0 +1,421 @@
#
# OtterTune - fabfile.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Mar 23, 2018
@author: bohan
'''
import sys
import json
import logging
import time
import os.path
import re
import glob
from multiprocessing import Process
from fabric.api import (env, local, task, lcd)
from fabric.state import output as fabric_output
LOG = logging.getLogger()
LOG.setLevel(logging.DEBUG)
Formatter = logging.Formatter( # pylint: disable=invalid-name
"%(asctime)s [%(levelname)s] %(message)s")
# print the log
ConsoleHandler = logging.StreamHandler(sys.stdout) # pylint: disable=invalid-name
ConsoleHandler.setFormatter(Formatter)
LOG.addHandler(ConsoleHandler)
# Fabric environment settings
env.hosts = ['localhost']
fabric_output.update({
'running': True,
'stdout': True,
})
# intervals of restoring the databse
RELOAD_INTERVAL = 10
# maximum disk usage
MAX_DISK_USAGE = 90
with open('driver_config.json', 'r') as f:
CONF = json.load(f)
@task
def check_disk_usage():
partition = CONF['database_disk']
disk_use = 0
cmd = "df -h {}".format(partition)
out = local(cmd, capture=True).splitlines()[1]
m = re.search('\d+(?=%)', out) # pylint: disable=anomalous-backslash-in-string
if m:
disk_use = int(m.group(0))
LOG.info("Current Disk Usage: %s%s", disk_use, '%')
return disk_use
@task
def check_memory_usage():
cmd = 'free -m -h'
local(cmd)
@task
def restart_database():
if CONF['database_type'] == 'postgres':
cmd = 'sudo service postgresql restart'
elif CONF['database_type'] == 'oracle':
cmd = 'sh oracleScripts/shutdownOracle.sh && sh oracleScripts/startupOracle.sh'
else:
raise Exception("Database Type {} Not Implemented !".format(CONF['database_type']))
local(cmd)
@task
def drop_database():
if CONF['database_type'] == 'postgres':
cmd = "PGPASSWORD={} dropdb -e --if-exists {} -U {}".\
format(CONF['password'], CONF['database_name'], CONF['username'])
else:
raise Exception("Database Type {} Not Implemented !".format(CONF['database_type']))
local(cmd)
@task
def create_database():
if CONF['database_type'] == 'postgres':
cmd = "PGPASSWORD={} createdb -e {} -U {}".\
format(CONF['password'], CONF['database_name'], CONF['username'])
else:
raise Exception("Database Type {} Not Implemented !".format(CONF['database_type']))
local(cmd)
@task
def change_conf():
next_conf = 'next_config'
cmd = "sudo python3 ConfParser.py {} {} {}".\
format(CONF['database_type'], next_conf, CONF['database_conf'])
local(cmd)
@task
def load_oltpbench():
cmd = "./oltpbenchmark -b {} -c {} --create=true --load=true".\
format(CONF['oltpbench_workload'], CONF['oltpbench_config'])
with lcd(CONF['oltpbench_home']): # pylint: disable=not-context-manager
local(cmd)
@task
def run_oltpbench():
cmd = "./oltpbenchmark -b {} -c {} --execute=true -s 5 -o outputfile".\
format(CONF['oltpbench_workload'], CONF['oltpbench_config'])
with lcd(CONF['oltpbench_home']): # pylint: disable=not-context-manager
local(cmd)
@task
def run_oltpbench_bg():
cmd = "./oltpbenchmark -b {} -c {} --execute=true -s 5 -o outputfile > {} 2>&1 &".\
format(CONF['oltpbench_workload'], CONF['oltpbench_config'], CONF['oltpbench_log'])
with lcd(CONF['oltpbench_home']): # pylint: disable=not-context-manager
local(cmd)
@task
def run_controller():
cmd = 'sudo gradle run -PappArgs="-c {} -d output/" --no-daemon > {}'.\
format(CONF['controller_config'], CONF['controller_log'])
with lcd("../controller"): # pylint: disable=not-context-manager
local(cmd)
@task
def signal_controller():
pid = int(open('../controller/pid.txt').read())
cmd = 'sudo kill -2 {}'.format(pid)
with lcd("../controller"): # pylint: disable=not-context-manager
local(cmd)
@task
def save_dbms_result():
t = int(time.time())
files = ['knobs.json', 'metrics_after.json', 'metrics_before.json', 'summary.json']
for f_ in files:
f_prefix = f_.split('.')[0]
cmd = 'cp ../controller/output/{} {}/{}__{}.json'.\
format(f_, CONF['save_path'], t, f_prefix)
local(cmd)
@task
def free_cache():
cmd = 'sync; sudo bash -c "echo 1 > /proc/sys/vm/drop_caches"'
local(cmd)
@task
def upload_result():
cmd = 'python3 ../../server/website/script/upload/upload.py \
../controller/output/ {} {}/new_result/'.format(CONF['upload_code'],
CONF['upload_url'])
local(cmd)
@task
def get_result():
cmd = 'python3 ../../script/query_and_get.py {} {} 5'.\
format(CONF['upload_url'], CONF['upload_code'])
local(cmd)
@task
def add_udf():
cmd = 'sudo python3 ./LatencyUDF.py ../controller/output/'
local(cmd)
@task
def upload_batch():
cmd = 'python3 ./upload_batch.py {} {} {}/new_result/'.format(CONF['save_path'],
CONF['upload_code'],
CONF['upload_url'])
local(cmd)
@task
def dump_database():
db_file_path = '{}/{}.dump'.format(CONF['database_save_path'], CONF['database_name'])
if os.path.exists(db_file_path):
LOG.info('%s already exists ! ', db_file_path)
return False
else:
LOG.info('Dump database %s to %s', CONF['database_name'], db_file_path)
# You must create a directory named dpdata through sqlplus in your Oracle database
if CONF['database_type'] == 'oracle':
cmd = 'expdp {}/{}@{} schemas={} dumpfile={}.dump DIRECTORY=dpdata'.format(
'c##tpcc', 'oracle', 'orcldb', 'c##tpcc', 'orcldb')
elif CONF['database_type'] == 'postgres':
cmd = 'PGPASSWORD={} pg_dump -U {} -F c -d {} > {}'.format(CONF['password'],
CONF['username'],
CONF['database_name'],
db_file_path)
else:
raise Exception("Database Type {} Not Implemented !".format(CONF['database_type']))
local(cmd)
return True
@task
def restore_database():
if CONF['database_type'] == 'oracle':
# You must create a directory named dpdata through sqlplus in your Oracle database
# The following script assumes such directory exists.
# You may want to modify the username, password, and dump file name in the script
cmd = 'sh oracleScripts/restoreOracle.sh'
elif CONF['database_type'] == 'postgres':
db_file_path = '{}/{}.dump'.format(CONF['database_save_path'], CONF['database_name'])
drop_database()
create_database()
cmd = 'PGPASSWORD={} pg_restore -U {} -n public -j 8 -F c -d {} {}'.\
format(CONF['password'], CONF['username'], CONF['database_name'], db_file_path)
else:
raise Exception("Database Type {} Not Implemented !".format(CONF['database_type']))
LOG.info('Start restoring database')
local(cmd)
LOG.info('Finish restoring database')
def _ready_to_start_oltpbench():
return (os.path.exists(CONF['controller_log']) and
'Output the process pid to'
in open(CONF['controller_log']).read())
def _ready_to_start_controller():
return (os.path.exists(CONF['oltpbench_log']) and
'Warmup complete, starting measurements'
in open(CONF['oltpbench_log']).read())
def _ready_to_shut_down_controller():
pid_file_path = '../controller/pid.txt'
return (os.path.exists(pid_file_path) and os.path.exists(CONF['oltpbench_log']) and
'Output throughput samples into file' in open(CONF['oltpbench_log']).read())
def clean_logs():
# remove oltpbench log
cmd = 'rm -f {}'.format(CONF['oltpbench_log'])
local(cmd)
# remove controller log
cmd = 'rm -f {}'.format(CONF['controller_log'])
local(cmd)
@task
def lhs_samples(count=10):
cmd = 'python3 lhs.py {} {} {}'.format(count, CONF['lhs_knob_path'], CONF['lhs_save_path'])
local(cmd)
@task
def loop():
# free cache
free_cache()
# remove oltpbench log and controller log
clean_logs()
# restart database
restart_database()
# check disk usage
if check_disk_usage() > MAX_DISK_USAGE:
LOG.WARN('Exceeds max disk usage %s', MAX_DISK_USAGE)
# run controller from another process
p = Process(target=run_controller, args=())
p.start()
LOG.info('Run the controller')
# run oltpbench as a background job
while not _ready_to_start_oltpbench():
pass
run_oltpbench_bg()
LOG.info('Run OLTP-Bench')
# the controller starts the first collection
while not _ready_to_start_controller():
pass
signal_controller()
LOG.info('Start the first collection')
# stop the experiment
while not _ready_to_shut_down_controller():
pass
signal_controller()
LOG.info('Start the second collection, shut down the controller')
p.join()
# add user defined target objective
# add_udf()
# save result
save_dbms_result()
# upload result
upload_result()
# get result
get_result()
# change config
change_conf()
@task
def run_lhs():
datadir = CONF['lhs_save_path']
samples = glob.glob(os.path.join(datadir, 'config_*'))
# dump database if it's not done before.
dump = dump_database()
for i, sample in enumerate(samples):
# reload database periodically
if RELOAD_INTERVAL > 0:
if i % RELOAD_INTERVAL == 0:
if i == 0 and dump is False:
restore_database()
elif i > 0:
restore_database()
# free cache
free_cache()
LOG.info('\n\n Start %s-th sample %s \n\n', i, sample)
# check memory usage
# check_memory_usage()
# check disk usage
if check_disk_usage() > MAX_DISK_USAGE:
LOG.WARN('Exceeds max disk usage %s', MAX_DISK_USAGE)
# copy lhs-sampled config to the to-be-used config
cmd = 'cp {} next_config'.format(sample)
local(cmd)
# remove oltpbench log and controller log
clean_logs()
# change config
change_conf()
# restart database
restart_database()
if CONF.get('oracle_awr_enabled', False):
# create snapshot for oracle AWR report
if CONF['database_type'] == 'oracle':
local('sh snapshotOracle.sh')
# run controller from another process
p = Process(target=run_controller, args=())
p.start()
# run oltpbench as a background job
while not _ready_to_start_oltpbench():
pass
run_oltpbench_bg()
LOG.info('Run OLTP-Bench')
while not _ready_to_start_controller():
pass
signal_controller()
LOG.info('Start the first collection')
while not _ready_to_shut_down_controller():
pass
# stop the experiment
signal_controller()
LOG.info('Start the second collection, shut down the controller')
p.join()
# save result
save_dbms_result()
# upload result
upload_result()
if CONF.get('oracle_awr_enabled', False):
# create oracle AWR report for performance analysis
if CONF['database_type'] == 'oracle':
local('sh oracleScripts/snapshotOracle.sh && sh oracleScripts/awrOracle.sh')
@task
def run_loops(max_iter=1):
# dump database if it's not done before.
dump = dump_database()
for i in range(int(max_iter)):
if RELOAD_INTERVAL > 0:
if i % RELOAD_INTERVAL == 0:
if i == 0 and dump is False:
restore_database()
elif i > 0:
restore_database()
LOG.info('The %s-th Loop Starts / Total Loops %s', i + 1, max_iter)
loop()
LOG.info('The %s-th Loop Ends / Total Loops %s', i + 1, max_iter)

View File

@@ -0,0 +1,83 @@
[
{
"name": "SHARED_POOL_SIZE",
"tuning_range": {
"minval": "500MB",
"maxval": "2500MB"
},
"default": "1500MB",
"type": "bytes"
},
{
"name": "DB_CACHE_SIZE",
"tuning_range": {
"minval": "10GB",
"maxval": "24GB"
},
"default": "14GB",
"type": "bytes"
},
{
"name": "LOG_BUFFER",
"tuning_range": {
"minval": "10MB",
"maxval": "1000MB"
},
"default": "20MB",
"type": "bytes"
},
{
"name": "LARGE_POOL_SIZE",
"tuning_range": {
"minval": "10MB",
"maxval": "1000MB"
},
"default": "100MB",
"type": "bytes"
},
{
"name": "STREAMS_POOL_SIZE",
"tuning_range": {
"minval": "10MB",
"maxval": "1000MB"
},
"default": "100MB",
"type": "bytes"
},
{
"name": "bitmap_merge_area_size",
"tuning_range": {
"minval": "1000000",
"maxval": "20000000"
},
"default": "1MB",
"type": "integer"
},
{
"name": "create_bitmap_area_size",
"tuning_range": {
"minval": "1000000",
"maxval": "100000000"
},
"default": "8MB",
"type": "integer"
},
{
"name": "hash_area_size",
"tuning_range": {
"minval": "65536",
"maxval": "1000000"
},
"default": "65536",
"type": "integer"
},
{
"name": "sort_area_size",
"tuning_range": {
"minval": "128000",
"maxval": "2000000"
},
"default": "128000",
"type": "integer"
}
]

View File

@@ -0,0 +1,110 @@
[
{
"name": "effective_cache_size",
"tuning_range": {
"minval": "4GB",
"maxval": "16GB"
},
"default": "4GB",
"type": "bytes"
},
{
"name": "shared_buffers",
"tuning_range": {
"minval": "128MB",
"maxval": "12GB"
},
"default": "128MB",
"type": "bytes"
},
{
"name": "max_parallel_workers_per_gather",
"tuning_range": {
"minval": 0,
"maxval": 8
},
"default": 0,
"type": "integer"
},
{
"name": "default_statistics_target",
"tuning_range": {
"minval": 100,
"maxval": 2048
},
"default": 100,
"type": "integer"
},
{
"name": "bgwriter_lru_maxpages",
"tuning_range": {
"minval": 0,
"maxval": 1000
},
"default": 10,
"type": "integer"
},
{
"name": "checkpoint_timeout",
"tuning_range": {
"minval": "10ms",
"maxval": "1min"
},
"default": "200ms",
"type": "time"
},
{
"name": "random_page_cost",
"tuning_range": {
"minval": 1,
"maxval": 10
},
"default": 4.0,
"type": "float"
},
{
"name": "checkpoint_completion_target",
"tuning_range": {
"minval": 0.1,
"maxval": 0.9
},
"default": 0.5,
"type": "float"
},
{
"name": "checkpoint_timeout",
"tuning_range": {
"minval": "1min",
"maxval": "30min"
},
"default": "5min",
"type": "time"
},
{
"name": "max_wal_size",
"tuning_range": {
"minval": "256MB",
"maxval": "16GB"
},
"default": "1GB",
"type": "bytes"
},
{
"name": "temp_buffers",
"tuning_range": {
"minval": "8MB",
"maxval": "1GB"
},
"default": "8MB",
"type": "bytes"
},
{
"name": "work_mem",
"tuning_range": {
"minval": "4MB",
"maxval": "1GB"
},
"default": "4MB",
"type": "bytes"
}
]

126
client/driver/lhs.py Normal file
View File

@@ -0,0 +1,126 @@
#
# OtterTune - lhs.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import sys
import json
import os
import numpy as np
from pyDOE import lhs
from scipy.stats import uniform
from hurry.filesize import size
BYTES_SYSTEM = [
(1024 ** 5, 'PB'),
(1024 ** 4, 'TB'),
(1024 ** 3, 'GB'),
(1024 ** 2, 'MB'),
(1024 ** 1, 'kB'),
(1024 ** 0, 'B'),
]
TIME_SYSTEM = [
(1000 * 60 * 60 * 24, 'd'),
(1000 * 60 * 60, 'h'),
(1000 * 60, 'min'),
(1000, 's'),
(1, 'ms'),
]
def get_raw_size(value, system):
for factor, suffix in system:
if value.endswith(suffix):
if len(value) == len(suffix):
amount = 1
else:
try:
amount = int(value[:-len(suffix)])
except ValueError:
continue
return amount * factor
return None
def get_knob_raw(value, knob_type):
if knob_type == 'integer':
return int(value)
elif knob_type == 'float':
return float(value)
elif knob_type == 'bytes':
return get_raw_size(value, BYTES_SYSTEM)
elif knob_type == 'time':
return get_raw_size(value, TIME_SYSTEM)
else:
raise Exception('Knob Type does not support')
def get_knob_readable(value, knob_type):
if knob_type == 'integer':
return int(round(value))
elif knob_type == 'float':
return float(value)
elif knob_type == 'bytes':
value = int(round(value))
return size(value, system=BYTES_SYSTEM)
elif knob_type == 'time':
value = int(round(value))
return size(value, system=TIME_SYSTEM)
else:
raise Exception('Knob Type does not support')
def get_knobs_readable(values, types):
result = []
for i, value in enumerate(values):
result.append(get_knob_readable(value, types[i]))
return result
def main(args):
if (len(sys.argv) != 4):
raise Exception("Usage: python3 lhs.py [Samples Count] [Knob Path] [Save Path]")
knob_path = args[2]
save_path = args[3]
with open(knob_path, "r") as f:
tuning_knobs = json.load(f)
names = []
maxvals = []
minvals = []
types = []
for knob in tuning_knobs:
names.append(knob['name'])
maxvals.append(get_knob_raw(knob['tuning_range']['maxval'], knob['type']))
minvals.append(get_knob_raw(knob['tuning_range']['minval'], knob['type']))
types.append(knob['type'])
nsamples = int(args[1])
nfeats = len(tuning_knobs)
samples = lhs(nfeats, samples=nsamples, criterion='maximin')
maxvals = np.array(maxvals)
minvals = np.array(minvals)
scales = maxvals - minvals
for fidx in range(nfeats):
samples[:, fidx] = uniform(loc=minvals[fidx], scale=scales[fidx]).ppf(samples[:, fidx])
samples_readable = []
for sample in samples:
samples_readable.append(get_knobs_readable(sample, types))
config = {'recommendation': {}}
for sidx in range(nsamples):
for fidx in range(nfeats):
config["recommendation"][names[fidx]] = samples_readable[sidx][fidx]
with open(os.path.join(save_path, 'config_' + str(sidx)), 'w+') as f:
f.write(json.dumps(config))
if __name__ == '__main__':
main(sys.argv)

1
client/driver/lhs.sh Executable file
View File

@@ -0,0 +1 @@
sudo -b nohup fab run_lhs > lhs.log 2>&1

View File

@@ -0,0 +1,9 @@
#!/bin/sh
su - oracle <<EON
oracle
sqlplus / as sysdba <<EOF
@/home/oracle/ottertune/client/driver/autoawr.sql;
quit
EOF
exit
EON

View File

@@ -0,0 +1,23 @@
#!/bin/sh
su - oracle <<EON
oracle #system password
sqlplus / as sysdba <<EOF
drop user c##tpcc cascade;
# username
create user c##tpcc identified by oracle;
# username password
quit
EOF
impdp 'userid="/ as sysdba"' schemas=c##tpcc dumpfile=orcldb.dump DIRECTORY=dpdata
# username database_name db_directory
sqlplus / as sysdba <<EOF #restart the database
shutdown immediate
startup
quit
EOF
exit
EON

View File

@@ -0,0 +1,11 @@
#!/bin/sh
su - oracle <<EON
oracle
sqlplus / as sysdba <<EOF
shutdown immediate
exit
EOF
exit
EON

View File

@@ -0,0 +1,11 @@
#!/bin/sh
su - oracle <<EON
oracle
sqlplus / as sysdba <<EOF
exec dbms_workload_repository.create_snapshot;
quit
EOF
exit
EON

View File

@@ -0,0 +1,11 @@
#!/bin/sh
su - oracle <<EON
oracle
sqlplus / as sysdba <<EOF
startup
quit
EOF
exit
EON

View File

@@ -0,0 +1,63 @@
#
# OtterTune - upload_batch.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import argparse
import glob
import logging
import os
import requests
# Logging
LOG = logging.getLogger(__name__)
LOG.addHandler(logging.StreamHandler())
LOG.setLevel(logging.INFO)
# Upload all files in the datadir to OtterTune's server side.
# You may want to upload your training data to the non-tuning session.
def upload_batch(datadir, upload_code, url):
samples = glob.glob(os.path.join(datadir, '*__summary.json'))
count = len(samples)
samples_prefix = []
LOG.info('Uploading %d samples in %s...', count, datadir)
for sample in samples:
prefix = sample.split('/')[-1].split('__')[0]
samples_prefix.append(prefix)
for i in range(count):
prefix = samples_prefix[i]
params = {
'summary': open(os.path.join(datadir, '{}__summary.json'.format(prefix)), 'rb'),
'knobs': open(os.path.join(datadir, '{}__knobs.json'.format(prefix)), 'rb'),
'metrics_before': open(os.path.join(datadir,
'{}__metrics_before.json'.format(prefix)), 'rb'),
'metrics_after': open(os.path.join(datadir,
'{}__metrics_after.json'.format(prefix)), 'rb'),
}
LOG.info('Upload %d-th sample %s__*.json', i + 1, prefix)
response = requests.post(url,
files=params,
data={'upload_code': upload_code})
LOG.info(response.content)
def main():
parser = argparse.ArgumentParser(description="Upload generated data to the website")
parser.add_argument('datadir', type=str, nargs=1,
help='Directory containing the generated data')
parser.add_argument('upload_code', type=str, nargs=1,
help='The website\'s upload code')
parser.add_argument('url', type=str, default='http://0.0.0.0:8000/new_result/',
nargs='?', help='The upload url: server_ip/new_result/')
args = parser.parse_args()
upload_batch(args.datadir[0], args.upload_code[0], args.url)
if __name__ == "__main__":
main()