diff --git a/client/driver/driver_config.json b/client/driver/driver_config.json
index f7ed573..efd6bb7 100644
--- a/client/driver/driver_config.json
+++ b/client/driver/driver_config.json
@@ -1,7 +1,7 @@
{
"database_type" : "postgres",
"database_name" : "tpcc",
- "database_disk": "/dev/xvda1",
+ "database_disk": "",
"database_conf": "/etc/postgresql/9.6/main/postgresql.conf",
"database_save_path": "~/ottertune/client/driver/dumpfiles",
"username" : "dbuser",
diff --git a/client/driver/fabfile.py b/client/driver/fabfile.py
index f095d63..e89126f 100644
--- a/client/driver/fabfile.py
+++ b/client/driver/fabfile.py
@@ -34,6 +34,8 @@ fabric_output.update({
RELOAD_INTERVAL = 10
# maximum disk usage
MAX_DISK_USAGE = 90
+# Postgres datadir
+PG_DATADIR = '/var/lib/postgresql/11/main'
# Load config
with open('driver_config.json', 'r') as _f:
@@ -119,7 +121,7 @@ def create_controller_config():
@task
def restart_database():
if CONF['database_type'] == 'postgres':
- cmd = 'sudo service postgresql restart'
+ cmd = 'sudo -u postgres pg_ctl -D {} -w restart'.format(PG_DATADIR)
elif CONF['database_type'] == 'oracle':
cmd = 'sh oracleScripts/shutdownOracle.sh && sh oracleScripts/startupOracle.sh'
else:
@@ -149,11 +151,11 @@ def create_database():
@task
def reset_conf():
- change_conf(next_conf='')
+ change_conf()
@task
-def change_conf(next_conf='next_config'):
+def change_conf(next_conf=None):
signal = "# configurations recommended by ottertune:\n"
next_conf = next_conf or {}
@@ -240,6 +242,16 @@ def save_dbms_result():
srcfile = os.path.join(CONF['controller_home'], 'output', f_)
dstfile = os.path.join(CONF['save_path'], '{}__{}'.format(t, f_))
local('cp {} {}'.format(srcfile, dstfile))
+ return t
+
+
+@task
+def save_next_config(next_config, t=None):
+ if not t:
+ t = int(time.time())
+ with open(os.path.join(CONF['save_path'], '{}__next_config.json'.format(t)), 'w') as f:
+ json.dump(next_config, f, indent=2)
+ return t
@task
@@ -282,52 +294,77 @@ def upload_result(result_dir=None, prefix=None):
@task
-def get_result(max_time_sec=180, interval_sec=1):
+def get_result(max_time_sec=180, interval_sec=5):
max_time_sec = int(max_time_sec)
interval_sec = int(interval_sec)
url = CONF['upload_url'] + '/query_and_get/' + CONF['upload_code']
- elapsed = 0.0
+ elapsed = 0
response_dict = None
response = ''
- start_time = time.time()
while elapsed <= max_time_sec:
rsp = requests.get(url)
response = rsp.content.decode()
- LOG.debug('Response:\n\n%s\n', response)
+ assert response != 'null'
+
+ LOG.debug('%s [status code: %d, content_type: %s, elapsed: %ds]', response,
+ rsp.status_code, rsp.headers.get('content-type', ''), elapsed)
if rsp.status_code == 200:
# Success
response_dict = json.loads(rsp.json(), object_pairs_hook=OrderedDict)
break
+
elif rsp.status_code == 202:
# Not ready
time.sleep(interval_sec)
+ elapsed += interval_sec
+
elif rsp.status_code == 400:
# Failure
raise Exception(
"Failed to download the next config.\nStatus code: {}\nMessage: {}\n".format(
rsp.status_code, response))
+
else:
raise NotImplementedError(
"Unhandled status code: '{}'.\nMessage: {}".format(rsp.status_code, response))
- elapsed = time.time() - start_time
-
if not response_dict:
assert elapsed > max_time_sec, \
- 'response={} but elapsed={:.1f}s <= max_time={:.1f}s'.format(
+ 'response={} but elapsed={}s <= max_time={}s'.format(
response, elapsed, max_time_sec)
raise Exception(
- 'Failed to download the next config in {}s: {} (elapsed: {:.1f}s)'.format(
+ 'Failed to download the next config in {}s: {} (elapsed: {}s)'.format(
max_time_sec, response, elapsed))
- LOG.info('Downloaded the next config in %.0fs: %s', elapsed,
- json.dumps(response_dict, indent=4))
+ LOG.info('Downloaded the next config in %ds: %s', elapsed, json.dumps(response_dict, indent=4))
return response_dict
+@task
+def download_debug_info(pprint=False):
+ pprint = _parse_bool(pprint)
+ url = '{}/dump/{}'.format(CONF['upload_url'], CONF['upload_code'])
+ params = {'pp': int(True)} if pprint else {}
+ rsp = requests.get(url, params=params)
+
+ if rsp.status_code != 200:
+ raise Exception('Error downloading debug info.')
+
+ filename = rsp.headers.get('Content-Disposition').split('=')[-1]
+ file_len, exp_len = len(rsp.content), int(rsp.headers.get('Content-Length'))
+ assert file_len == exp_len, 'File {}: content length != expected length: {} != {}'.format(
+ filename, file_len, exp_len)
+
+ with open(filename, 'wb') as f:
+ f.write(rsp.content)
+ LOG.info('Downloaded debug info to %s', filename)
+
+ return filename
+
+
@task
def add_udf():
cmd = 'sudo python3 ./LatencyUDF.py ../controller/output/'
@@ -469,16 +506,19 @@ def loop():
# add_udf()
# save result
- save_dbms_result()
+ result_timestamp = save_dbms_result()
# upload result
upload_result()
# get result
- get_result()
+ response = get_result()
+
+ # save next config
+ save_next_config(response, t=result_timestamp)
# change config
- change_conf()
+ change_conf(response['recommendation'])
@task
@@ -489,6 +529,7 @@ def run_lhs():
# dump database if it's not done before.
dump = dump_database()
+ result_timestamp = None
for i, sample in enumerate(samples):
# reload database periodically
if RELOAD_INTERVAL > 0:
@@ -508,15 +549,16 @@ def run_lhs():
if check_disk_usage() > MAX_DISK_USAGE:
LOG.warning('Exceeds max disk usage %s', MAX_DISK_USAGE)
- # copy lhs-sampled config to the to-be-used config
- cmd = 'cp {} next_config'.format(sample)
- local(cmd)
+ # load the next lhs-sampled config
+ with open(sample, 'r') as f:
+ next_config = json.load(f, object_pairs_hook=OrderedDict)
+ save_next_config(next_config, t=result_timestamp)
# remove oltpbench log and controller log
clean_logs()
# change config
- change_conf()
+ change_conf(next_config)
# restart database
restart_database()
@@ -550,7 +592,7 @@ def run_lhs():
p.join()
# save result
- save_dbms_result()
+ result_timestamp = save_dbms_result()
# upload result
upload_result()
diff --git a/server/website/website/management/commands/dumpdebuginfo.py b/server/website/website/management/commands/dumpdebuginfo.py
new file mode 100644
index 0000000..4f4dda2
--- /dev/null
+++ b/server/website/website/management/commands/dumpdebuginfo.py
@@ -0,0 +1,58 @@
+#
+# OtterTune - setuploadcode.py
+#
+# Copyright (c) 2017-18, Carnegie Mellon University Database Group
+#
+import os
+
+from django.core.management.base import BaseCommand, CommandError
+
+from website.models import Session
+from website.utils import dump_debug_info
+
+
+class Command(BaseCommand):
+ help = 'Dump debug information for the session with the given upload code.'
+
+ def add_arguments(self, parser):
+ parser.add_argument(
+ 'uploadcode',
+ metavar='UPLOADCODE',
+ help="The session's upload code to.")
+ parser.add_argument(
+ '-f', '--filename',
+ metavar='FILE',
+ help='Name of the file to write the debug information to. '
+ 'Default: debug_[timestamp].tar.gz')
+ parser.add_argument(
+ '-d', '--directory',
+ metavar='DIR',
+ help='Path of the directory to write the debug information to. '
+ 'Default: current directory')
+ parser.add_argument(
+ '--prettyprint',
+ action='store_true',
+ help='Pretty print the output.')
+
+ def handle(self, *args, **options):
+ directory = options['directory'] or ''
+ if directory and not os.path.exists(directory):
+ os.makedirs(directory)
+ try:
+ session = Session.objects.get(upload_code=options['uploadcode'])
+ except Session.DoesNotExist:
+ raise CommandError(
+ "ERROR: Session with upload code '{}' not exist.".format(options['uploadcode']))
+
+ debug_info, root = dump_debug_info(session, pretty_print=options['prettyprint'])
+
+ filename = options['filename'] or root
+ if not filename.endswith('.tar.gz'):
+ filename += '.tar.gz'
+ path = os.path.join(directory, filename)
+
+ with open(path, 'wb') as f:
+ f.write(debug_info.getvalue())
+
+ self.stdout.write(self.style.SUCCESS(
+ "Successfully dumped debug information to '{}'.".format(path)))
diff --git a/server/website/website/templates/session.html b/server/website/website/templates/session.html
index b5566f5..32dc6fd 100644
--- a/server/website/website/templates/session.html
+++ b/server/website/website/templates/session.html
@@ -61,12 +61,11 @@ caption span {float: right;}
-
- {{ labels.title }}
- (edit)
- (edit knobs)
-
-
+
{{ labels.name }} |
diff --git a/server/website/website/urls.py b/server/website/website/urls.py
index d1f5f06..f638fdf 100644
--- a/server/website/website/urls.py
+++ b/server/website/website/urls.py
@@ -37,6 +37,7 @@ urlpatterns = [
url(r'^projects/(?P[0-9]+)/sessions/(?P[0-9]+)/edit/$', website_views.create_or_edit_session, name='edit_session'),
url(r'^projects/(?P[0-9]+)/sessions/(?P[0-9]+)/editKnobs/$', website_views.edit_knobs, name='edit_knobs'),
url(r'^projects/(?P[0-9]+)/sessions/delete/$', website_views.delete_session, name='delete_session'),
+ url(r'^projects/(?P[0-9]+)/sessions/(?P[0-9]+)/dump/$', website_views.download_debug_info, name='dump_debug_data'),
# URLs for result views
url(r'^new_result/', website_views.new_result, name='new_result'),
@@ -65,6 +66,7 @@ urlpatterns = [
# Back door
url(r'^query_and_get/(?P[0-9a-zA-Z]+)$', website_views.give_result, name="backdoor"),
+ url(r'^dump/(?P[0-9a-zA-Z]+)', website_views.get_debug_info, name="backdoor_debug"),
# train ddpg with results in the given session
url(r'^train_ddpg/sessions/(?P[0-9]+)$', website_views.train_ddpg_loops, name='train_ddpg_loops'),
diff --git a/server/website/website/utils.py b/server/website/website/utils.py
index 3b37637..259f418 100644
--- a/server/website/website/utils.py
+++ b/server/website/website/utils.py
@@ -3,24 +3,25 @@
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
-'''
-Created on Jul 8, 2017
-
-@author: dvanaken
-'''
-
+import datetime
import json
import logging
+import os
import string
+import tarfile
+import time
from collections import OrderedDict
+from io import BytesIO
from random import choice
import numpy as np
from django.utils.text import capfirst
+from django_db_logger.models import StatusLog
from djcelery.models import TaskMeta
+from .models import DBMSCatalog, KnobCatalog, Result, Session, SessionKnob
+from .settings import constants
from .types import LabelStyleType, VarType
-from .models import KnobCatalog, DBMSCatalog, SessionKnob
LOG = logging.getLogger(__name__)
@@ -34,17 +35,27 @@ class JSONUtil(object):
object_pairs_hook=OrderedDict)
@staticmethod
- def dumps(config, pprint=False, sort=False):
- indent = 4 if pprint is True else None
+ def dumps(config, pprint=False, sort=False, encoder='custom'):
+ json_args = dict(indent=4 if pprint is True else None,
+ ensure_ascii=False)
+
+ if encoder == 'custom':
+ json_args.update(default=JSONUtil.custom_converter)
+
if sort is True:
if isinstance(config, dict):
config = OrderedDict(sorted(config.items()))
else:
config = sorted(config)
- return json.dumps(config,
- ensure_ascii=False,
- indent=indent)
+ return json.dumps(config, **json_args)
+
+ @staticmethod
+ def custom_converter(o):
+ if isinstance(o, datetime.datetime):
+ return str(o)
+ elif isinstance(o, np.ndarray):
+ return o.tolist()
class MediaUtil(object):
@@ -279,3 +290,108 @@ class LabelUtil(object):
label = label.replace('Dbms', 'DBMS')
style_labels[name] = str(label)
return style_labels
+
+
+def dump_debug_info(session, pretty_print=False):
+ files = {}
+
+ # Session
+ session_values = Session.objects.filter(pk=session.pk).values()[0]
+ session_values['dbms'] = session.dbms.full_name
+ session_values['hardware'] = session.hardware.name
+
+ # Session knobs
+ knob_instances = SessionKnob.objects.filter(
+ session=session, tunable=True).select_related('knob')
+ knob_values = list(knob_instances.values())
+ for knob, knob_dict in zip(knob_instances, knob_values):
+ assert knob.pk == knob_dict['id']
+ knob_dict['knob'] = knob.name
+ session_values['knobs'] = knob_values
+
+ # Save binary field types to separate files
+ binary_fields = [
+ 'ddpg_actor_model',
+ 'ddpg_critic_model',
+ 'ddpg_reply_memory',
+ 'dnn_model',
+ ]
+ for bf in binary_fields:
+ if session_values[bf]:
+ filename = os.path.join('binaries', '{}.pickle'.format(bf))
+ content = session_values[bf]
+ session_values[bf] = filename
+ files[filename] = content
+
+ files['session.json'] = session_values
+
+ # Results from session
+ result_instances = Result.objects.filter(session=session).select_related(
+ 'knob_data', 'metric_data').order_by('creation_time')
+ results = []
+
+ for result, result_dict in zip(result_instances, result_instances.values()):
+ assert result.pk == result_dict['id']
+ result_dict = OrderedDict(result_dict)
+ next_config = result.next_configuration or '{}'
+ result_dict['next_configuration'] = JSONUtil.loads(next_config)
+
+ tasks = {}
+ task_ids = result.task_ids
+ task_ids = task_ids.split(',') if task_ids else []
+ for task_id in task_ids:
+ task = TaskMeta.objects.filter(task_id=task_id).values()
+ task = task[0] if task else None
+ tasks[task_id] = task
+ result_dict['tasks'] = tasks
+
+ knob_data = result.knob_data.data or '{}'
+ metric_data = result.metric_data.data or '{}'
+ result_dict['knob_data'] = JSONUtil.loads(knob_data)
+ result_dict['metric_data'] = JSONUtil.loads(metric_data)
+ results.append(result_dict)
+
+ files['results.json'] = results
+
+ # Log messages written to the database using django-db-logger
+ logs = StatusLog.objects.filter(create_datetime__gte=session.creation_time)
+ logger_names = logs.order_by().values_list('logger_name', flat=True).distinct()
+
+ # Write log files at app scope (e.g., django, website, celery)
+ logger_names = set([l.split('.', 1)[0] for l in logger_names])
+
+ for logger_name in logger_names:
+ log_values = list(logs.filter(logger_name__startswith=logger_name).order_by(
+ 'create_datetime').values())
+ for lv in log_values:
+ lv['level'] = logging.getLevelName(lv['level'])
+ files['logs/{}.log'.format(logger_name)] = log_values
+
+ # Save settings
+ constants_dict = OrderedDict()
+ for name, value in sorted(constants.__dict__.items()):
+ if not name.startswith('_') and name == name.upper():
+ constants_dict[name] = value
+ files['constants.json'] = constants_dict
+
+ timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
+ root = 'debug_{}'.format(timestamp)
+
+ mtime = time.time()
+ tarstream = BytesIO()
+ with tarfile.open(mode='w:gz', fileobj=tarstream) as tar:
+ for filename, content in files.items(): # pylint: disable=not-an-iterable
+ if isinstance(content, (dict, list)):
+ content = JSONUtil.dumps(content, pprint=pretty_print)
+ if isinstance(content, str):
+ content = content.encode('utf-8')
+ assert isinstance(content, bytes), (filename, type(content))
+ bio = BytesIO(content)
+ path = os.path.join(root, filename)
+ tarinfo = tarfile.TarInfo(name=path)
+ tarinfo.size = len(bio.getvalue())
+ tarinfo.mtime = mtime
+ tar.addfile(tarinfo, bio)
+
+ tarstream.seek(0)
+ return tarstream, root
diff --git a/server/website/website/views.py b/server/website/website/views.py
index 6c4c8ef..83b2f70 100644
--- a/server/website/website/views.py
+++ b/server/website/website/views.py
@@ -3,6 +3,7 @@
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
+# pylint: disable=too-many-lines
import logging
import datetime
import re
@@ -14,6 +15,7 @@ from django.contrib.auth import update_session_auth_hash
from django.contrib.auth.forms import AuthenticationForm, UserCreationForm
from django.contrib.auth.forms import PasswordChangeForm
from django.core.exceptions import ObjectDoesNotExist
+from django.core.files.base import ContentFile
from django.http import HttpResponse, QueryDict
from django.shortcuts import redirect, render, get_object_or_404
from django.template.context_processors import csrf
@@ -34,7 +36,7 @@ from .tasks import (aggregate_target_results, map_workload, train_ddpg,
configuration_recommendation, configuration_recommendation_ddpg)
from .types import (DBMSType, KnobUnitType, MetricType,
TaskType, VarType, WorkloadStatusType, AlgorithmType)
-from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil
+from .utils import dump_debug_info, JSONUtil, LabelUtil, MediaUtil, TaskUtil
from .settings import TIME_ZONE
from .set_default_knobs import set_default_knobs
@@ -336,18 +338,24 @@ def edit_knobs(request, project_id, session_id):
{'project': project, 'session': session, 'form': form})
instance = form.instance
instance.session = session
- instance.knob = KnobCatalog.objects.filter(dbms=session.dbms,
- name=form.cleaned_data["name"])[0]
+ instance.knob = KnobCatalog.objects.get(dbms=session.dbms,
+ name=form.cleaned_data["name"])
SessionKnob.objects.filter(session=instance.session, knob=instance.knob).delete()
instance.save()
return HttpResponse(status=204)
else:
+ # knobs = KnobCatalog.objects.filter(dbms=session.dbms).order_by('-tunable')
knobs = SessionKnob.objects.filter(session=session).order_by('-tunable', 'knob__name')
forms = []
for knob in knobs:
knob_values = model_to_dict(knob)
knob_values['session'] = session
knob_values['name'] = KnobCatalog.objects.get(pk=knob.knob.pk).name
+ # if SessionKnob.objects.filter(session=session, knob=knob).exists():
+ # new_knob = SessionKnob.objects.filter(session=session, knob=knob)[0]
+ # knob_values["minval"] = new_knob.minval
+ # knob_values["maxval"] = new_knob.maxval
+ # knob_values["tunable"] = new_knob.tunable
forms.append(SessionKnobForm(initial=knob_values))
context = {
'project': project,
@@ -412,7 +420,6 @@ def new_result(request):
if not form.is_valid():
LOG.warning("New result form is not valid: %s", str(form.errors))
return HttpResponse("New result form is not valid: " + str(form.errors), status=400)
-
upload_code = form.cleaned_data['upload_code']
try:
session = Session.objects.get(upload_code=upload_code)
@@ -421,7 +428,6 @@ def new_result(request):
return HttpResponse("Invalid upload code: " + upload_code, status=400)
return handle_result_files(session, request.FILES)
-
LOG.warning("Request type was not POST")
return HttpResponse("Request type was not POST", status=400)
@@ -721,6 +727,17 @@ def download_next_config(request):
return response
+@login_required(login_url=reverse_lazy('login'))
+def download_debug_info(request, project_id, session_id): # pylint: disable=unused-argument
+ session = Session.objects.get(pk=session_id)
+ content, filename = dump_debug_info(session, pretty_print=False)
+ file = ContentFile(content.getvalue())
+ response = HttpResponse(file, content_type='application/x-gzip')
+ response['Content-Length'] = file.size
+ response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
+ return response
+
+
@login_required(login_url=reverse_lazy('login'))
def tuner_status_view(request, project_id, session_id, result_id): # pylint: disable=unused-argument
res = Result.objects.get(pk=result_id)
@@ -949,29 +966,38 @@ def get_timeline_data(request):
# get the lastest result
def give_result(request, upload_code): # pylint: disable=unused-argument
+
+ def _failed_response(_latest_result, _tasks, _num_completed, _status, _msg):
+ _msg = "{}\nSTATUS: {}\nRESULT ID: {}\n".format(_msg, _status, _latest_result)
+ if tasks:
+ _failed_task_idx = min(len(_tasks) - 1, _num_completed + 1)
+ _failed_task = _tasks[_failed_task_idx]
+ _msg += "TRACEBACK: {}".format(_failed_task.traceback)
+ return HttpResponse(_msg, status=400)
+
try:
session = Session.objects.get(upload_code=upload_code)
except Session.DoesNotExist:
LOG.warning("Invalid upload code: %s", upload_code)
return HttpResponse("Invalid upload code: " + upload_code, status=400)
- results = Result.objects.filter(session=session)
- lastest_result = results[len(results) - 1]
- tasks = TaskUtil.get_tasks(lastest_result.task_ids)
+ latest_result = Result.objects.filter(session=session).latest('creation_time')
+ tasks = TaskUtil.get_tasks(latest_result.task_ids)
overall_status, num_completed = TaskUtil.get_task_status(tasks)
if overall_status == 'SUCCESS':
- res = Result.objects.get(pk=lastest_result.pk)
- response = HttpResponse(JSONUtil.dumps(res.next_configuration),
- content_type='application/json')
+ next_config = latest_result.next_configuration
+ if not next_config:
+ overall_status = 'FAILURE'
+ response = _failed_response(latest_result, tasks, num_completed, overall_status,
+ 'Failed to get the next configuration.')
+ else:
+ response = HttpResponse(JSONUtil.dumps(next_config),
+ content_type='application/json')
elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'):
- msg = "STATUS: {}\nRESULT ID: {}\n".format(overall_status, lastest_result)
- if tasks:
- failed_task_idx = min(len(tasks) - 1, num_completed + 1)
- failed_task = tasks[failed_task_idx]
- msg += "TRACEBACK: {}".format(failed_task.traceback)
- response = HttpResponse(msg, status=400)
+ response = _failed_response(latest_result, tasks, num_completed, overall_status,
+ 'Celery failed to get the next configuration.')
else: # overall_status in ('PENDING', 'RECEIVED', 'STARTED'):
response = HttpResponse("{}: Result not ready".format(overall_status), status=202)
@@ -979,6 +1005,23 @@ def give_result(request, upload_code): # pylint: disable=unused-argument
return response
+# get the lastest result
+def get_debug_info(request, upload_code): # pylint: disable=unused-argument
+ pprint = bool(int(request.GET.get('pp', False)))
+ try:
+ session = Session.objects.get(upload_code=upload_code)
+ except Session.DoesNotExist:
+ LOG.warning("Invalid upload code: %s", upload_code)
+ return HttpResponse("Invalid upload code: " + upload_code, status=400)
+
+ content, filename = dump_debug_info(session, pretty_print=pprint)
+ file = ContentFile(content.getvalue())
+ response = HttpResponse(file, content_type='application/x-gzip')
+ response['Content-Length'] = file.size
+ response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
+ return response
+
+
def train_ddpg_loops(request, session_id): # pylint: disable=unused-argument
session = get_object_or_404(Session, pk=session_id, user=request.user) # pylint: disable=unused-variable
results = Result.objects.filter(session=session_id)