Added option to dump debug info in 3 places: (1) the session view, (2) as a command in manage.py, and (3) as a fab command in the driver.

This commit is contained in:
Dana Van Aken
2019-10-03 22:21:51 -04:00
parent 309d327a44
commit 162dc48c53
7 changed files with 317 additions and 57 deletions

View File

@@ -0,0 +1,58 @@
#
# OtterTune - setuploadcode.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import os
from django.core.management.base import BaseCommand, CommandError
from website.models import Session
from website.utils import dump_debug_info
class Command(BaseCommand):
help = 'Dump debug information for the session with the given upload code.'
def add_arguments(self, parser):
parser.add_argument(
'uploadcode',
metavar='UPLOADCODE',
help="The session's upload code to.")
parser.add_argument(
'-f', '--filename',
metavar='FILE',
help='Name of the file to write the debug information to. '
'Default: debug_[timestamp].tar.gz')
parser.add_argument(
'-d', '--directory',
metavar='DIR',
help='Path of the directory to write the debug information to. '
'Default: current directory')
parser.add_argument(
'--prettyprint',
action='store_true',
help='Pretty print the output.')
def handle(self, *args, **options):
directory = options['directory'] or ''
if directory and not os.path.exists(directory):
os.makedirs(directory)
try:
session = Session.objects.get(upload_code=options['uploadcode'])
except Session.DoesNotExist:
raise CommandError(
"ERROR: Session with upload code '{}' not exist.".format(options['uploadcode']))
debug_info, root = dump_debug_info(session, pretty_print=options['prettyprint'])
filename = options['filename'] or root
if not filename.endswith('.tar.gz'):
filename += '.tar.gz'
path = os.path.join(directory, filename)
with open(path, 'wb') as f:
f.write(debug_info.getvalue())
self.stdout.write(self.style.SUCCESS(
"Successfully dumped debug information to '{}'.".format(path)))

View File

@@ -61,12 +61,11 @@ caption span {float: right;}
<div id="session" class="container">
<table class="table table-striped table-bordered table-condensed table-hover">
<caption >
<h4> {{ labels.title }}</h4>
<span> (<a href="{% url 'edit_session' project.pk session.pk %}">edit</a>)
(<a href="{% url 'edit_knobs' project.pk session.pk %}">edit knobs</a>)
</span>
</caption>
<caption><h4>{{ labels.title }}
(<a href="{% url 'edit_session' project.pk session.pk %}">edit</a>)
(<a href="{% url 'edit_knobs' project.pk session.pk %}">edit knobs</a>)
(<a href="{% url 'dump_debug_data' project.pk session.pk %}">dump debug info</a>)
</h4></caption>
<tbody>
<tr>
<td style="width: 50%"><div class="text-right">{{ labels.name }}</div></td>

View File

@@ -37,6 +37,7 @@ urlpatterns = [
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/edit/$', website_views.create_or_edit_session, name='edit_session'),
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/editKnobs/$', website_views.edit_knobs, name='edit_knobs'),
url(r'^projects/(?P<project_id>[0-9]+)/sessions/delete/$', website_views.delete_session, name='delete_session'),
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/dump/$', website_views.download_debug_info, name='dump_debug_data'),
# URLs for result views
url(r'^new_result/', website_views.new_result, name='new_result'),
@@ -65,6 +66,7 @@ urlpatterns = [
# Back door
url(r'^query_and_get/(?P<upload_code>[0-9a-zA-Z]+)$', website_views.give_result, name="backdoor"),
url(r'^dump/(?P<upload_code>[0-9a-zA-Z]+)', website_views.get_debug_info, name="backdoor_debug"),
# train ddpg with results in the given session
url(r'^train_ddpg/sessions/(?P<session_id>[0-9]+)$', website_views.train_ddpg_loops, name='train_ddpg_loops'),

View File

@@ -3,24 +3,25 @@
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jul 8, 2017
@author: dvanaken
'''
import datetime
import json
import logging
import os
import string
import tarfile
import time
from collections import OrderedDict
from io import BytesIO
from random import choice
import numpy as np
from django.utils.text import capfirst
from django_db_logger.models import StatusLog
from djcelery.models import TaskMeta
from .models import DBMSCatalog, KnobCatalog, Result, Session, SessionKnob
from .settings import constants
from .types import LabelStyleType, VarType
from .models import KnobCatalog, DBMSCatalog, SessionKnob
LOG = logging.getLogger(__name__)
@@ -34,17 +35,27 @@ class JSONUtil(object):
object_pairs_hook=OrderedDict)
@staticmethod
def dumps(config, pprint=False, sort=False):
indent = 4 if pprint is True else None
def dumps(config, pprint=False, sort=False, encoder='custom'):
json_args = dict(indent=4 if pprint is True else None,
ensure_ascii=False)
if encoder == 'custom':
json_args.update(default=JSONUtil.custom_converter)
if sort is True:
if isinstance(config, dict):
config = OrderedDict(sorted(config.items()))
else:
config = sorted(config)
return json.dumps(config,
ensure_ascii=False,
indent=indent)
return json.dumps(config, **json_args)
@staticmethod
def custom_converter(o):
if isinstance(o, datetime.datetime):
return str(o)
elif isinstance(o, np.ndarray):
return o.tolist()
class MediaUtil(object):
@@ -279,3 +290,108 @@ class LabelUtil(object):
label = label.replace('Dbms', 'DBMS')
style_labels[name] = str(label)
return style_labels
def dump_debug_info(session, pretty_print=False):
files = {}
# Session
session_values = Session.objects.filter(pk=session.pk).values()[0]
session_values['dbms'] = session.dbms.full_name
session_values['hardware'] = session.hardware.name
# Session knobs
knob_instances = SessionKnob.objects.filter(
session=session, tunable=True).select_related('knob')
knob_values = list(knob_instances.values())
for knob, knob_dict in zip(knob_instances, knob_values):
assert knob.pk == knob_dict['id']
knob_dict['knob'] = knob.name
session_values['knobs'] = knob_values
# Save binary field types to separate files
binary_fields = [
'ddpg_actor_model',
'ddpg_critic_model',
'ddpg_reply_memory',
'dnn_model',
]
for bf in binary_fields:
if session_values[bf]:
filename = os.path.join('binaries', '{}.pickle'.format(bf))
content = session_values[bf]
session_values[bf] = filename
files[filename] = content
files['session.json'] = session_values
# Results from session
result_instances = Result.objects.filter(session=session).select_related(
'knob_data', 'metric_data').order_by('creation_time')
results = []
for result, result_dict in zip(result_instances, result_instances.values()):
assert result.pk == result_dict['id']
result_dict = OrderedDict(result_dict)
next_config = result.next_configuration or '{}'
result_dict['next_configuration'] = JSONUtil.loads(next_config)
tasks = {}
task_ids = result.task_ids
task_ids = task_ids.split(',') if task_ids else []
for task_id in task_ids:
task = TaskMeta.objects.filter(task_id=task_id).values()
task = task[0] if task else None
tasks[task_id] = task
result_dict['tasks'] = tasks
knob_data = result.knob_data.data or '{}'
metric_data = result.metric_data.data or '{}'
result_dict['knob_data'] = JSONUtil.loads(knob_data)
result_dict['metric_data'] = JSONUtil.loads(metric_data)
results.append(result_dict)
files['results.json'] = results
# Log messages written to the database using django-db-logger
logs = StatusLog.objects.filter(create_datetime__gte=session.creation_time)
logger_names = logs.order_by().values_list('logger_name', flat=True).distinct()
# Write log files at app scope (e.g., django, website, celery)
logger_names = set([l.split('.', 1)[0] for l in logger_names])
for logger_name in logger_names:
log_values = list(logs.filter(logger_name__startswith=logger_name).order_by(
'create_datetime').values())
for lv in log_values:
lv['level'] = logging.getLevelName(lv['level'])
files['logs/{}.log'.format(logger_name)] = log_values
# Save settings
constants_dict = OrderedDict()
for name, value in sorted(constants.__dict__.items()):
if not name.startswith('_') and name == name.upper():
constants_dict[name] = value
files['constants.json'] = constants_dict
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
root = 'debug_{}'.format(timestamp)
mtime = time.time()
tarstream = BytesIO()
with tarfile.open(mode='w:gz', fileobj=tarstream) as tar:
for filename, content in files.items(): # pylint: disable=not-an-iterable
if isinstance(content, (dict, list)):
content = JSONUtil.dumps(content, pprint=pretty_print)
if isinstance(content, str):
content = content.encode('utf-8')
assert isinstance(content, bytes), (filename, type(content))
bio = BytesIO(content)
path = os.path.join(root, filename)
tarinfo = tarfile.TarInfo(name=path)
tarinfo.size = len(bio.getvalue())
tarinfo.mtime = mtime
tar.addfile(tarinfo, bio)
tarstream.seek(0)
return tarstream, root

View File

@@ -3,6 +3,7 @@
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
# pylint: disable=too-many-lines
import logging
import datetime
import re
@@ -14,6 +15,7 @@ from django.contrib.auth import update_session_auth_hash
from django.contrib.auth.forms import AuthenticationForm, UserCreationForm
from django.contrib.auth.forms import PasswordChangeForm
from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
from django.http import HttpResponse, QueryDict
from django.shortcuts import redirect, render, get_object_or_404
from django.template.context_processors import csrf
@@ -34,7 +36,7 @@ from .tasks import (aggregate_target_results, map_workload, train_ddpg,
configuration_recommendation, configuration_recommendation_ddpg)
from .types import (DBMSType, KnobUnitType, MetricType,
TaskType, VarType, WorkloadStatusType, AlgorithmType)
from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil
from .utils import dump_debug_info, JSONUtil, LabelUtil, MediaUtil, TaskUtil
from .settings import TIME_ZONE
from .set_default_knobs import set_default_knobs
@@ -336,18 +338,24 @@ def edit_knobs(request, project_id, session_id):
{'project': project, 'session': session, 'form': form})
instance = form.instance
instance.session = session
instance.knob = KnobCatalog.objects.filter(dbms=session.dbms,
name=form.cleaned_data["name"])[0]
instance.knob = KnobCatalog.objects.get(dbms=session.dbms,
name=form.cleaned_data["name"])
SessionKnob.objects.filter(session=instance.session, knob=instance.knob).delete()
instance.save()
return HttpResponse(status=204)
else:
# knobs = KnobCatalog.objects.filter(dbms=session.dbms).order_by('-tunable')
knobs = SessionKnob.objects.filter(session=session).order_by('-tunable', 'knob__name')
forms = []
for knob in knobs:
knob_values = model_to_dict(knob)
knob_values['session'] = session
knob_values['name'] = KnobCatalog.objects.get(pk=knob.knob.pk).name
# if SessionKnob.objects.filter(session=session, knob=knob).exists():
# new_knob = SessionKnob.objects.filter(session=session, knob=knob)[0]
# knob_values["minval"] = new_knob.minval
# knob_values["maxval"] = new_knob.maxval
# knob_values["tunable"] = new_knob.tunable
forms.append(SessionKnobForm(initial=knob_values))
context = {
'project': project,
@@ -412,7 +420,6 @@ def new_result(request):
if not form.is_valid():
LOG.warning("New result form is not valid: %s", str(form.errors))
return HttpResponse("New result form is not valid: " + str(form.errors), status=400)
upload_code = form.cleaned_data['upload_code']
try:
session = Session.objects.get(upload_code=upload_code)
@@ -421,7 +428,6 @@ def new_result(request):
return HttpResponse("Invalid upload code: " + upload_code, status=400)
return handle_result_files(session, request.FILES)
LOG.warning("Request type was not POST")
return HttpResponse("Request type was not POST", status=400)
@@ -721,6 +727,17 @@ def download_next_config(request):
return response
@login_required(login_url=reverse_lazy('login'))
def download_debug_info(request, project_id, session_id): # pylint: disable=unused-argument
session = Session.objects.get(pk=session_id)
content, filename = dump_debug_info(session, pretty_print=False)
file = ContentFile(content.getvalue())
response = HttpResponse(file, content_type='application/x-gzip')
response['Content-Length'] = file.size
response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
return response
@login_required(login_url=reverse_lazy('login'))
def tuner_status_view(request, project_id, session_id, result_id): # pylint: disable=unused-argument
res = Result.objects.get(pk=result_id)
@@ -949,29 +966,38 @@ def get_timeline_data(request):
# get the lastest result
def give_result(request, upload_code): # pylint: disable=unused-argument
def _failed_response(_latest_result, _tasks, _num_completed, _status, _msg):
_msg = "{}\nSTATUS: {}\nRESULT ID: {}\n".format(_msg, _status, _latest_result)
if tasks:
_failed_task_idx = min(len(_tasks) - 1, _num_completed + 1)
_failed_task = _tasks[_failed_task_idx]
_msg += "TRACEBACK: {}".format(_failed_task.traceback)
return HttpResponse(_msg, status=400)
try:
session = Session.objects.get(upload_code=upload_code)
except Session.DoesNotExist:
LOG.warning("Invalid upload code: %s", upload_code)
return HttpResponse("Invalid upload code: " + upload_code, status=400)
results = Result.objects.filter(session=session)
lastest_result = results[len(results) - 1]
tasks = TaskUtil.get_tasks(lastest_result.task_ids)
latest_result = Result.objects.filter(session=session).latest('creation_time')
tasks = TaskUtil.get_tasks(latest_result.task_ids)
overall_status, num_completed = TaskUtil.get_task_status(tasks)
if overall_status == 'SUCCESS':
res = Result.objects.get(pk=lastest_result.pk)
response = HttpResponse(JSONUtil.dumps(res.next_configuration),
content_type='application/json')
next_config = latest_result.next_configuration
if not next_config:
overall_status = 'FAILURE'
response = _failed_response(latest_result, tasks, num_completed, overall_status,
'Failed to get the next configuration.')
else:
response = HttpResponse(JSONUtil.dumps(next_config),
content_type='application/json')
elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'):
msg = "STATUS: {}\nRESULT ID: {}\n".format(overall_status, lastest_result)
if tasks:
failed_task_idx = min(len(tasks) - 1, num_completed + 1)
failed_task = tasks[failed_task_idx]
msg += "TRACEBACK: {}".format(failed_task.traceback)
response = HttpResponse(msg, status=400)
response = _failed_response(latest_result, tasks, num_completed, overall_status,
'Celery failed to get the next configuration.')
else: # overall_status in ('PENDING', 'RECEIVED', 'STARTED'):
response = HttpResponse("{}: Result not ready".format(overall_status), status=202)
@@ -979,6 +1005,23 @@ def give_result(request, upload_code): # pylint: disable=unused-argument
return response
# get the lastest result
def get_debug_info(request, upload_code): # pylint: disable=unused-argument
pprint = bool(int(request.GET.get('pp', False)))
try:
session = Session.objects.get(upload_code=upload_code)
except Session.DoesNotExist:
LOG.warning("Invalid upload code: %s", upload_code)
return HttpResponse("Invalid upload code: " + upload_code, status=400)
content, filename = dump_debug_info(session, pretty_print=pprint)
file = ContentFile(content.getvalue())
response = HttpResponse(file, content_type='application/x-gzip')
response['Content-Length'] = file.size
response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
return response
def train_ddpg_loops(request, session_id): # pylint: disable=unused-argument
session = get_object_or_404(Session, pk=session_id, user=request.user) # pylint: disable=unused-variable
results = Result.objects.filter(session=session_id)