Added option to dump debug info in 3 places: (1) the session view, (2) as a command in manage.py, and (3) as a fab command in the driver.
This commit is contained in:
58
server/website/website/management/commands/dumpdebuginfo.py
Normal file
58
server/website/website/management/commands/dumpdebuginfo.py
Normal file
@@ -0,0 +1,58 @@
|
||||
#
|
||||
# OtterTune - setuploadcode.py
|
||||
#
|
||||
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||
#
|
||||
import os
|
||||
|
||||
from django.core.management.base import BaseCommand, CommandError
|
||||
|
||||
from website.models import Session
|
||||
from website.utils import dump_debug_info
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Dump debug information for the session with the given upload code.'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'uploadcode',
|
||||
metavar='UPLOADCODE',
|
||||
help="The session's upload code to.")
|
||||
parser.add_argument(
|
||||
'-f', '--filename',
|
||||
metavar='FILE',
|
||||
help='Name of the file to write the debug information to. '
|
||||
'Default: debug_[timestamp].tar.gz')
|
||||
parser.add_argument(
|
||||
'-d', '--directory',
|
||||
metavar='DIR',
|
||||
help='Path of the directory to write the debug information to. '
|
||||
'Default: current directory')
|
||||
parser.add_argument(
|
||||
'--prettyprint',
|
||||
action='store_true',
|
||||
help='Pretty print the output.')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
directory = options['directory'] or ''
|
||||
if directory and not os.path.exists(directory):
|
||||
os.makedirs(directory)
|
||||
try:
|
||||
session = Session.objects.get(upload_code=options['uploadcode'])
|
||||
except Session.DoesNotExist:
|
||||
raise CommandError(
|
||||
"ERROR: Session with upload code '{}' not exist.".format(options['uploadcode']))
|
||||
|
||||
debug_info, root = dump_debug_info(session, pretty_print=options['prettyprint'])
|
||||
|
||||
filename = options['filename'] or root
|
||||
if not filename.endswith('.tar.gz'):
|
||||
filename += '.tar.gz'
|
||||
path = os.path.join(directory, filename)
|
||||
|
||||
with open(path, 'wb') as f:
|
||||
f.write(debug_info.getvalue())
|
||||
|
||||
self.stdout.write(self.style.SUCCESS(
|
||||
"Successfully dumped debug information to '{}'.".format(path)))
|
||||
@@ -61,12 +61,11 @@ caption span {float: right;}
|
||||
|
||||
<div id="session" class="container">
|
||||
<table class="table table-striped table-bordered table-condensed table-hover">
|
||||
<caption >
|
||||
<h4> {{ labels.title }}</h4>
|
||||
<span> (<a href="{% url 'edit_session' project.pk session.pk %}">edit</a>)
|
||||
(<a href="{% url 'edit_knobs' project.pk session.pk %}">edit knobs</a>)
|
||||
</span>
|
||||
</caption>
|
||||
<caption><h4>{{ labels.title }}
|
||||
(<a href="{% url 'edit_session' project.pk session.pk %}">edit</a>)
|
||||
(<a href="{% url 'edit_knobs' project.pk session.pk %}">edit knobs</a>)
|
||||
(<a href="{% url 'dump_debug_data' project.pk session.pk %}">dump debug info</a>)
|
||||
</h4></caption>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td style="width: 50%"><div class="text-right">{{ labels.name }}</div></td>
|
||||
|
||||
@@ -37,6 +37,7 @@ urlpatterns = [
|
||||
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/edit/$', website_views.create_or_edit_session, name='edit_session'),
|
||||
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/editKnobs/$', website_views.edit_knobs, name='edit_knobs'),
|
||||
url(r'^projects/(?P<project_id>[0-9]+)/sessions/delete/$', website_views.delete_session, name='delete_session'),
|
||||
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/dump/$', website_views.download_debug_info, name='dump_debug_data'),
|
||||
|
||||
# URLs for result views
|
||||
url(r'^new_result/', website_views.new_result, name='new_result'),
|
||||
@@ -65,6 +66,7 @@ urlpatterns = [
|
||||
|
||||
# Back door
|
||||
url(r'^query_and_get/(?P<upload_code>[0-9a-zA-Z]+)$', website_views.give_result, name="backdoor"),
|
||||
url(r'^dump/(?P<upload_code>[0-9a-zA-Z]+)', website_views.get_debug_info, name="backdoor_debug"),
|
||||
|
||||
# train ddpg with results in the given session
|
||||
url(r'^train_ddpg/sessions/(?P<session_id>[0-9]+)$', website_views.train_ddpg_loops, name='train_ddpg_loops'),
|
||||
|
||||
@@ -3,24 +3,25 @@
|
||||
#
|
||||
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||
#
|
||||
'''
|
||||
Created on Jul 8, 2017
|
||||
|
||||
@author: dvanaken
|
||||
'''
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import string
|
||||
import tarfile
|
||||
import time
|
||||
from collections import OrderedDict
|
||||
from io import BytesIO
|
||||
from random import choice
|
||||
|
||||
import numpy as np
|
||||
from django.utils.text import capfirst
|
||||
from django_db_logger.models import StatusLog
|
||||
from djcelery.models import TaskMeta
|
||||
|
||||
from .models import DBMSCatalog, KnobCatalog, Result, Session, SessionKnob
|
||||
from .settings import constants
|
||||
from .types import LabelStyleType, VarType
|
||||
from .models import KnobCatalog, DBMSCatalog, SessionKnob
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
@@ -34,17 +35,27 @@ class JSONUtil(object):
|
||||
object_pairs_hook=OrderedDict)
|
||||
|
||||
@staticmethod
|
||||
def dumps(config, pprint=False, sort=False):
|
||||
indent = 4 if pprint is True else None
|
||||
def dumps(config, pprint=False, sort=False, encoder='custom'):
|
||||
json_args = dict(indent=4 if pprint is True else None,
|
||||
ensure_ascii=False)
|
||||
|
||||
if encoder == 'custom':
|
||||
json_args.update(default=JSONUtil.custom_converter)
|
||||
|
||||
if sort is True:
|
||||
if isinstance(config, dict):
|
||||
config = OrderedDict(sorted(config.items()))
|
||||
else:
|
||||
config = sorted(config)
|
||||
|
||||
return json.dumps(config,
|
||||
ensure_ascii=False,
|
||||
indent=indent)
|
||||
return json.dumps(config, **json_args)
|
||||
|
||||
@staticmethod
|
||||
def custom_converter(o):
|
||||
if isinstance(o, datetime.datetime):
|
||||
return str(o)
|
||||
elif isinstance(o, np.ndarray):
|
||||
return o.tolist()
|
||||
|
||||
|
||||
class MediaUtil(object):
|
||||
@@ -279,3 +290,108 @@ class LabelUtil(object):
|
||||
label = label.replace('Dbms', 'DBMS')
|
||||
style_labels[name] = str(label)
|
||||
return style_labels
|
||||
|
||||
|
||||
def dump_debug_info(session, pretty_print=False):
|
||||
files = {}
|
||||
|
||||
# Session
|
||||
session_values = Session.objects.filter(pk=session.pk).values()[0]
|
||||
session_values['dbms'] = session.dbms.full_name
|
||||
session_values['hardware'] = session.hardware.name
|
||||
|
||||
# Session knobs
|
||||
knob_instances = SessionKnob.objects.filter(
|
||||
session=session, tunable=True).select_related('knob')
|
||||
knob_values = list(knob_instances.values())
|
||||
for knob, knob_dict in zip(knob_instances, knob_values):
|
||||
assert knob.pk == knob_dict['id']
|
||||
knob_dict['knob'] = knob.name
|
||||
session_values['knobs'] = knob_values
|
||||
|
||||
# Save binary field types to separate files
|
||||
binary_fields = [
|
||||
'ddpg_actor_model',
|
||||
'ddpg_critic_model',
|
||||
'ddpg_reply_memory',
|
||||
'dnn_model',
|
||||
]
|
||||
for bf in binary_fields:
|
||||
if session_values[bf]:
|
||||
filename = os.path.join('binaries', '{}.pickle'.format(bf))
|
||||
content = session_values[bf]
|
||||
session_values[bf] = filename
|
||||
files[filename] = content
|
||||
|
||||
files['session.json'] = session_values
|
||||
|
||||
# Results from session
|
||||
result_instances = Result.objects.filter(session=session).select_related(
|
||||
'knob_data', 'metric_data').order_by('creation_time')
|
||||
results = []
|
||||
|
||||
for result, result_dict in zip(result_instances, result_instances.values()):
|
||||
assert result.pk == result_dict['id']
|
||||
result_dict = OrderedDict(result_dict)
|
||||
next_config = result.next_configuration or '{}'
|
||||
result_dict['next_configuration'] = JSONUtil.loads(next_config)
|
||||
|
||||
tasks = {}
|
||||
task_ids = result.task_ids
|
||||
task_ids = task_ids.split(',') if task_ids else []
|
||||
for task_id in task_ids:
|
||||
task = TaskMeta.objects.filter(task_id=task_id).values()
|
||||
task = task[0] if task else None
|
||||
tasks[task_id] = task
|
||||
result_dict['tasks'] = tasks
|
||||
|
||||
knob_data = result.knob_data.data or '{}'
|
||||
metric_data = result.metric_data.data or '{}'
|
||||
result_dict['knob_data'] = JSONUtil.loads(knob_data)
|
||||
result_dict['metric_data'] = JSONUtil.loads(metric_data)
|
||||
results.append(result_dict)
|
||||
|
||||
files['results.json'] = results
|
||||
|
||||
# Log messages written to the database using django-db-logger
|
||||
logs = StatusLog.objects.filter(create_datetime__gte=session.creation_time)
|
||||
logger_names = logs.order_by().values_list('logger_name', flat=True).distinct()
|
||||
|
||||
# Write log files at app scope (e.g., django, website, celery)
|
||||
logger_names = set([l.split('.', 1)[0] for l in logger_names])
|
||||
|
||||
for logger_name in logger_names:
|
||||
log_values = list(logs.filter(logger_name__startswith=logger_name).order_by(
|
||||
'create_datetime').values())
|
||||
for lv in log_values:
|
||||
lv['level'] = logging.getLevelName(lv['level'])
|
||||
files['logs/{}.log'.format(logger_name)] = log_values
|
||||
|
||||
# Save settings
|
||||
constants_dict = OrderedDict()
|
||||
for name, value in sorted(constants.__dict__.items()):
|
||||
if not name.startswith('_') and name == name.upper():
|
||||
constants_dict[name] = value
|
||||
files['constants.json'] = constants_dict
|
||||
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
||||
root = 'debug_{}'.format(timestamp)
|
||||
|
||||
mtime = time.time()
|
||||
tarstream = BytesIO()
|
||||
with tarfile.open(mode='w:gz', fileobj=tarstream) as tar:
|
||||
for filename, content in files.items(): # pylint: disable=not-an-iterable
|
||||
if isinstance(content, (dict, list)):
|
||||
content = JSONUtil.dumps(content, pprint=pretty_print)
|
||||
if isinstance(content, str):
|
||||
content = content.encode('utf-8')
|
||||
assert isinstance(content, bytes), (filename, type(content))
|
||||
bio = BytesIO(content)
|
||||
path = os.path.join(root, filename)
|
||||
tarinfo = tarfile.TarInfo(name=path)
|
||||
tarinfo.size = len(bio.getvalue())
|
||||
tarinfo.mtime = mtime
|
||||
tar.addfile(tarinfo, bio)
|
||||
|
||||
tarstream.seek(0)
|
||||
return tarstream, root
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#
|
||||
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||
#
|
||||
# pylint: disable=too-many-lines
|
||||
import logging
|
||||
import datetime
|
||||
import re
|
||||
@@ -14,6 +15,7 @@ from django.contrib.auth import update_session_auth_hash
|
||||
from django.contrib.auth.forms import AuthenticationForm, UserCreationForm
|
||||
from django.contrib.auth.forms import PasswordChangeForm
|
||||
from django.core.exceptions import ObjectDoesNotExist
|
||||
from django.core.files.base import ContentFile
|
||||
from django.http import HttpResponse, QueryDict
|
||||
from django.shortcuts import redirect, render, get_object_or_404
|
||||
from django.template.context_processors import csrf
|
||||
@@ -34,7 +36,7 @@ from .tasks import (aggregate_target_results, map_workload, train_ddpg,
|
||||
configuration_recommendation, configuration_recommendation_ddpg)
|
||||
from .types import (DBMSType, KnobUnitType, MetricType,
|
||||
TaskType, VarType, WorkloadStatusType, AlgorithmType)
|
||||
from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil
|
||||
from .utils import dump_debug_info, JSONUtil, LabelUtil, MediaUtil, TaskUtil
|
||||
from .settings import TIME_ZONE
|
||||
|
||||
from .set_default_knobs import set_default_knobs
|
||||
@@ -336,18 +338,24 @@ def edit_knobs(request, project_id, session_id):
|
||||
{'project': project, 'session': session, 'form': form})
|
||||
instance = form.instance
|
||||
instance.session = session
|
||||
instance.knob = KnobCatalog.objects.filter(dbms=session.dbms,
|
||||
name=form.cleaned_data["name"])[0]
|
||||
instance.knob = KnobCatalog.objects.get(dbms=session.dbms,
|
||||
name=form.cleaned_data["name"])
|
||||
SessionKnob.objects.filter(session=instance.session, knob=instance.knob).delete()
|
||||
instance.save()
|
||||
return HttpResponse(status=204)
|
||||
else:
|
||||
# knobs = KnobCatalog.objects.filter(dbms=session.dbms).order_by('-tunable')
|
||||
knobs = SessionKnob.objects.filter(session=session).order_by('-tunable', 'knob__name')
|
||||
forms = []
|
||||
for knob in knobs:
|
||||
knob_values = model_to_dict(knob)
|
||||
knob_values['session'] = session
|
||||
knob_values['name'] = KnobCatalog.objects.get(pk=knob.knob.pk).name
|
||||
# if SessionKnob.objects.filter(session=session, knob=knob).exists():
|
||||
# new_knob = SessionKnob.objects.filter(session=session, knob=knob)[0]
|
||||
# knob_values["minval"] = new_knob.minval
|
||||
# knob_values["maxval"] = new_knob.maxval
|
||||
# knob_values["tunable"] = new_knob.tunable
|
||||
forms.append(SessionKnobForm(initial=knob_values))
|
||||
context = {
|
||||
'project': project,
|
||||
@@ -412,7 +420,6 @@ def new_result(request):
|
||||
if not form.is_valid():
|
||||
LOG.warning("New result form is not valid: %s", str(form.errors))
|
||||
return HttpResponse("New result form is not valid: " + str(form.errors), status=400)
|
||||
|
||||
upload_code = form.cleaned_data['upload_code']
|
||||
try:
|
||||
session = Session.objects.get(upload_code=upload_code)
|
||||
@@ -421,7 +428,6 @@ def new_result(request):
|
||||
return HttpResponse("Invalid upload code: " + upload_code, status=400)
|
||||
|
||||
return handle_result_files(session, request.FILES)
|
||||
|
||||
LOG.warning("Request type was not POST")
|
||||
return HttpResponse("Request type was not POST", status=400)
|
||||
|
||||
@@ -721,6 +727,17 @@ def download_next_config(request):
|
||||
return response
|
||||
|
||||
|
||||
@login_required(login_url=reverse_lazy('login'))
|
||||
def download_debug_info(request, project_id, session_id): # pylint: disable=unused-argument
|
||||
session = Session.objects.get(pk=session_id)
|
||||
content, filename = dump_debug_info(session, pretty_print=False)
|
||||
file = ContentFile(content.getvalue())
|
||||
response = HttpResponse(file, content_type='application/x-gzip')
|
||||
response['Content-Length'] = file.size
|
||||
response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
|
||||
return response
|
||||
|
||||
|
||||
@login_required(login_url=reverse_lazy('login'))
|
||||
def tuner_status_view(request, project_id, session_id, result_id): # pylint: disable=unused-argument
|
||||
res = Result.objects.get(pk=result_id)
|
||||
@@ -949,29 +966,38 @@ def get_timeline_data(request):
|
||||
|
||||
# get the lastest result
|
||||
def give_result(request, upload_code): # pylint: disable=unused-argument
|
||||
|
||||
def _failed_response(_latest_result, _tasks, _num_completed, _status, _msg):
|
||||
_msg = "{}\nSTATUS: {}\nRESULT ID: {}\n".format(_msg, _status, _latest_result)
|
||||
if tasks:
|
||||
_failed_task_idx = min(len(_tasks) - 1, _num_completed + 1)
|
||||
_failed_task = _tasks[_failed_task_idx]
|
||||
_msg += "TRACEBACK: {}".format(_failed_task.traceback)
|
||||
return HttpResponse(_msg, status=400)
|
||||
|
||||
try:
|
||||
session = Session.objects.get(upload_code=upload_code)
|
||||
except Session.DoesNotExist:
|
||||
LOG.warning("Invalid upload code: %s", upload_code)
|
||||
return HttpResponse("Invalid upload code: " + upload_code, status=400)
|
||||
results = Result.objects.filter(session=session)
|
||||
lastest_result = results[len(results) - 1]
|
||||
|
||||
tasks = TaskUtil.get_tasks(lastest_result.task_ids)
|
||||
latest_result = Result.objects.filter(session=session).latest('creation_time')
|
||||
tasks = TaskUtil.get_tasks(latest_result.task_ids)
|
||||
overall_status, num_completed = TaskUtil.get_task_status(tasks)
|
||||
|
||||
if overall_status == 'SUCCESS':
|
||||
res = Result.objects.get(pk=lastest_result.pk)
|
||||
response = HttpResponse(JSONUtil.dumps(res.next_configuration),
|
||||
content_type='application/json')
|
||||
next_config = latest_result.next_configuration
|
||||
if not next_config:
|
||||
overall_status = 'FAILURE'
|
||||
response = _failed_response(latest_result, tasks, num_completed, overall_status,
|
||||
'Failed to get the next configuration.')
|
||||
else:
|
||||
response = HttpResponse(JSONUtil.dumps(next_config),
|
||||
content_type='application/json')
|
||||
|
||||
elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'):
|
||||
msg = "STATUS: {}\nRESULT ID: {}\n".format(overall_status, lastest_result)
|
||||
if tasks:
|
||||
failed_task_idx = min(len(tasks) - 1, num_completed + 1)
|
||||
failed_task = tasks[failed_task_idx]
|
||||
msg += "TRACEBACK: {}".format(failed_task.traceback)
|
||||
response = HttpResponse(msg, status=400)
|
||||
response = _failed_response(latest_result, tasks, num_completed, overall_status,
|
||||
'Celery failed to get the next configuration.')
|
||||
|
||||
else: # overall_status in ('PENDING', 'RECEIVED', 'STARTED'):
|
||||
response = HttpResponse("{}: Result not ready".format(overall_status), status=202)
|
||||
@@ -979,6 +1005,23 @@ def give_result(request, upload_code): # pylint: disable=unused-argument
|
||||
return response
|
||||
|
||||
|
||||
# get the lastest result
|
||||
def get_debug_info(request, upload_code): # pylint: disable=unused-argument
|
||||
pprint = bool(int(request.GET.get('pp', False)))
|
||||
try:
|
||||
session = Session.objects.get(upload_code=upload_code)
|
||||
except Session.DoesNotExist:
|
||||
LOG.warning("Invalid upload code: %s", upload_code)
|
||||
return HttpResponse("Invalid upload code: " + upload_code, status=400)
|
||||
|
||||
content, filename = dump_debug_info(session, pretty_print=pprint)
|
||||
file = ContentFile(content.getvalue())
|
||||
response = HttpResponse(file, content_type='application/x-gzip')
|
||||
response['Content-Length'] = file.size
|
||||
response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
|
||||
return response
|
||||
|
||||
|
||||
def train_ddpg_loops(request, session_id): # pylint: disable=unused-argument
|
||||
session = get_object_or_404(Session, pk=session_id, user=request.user) # pylint: disable=unused-variable
|
||||
results = Result.objects.filter(session=session_id)
|
||||
|
||||
Reference in New Issue
Block a user