Added option to dump debug info in 3 places: (1) the session view, (2) as a command in manage.py, and (3) as a fab command in the driver.

This commit is contained in:
Dana Van Aken 2019-10-03 22:21:51 -04:00
parent 309d327a44
commit 162dc48c53
7 changed files with 317 additions and 57 deletions

View File

@ -1,7 +1,7 @@
{ {
"database_type" : "postgres", "database_type" : "postgres",
"database_name" : "tpcc", "database_name" : "tpcc",
"database_disk": "/dev/xvda1", "database_disk": "",
"database_conf": "/etc/postgresql/9.6/main/postgresql.conf", "database_conf": "/etc/postgresql/9.6/main/postgresql.conf",
"database_save_path": "~/ottertune/client/driver/dumpfiles", "database_save_path": "~/ottertune/client/driver/dumpfiles",
"username" : "dbuser", "username" : "dbuser",

View File

@ -34,6 +34,8 @@ fabric_output.update({
RELOAD_INTERVAL = 10 RELOAD_INTERVAL = 10
# maximum disk usage # maximum disk usage
MAX_DISK_USAGE = 90 MAX_DISK_USAGE = 90
# Postgres datadir
PG_DATADIR = '/var/lib/postgresql/11/main'
# Load config # Load config
with open('driver_config.json', 'r') as _f: with open('driver_config.json', 'r') as _f:
@ -119,7 +121,7 @@ def create_controller_config():
@task @task
def restart_database(): def restart_database():
if CONF['database_type'] == 'postgres': if CONF['database_type'] == 'postgres':
cmd = 'sudo service postgresql restart' cmd = 'sudo -u postgres pg_ctl -D {} -w restart'.format(PG_DATADIR)
elif CONF['database_type'] == 'oracle': elif CONF['database_type'] == 'oracle':
cmd = 'sh oracleScripts/shutdownOracle.sh && sh oracleScripts/startupOracle.sh' cmd = 'sh oracleScripts/shutdownOracle.sh && sh oracleScripts/startupOracle.sh'
else: else:
@ -149,11 +151,11 @@ def create_database():
@task @task
def reset_conf(): def reset_conf():
change_conf(next_conf='') change_conf()
@task @task
def change_conf(next_conf='next_config'): def change_conf(next_conf=None):
signal = "# configurations recommended by ottertune:\n" signal = "# configurations recommended by ottertune:\n"
next_conf = next_conf or {} next_conf = next_conf or {}
@ -240,6 +242,16 @@ def save_dbms_result():
srcfile = os.path.join(CONF['controller_home'], 'output', f_) srcfile = os.path.join(CONF['controller_home'], 'output', f_)
dstfile = os.path.join(CONF['save_path'], '{}__{}'.format(t, f_)) dstfile = os.path.join(CONF['save_path'], '{}__{}'.format(t, f_))
local('cp {} {}'.format(srcfile, dstfile)) local('cp {} {}'.format(srcfile, dstfile))
return t
@task
def save_next_config(next_config, t=None):
if not t:
t = int(time.time())
with open(os.path.join(CONF['save_path'], '{}__next_config.json'.format(t)), 'w') as f:
json.dump(next_config, f, indent=2)
return t
@task @task
@ -282,52 +294,77 @@ def upload_result(result_dir=None, prefix=None):
@task @task
def get_result(max_time_sec=180, interval_sec=1): def get_result(max_time_sec=180, interval_sec=5):
max_time_sec = int(max_time_sec) max_time_sec = int(max_time_sec)
interval_sec = int(interval_sec) interval_sec = int(interval_sec)
url = CONF['upload_url'] + '/query_and_get/' + CONF['upload_code'] url = CONF['upload_url'] + '/query_and_get/' + CONF['upload_code']
elapsed = 0.0 elapsed = 0
response_dict = None response_dict = None
response = '' response = ''
start_time = time.time()
while elapsed <= max_time_sec: while elapsed <= max_time_sec:
rsp = requests.get(url) rsp = requests.get(url)
response = rsp.content.decode() response = rsp.content.decode()
LOG.debug('Response:\n\n%s\n', response) assert response != 'null'
LOG.debug('%s [status code: %d, content_type: %s, elapsed: %ds]', response,
rsp.status_code, rsp.headers.get('content-type', ''), elapsed)
if rsp.status_code == 200: if rsp.status_code == 200:
# Success # Success
response_dict = json.loads(rsp.json(), object_pairs_hook=OrderedDict) response_dict = json.loads(rsp.json(), object_pairs_hook=OrderedDict)
break break
elif rsp.status_code == 202: elif rsp.status_code == 202:
# Not ready # Not ready
time.sleep(interval_sec) time.sleep(interval_sec)
elapsed += interval_sec
elif rsp.status_code == 400: elif rsp.status_code == 400:
# Failure # Failure
raise Exception( raise Exception(
"Failed to download the next config.\nStatus code: {}\nMessage: {}\n".format( "Failed to download the next config.\nStatus code: {}\nMessage: {}\n".format(
rsp.status_code, response)) rsp.status_code, response))
else: else:
raise NotImplementedError( raise NotImplementedError(
"Unhandled status code: '{}'.\nMessage: {}".format(rsp.status_code, response)) "Unhandled status code: '{}'.\nMessage: {}".format(rsp.status_code, response))
elapsed = time.time() - start_time
if not response_dict: if not response_dict:
assert elapsed > max_time_sec, \ assert elapsed > max_time_sec, \
'response={} but elapsed={:.1f}s <= max_time={:.1f}s'.format( 'response={} but elapsed={}s <= max_time={}s'.format(
response, elapsed, max_time_sec) response, elapsed, max_time_sec)
raise Exception( raise Exception(
'Failed to download the next config in {}s: {} (elapsed: {:.1f}s)'.format( 'Failed to download the next config in {}s: {} (elapsed: {}s)'.format(
max_time_sec, response, elapsed)) max_time_sec, response, elapsed))
LOG.info('Downloaded the next config in %.0fs: %s', elapsed, LOG.info('Downloaded the next config in %ds: %s', elapsed, json.dumps(response_dict, indent=4))
json.dumps(response_dict, indent=4))
return response_dict return response_dict
@task
def download_debug_info(pprint=False):
pprint = _parse_bool(pprint)
url = '{}/dump/{}'.format(CONF['upload_url'], CONF['upload_code'])
params = {'pp': int(True)} if pprint else {}
rsp = requests.get(url, params=params)
if rsp.status_code != 200:
raise Exception('Error downloading debug info.')
filename = rsp.headers.get('Content-Disposition').split('=')[-1]
file_len, exp_len = len(rsp.content), int(rsp.headers.get('Content-Length'))
assert file_len == exp_len, 'File {}: content length != expected length: {} != {}'.format(
filename, file_len, exp_len)
with open(filename, 'wb') as f:
f.write(rsp.content)
LOG.info('Downloaded debug info to %s', filename)
return filename
@task @task
def add_udf(): def add_udf():
cmd = 'sudo python3 ./LatencyUDF.py ../controller/output/' cmd = 'sudo python3 ./LatencyUDF.py ../controller/output/'
@ -469,16 +506,19 @@ def loop():
# add_udf() # add_udf()
# save result # save result
save_dbms_result() result_timestamp = save_dbms_result()
# upload result # upload result
upload_result() upload_result()
# get result # get result
get_result() response = get_result()
# save next config
save_next_config(response, t=result_timestamp)
# change config # change config
change_conf() change_conf(response['recommendation'])
@task @task
@ -489,6 +529,7 @@ def run_lhs():
# dump database if it's not done before. # dump database if it's not done before.
dump = dump_database() dump = dump_database()
result_timestamp = None
for i, sample in enumerate(samples): for i, sample in enumerate(samples):
# reload database periodically # reload database periodically
if RELOAD_INTERVAL > 0: if RELOAD_INTERVAL > 0:
@ -508,15 +549,16 @@ def run_lhs():
if check_disk_usage() > MAX_DISK_USAGE: if check_disk_usage() > MAX_DISK_USAGE:
LOG.warning('Exceeds max disk usage %s', MAX_DISK_USAGE) LOG.warning('Exceeds max disk usage %s', MAX_DISK_USAGE)
# copy lhs-sampled config to the to-be-used config # load the next lhs-sampled config
cmd = 'cp {} next_config'.format(sample) with open(sample, 'r') as f:
local(cmd) next_config = json.load(f, object_pairs_hook=OrderedDict)
save_next_config(next_config, t=result_timestamp)
# remove oltpbench log and controller log # remove oltpbench log and controller log
clean_logs() clean_logs()
# change config # change config
change_conf() change_conf(next_config)
# restart database # restart database
restart_database() restart_database()
@ -550,7 +592,7 @@ def run_lhs():
p.join() p.join()
# save result # save result
save_dbms_result() result_timestamp = save_dbms_result()
# upload result # upload result
upload_result() upload_result()

View File

@ -0,0 +1,58 @@
#
# OtterTune - setuploadcode.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import os
from django.core.management.base import BaseCommand, CommandError
from website.models import Session
from website.utils import dump_debug_info
class Command(BaseCommand):
help = 'Dump debug information for the session with the given upload code.'
def add_arguments(self, parser):
parser.add_argument(
'uploadcode',
metavar='UPLOADCODE',
help="The session's upload code to.")
parser.add_argument(
'-f', '--filename',
metavar='FILE',
help='Name of the file to write the debug information to. '
'Default: debug_[timestamp].tar.gz')
parser.add_argument(
'-d', '--directory',
metavar='DIR',
help='Path of the directory to write the debug information to. '
'Default: current directory')
parser.add_argument(
'--prettyprint',
action='store_true',
help='Pretty print the output.')
def handle(self, *args, **options):
directory = options['directory'] or ''
if directory and not os.path.exists(directory):
os.makedirs(directory)
try:
session = Session.objects.get(upload_code=options['uploadcode'])
except Session.DoesNotExist:
raise CommandError(
"ERROR: Session with upload code '{}' not exist.".format(options['uploadcode']))
debug_info, root = dump_debug_info(session, pretty_print=options['prettyprint'])
filename = options['filename'] or root
if not filename.endswith('.tar.gz'):
filename += '.tar.gz'
path = os.path.join(directory, filename)
with open(path, 'wb') as f:
f.write(debug_info.getvalue())
self.stdout.write(self.style.SUCCESS(
"Successfully dumped debug information to '{}'.".format(path)))

View File

@ -61,12 +61,11 @@ caption span {float: right;}
<div id="session" class="container"> <div id="session" class="container">
<table class="table table-striped table-bordered table-condensed table-hover"> <table class="table table-striped table-bordered table-condensed table-hover">
<caption > <caption><h4>{{ labels.title }}
<h4> {{ labels.title }}</h4> (<a href="{% url 'edit_session' project.pk session.pk %}">edit</a>)
<span> (<a href="{% url 'edit_session' project.pk session.pk %}">edit</a>)
(<a href="{% url 'edit_knobs' project.pk session.pk %}">edit knobs</a>) (<a href="{% url 'edit_knobs' project.pk session.pk %}">edit knobs</a>)
</span> (<a href="{% url 'dump_debug_data' project.pk session.pk %}">dump debug info</a>)
</caption> </h4></caption>
<tbody> <tbody>
<tr> <tr>
<td style="width: 50%"><div class="text-right">{{ labels.name }}</div></td> <td style="width: 50%"><div class="text-right">{{ labels.name }}</div></td>

View File

@ -37,6 +37,7 @@ urlpatterns = [
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/edit/$', website_views.create_or_edit_session, name='edit_session'), url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/edit/$', website_views.create_or_edit_session, name='edit_session'),
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/editKnobs/$', website_views.edit_knobs, name='edit_knobs'), url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/editKnobs/$', website_views.edit_knobs, name='edit_knobs'),
url(r'^projects/(?P<project_id>[0-9]+)/sessions/delete/$', website_views.delete_session, name='delete_session'), url(r'^projects/(?P<project_id>[0-9]+)/sessions/delete/$', website_views.delete_session, name='delete_session'),
url(r'^projects/(?P<project_id>[0-9]+)/sessions/(?P<session_id>[0-9]+)/dump/$', website_views.download_debug_info, name='dump_debug_data'),
# URLs for result views # URLs for result views
url(r'^new_result/', website_views.new_result, name='new_result'), url(r'^new_result/', website_views.new_result, name='new_result'),
@ -65,6 +66,7 @@ urlpatterns = [
# Back door # Back door
url(r'^query_and_get/(?P<upload_code>[0-9a-zA-Z]+)$', website_views.give_result, name="backdoor"), url(r'^query_and_get/(?P<upload_code>[0-9a-zA-Z]+)$', website_views.give_result, name="backdoor"),
url(r'^dump/(?P<upload_code>[0-9a-zA-Z]+)', website_views.get_debug_info, name="backdoor_debug"),
# train ddpg with results in the given session # train ddpg with results in the given session
url(r'^train_ddpg/sessions/(?P<session_id>[0-9]+)$', website_views.train_ddpg_loops, name='train_ddpg_loops'), url(r'^train_ddpg/sessions/(?P<session_id>[0-9]+)$', website_views.train_ddpg_loops, name='train_ddpg_loops'),

View File

@ -3,24 +3,25 @@
# #
# Copyright (c) 2017-18, Carnegie Mellon University Database Group # Copyright (c) 2017-18, Carnegie Mellon University Database Group
# #
''' import datetime
Created on Jul 8, 2017
@author: dvanaken
'''
import json import json
import logging import logging
import os
import string import string
import tarfile
import time
from collections import OrderedDict from collections import OrderedDict
from io import BytesIO
from random import choice from random import choice
import numpy as np import numpy as np
from django.utils.text import capfirst from django.utils.text import capfirst
from django_db_logger.models import StatusLog
from djcelery.models import TaskMeta from djcelery.models import TaskMeta
from .models import DBMSCatalog, KnobCatalog, Result, Session, SessionKnob
from .settings import constants
from .types import LabelStyleType, VarType from .types import LabelStyleType, VarType
from .models import KnobCatalog, DBMSCatalog, SessionKnob
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -34,17 +35,27 @@ class JSONUtil(object):
object_pairs_hook=OrderedDict) object_pairs_hook=OrderedDict)
@staticmethod @staticmethod
def dumps(config, pprint=False, sort=False): def dumps(config, pprint=False, sort=False, encoder='custom'):
indent = 4 if pprint is True else None json_args = dict(indent=4 if pprint is True else None,
ensure_ascii=False)
if encoder == 'custom':
json_args.update(default=JSONUtil.custom_converter)
if sort is True: if sort is True:
if isinstance(config, dict): if isinstance(config, dict):
config = OrderedDict(sorted(config.items())) config = OrderedDict(sorted(config.items()))
else: else:
config = sorted(config) config = sorted(config)
return json.dumps(config, return json.dumps(config, **json_args)
ensure_ascii=False,
indent=indent) @staticmethod
def custom_converter(o):
if isinstance(o, datetime.datetime):
return str(o)
elif isinstance(o, np.ndarray):
return o.tolist()
class MediaUtil(object): class MediaUtil(object):
@ -279,3 +290,108 @@ class LabelUtil(object):
label = label.replace('Dbms', 'DBMS') label = label.replace('Dbms', 'DBMS')
style_labels[name] = str(label) style_labels[name] = str(label)
return style_labels return style_labels
def dump_debug_info(session, pretty_print=False):
files = {}
# Session
session_values = Session.objects.filter(pk=session.pk).values()[0]
session_values['dbms'] = session.dbms.full_name
session_values['hardware'] = session.hardware.name
# Session knobs
knob_instances = SessionKnob.objects.filter(
session=session, tunable=True).select_related('knob')
knob_values = list(knob_instances.values())
for knob, knob_dict in zip(knob_instances, knob_values):
assert knob.pk == knob_dict['id']
knob_dict['knob'] = knob.name
session_values['knobs'] = knob_values
# Save binary field types to separate files
binary_fields = [
'ddpg_actor_model',
'ddpg_critic_model',
'ddpg_reply_memory',
'dnn_model',
]
for bf in binary_fields:
if session_values[bf]:
filename = os.path.join('binaries', '{}.pickle'.format(bf))
content = session_values[bf]
session_values[bf] = filename
files[filename] = content
files['session.json'] = session_values
# Results from session
result_instances = Result.objects.filter(session=session).select_related(
'knob_data', 'metric_data').order_by('creation_time')
results = []
for result, result_dict in zip(result_instances, result_instances.values()):
assert result.pk == result_dict['id']
result_dict = OrderedDict(result_dict)
next_config = result.next_configuration or '{}'
result_dict['next_configuration'] = JSONUtil.loads(next_config)
tasks = {}
task_ids = result.task_ids
task_ids = task_ids.split(',') if task_ids else []
for task_id in task_ids:
task = TaskMeta.objects.filter(task_id=task_id).values()
task = task[0] if task else None
tasks[task_id] = task
result_dict['tasks'] = tasks
knob_data = result.knob_data.data or '{}'
metric_data = result.metric_data.data or '{}'
result_dict['knob_data'] = JSONUtil.loads(knob_data)
result_dict['metric_data'] = JSONUtil.loads(metric_data)
results.append(result_dict)
files['results.json'] = results
# Log messages written to the database using django-db-logger
logs = StatusLog.objects.filter(create_datetime__gte=session.creation_time)
logger_names = logs.order_by().values_list('logger_name', flat=True).distinct()
# Write log files at app scope (e.g., django, website, celery)
logger_names = set([l.split('.', 1)[0] for l in logger_names])
for logger_name in logger_names:
log_values = list(logs.filter(logger_name__startswith=logger_name).order_by(
'create_datetime').values())
for lv in log_values:
lv['level'] = logging.getLevelName(lv['level'])
files['logs/{}.log'.format(logger_name)] = log_values
# Save settings
constants_dict = OrderedDict()
for name, value in sorted(constants.__dict__.items()):
if not name.startswith('_') and name == name.upper():
constants_dict[name] = value
files['constants.json'] = constants_dict
timestamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
root = 'debug_{}'.format(timestamp)
mtime = time.time()
tarstream = BytesIO()
with tarfile.open(mode='w:gz', fileobj=tarstream) as tar:
for filename, content in files.items(): # pylint: disable=not-an-iterable
if isinstance(content, (dict, list)):
content = JSONUtil.dumps(content, pprint=pretty_print)
if isinstance(content, str):
content = content.encode('utf-8')
assert isinstance(content, bytes), (filename, type(content))
bio = BytesIO(content)
path = os.path.join(root, filename)
tarinfo = tarfile.TarInfo(name=path)
tarinfo.size = len(bio.getvalue())
tarinfo.mtime = mtime
tar.addfile(tarinfo, bio)
tarstream.seek(0)
return tarstream, root

View File

@ -3,6 +3,7 @@
# #
# Copyright (c) 2017-18, Carnegie Mellon University Database Group # Copyright (c) 2017-18, Carnegie Mellon University Database Group
# #
# pylint: disable=too-many-lines
import logging import logging
import datetime import datetime
import re import re
@ -14,6 +15,7 @@ from django.contrib.auth import update_session_auth_hash
from django.contrib.auth.forms import AuthenticationForm, UserCreationForm from django.contrib.auth.forms import AuthenticationForm, UserCreationForm
from django.contrib.auth.forms import PasswordChangeForm from django.contrib.auth.forms import PasswordChangeForm
from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ObjectDoesNotExist
from django.core.files.base import ContentFile
from django.http import HttpResponse, QueryDict from django.http import HttpResponse, QueryDict
from django.shortcuts import redirect, render, get_object_or_404 from django.shortcuts import redirect, render, get_object_or_404
from django.template.context_processors import csrf from django.template.context_processors import csrf
@ -34,7 +36,7 @@ from .tasks import (aggregate_target_results, map_workload, train_ddpg,
configuration_recommendation, configuration_recommendation_ddpg) configuration_recommendation, configuration_recommendation_ddpg)
from .types import (DBMSType, KnobUnitType, MetricType, from .types import (DBMSType, KnobUnitType, MetricType,
TaskType, VarType, WorkloadStatusType, AlgorithmType) TaskType, VarType, WorkloadStatusType, AlgorithmType)
from .utils import JSONUtil, LabelUtil, MediaUtil, TaskUtil from .utils import dump_debug_info, JSONUtil, LabelUtil, MediaUtil, TaskUtil
from .settings import TIME_ZONE from .settings import TIME_ZONE
from .set_default_knobs import set_default_knobs from .set_default_knobs import set_default_knobs
@ -336,18 +338,24 @@ def edit_knobs(request, project_id, session_id):
{'project': project, 'session': session, 'form': form}) {'project': project, 'session': session, 'form': form})
instance = form.instance instance = form.instance
instance.session = session instance.session = session
instance.knob = KnobCatalog.objects.filter(dbms=session.dbms, instance.knob = KnobCatalog.objects.get(dbms=session.dbms,
name=form.cleaned_data["name"])[0] name=form.cleaned_data["name"])
SessionKnob.objects.filter(session=instance.session, knob=instance.knob).delete() SessionKnob.objects.filter(session=instance.session, knob=instance.knob).delete()
instance.save() instance.save()
return HttpResponse(status=204) return HttpResponse(status=204)
else: else:
# knobs = KnobCatalog.objects.filter(dbms=session.dbms).order_by('-tunable')
knobs = SessionKnob.objects.filter(session=session).order_by('-tunable', 'knob__name') knobs = SessionKnob.objects.filter(session=session).order_by('-tunable', 'knob__name')
forms = [] forms = []
for knob in knobs: for knob in knobs:
knob_values = model_to_dict(knob) knob_values = model_to_dict(knob)
knob_values['session'] = session knob_values['session'] = session
knob_values['name'] = KnobCatalog.objects.get(pk=knob.knob.pk).name knob_values['name'] = KnobCatalog.objects.get(pk=knob.knob.pk).name
# if SessionKnob.objects.filter(session=session, knob=knob).exists():
# new_knob = SessionKnob.objects.filter(session=session, knob=knob)[0]
# knob_values["minval"] = new_knob.minval
# knob_values["maxval"] = new_knob.maxval
# knob_values["tunable"] = new_knob.tunable
forms.append(SessionKnobForm(initial=knob_values)) forms.append(SessionKnobForm(initial=knob_values))
context = { context = {
'project': project, 'project': project,
@ -412,7 +420,6 @@ def new_result(request):
if not form.is_valid(): if not form.is_valid():
LOG.warning("New result form is not valid: %s", str(form.errors)) LOG.warning("New result form is not valid: %s", str(form.errors))
return HttpResponse("New result form is not valid: " + str(form.errors), status=400) return HttpResponse("New result form is not valid: " + str(form.errors), status=400)
upload_code = form.cleaned_data['upload_code'] upload_code = form.cleaned_data['upload_code']
try: try:
session = Session.objects.get(upload_code=upload_code) session = Session.objects.get(upload_code=upload_code)
@ -421,7 +428,6 @@ def new_result(request):
return HttpResponse("Invalid upload code: " + upload_code, status=400) return HttpResponse("Invalid upload code: " + upload_code, status=400)
return handle_result_files(session, request.FILES) return handle_result_files(session, request.FILES)
LOG.warning("Request type was not POST") LOG.warning("Request type was not POST")
return HttpResponse("Request type was not POST", status=400) return HttpResponse("Request type was not POST", status=400)
@ -721,6 +727,17 @@ def download_next_config(request):
return response return response
@login_required(login_url=reverse_lazy('login'))
def download_debug_info(request, project_id, session_id): # pylint: disable=unused-argument
session = Session.objects.get(pk=session_id)
content, filename = dump_debug_info(session, pretty_print=False)
file = ContentFile(content.getvalue())
response = HttpResponse(file, content_type='application/x-gzip')
response['Content-Length'] = file.size
response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
return response
@login_required(login_url=reverse_lazy('login')) @login_required(login_url=reverse_lazy('login'))
def tuner_status_view(request, project_id, session_id, result_id): # pylint: disable=unused-argument def tuner_status_view(request, project_id, session_id, result_id): # pylint: disable=unused-argument
res = Result.objects.get(pk=result_id) res = Result.objects.get(pk=result_id)
@ -949,29 +966,38 @@ def get_timeline_data(request):
# get the lastest result # get the lastest result
def give_result(request, upload_code): # pylint: disable=unused-argument def give_result(request, upload_code): # pylint: disable=unused-argument
def _failed_response(_latest_result, _tasks, _num_completed, _status, _msg):
_msg = "{}\nSTATUS: {}\nRESULT ID: {}\n".format(_msg, _status, _latest_result)
if tasks:
_failed_task_idx = min(len(_tasks) - 1, _num_completed + 1)
_failed_task = _tasks[_failed_task_idx]
_msg += "TRACEBACK: {}".format(_failed_task.traceback)
return HttpResponse(_msg, status=400)
try: try:
session = Session.objects.get(upload_code=upload_code) session = Session.objects.get(upload_code=upload_code)
except Session.DoesNotExist: except Session.DoesNotExist:
LOG.warning("Invalid upload code: %s", upload_code) LOG.warning("Invalid upload code: %s", upload_code)
return HttpResponse("Invalid upload code: " + upload_code, status=400) return HttpResponse("Invalid upload code: " + upload_code, status=400)
results = Result.objects.filter(session=session)
lastest_result = results[len(results) - 1]
tasks = TaskUtil.get_tasks(lastest_result.task_ids) latest_result = Result.objects.filter(session=session).latest('creation_time')
tasks = TaskUtil.get_tasks(latest_result.task_ids)
overall_status, num_completed = TaskUtil.get_task_status(tasks) overall_status, num_completed = TaskUtil.get_task_status(tasks)
if overall_status == 'SUCCESS': if overall_status == 'SUCCESS':
res = Result.objects.get(pk=lastest_result.pk) next_config = latest_result.next_configuration
response = HttpResponse(JSONUtil.dumps(res.next_configuration), if not next_config:
overall_status = 'FAILURE'
response = _failed_response(latest_result, tasks, num_completed, overall_status,
'Failed to get the next configuration.')
else:
response = HttpResponse(JSONUtil.dumps(next_config),
content_type='application/json') content_type='application/json')
elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'): elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'):
msg = "STATUS: {}\nRESULT ID: {}\n".format(overall_status, lastest_result) response = _failed_response(latest_result, tasks, num_completed, overall_status,
if tasks: 'Celery failed to get the next configuration.')
failed_task_idx = min(len(tasks) - 1, num_completed + 1)
failed_task = tasks[failed_task_idx]
msg += "TRACEBACK: {}".format(failed_task.traceback)
response = HttpResponse(msg, status=400)
else: # overall_status in ('PENDING', 'RECEIVED', 'STARTED'): else: # overall_status in ('PENDING', 'RECEIVED', 'STARTED'):
response = HttpResponse("{}: Result not ready".format(overall_status), status=202) response = HttpResponse("{}: Result not ready".format(overall_status), status=202)
@ -979,6 +1005,23 @@ def give_result(request, upload_code): # pylint: disable=unused-argument
return response return response
# get the lastest result
def get_debug_info(request, upload_code): # pylint: disable=unused-argument
pprint = bool(int(request.GET.get('pp', False)))
try:
session = Session.objects.get(upload_code=upload_code)
except Session.DoesNotExist:
LOG.warning("Invalid upload code: %s", upload_code)
return HttpResponse("Invalid upload code: " + upload_code, status=400)
content, filename = dump_debug_info(session, pretty_print=pprint)
file = ContentFile(content.getvalue())
response = HttpResponse(file, content_type='application/x-gzip')
response['Content-Length'] = file.size
response['Content-Disposition'] = 'attachment; filename={}.tar.gz'.format(filename)
return response
def train_ddpg_loops(request, session_id): # pylint: disable=unused-argument def train_ddpg_loops(request, session_id): # pylint: disable=unused-argument
session = get_object_or_404(Session, pk=session_id, user=request.user) # pylint: disable=unused-variable session = get_object_or_404(Session, pk=session_id, user=request.user) # pylint: disable=unused-variable
results = Result.objects.filter(session=session_id) results = Result.objects.filter(session=session_id)