Fixed bug caused by stale value of latest result in views.give_result()

This commit is contained in:
dvanaken 2019-11-26 03:57:35 -05:00 committed by Dana Van Aken
parent e47508d20b
commit b211ad455d
3 changed files with 31 additions and 15 deletions

View File

@ -23,7 +23,7 @@ import requests
from fabric.api import env, lcd, local, settings, show, task from fabric.api import env, lcd, local, settings, show, task
from fabric.state import output as fabric_output from fabric.state import output as fabric_output
from utils import (file_exists, get, load_driver_conf, parse_bool, from utils import (file_exists, get, get_content, load_driver_conf, parse_bool,
put, run, run_sql_script, sudo, FabricException) put, run, run_sql_script, sudo, FabricException)
# Loads the driver config file (defaults to driver_config.py) # Loads the driver config file (defaults to driver_config.py)
@ -306,12 +306,12 @@ def upload_result(result_dir=None, prefix=None, upload_code=None):
data={'upload_code': upload_code}) data={'upload_code': upload_code})
if response.status_code != 200: if response.status_code != 200:
raise Exception('Error uploading result.\nStatus: {}\nMessage: {}\n'.format( raise Exception('Error uploading result.\nStatus: {}\nMessage: {}\n'.format(
response.status_code, response.content)) response.status_code, get_content(response)))
for f in files.values(): # pylint: disable=not-an-iterable for f in files.values(): # pylint: disable=not-an-iterable
f.close() f.close()
LOG.info(response.content) LOG.info(get_content(response))
return response return response
@ -328,7 +328,7 @@ def get_result(max_time_sec=180, interval_sec=5, upload_code=None):
while elapsed <= max_time_sec: while elapsed <= max_time_sec:
rsp = requests.get(url) rsp = requests.get(url)
response = rsp.content.decode() response = get_content(rsp)
assert response != 'null' assert response != 'null'
LOG.debug('%s [status code: %d, content_type: %s, elapsed: %ds]', response, LOG.debug('%s [status code: %d, content_type: %s, elapsed: %ds]', response,
@ -693,16 +693,20 @@ def wait_pipeline_data_ready(max_time_sec=800, interval_sec=10):
max_time_sec = int(max_time_sec) max_time_sec = int(max_time_sec)
interval_sec = int(interval_sec) interval_sec = int(interval_sec)
elapsed = 0 elapsed = 0
ready = False
while elapsed <= max_time_sec: while elapsed <= max_time_sec:
response = requests.get(dconf.WEBSITE_URL + '/test/pipeline/') response = requests.get(dconf.WEBSITE_URL + '/test/pipeline/')
response = response.content content = get_content(response)
LOG.info(response) LOG.info("%s (elapsed: %ss)", content, interval_sec)
if 'False' in str(response): if 'False' in content:
time.sleep(interval_sec) time.sleep(interval_sec)
elapsed += interval_sec elapsed += interval_sec
else: else:
return ready = True
break
return ready
@task @task
@ -710,14 +714,14 @@ def integration_tests():
# Create test website # Create test website
response = requests.get(dconf.WEBSITE_URL + '/test/create/') response = requests.get(dconf.WEBSITE_URL + '/test/create/')
LOG.info(response.content) LOG.info(get_content(response))
# Upload training data # Upload training data
LOG.info('Upload training data to no tuning session') LOG.info('Upload training data to no tuning session')
upload_batch(result_dir='../../integrationTests/data/', upload_code='ottertuneTestNoTuning') upload_batch(result_dir='../../integrationTests/data/', upload_code='ottertuneTestNoTuning')
# wait celery periodic task finishes # wait celery periodic task finishes
wait_pipeline_data_ready() assert wait_pipeline_data_ready(), "Pipeline data failed"
# Test DNN # Test DNN
LOG.info('Test DNN (deep neural network)') LOG.info('Test DNN (deep neural network)')

View File

@ -53,6 +53,13 @@ def parse_bool(value):
return value return value
def get_content(response):
content = response.content
if isinstance(content, bytes):
content = content.decode('utf-8')
return content
@task @task
def run(cmd, capture=True, **kwargs): def run(cmd, capture=True, **kwargs):
capture = parse_bool(capture) capture = parse_bool(capture)

View File

@ -761,7 +761,7 @@ def tuner_status_view(request, project_id, session_id, result_id): # pylint: di
tasks = TaskUtil.get_tasks(res.task_ids) tasks = TaskUtil.get_tasks(res.task_ids)
overall_status, num_completed = TaskUtil.get_task_status(tasks) overall_status, num_completed = TaskUtil.get_task_status(tasks)
if overall_status in ['PENDING', 'RECEIVED', 'STARTED']: if overall_status in ['PENDING', 'RECEIVED', 'STARTED', None]:
completion_time = 'N/A' completion_time = 'N/A'
total_runtime = 'N/A' total_runtime = 'N/A'
else: else:
@ -1001,13 +1001,18 @@ def give_result(request, upload_code): # pylint: disable=unused-argument
overall_status, num_completed = TaskUtil.get_task_status(tasks) overall_status, num_completed = TaskUtil.get_task_status(tasks)
if overall_status == 'SUCCESS': if overall_status == 'SUCCESS':
next_config = latest_result.next_configuration if not latest_result.next_configuration:
if not next_config: # If the task status was incomplete when we first queried latest_result
# but succeeded before the call to TaskUtil.get_task_status() finished
# then latest_result is stale and must be updated.
latest_result = Result.objects.get(id=latest_result.pk)
if not latest_result.next_configuration:
overall_status = 'FAILURE' overall_status = 'FAILURE'
response = _failed_response(latest_result, tasks, num_completed, overall_status, response = _failed_response(latest_result, tasks, num_completed, overall_status,
'Failed to get the next configuration.') 'Failed to get the next configuration.')
else: else:
response = HttpResponse(JSONUtil.dumps(next_config), response = HttpResponse(JSONUtil.dumps(latest_result.next_configuration),
content_type='application/json') content_type='application/json')
elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'): elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'):
@ -1252,7 +1257,7 @@ def alt_create_or_edit_session(request):
# integration test # integration test
@csrf_exempt @csrf_exempt
def pipeline_data_ready(request): # pylint: disable=unused-argument def pipeline_data_ready(request): # pylint: disable=unused-argument
LOG.info(PipelineRun.objects.get_latest()) LOG.debug("Latest pipeline run: %s", PipelineRun.objects.get_latest())
if PipelineRun.objects.get_latest() is None: if PipelineRun.objects.get_latest() is None:
response = "Pipeline data ready: False" response = "Pipeline data ready: False"
else: else: