Create/format/save the recommended config for the latest result before the on_success method.
This commit is contained in:
parent
8001b658c9
commit
8cec62160f
|
@ -109,19 +109,9 @@ class ConfigurationRecommendation(UpdateTask): # pylint: disable=abstract-metho
|
||||||
def on_success(self, retval, task_id, args, kwargs):
|
def on_success(self, retval, task_id, args, kwargs):
|
||||||
super(ConfigurationRecommendation, self).on_success(retval, task_id, args, kwargs)
|
super(ConfigurationRecommendation, self).on_success(retval, task_id, args, kwargs)
|
||||||
|
|
||||||
result_id = retval['result_id']
|
|
||||||
result = Result.objects.get(pk=result_id)
|
|
||||||
|
|
||||||
# Replace result with formatted result
|
|
||||||
formatted_params = db.parser.format_dbms_knobs(result.dbms.pk, retval['recommendation'])
|
|
||||||
# Create next configuration to try
|
|
||||||
config = db.parser.create_knob_configuration(result.dbms.pk, formatted_params)
|
|
||||||
task_meta = TaskMeta.objects.get(task_id=task_id)
|
task_meta = TaskMeta.objects.get(task_id=task_id)
|
||||||
retval['recommendation'] = config
|
|
||||||
task_meta.result = retval
|
task_meta.result = retval
|
||||||
task_meta.save()
|
task_meta.save()
|
||||||
result.next_configuration = JSONUtil.dumps(retval)
|
|
||||||
result.save()
|
|
||||||
|
|
||||||
|
|
||||||
def clean_knob_data(knob_matrix, knob_labels, session):
|
def clean_knob_data(knob_matrix, knob_labels, session):
|
||||||
|
@ -419,13 +409,31 @@ def train_ddpg(result_id):
|
||||||
return result_info
|
return result_info
|
||||||
|
|
||||||
|
|
||||||
|
def create_and_save_recommendation(recommended_knobs, result, status, **kwargs):
|
||||||
|
dbms_id = result.dbms.pk
|
||||||
|
formatted_knobs = db.parser.format_dbms_knobs(dbms_id, recommended_knobs)
|
||||||
|
config = db.parser.create_knob_configuration(dbms_id, formatted_knobs)
|
||||||
|
|
||||||
|
retval = dict(**kwargs)
|
||||||
|
retval.update(
|
||||||
|
status=status,
|
||||||
|
result_id=result.pk,
|
||||||
|
recommendation=config,
|
||||||
|
)
|
||||||
|
result.next_configuration = JSONUtil.dumps(retval)
|
||||||
|
result.save()
|
||||||
|
|
||||||
|
return retval
|
||||||
|
|
||||||
|
|
||||||
@task(base=ConfigurationRecommendation, name='configuration_recommendation_ddpg')
|
@task(base=ConfigurationRecommendation, name='configuration_recommendation_ddpg')
|
||||||
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name
|
def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-name
|
||||||
LOG.info('Use ddpg to recommend configuration')
|
LOG.info('Use ddpg to recommend configuration')
|
||||||
result_id = result_info['newest_result_id']
|
result_id = result_info['newest_result_id']
|
||||||
result = Result.objects.filter(pk=result_id)
|
result_list = Result.objects.filter(pk=result_id)
|
||||||
session = Result.objects.get(pk=result_id).session
|
result = result_list.first()
|
||||||
agg_data = DataUtil.aggregate_data(result)
|
session = result.session
|
||||||
|
agg_data = DataUtil.aggregate_data(result_list)
|
||||||
metric_data, _ = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session)
|
metric_data, _ = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session)
|
||||||
metric_data = metric_data.flatten()
|
metric_data = metric_data.flatten()
|
||||||
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
||||||
|
@ -447,11 +455,10 @@ def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-n
|
||||||
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session))
|
knob_bounds = np.vstack(DataUtil.get_knob_bounds(knob_labels, session))
|
||||||
knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0]
|
knob_data = MinMaxScaler().fit(knob_bounds).inverse_transform(knob_data.reshape(1, -1))[0]
|
||||||
conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)}
|
conf_map = {k: knob_data[i] for i, k in enumerate(knob_labels)}
|
||||||
conf_map_res = {}
|
|
||||||
conf_map_res['status'] = 'good'
|
conf_map_res = create_and_save_recommendation(recommended_knobs=conf_map, result=result,
|
||||||
conf_map_res['result_id'] = result_id
|
status='good', info='INFO: ddpg')
|
||||||
conf_map_res['recommendation'] = conf_map
|
|
||||||
conf_map_res['info'] = 'INFO: ddpg'
|
|
||||||
return conf_map_res
|
return conf_map_res
|
||||||
|
|
||||||
|
|
||||||
|
@ -638,20 +645,18 @@ def combine_workload(target_data):
|
||||||
def configuration_recommendation(recommendation_input):
|
def configuration_recommendation(recommendation_input):
|
||||||
target_data, algorithm = recommendation_input
|
target_data, algorithm = recommendation_input
|
||||||
LOG.info('configuration_recommendation called')
|
LOG.info('configuration_recommendation called')
|
||||||
|
newest_result = Result.objects.get(pk=target_data['newest_result_id'])
|
||||||
|
|
||||||
if target_data['bad'] is True:
|
if target_data['bad'] is True:
|
||||||
target_data_res = dict(
|
target_data_res = create_and_save_recommendation(
|
||||||
status='bad',
|
recommended_knobs=target_data['config_recommend'], result=newest_result,
|
||||||
result_id=target_data['newest_result_id'],
|
status='bad', info='WARNING: no training data, the config is generated randomly',
|
||||||
info='WARNING: no training data, the config is generated randomly',
|
|
||||||
recommendation=target_data['config_recommend'],
|
|
||||||
pipeline_run=target_data['pipeline_run'])
|
pipeline_run=target_data['pipeline_run'])
|
||||||
LOG.debug('%s: Skipping configuration recommendation.\n\ndata=%s\n',
|
LOG.debug('%s: Skipping configuration recommendation.\n\ndata=%s\n',
|
||||||
AlgorithmType.name(algorithm), JSONUtil.dumps(target_data, pprint=True))
|
AlgorithmType.name(algorithm), JSONUtil.dumps(target_data, pprint=True))
|
||||||
return target_data_res
|
return target_data_res
|
||||||
|
|
||||||
latest_pipeline_run = PipelineRun.objects.get(pk=target_data['pipeline_run'])
|
latest_pipeline_run = PipelineRun.objects.get(pk=target_data['pipeline_run'])
|
||||||
newest_result = Result.objects.get(pk=target_data['newest_result_id'])
|
|
||||||
|
|
||||||
X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min = combine_workload(target_data)
|
X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min = combine_workload(target_data)
|
||||||
|
|
||||||
|
@ -756,12 +761,10 @@ def configuration_recommendation(recommendation_input):
|
||||||
best_config = np.maximum(best_config, X_min_inv)
|
best_config = np.maximum(best_config, X_min_inv)
|
||||||
|
|
||||||
conf_map = {k: best_config[i] for i, k in enumerate(X_columnlabels)}
|
conf_map = {k: best_config[i] for i, k in enumerate(X_columnlabels)}
|
||||||
conf_map_res = dict(
|
conf_map_res = create_and_save_recommendation(
|
||||||
status='good',
|
recommended_knobs=conf_map, result=newest_result,
|
||||||
result_id=target_data['newest_result_id'],
|
status='good', info='INFO: training data size is {}'.format(X_scaled.shape[0]),
|
||||||
recommendation=conf_map,
|
pipeline_run=target_data['pipeline_run'])
|
||||||
info='INFO: training data size is {}'.format(X_scaled.shape[0]),
|
|
||||||
pipeline_run=latest_pipeline_run.pk)
|
|
||||||
LOG.debug('%s: Finished selecting the next config.\n\ndata=%s\n',
|
LOG.debug('%s: Finished selecting the next config.\n\ndata=%s\n',
|
||||||
AlgorithmType.name(algorithm), JSONUtil.dumps(conf_map_res, pprint=True))
|
AlgorithmType.name(algorithm), JSONUtil.dumps(conf_map_res, pprint=True))
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ from random import choice
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from django.contrib.auth.models import User
|
from django.contrib.auth.models import User
|
||||||
|
from django.db.models import Case, When
|
||||||
from django.utils.text import capfirst
|
from django.utils.text import capfirst
|
||||||
from django_db_logger.models import StatusLog
|
from django_db_logger.models import StatusLog
|
||||||
from djcelery.models import TaskMeta
|
from djcelery.models import TaskMeta
|
||||||
|
@ -72,34 +73,34 @@ class MediaUtil(object):
|
||||||
class TaskUtil(object):
|
class TaskUtil(object):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_tasks(tasks):
|
def get_tasks(task_ids):
|
||||||
if not tasks:
|
task_ids = task_ids or []
|
||||||
return []
|
if isinstance(task_ids, str):
|
||||||
task_ids = tasks.split(',')
|
task_ids = task_ids.split(',')
|
||||||
res = []
|
preserved = Case(*[
|
||||||
for task_id in task_ids:
|
When(task_id=task_id, then=pos) for pos, task_id in enumerate(task_ids)])
|
||||||
task = TaskMeta.objects.filter(task_id=task_id)
|
return TaskMeta.objects.filter(task_id__in=task_ids).order_by(preserved)
|
||||||
if len(task) == 0:
|
|
||||||
continue # Task Not Finished
|
|
||||||
res.append(task[0])
|
|
||||||
return res
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_task_status(tasks):
|
def get_task_status(tasks):
|
||||||
if len(tasks) == 0:
|
if not tasks:
|
||||||
return None, 0
|
return None, 0
|
||||||
|
|
||||||
overall_status = 'SUCCESS'
|
overall_status = 'SUCCESS'
|
||||||
num_completed = 0
|
num_completed = 0
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
status = task.status
|
status = task.status
|
||||||
if status == "SUCCESS":
|
if status == "SUCCESS":
|
||||||
num_completed += 1
|
num_completed += 1
|
||||||
elif status in ['FAILURE', 'REVOKED', 'RETRY']:
|
elif status in ('FAILURE', 'REVOKED', 'RETRY'):
|
||||||
overall_status = status
|
overall_status = status
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
assert status in ['PENDING', 'RECEIVED', 'STARTED']
|
if status not in ('PENDING', 'RECEIVED', 'STARTED'):
|
||||||
|
LOG.warning("Task %s: invalid task status: '%s' (task_id=%s)",
|
||||||
|
task.id, status, task.task_id)
|
||||||
overall_status = status
|
overall_status = status
|
||||||
|
|
||||||
return overall_status, num_completed
|
return overall_status, num_completed
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1067,14 +1067,6 @@ def get_timeline_data(request):
|
||||||
# get the lastest result
|
# get the lastest result
|
||||||
def give_result(request, upload_code): # pylint: disable=unused-argument
|
def give_result(request, upload_code): # pylint: disable=unused-argument
|
||||||
|
|
||||||
def _failed_response(_latest_result, _tasks, _num_completed, _status, _msg):
|
|
||||||
_msg = "{}\nSTATUS: {}\nRESULT ID: {}\n".format(_msg, _status, _latest_result)
|
|
||||||
if tasks:
|
|
||||||
_failed_task_idx = min(len(_tasks) - 1, _num_completed + 1)
|
|
||||||
_failed_task = _tasks[_failed_task_idx]
|
|
||||||
_msg += "TRACEBACK: {}".format(_failed_task.traceback)
|
|
||||||
return HttpResponse(_msg, status=400)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
session = Session.objects.get(upload_code=upload_code)
|
session = Session.objects.get(upload_code=upload_code)
|
||||||
except Session.DoesNotExist:
|
except Session.DoesNotExist:
|
||||||
|
@ -1084,39 +1076,31 @@ def give_result(request, upload_code): # pylint: disable=unused-argument
|
||||||
latest_result = Result.objects.filter(session=session).latest('creation_time')
|
latest_result = Result.objects.filter(session=session).latest('creation_time')
|
||||||
tasks = TaskUtil.get_tasks(latest_result.task_ids)
|
tasks = TaskUtil.get_tasks(latest_result.task_ids)
|
||||||
overall_status, num_completed = TaskUtil.get_task_status(tasks)
|
overall_status, num_completed = TaskUtil.get_task_status(tasks)
|
||||||
|
response = {
|
||||||
|
'celery_status': overall_status,
|
||||||
|
'result_id': latest_result.pk,
|
||||||
|
'message': '',
|
||||||
|
'errors': [],
|
||||||
|
}
|
||||||
|
|
||||||
if overall_status == 'SUCCESS':
|
if overall_status == 'SUCCESS':
|
||||||
# The task status is set to SUCCESS before the next config is saved in
|
response.update(JSONUtil.loads(latest_result.next_configuration),
|
||||||
# the latest result so we must wait for it to be updated
|
message='Celery successfully recommended the next configuration')
|
||||||
max_wait_sec = 20
|
status_code = 200
|
||||||
elapsed_sec = 0
|
|
||||||
while not latest_result.next_configuration and elapsed_sec <= max_wait_sec:
|
|
||||||
time.sleep(5)
|
|
||||||
elapsed_sec += 5
|
|
||||||
latest_result = Result.objects.get(id=latest_result.pk)
|
|
||||||
LOG.debug("Waiting for the next config for result %s to be updated... "
|
|
||||||
"(elapsed: %ss): %s", latest_result.pk, elapsed_sec,
|
|
||||||
model_to_dict(latest_result))
|
|
||||||
|
|
||||||
if not latest_result.next_configuration:
|
|
||||||
LOG.warning(
|
|
||||||
"Failed to get the next configuration from the latest result after %ss: %s",
|
|
||||||
elapsed_sec, model_to_dict(latest_result))
|
|
||||||
overall_status = 'FAILURE'
|
|
||||||
response = _failed_response(latest_result, tasks, num_completed, overall_status,
|
|
||||||
'Failed to get the next configuration.')
|
|
||||||
else:
|
|
||||||
response = HttpResponse(JSONUtil.dumps(latest_result.next_configuration),
|
|
||||||
content_type='application/json')
|
|
||||||
|
|
||||||
elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'):
|
elif overall_status in ('FAILURE', 'REVOKED', 'RETRY'):
|
||||||
response = _failed_response(latest_result, tasks, num_completed, overall_status,
|
task_errors = [t.traceback for t in tasks if t.traceback]
|
||||||
'Celery failed to get the next configuration.')
|
if task_errors:
|
||||||
|
LOG.warning('\n\n'.join(task_errors))
|
||||||
|
response.update(message='Celery failed to get the next configuration', errors=task_errors)
|
||||||
|
status_code = 400
|
||||||
|
|
||||||
else: # overall_status in ('PENDING', 'RECEIVED', 'STARTED'):
|
else: # overall_status in ('PENDING', 'RECEIVED', 'STARTED'):
|
||||||
response = HttpResponse("{}: Result not ready".format(overall_status), status=202)
|
response.update(message='Result not ready')
|
||||||
|
status_code = 202
|
||||||
|
|
||||||
return response
|
return HttpResponse(JSONUtil.dumps(response, pprint=True), status=status_code,
|
||||||
|
content_type='application/json')
|
||||||
|
|
||||||
|
|
||||||
# get the lastest result
|
# get the lastest result
|
||||||
|
|
Loading…
Reference in New Issue