pipeline data view in results

This commit is contained in:
bohanjason 2020-03-01 23:23:20 -05:00 committed by Dana Van Aken
parent 84407eb999
commit be9307822e
5 changed files with 90 additions and 41 deletions

View File

@ -13,7 +13,7 @@ profile=no
# Add files or directories to the blacklist. They should be base names, not # Add files or directories to the blacklist. They should be base names, not
# paths. # paths.
ignore=CVS,.git,manage.py,0001_initial.py,0002_enable_compression.py,0003_load_initial_data.py,0004_add_lhs.py,0005_add_workload_field.py,0006_session_hyperparameters.py,0007_executiontime.py,0008_change_result_taskids_field.py,0009_change_executiontime_function_field.py,credentials.py,create_knob_settings.py ignore=CVS,.git,manage.py,0001_initial.py,0002_enable_compression.py,0003_load_initial_data.py,0004_add_lhs.py,0005_add_workload_field.py,0006_session_hyperparameters.py,0007_executiontime.py,0008_change_result_taskids_field.py,0009_change_executiontime_function_field.py,0010_add_pipeline_data_field.py,credentials.py,create_knob_settings.py
# ignore-patterns=**/migrations/*.py # ignore-patterns=**/migrations/*.py

View File

@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.27 on 2020-03-02 03:44
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('website', '0009_change_executiontime_function_field'),
]
operations = [
migrations.AddField(
model_name='result',
name='pipeline_knobs',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE,
related_name='pipeline_knobs', to='website.PipelineData'),
),
migrations.AddField(
model_name='result',
name='pipeline_metrics',
field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE,
related_name='pipeline_metrics', to='website.PipelineData'),
),
]

View File

@ -423,6 +423,39 @@ class Workload(BaseModel):
# hw=hw_id) # hw=hw_id)
class PipelineRunManager(models.Manager):
def get_latest(self):
return self.all().exclude(end_time=None).first()
class PipelineRun(models.Model):
objects = PipelineRunManager()
start_time = models.DateTimeField()
end_time = models.DateTimeField(null=True)
def __unicode__(self):
return str(self.pk)
def __str__(self):
return self.__unicode__()
class Meta: # pylint: disable=no-init
ordering = ["-id"]
class PipelineData(models.Model):
pipeline_run = models.ForeignKey(PipelineRun, verbose_name='group')
task_type = models.IntegerField(choices=PipelineTaskType.choices())
workload = models.ForeignKey(Workload)
data = models.TextField()
creation_time = models.DateTimeField()
class Meta: # pylint: disable=no-init
unique_together = ("pipeline_run", "task_type", "workload")
class ResultManager(models.Manager): class ResultManager(models.Manager):
def create_result(self, session, dbms, workload, def create_result(self, session, dbms, workload,
@ -461,44 +494,13 @@ class Result(BaseModel):
observation_time = models.FloatField() observation_time = models.FloatField()
task_ids = models.TextField(null=True) task_ids = models.TextField(null=True)
next_configuration = models.TextField(null=True) next_configuration = models.TextField(null=True)
pipeline_knobs = models.ForeignKey(PipelineData, null=True, related_name='pipeline_knobs')
pipeline_metrics = models.ForeignKey(PipelineData, null=True, related_name='pipeline_metrics')
def __unicode__(self): def __unicode__(self):
return str(self.pk) return str(self.pk)
class PipelineRunManager(models.Manager):
def get_latest(self):
return self.all().exclude(end_time=None).first()
class PipelineRun(models.Model):
objects = PipelineRunManager()
start_time = models.DateTimeField()
end_time = models.DateTimeField(null=True)
def __unicode__(self):
return str(self.pk)
def __str__(self):
return self.__unicode__()
class Meta: # pylint: disable=no-init
ordering = ["-id"]
class PipelineData(models.Model):
pipeline_run = models.ForeignKey(PipelineRun, verbose_name='group')
task_type = models.IntegerField(choices=PipelineTaskType.choices())
workload = models.ForeignKey(Workload)
data = models.TextField()
creation_time = models.DateTimeField()
class Meta: # pylint: disable=no-init
unique_together = ("pipeline_run", "task_type", "workload")
class BackupData(BaseModel): class BackupData(BaseModel):
result = models.ForeignKey(Result) result = models.ForeignKey(Result)
raw_knobs = models.TextField() raw_knobs = models.TextField()

View File

@ -485,6 +485,8 @@ def combine_workload(target_data):
latest_pipeline_run = PipelineRun.objects.get(pk=target_data['pipeline_run']) latest_pipeline_run = PipelineRun.objects.get(pk=target_data['pipeline_run'])
session = newest_result.session session = newest_result.session
params = JSONUtil.loads(session.hyperparameters) params = JSONUtil.loads(session.hyperparameters)
pipeline_data_knob = None
pipeline_data_metric = None
# Load mapped workload data # Load mapped workload data
if target_data['mapped_workload'] is not None: if target_data['mapped_workload'] is not None:
@ -537,6 +539,11 @@ def combine_workload(target_data):
pipeline_run=latest_pipeline_run, pipeline_run=latest_pipeline_run,
workload=mapped_workload, workload=mapped_workload,
task_type=PipelineTaskType.RANKED_KNOBS) task_type=PipelineTaskType.RANKED_KNOBS)
pipeline_data_knob = ranked_knobs
pipeline_data_metric = PipelineData.objects.get(
pipeline_run=latest_pipeline_run,
workload=mapped_workload,
task_type=PipelineTaskType.PRUNED_METRICS)
ranked_knobs = JSONUtil.loads(ranked_knobs.data)[:params['IMPORTANT_KNOB_NUMBER']] ranked_knobs = JSONUtil.loads(ranked_knobs.data)[:params['IMPORTANT_KNOB_NUMBER']]
ranked_knob_idxs = [i for i, cl in enumerate(X_columnlabels) if cl in ranked_knobs] ranked_knob_idxs = [i for i, cl in enumerate(X_columnlabels) if cl in ranked_knobs]
X_workload = X_workload[:, ranked_knob_idxs] X_workload = X_workload[:, ranked_knob_idxs]
@ -666,7 +673,7 @@ def combine_workload(target_data):
X_max[i] = col_max X_max[i] = col_max
return X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min,\ return X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min,\
dummy_encoder, constraint_helper dummy_encoder, constraint_helper, pipeline_data_knob, pipeline_data_metric
@shared_task(base=ConfigurationRecommendation, name='configuration_recommendation') @shared_task(base=ConfigurationRecommendation, name='configuration_recommendation')
@ -679,20 +686,17 @@ def configuration_recommendation(recommendation_input):
params = JSONUtil.loads(session.hyperparameters) params = JSONUtil.loads(session.hyperparameters)
if target_data['bad'] is True: if target_data['bad'] is True:
if session.tuning_session == 'randomly_generate':
info = 'Randomly generated'
else:
info = 'WARNING: no training data, the config is generated by LHS'
target_data_res = create_and_save_recommendation( target_data_res = create_and_save_recommendation(
recommended_knobs=target_data['config_recommend'], result=newest_result, recommended_knobs=target_data['config_recommend'], result=newest_result,
status='bad', info=info, status='bad', info='WARNING: no training data, the config is generated by LHS',
pipeline_run=target_data['pipeline_run']) pipeline_run=target_data['pipeline_run'])
LOG.debug('%s: Skipping configuration recommendation.\nData:\n%s\n\n', LOG.debug('%s: Skipping configuration recommendation.\nData:\n%s\n\n',
AlgorithmType.name(algorithm), target_data) AlgorithmType.name(algorithm), target_data)
return target_data_res return target_data_res
X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min,\ X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min,\
dummy_encoder, constraint_helper = combine_workload(target_data) dummy_encoder, constraint_helper, pipeline_knobs,\
pipeline_metrics = combine_workload(target_data)
# FIXME: we should generate more samples and use a smarter sampling technique # FIXME: we should generate more samples and use a smarter sampling technique
num_samples = params['NUM_SAMPLES'] num_samples = params['NUM_SAMPLES']
@ -796,6 +800,9 @@ def configuration_recommendation(recommendation_input):
best_config = np.maximum(best_config, X_min_inv) best_config = np.maximum(best_config, X_min_inv)
conf_map = {k: best_config[i] for i, k in enumerate(X_columnlabels)} conf_map = {k: best_config[i] for i, k in enumerate(X_columnlabels)}
newest_result.pipeline_knobs = pipeline_knobs
newest_result.pipeline_metrics = pipeline_metrics
conf_map_res = create_and_save_recommendation( conf_map_res = create_and_save_recommendation(
recommended_knobs=conf_map, result=newest_result, recommended_knobs=conf_map, result=newest_result,
status='good', info='INFO: training data size is {}'.format(X_scaled.shape[0]), status='good', info='INFO: training data size is {}'.format(X_scaled.shape[0]),

View File

@ -38,6 +38,18 @@
<td><div class="text-right">Total runtime:</div></td> <td><div class="text-right">Total runtime:</div></td>
<td>{{ total_runtime }}</td> <td>{{ total_runtime }}</td>
</tr> </tr>
{% if result.pipeline_knobs %}
<tr>
<td><div class="text-right">Ranked Knobs:</div></td>
<td><a href="{% url 'pipeline_data_view' result.pipeline_knobs.pk %}">Ranked Knobs</td>
</tr>
{% endif %}
{% if result.pipeline_metrics %}
<tr>
<td><div class="text-right">Pruned Metrics:</div></td>
<td><a href="{% url 'pipeline_data_view' result.pipeline_metrics.pk %}">Pruned Metrics</td>
</tr>
{% endif %}
</tbody> </tbody>
</table> </table>