diff --git a/server/website/website/migrations/0005_add_workload_field.py b/server/website/website/migrations/0005_add_workload_field.py new file mode 100644 index 0000000..b92ec14 --- /dev/null +++ b/server/website/website/migrations/0005_add_workload_field.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.23 on 2019-12-15 06:16 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('website', '0004_add_lhs'), + ] + + operations = [ + migrations.AddField( + model_name='workload', + name='project', + field=models.ForeignKey(default=1, on_delete=django.db.models.deletion.CASCADE, to='website.Project'), + preserve_default=False, + ), + migrations.AlterUniqueTogether( + name='workload', + unique_together=set([('dbms', 'hardware', 'name', 'project')]), + ), + ] diff --git a/server/website/website/models.py b/server/website/website/models.py index 94368d6..44e087b 100644 --- a/server/website/website/models.py +++ b/server/website/website/models.py @@ -317,14 +317,15 @@ class MetricData(DataModel): class WorkloadManager(models.Manager): - def create_workload(self, dbms, hardware, name): + def create_workload(self, dbms, hardware, name, project): # (dbms,hardware,name) should be unique for each workload try: - return Workload.objects.get(dbms=dbms, hardware=hardware, name=name) + return Workload.objects.get(dbms=dbms, hardware=hardware, name=name, project=project) except Workload.DoesNotExist: return self.create(dbms=dbms, hardware=hardware, - name=name) + name=name, + project=project) class Workload(BaseModel): @@ -336,6 +337,7 @@ class Workload(BaseModel): dbms = models.ForeignKey(DBMSCatalog) hardware = models.ForeignKey(Hardware) name = models.CharField(max_length=128, verbose_name='workload name') + project = models.ForeignKey(Project) status = models.IntegerField(choices=WorkloadStatusType.choices(), default=WorkloadStatusType.MODIFIED, editable=False) @@ -353,7 +355,7 @@ class Workload(BaseModel): super(Workload, self).delete(using, keep_parents) class Meta: # pylint: disable=no-init - unique_together = ("dbms", "hardware", "name") + unique_together = ("dbms", "hardware", "name", "project") # @property # def isdefault(self): diff --git a/server/website/website/tasks/async_tasks.py b/server/website/website/tasks/async_tasks.py index 45cb8a7..81ea93e 100644 --- a/server/website/website/tasks/async_tasks.py +++ b/server/website/website/tasks/async_tasks.py @@ -213,8 +213,8 @@ def aggregate_target_results(result_id, algorithm): agg_data['X_matrix'] = np.array(cleaned_agg_data[0]) agg_data['X_columnlabels'] = np.array(cleaned_agg_data[1]) - LOG.debug('%s: Finished aggregating target results.\n\ndata=%s\n', - AlgorithmType.name(algorithm), JSONUtil.dumps(agg_data, pprint=True)) + LOG.debug('%s: Finished aggregating target results.\n\n', + AlgorithmType.name(algorithm)) return agg_data, algorithm @@ -418,22 +418,7 @@ def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-n return conf_map_res -@task(base=ConfigurationRecommendation, name='configuration_recommendation') -def configuration_recommendation(recommendation_input): - target_data, algorithm = recommendation_input - LOG.info('configuration_recommendation called') - - if target_data['bad'] is True: - target_data_res = dict( - status='bad', - result_id=target_data['newest_result_id'], - info='WARNING: no training data, the config is generated randomly', - recommendation=target_data['config_recommend'], - pipeline_run=target_data['pipeline_run']) - LOG.debug('%s: Skipping configuration recommendation.\n\ndata=%s\n', - AlgorithmType.name(algorithm), JSONUtil.dumps(target_data, pprint=True)) - return target_data_res - +def combine_workload(target_data): # Load mapped workload data mapped_workload_id = target_data['mapped_workload'][0] @@ -498,10 +483,6 @@ def configuration_recommendation(recommendation_input): 'metrics (target_obj={})').format(len(target_obj_idx), target_objective)) - metric_meta = db.target_objectives.get_metric_metadata( - newest_result.session.dbms.pk, newest_result.session.target_objective) - lessisbetter = metric_meta[target_objective].improvement == db.target_objectives.LESS_IS_BETTER - y_workload = y_workload[:, target_obj_idx] y_target = y_target[:, target_obj_idx] y_columnlabels = y_columnlabels[target_obj_idx] @@ -570,6 +551,14 @@ def configuration_recommendation(recommendation_input): y_workload_scaler = StandardScaler() y_scaled = y_workload_scaler.fit_transform(y_target) + metric_meta = db.target_objectives.get_metric_metadata( + newest_result.session.dbms.pk, newest_result.session.target_objective) + lessisbetter = metric_meta[target_objective].improvement == db.target_objectives.LESS_IS_BETTER + # Maximize the throughput, moreisbetter + # Use gradient descent to minimize -throughput + if not lessisbetter: + y_scaled = -y_scaled + # Set up constraint helper constraint_helper = ParamConstraintHelper(scaler=X_scaler, encoder=dummy_encoder, @@ -582,10 +571,6 @@ def configuration_recommendation(recommendation_input): # ridge[:X_target.shape[0]] = 0.01 # ridge[X_target.shape[0]:] = 0.1 - # FIXME: we should generate more samples and use a smarter sampling - # technique - num_samples = NUM_SAMPLES - X_samples = np.empty((num_samples, X_scaled.shape[1])) X_min = np.empty(X_scaled.shape[1]) X_max = np.empty(X_scaled.shape[1]) X_scaler_matrix = np.zeros([1, X_scaled.shape[1]]) @@ -608,12 +593,37 @@ def configuration_recommendation(recommendation_input): col_max = X_scaler.transform(X_scaler_matrix)[0][i] X_min[i] = col_min X_max[i] = col_max - X_samples[:, i] = np.random.rand(num_samples) * (col_max - col_min) + col_min - # Maximize the throughput, moreisbetter - # Use gradient descent to minimize -throughput - if not lessisbetter: - y_scaled = -y_scaled + return X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min + + +@task(base=ConfigurationRecommendation, name='configuration_recommendation') +def configuration_recommendation(recommendation_input): + target_data, algorithm = recommendation_input + LOG.info('configuration_recommendation called') + + if target_data['bad'] is True: + target_data_res = dict( + status='bad', + result_id=target_data['newest_result_id'], + info='WARNING: no training data, the config is generated randomly', + recommendation=target_data['config_recommend'], + pipeline_run=target_data['pipeline_run']) + LOG.debug('%s: Skipping configuration recommendation.\n\ndata=%s\n', + AlgorithmType.name(algorithm), JSONUtil.dumps(target_data, pprint=True)) + return target_data_res + + latest_pipeline_run = PipelineRun.objects.get(pk=target_data['pipeline_run']) + newest_result = Result.objects.get(pk=target_data['newest_result_id']) + + X_columnlabels, X_scaler, X_scaled, y_scaled, X_max, X_min = combine_workload(target_data) + + # FIXME: we should generate more samples and use a smarter sampling + # technique + num_samples = NUM_SAMPLES + X_samples = np.empty((num_samples, X_scaled.shape[1])) + for i in range(X_scaled.shape[1]): + X_samples[:, i] = np.random.rand(num_samples) * (X_max[i] - X_min[i]) + X_min[i] q = queue.PriorityQueue() for x in range(0, y_scaled.shape[0]): @@ -755,7 +765,8 @@ def map_workload(map_workload_input): pipeline_data = PipelineData.objects.filter( pipeline_run=latest_pipeline_run, workload__dbms=target_workload.dbms, - workload__hardware=target_workload.hardware) + workload__hardware=target_workload.hardware, + workload__project=target_workload.project) # FIXME (dva): we should also compute the global (i.e., overall) ranked_knobs # and pruned metrics but we just use those from the first workload for now diff --git a/server/website/website/views.py b/server/website/website/views.py index 90c643b..7cd1903 100644 --- a/server/website/website/views.py +++ b/server/website/website/views.py @@ -566,6 +566,7 @@ def handle_result_files(session, files): knob_data = KnobData.objects.create_knob_data( session, JSONUtil.dumps(knob_dict, pprint=True, sort=True), JSONUtil.dumps(tunable_knob_dict, pprint=True, sort=True), dbms) + LOG.debug(knob_data.data) # Load, process, and store the runtime metrics exposed by the DBMS initial_metric_dict, initial_metric_diffs = parser.parse_dbms_metrics( @@ -583,7 +584,7 @@ def handle_result_files(session, files): # Create a new workload if this one does not already exist workload = Workload.objects.create_workload( - dbms, session.hardware, workload_name) + dbms, session.hardware, workload_name, session.project) # Save this result result = Result.objects.create_result( @@ -591,7 +592,7 @@ def handle_result_files(session, files): start_time, end_time, observation_time) result.save() - # Workload is now modified so backgroundTasks can make calculationw + # Workload is now modified so backgroundTasks can make calculation workload.status = WorkloadStatusType.MODIFIED workload.save()