From efa02899b5a2b46f4260095bb75d2610d9874f74 Mon Sep 17 00:00:00 2001 From: dvanaken Date: Mon, 13 Apr 2020 23:54:44 -0400 Subject: [PATCH] When ranking knobs in the background tasks use the target objective(s) by default. Added an option that when enabled also uses the pruned metrics. --- server/website/website/admin.py | 2 +- server/website/website/settings/constants.py | 13 ++++++++ .../website/website/tasks/periodic_tasks.py | 31 ++++++++++++------- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/server/website/website/admin.py b/server/website/website/admin.py index d3b7b49..0862836 100644 --- a/server/website/website/admin.py +++ b/server/website/website/admin.py @@ -146,7 +146,7 @@ class TaskMetaAdmin(admin.ModelAdmin): fields = readonly_fields list_filter = ('status',) list_per_page = 10 - ordering = ('date_done',) + ordering = ('-date_done',) max_field_length = 1000 @staticmethod diff --git a/server/website/website/settings/constants.py b/server/website/website/settings/constants.py index 7fb1fec..f078417 100644 --- a/server/website/website/settings/constants.py +++ b/server/website/website/settings/constants.py @@ -11,3 +11,16 @@ CHECK_CELERY = True # address categorical knobs (enum, boolean) ENABLE_DUMMY_ENCODER = False + +# Whether to include the pruned metrics from the workload characterization subtask in +# the output (y) when ranking the knobs for a given workload in the knob identification +# subtask. + +# When computing the ranked knobs in the knob identification subtask, the output (y) is +# the set of target objectives used to tune the given workload. If this flag is enabled +# then the pruned metrics from the workload characterization subtask are also included +# in the output. (See website/tasks/periodic_tasks.py) +KNOB_IDENT_USE_PRUNED_METRICS = False + +# The background tasks only process workloads containing this minimum amount of results +MIN_WORKLOAD_RESULTS_COUNT = 5 diff --git a/server/website/website/tasks/periodic_tasks.py b/server/website/website/tasks/periodic_tasks.py index 466ad19..a744a61 100644 --- a/server/website/website/tasks/periodic_tasks.py +++ b/server/website/website/tasks/periodic_tasks.py @@ -21,14 +21,13 @@ from analysis.preprocessing import (Bin, get_shuffle_indices, DummyEncoder, consolidate_columnlabels) from website.models import PipelineData, PipelineRun, Result, Workload, ExecutionTime -from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE +from website.settings import (ENABLE_DUMMY_ENCODER, KNOB_IDENT_USE_PRUNED_METRICS, + MIN_WORKLOAD_RESULTS_COUNT, TIME_ZONE) from website.types import PipelineTaskType, WorkloadStatusType from website.utils import DataUtil, JSONUtil # Log debug messages LOG = get_task_logger(__name__) -# Only process workload containing this minimum amount of results -MIN_WORKLOAD_RESULTS_COUNT = 5 def save_execution_time(start_ts, fn): @@ -134,21 +133,31 @@ def run_background_tasks(): creation_time=now()) pruned_metrics_entry.save() - # Use the pruned metrics to filter the metric_data - pruned_metric_idxs = [i for i, metric_name in enumerate(metric_data['columnlabels']) - if metric_name in pruned_metrics] - pruned_metric_data = { - 'data': metric_data['data'][:, pruned_metric_idxs], + # Workload target objective data + ranked_knob_metrics = sorted(wkld_results.distinct('session').values_list( + 'session__target_objective', flat=True).distinct()) + LOG.debug("Target objectives for workload %s: %s", workload_name, + ', '.join(ranked_knob_metrics)) + + if KNOB_IDENT_USE_PRUNED_METRICS: + ranked_knob_metrics = sorted(set(ranked_knob_metrics) + set(pruned_metrics)) + + # Use the set of metrics to filter the metric_data + metric_idxs = [i for i, metric_name in enumerate(metric_data['columnlabels']) + if metric_name in ranked_knob_metrics] + ranked_metric_data = { + 'data': metric_data['data'][:, metric_idxs], 'rowlabels': copy.deepcopy(metric_data['rowlabels']), - 'columnlabels': [metric_data['columnlabels'][i] for i in pruned_metric_idxs] + 'columnlabels': [metric_data['columnlabels'][i] for i in metric_idxs] } # Execute the Knob Identification task to compute an ordered list of knobs # ranked by their impact on the DBMS's performance. Save them in a new # PipelineData object. - LOG.info("Ranking knobs for workload %s...", workload_name) + LOG.info("Ranking knobs for workload %s (use pruned metric data: %s)...", + workload_name, KNOB_IDENT_USE_PRUNED_METRICS) ranked_knobs = run_knob_identification(knob_data=knob_data, - metric_data=pruned_metric_data, + metric_data=ranked_metric_data, dbms=workload.dbms) LOG.info("Done ranking knobs for workload %s (# ranked knobs: %s).\n\n" "Ranked knobs: %s\n", workload_name, len(ranked_knobs), ranked_knobs)