When ranking knobs in the background tasks use the target objective(s) by default. Added an option that when enabled also uses the pruned metrics.
This commit is contained in:
parent
aeaae7d9f4
commit
efa02899b5
|
@ -146,7 +146,7 @@ class TaskMetaAdmin(admin.ModelAdmin):
|
||||||
fields = readonly_fields
|
fields = readonly_fields
|
||||||
list_filter = ('status',)
|
list_filter = ('status',)
|
||||||
list_per_page = 10
|
list_per_page = 10
|
||||||
ordering = ('date_done',)
|
ordering = ('-date_done',)
|
||||||
max_field_length = 1000
|
max_field_length = 1000
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -11,3 +11,16 @@ CHECK_CELERY = True
|
||||||
|
|
||||||
# address categorical knobs (enum, boolean)
|
# address categorical knobs (enum, boolean)
|
||||||
ENABLE_DUMMY_ENCODER = False
|
ENABLE_DUMMY_ENCODER = False
|
||||||
|
|
||||||
|
# Whether to include the pruned metrics from the workload characterization subtask in
|
||||||
|
# the output (y) when ranking the knobs for a given workload in the knob identification
|
||||||
|
# subtask.
|
||||||
|
|
||||||
|
# When computing the ranked knobs in the knob identification subtask, the output (y) is
|
||||||
|
# the set of target objectives used to tune the given workload. If this flag is enabled
|
||||||
|
# then the pruned metrics from the workload characterization subtask are also included
|
||||||
|
# in the output. (See website/tasks/periodic_tasks.py)
|
||||||
|
KNOB_IDENT_USE_PRUNED_METRICS = False
|
||||||
|
|
||||||
|
# The background tasks only process workloads containing this minimum amount of results
|
||||||
|
MIN_WORKLOAD_RESULTS_COUNT = 5
|
||||||
|
|
|
@ -21,14 +21,13 @@ from analysis.preprocessing import (Bin, get_shuffle_indices,
|
||||||
DummyEncoder,
|
DummyEncoder,
|
||||||
consolidate_columnlabels)
|
consolidate_columnlabels)
|
||||||
from website.models import PipelineData, PipelineRun, Result, Workload, ExecutionTime
|
from website.models import PipelineData, PipelineRun, Result, Workload, ExecutionTime
|
||||||
from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE
|
from website.settings import (ENABLE_DUMMY_ENCODER, KNOB_IDENT_USE_PRUNED_METRICS,
|
||||||
|
MIN_WORKLOAD_RESULTS_COUNT, TIME_ZONE)
|
||||||
from website.types import PipelineTaskType, WorkloadStatusType
|
from website.types import PipelineTaskType, WorkloadStatusType
|
||||||
from website.utils import DataUtil, JSONUtil
|
from website.utils import DataUtil, JSONUtil
|
||||||
|
|
||||||
# Log debug messages
|
# Log debug messages
|
||||||
LOG = get_task_logger(__name__)
|
LOG = get_task_logger(__name__)
|
||||||
# Only process workload containing this minimum amount of results
|
|
||||||
MIN_WORKLOAD_RESULTS_COUNT = 5
|
|
||||||
|
|
||||||
|
|
||||||
def save_execution_time(start_ts, fn):
|
def save_execution_time(start_ts, fn):
|
||||||
|
@ -134,21 +133,31 @@ def run_background_tasks():
|
||||||
creation_time=now())
|
creation_time=now())
|
||||||
pruned_metrics_entry.save()
|
pruned_metrics_entry.save()
|
||||||
|
|
||||||
# Use the pruned metrics to filter the metric_data
|
# Workload target objective data
|
||||||
pruned_metric_idxs = [i for i, metric_name in enumerate(metric_data['columnlabels'])
|
ranked_knob_metrics = sorted(wkld_results.distinct('session').values_list(
|
||||||
if metric_name in pruned_metrics]
|
'session__target_objective', flat=True).distinct())
|
||||||
pruned_metric_data = {
|
LOG.debug("Target objectives for workload %s: %s", workload_name,
|
||||||
'data': metric_data['data'][:, pruned_metric_idxs],
|
', '.join(ranked_knob_metrics))
|
||||||
|
|
||||||
|
if KNOB_IDENT_USE_PRUNED_METRICS:
|
||||||
|
ranked_knob_metrics = sorted(set(ranked_knob_metrics) + set(pruned_metrics))
|
||||||
|
|
||||||
|
# Use the set of metrics to filter the metric_data
|
||||||
|
metric_idxs = [i for i, metric_name in enumerate(metric_data['columnlabels'])
|
||||||
|
if metric_name in ranked_knob_metrics]
|
||||||
|
ranked_metric_data = {
|
||||||
|
'data': metric_data['data'][:, metric_idxs],
|
||||||
'rowlabels': copy.deepcopy(metric_data['rowlabels']),
|
'rowlabels': copy.deepcopy(metric_data['rowlabels']),
|
||||||
'columnlabels': [metric_data['columnlabels'][i] for i in pruned_metric_idxs]
|
'columnlabels': [metric_data['columnlabels'][i] for i in metric_idxs]
|
||||||
}
|
}
|
||||||
|
|
||||||
# Execute the Knob Identification task to compute an ordered list of knobs
|
# Execute the Knob Identification task to compute an ordered list of knobs
|
||||||
# ranked by their impact on the DBMS's performance. Save them in a new
|
# ranked by their impact on the DBMS's performance. Save them in a new
|
||||||
# PipelineData object.
|
# PipelineData object.
|
||||||
LOG.info("Ranking knobs for workload %s...", workload_name)
|
LOG.info("Ranking knobs for workload %s (use pruned metric data: %s)...",
|
||||||
|
workload_name, KNOB_IDENT_USE_PRUNED_METRICS)
|
||||||
ranked_knobs = run_knob_identification(knob_data=knob_data,
|
ranked_knobs = run_knob_identification(knob_data=knob_data,
|
||||||
metric_data=pruned_metric_data,
|
metric_data=ranked_metric_data,
|
||||||
dbms=workload.dbms)
|
dbms=workload.dbms)
|
||||||
LOG.info("Done ranking knobs for workload %s (# ranked knobs: %s).\n\n"
|
LOG.info("Done ranking knobs for workload %s (# ranked knobs: %s).\n\n"
|
||||||
"Ranked knobs: %s\n", workload_name, len(ranked_knobs), ranked_knobs)
|
"Ranked knobs: %s\n", workload_name, len(ranked_knobs), ranked_knobs)
|
||||||
|
|
Loading…
Reference in New Issue