address dana's comment
This commit is contained in:
parent
e7d5b0338a
commit
77eb5cdb42
|
@ -4,6 +4,8 @@
|
||||||
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
||||||
#
|
#
|
||||||
|
|
||||||
|
from website.types import DBMSType
|
||||||
|
|
||||||
# These parameters are not specified for any session, so they can only be set here
|
# These parameters are not specified for any session, so they can only be set here
|
||||||
|
|
||||||
# If this flag is set, we check if celery is running, and restart celery if it is not.
|
# If this flag is set, we check if celery is running, and restart celery if it is not.
|
||||||
|
@ -26,4 +28,7 @@ KNOB_IDENT_USE_PRUNED_METRICS = False
|
||||||
MIN_WORKLOAD_RESULTS_COUNT = 5
|
MIN_WORKLOAD_RESULTS_COUNT = 5
|
||||||
|
|
||||||
# The views used for metrics pruning
|
# The views used for metrics pruning
|
||||||
VIEWS_FOR_PRUNING = ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event']
|
VIEWS_FOR_PRUNING = {
|
||||||
|
DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event',
|
||||||
|
'dba_workload_replays', 'dba_hist_sys_time_model'],
|
||||||
|
}
|
||||||
|
|
|
@ -82,6 +82,8 @@ def run_background_tasks():
|
||||||
# Check that there are enough results in the workload
|
# Check that there are enough results in the workload
|
||||||
LOG.info("Not enough results in workload %s (# results: %s, # required: %s).",
|
LOG.info("Not enough results in workload %s (# results: %s, # required: %s).",
|
||||||
workload_name, num_wkld_results, MIN_WORKLOAD_RESULTS_COUNT)
|
workload_name, num_wkld_results, MIN_WORKLOAD_RESULTS_COUNT)
|
||||||
|
workload.status = WorkloadStatusType.PROCESSED
|
||||||
|
workload.save()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
LOG.info("Aggregating data for workload %s...", workload_name)
|
LOG.info("Aggregating data for workload %s...", workload_name)
|
||||||
|
@ -95,6 +97,16 @@ def run_background_tasks():
|
||||||
metric_data['data'].shape)
|
metric_data['data'].shape)
|
||||||
LOG.info("Done aggregating data for workload %s.", workload_name)
|
LOG.info("Done aggregating data for workload %s.", workload_name)
|
||||||
|
|
||||||
|
num_valid_results = knob_data['data'].shape[0] # pylint: disable=unsubscriptable-object
|
||||||
|
if num_valid_results < MIN_WORKLOAD_RESULTS_COUNT:
|
||||||
|
# Check that there are enough valid results in the workload
|
||||||
|
LOG.info("Not enough valid results in workload %s (# valid results: "
|
||||||
|
"%s, # required: %s).", workload_name, num_valid_results,
|
||||||
|
MIN_WORKLOAD_RESULTS_COUNT)
|
||||||
|
workload.status = WorkloadStatusType.PROCESSED
|
||||||
|
workload.save()
|
||||||
|
continue
|
||||||
|
|
||||||
# Knob_data and metric_data are 2D numpy arrays. Convert them into a
|
# Knob_data and metric_data are 2D numpy arrays. Convert them into a
|
||||||
# JSON-friendly (nested) lists and then save them as new PipelineData
|
# JSON-friendly (nested) lists and then save them as new PipelineData
|
||||||
# objects.
|
# objects.
|
||||||
|
@ -122,7 +134,7 @@ def run_background_tasks():
|
||||||
# pruned metrics for this workload and save them in a new PipelineData
|
# pruned metrics for this workload and save them in a new PipelineData
|
||||||
# object.
|
# object.
|
||||||
LOG.info("Pruning metrics for workload %s...", workload_name)
|
LOG.info("Pruning metrics for workload %s...", workload_name)
|
||||||
pruned_metrics = run_workload_characterization(metric_data=metric_data)
|
pruned_metrics = run_workload_characterization(metric_data=metric_data, dbms=workload.dbms)
|
||||||
LOG.info("Done pruning metrics for workload %s (# pruned metrics: %s).\n\n"
|
LOG.info("Done pruning metrics for workload %s (# pruned metrics: %s).\n\n"
|
||||||
"Pruned metrics: %s\n", workload_name, len(pruned_metrics),
|
"Pruned metrics: %s\n", workload_name, len(pruned_metrics),
|
||||||
pruned_metrics)
|
pruned_metrics)
|
||||||
|
@ -240,7 +252,7 @@ def aggregate_data(wkld_results):
|
||||||
return knob_data, metric_data
|
return knob_data, metric_data
|
||||||
|
|
||||||
|
|
||||||
def run_workload_characterization(metric_data):
|
def run_workload_characterization(metric_data, dbms):
|
||||||
# Performs workload characterization on the metric_data and returns
|
# Performs workload characterization on the metric_data and returns
|
||||||
# a set of pruned metrics.
|
# a set of pruned metrics.
|
||||||
#
|
#
|
||||||
|
@ -255,13 +267,17 @@ def run_workload_characterization(metric_data):
|
||||||
matrix = metric_data['data']
|
matrix = metric_data['data']
|
||||||
columnlabels = metric_data['columnlabels']
|
columnlabels = metric_data['columnlabels']
|
||||||
LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
|
LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
|
||||||
|
|
||||||
|
views = VIEWS_FOR_PRUNING.get(dbms.type, None)
|
||||||
|
if views is not None:
|
||||||
useful_labels = []
|
useful_labels = []
|
||||||
for label in columnlabels:
|
for label in columnlabels:
|
||||||
for view in VIEWS_FOR_PRUNING:
|
for view in views:
|
||||||
if view in label:
|
if view in label:
|
||||||
useful_labels.append(label)
|
useful_labels.append(label)
|
||||||
break
|
break
|
||||||
matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None, useful_labels)
|
matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None,
|
||||||
|
useful_labels)
|
||||||
LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
|
LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
|
||||||
|
|
||||||
# Bin each column (metric) in the matrix by its decile
|
# Bin each column (metric) in the matrix by its decile
|
||||||
|
|
Loading…
Reference in New Issue