address dana's comment

2020-04-22 07:01:08 +00:00 · 2020-04-22 07:01:08 +00:00 · 77eb5cdb42
parent e7d5b0338a
commit 77eb5cdb42
2 changed files with 32 additions and 11 deletions
--- a/server/website/website/settings/constants.py
+++ b/server/website/website/settings/constants.py
@ -4,6 +4,8 @@
 # Copyright (c) 2017-18, Carnegie Mellon University Database Group
 #

+from website.types import DBMSType
+
 # These parameters are not specified for any session, so they can only be set here

 # If this flag is set, we check if celery is running, and restart celery if it is not.
@ -26,4 +28,7 @@ KNOB_IDENT_USE_PRUNED_METRICS = False
 MIN_WORKLOAD_RESULTS_COUNT = 5

 # The views used for metrics pruning
-VIEWS_FOR_PRUNING = ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event']
+VIEWS_FOR_PRUNING = {
+    DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event',
+                      'dba_workload_replays', 'dba_hist_sys_time_model'],
+}
--- a/server/website/website/tasks/periodic_tasks.py
+++ b/server/website/website/tasks/periodic_tasks.py
@ -82,6 +82,8 @@ def run_background_tasks():
            # Check that there are enough results in the workload
            LOG.info("Not enough results in workload %s (# results: %s, # required: %s).",
                     workload_name, num_wkld_results, MIN_WORKLOAD_RESULTS_COUNT)
+            workload.status = WorkloadStatusType.PROCESSED
+            workload.save()
            continue

        LOG.info("Aggregating data for workload %s...", workload_name)
@ -95,6 +97,16 @@ def run_background_tasks():
                  metric_data['data'].shape)
        LOG.info("Done aggregating data for workload %s.", workload_name)

+        num_valid_results = knob_data['data'].shape[0]  # pylint: disable=unsubscriptable-object
+        if num_valid_results < MIN_WORKLOAD_RESULTS_COUNT:
+            # Check that there are enough valid results in the workload
+            LOG.info("Not enough valid results in workload %s (# valid results: "
+                     "%s, # required: %s).", workload_name, num_valid_results,
+                     MIN_WORKLOAD_RESULTS_COUNT)
+            workload.status = WorkloadStatusType.PROCESSED
+            workload.save()
+            continue
+
        # Knob_data and metric_data are 2D numpy arrays. Convert them into a
        # JSON-friendly (nested) lists and then save them as new PipelineData
        # objects.
@ -122,7 +134,7 @@ def run_background_tasks():
        # pruned metrics for this workload and save them in a new PipelineData
        # object.
        LOG.info("Pruning metrics for workload %s...", workload_name)
-        pruned_metrics = run_workload_characterization(metric_data=metric_data)
+        pruned_metrics = run_workload_characterization(metric_data=metric_data, dbms=workload.dbms)
        LOG.info("Done pruning metrics for workload %s (# pruned metrics: %s).\n\n"
                 "Pruned metrics: %s\n", workload_name, len(pruned_metrics),
                 pruned_metrics)
@ -240,7 +252,7 @@ def aggregate_data(wkld_results):
    return knob_data, metric_data


-def run_workload_characterization(metric_data):
+def run_workload_characterization(metric_data, dbms):
    # Performs workload characterization on the metric_data and returns
    # a set of pruned metrics.
    #
@ -255,13 +267,17 @@ def run_workload_characterization(metric_data):
    matrix = metric_data['data']
    columnlabels = metric_data['columnlabels']
    LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
+
+    views = VIEWS_FOR_PRUNING.get(dbms.type, None)
+    if views is not None:
        useful_labels = []
        for label in columnlabels:
-        for view in VIEWS_FOR_PRUNING:
+            for view in views:
                if view in label:
                    useful_labels.append(label)
                    break
-    matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None, useful_labels)
+        matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None,
+                                                          useful_labels)
        LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)

    # Bin each column (metric) in the matrix by its decile