clean metrics for DDPG
This commit is contained in:
parent
2916330e89
commit
285522a0f5
|
@ -32,3 +32,9 @@ VIEWS_FOR_PRUNING = {
|
|||
DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event',
|
||||
'dba_workload_replays', 'dba_hist_sys_time_model'],
|
||||
}
|
||||
|
||||
# The views used for DDPG
|
||||
# WARNING: modifying this parameter will cause all existing DDPG sessions broken
|
||||
VIEWS_FOR_DDPG = {
|
||||
DBMSType.ORACLE: ['dba_hist_sys_time_model'],
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ from website.models import (PipelineData, PipelineRun, Result, Workload, Session
|
|||
from website import db
|
||||
from website.types import PipelineTaskType, AlgorithmType, VarType
|
||||
from website.utils import DataUtil, JSONUtil
|
||||
from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE
|
||||
from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE, VIEWS_FOR_DDPG
|
||||
|
||||
|
||||
LOG = get_task_logger(__name__)
|
||||
|
@ -420,6 +420,7 @@ def train_ddpg(train_ddpg_input):
|
|||
result_id, algorithm, target_data = train_ddpg_input
|
||||
result = Result.objects.get(pk=result_id)
|
||||
session = result.session
|
||||
dbms = session.dbms
|
||||
task_name = _get_task_name(session, result_id)
|
||||
|
||||
# If the preprocessing method has already generated a config, bypass this method.
|
||||
|
@ -480,13 +481,14 @@ def train_ddpg(train_ddpg_input):
|
|||
prev_objective = prev_metric_data[target_obj_idx]
|
||||
|
||||
# Clean metric data
|
||||
views = VIEWS_FOR_DDPG.get(dbms.type, None)
|
||||
metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
|
||||
agg_data['y_columnlabels'], session)
|
||||
agg_data['y_columnlabels'], views)
|
||||
metric_data = metric_data.flatten()
|
||||
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
||||
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
||||
prev_metric_data, _ = DataUtil.clean_metric_data(prev_agg_data['y_matrix'],
|
||||
prev_agg_data['y_columnlabels'], session)
|
||||
prev_agg_data['y_columnlabels'], views)
|
||||
prev_metric_data = prev_metric_data.flatten()
|
||||
prev_metric_scalar = MinMaxScaler().fit(prev_metric_data.reshape(1, -1))
|
||||
prev_normalized_metric_data = prev_metric_scalar.transform(prev_metric_data.reshape(1, -1))[0]
|
||||
|
@ -597,6 +599,7 @@ def configuration_recommendation_ddpg(recommendation_ddpg_input): # pylint: dis
|
|||
result_list = Result.objects.filter(pk=result_id)
|
||||
result = result_list.first()
|
||||
session = result.session
|
||||
dbms = session.dbms
|
||||
task_name = _get_task_name(session, result_id)
|
||||
|
||||
early_return, target_data_res = check_early_return(target_data, algorithm)
|
||||
|
@ -609,8 +612,9 @@ def configuration_recommendation_ddpg(recommendation_ddpg_input): # pylint: dis
|
|||
|
||||
params = JSONUtil.loads(session.hyperparameters)
|
||||
agg_data = DataUtil.aggregate_data(result_list)
|
||||
views = VIEWS_FOR_DDPG.get(dbms.type, None)
|
||||
metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
|
||||
agg_data['y_columnlabels'], session)
|
||||
agg_data['y_columnlabels'], views)
|
||||
metric_data = metric_data.flatten()
|
||||
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
||||
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
||||
|
|
|
@ -269,16 +269,8 @@ def run_workload_characterization(metric_data, dbms=None):
|
|||
LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
|
||||
|
||||
views = None if dbms is None else VIEWS_FOR_PRUNING.get(dbms.type, None)
|
||||
if views is not None:
|
||||
useful_labels = []
|
||||
for label in columnlabels:
|
||||
for view in views:
|
||||
if view in label:
|
||||
useful_labels.append(label)
|
||||
break
|
||||
matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None,
|
||||
useful_labels)
|
||||
LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
|
||||
matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, views)
|
||||
LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
|
||||
|
||||
# Bin each column (metric) in the matrix by its decile
|
||||
binner = Bin(bin_start=1, axis=0)
|
||||
|
|
|
@ -212,33 +212,35 @@ class DataUtil(object):
|
|||
return X_unique, y_unique, rowlabels_unique
|
||||
|
||||
@staticmethod
|
||||
def clean_metric_data(metric_matrix, metric_labels, session, useful_labels=None):
|
||||
# Make metric_labels identical to useful_labels (if given)
|
||||
# If useful_labels is not given, let it to be all metrics in the catalog.
|
||||
if useful_labels is None:
|
||||
metric_objs = MetricCatalog.objects.filter(dbms=session.dbms)
|
||||
metric_cat = [session.target_objective]
|
||||
for metric_obj in metric_objs:
|
||||
metric_cat.append(metric_obj.name)
|
||||
else:
|
||||
metric_cat = useful_labels
|
||||
missing_columns = sorted(set(metric_cat) - set(metric_labels))
|
||||
unused_columns = set(metric_labels) - set(metric_cat)
|
||||
def clean_metric_data(metric_matrix, metric_labels, useful_views):
|
||||
# Make metric_labels identical to useful_labels (if not None)
|
||||
if useful_views is None:
|
||||
return metric_matrix, metric_labels
|
||||
|
||||
useful_labels = []
|
||||
for label in metric_labels:
|
||||
for view in useful_views:
|
||||
if view in label:
|
||||
useful_labels.append(label)
|
||||
break
|
||||
|
||||
missing_columns = sorted(set(useful_labels) - set(metric_labels))
|
||||
unused_columns = set(metric_labels) - set(useful_labels)
|
||||
LOG.debug("clean_metric_data: added %d metrics and removed %d metric.",
|
||||
len(missing_columns), len(unused_columns))
|
||||
default_val = 0
|
||||
metric_cat_size = len(metric_cat)
|
||||
matrix = np.ones((len(metric_matrix), metric_cat_size)) * default_val
|
||||
useful_labels_size = len(useful_labels)
|
||||
matrix = np.ones((len(metric_matrix), useful_labels_size)) * default_val
|
||||
metric_labels_dict = {n: i for i, n in enumerate(metric_labels)}
|
||||
# column labels in matrix has the same order as ones in metric catalog
|
||||
# column labels in matrix has the same order as ones in useful_labels
|
||||
# missing values are filled with default_val
|
||||
for i, metric_name in enumerate(metric_cat):
|
||||
for i, metric_name in enumerate(useful_labels):
|
||||
if metric_name in metric_labels_dict:
|
||||
index = metric_labels_dict[metric_name]
|
||||
matrix[:, i] = metric_matrix[:, index]
|
||||
LOG.debug("clean_metric_data: final ~ matrix: %s, labels: %s", matrix.shape,
|
||||
len(metric_cat))
|
||||
return matrix, metric_cat
|
||||
useful_labels_size)
|
||||
return matrix, useful_labels
|
||||
|
||||
@staticmethod
|
||||
def clean_knob_data(knob_matrix, knob_labels, sessions):
|
||||
|
|
Loading…
Reference in New Issue