clean metrics for DDPG
This commit is contained in:
parent
2916330e89
commit
285522a0f5
|
@ -32,3 +32,9 @@ VIEWS_FOR_PRUNING = {
|
||||||
DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event',
|
DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event',
|
||||||
'dba_workload_replays', 'dba_hist_sys_time_model'],
|
'dba_workload_replays', 'dba_hist_sys_time_model'],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# The views used for DDPG
|
||||||
|
# WARNING: modifying this parameter will cause all existing DDPG sessions broken
|
||||||
|
VIEWS_FOR_DDPG = {
|
||||||
|
DBMSType.ORACLE: ['dba_hist_sys_time_model'],
|
||||||
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ from website.models import (PipelineData, PipelineRun, Result, Workload, Session
|
||||||
from website import db
|
from website import db
|
||||||
from website.types import PipelineTaskType, AlgorithmType, VarType
|
from website.types import PipelineTaskType, AlgorithmType, VarType
|
||||||
from website.utils import DataUtil, JSONUtil
|
from website.utils import DataUtil, JSONUtil
|
||||||
from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE
|
from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE, VIEWS_FOR_DDPG
|
||||||
|
|
||||||
|
|
||||||
LOG = get_task_logger(__name__)
|
LOG = get_task_logger(__name__)
|
||||||
|
@ -420,6 +420,7 @@ def train_ddpg(train_ddpg_input):
|
||||||
result_id, algorithm, target_data = train_ddpg_input
|
result_id, algorithm, target_data = train_ddpg_input
|
||||||
result = Result.objects.get(pk=result_id)
|
result = Result.objects.get(pk=result_id)
|
||||||
session = result.session
|
session = result.session
|
||||||
|
dbms = session.dbms
|
||||||
task_name = _get_task_name(session, result_id)
|
task_name = _get_task_name(session, result_id)
|
||||||
|
|
||||||
# If the preprocessing method has already generated a config, bypass this method.
|
# If the preprocessing method has already generated a config, bypass this method.
|
||||||
|
@ -480,13 +481,14 @@ def train_ddpg(train_ddpg_input):
|
||||||
prev_objective = prev_metric_data[target_obj_idx]
|
prev_objective = prev_metric_data[target_obj_idx]
|
||||||
|
|
||||||
# Clean metric data
|
# Clean metric data
|
||||||
|
views = VIEWS_FOR_DDPG.get(dbms.type, None)
|
||||||
metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
|
metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
|
||||||
agg_data['y_columnlabels'], session)
|
agg_data['y_columnlabels'], views)
|
||||||
metric_data = metric_data.flatten()
|
metric_data = metric_data.flatten()
|
||||||
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
||||||
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
||||||
prev_metric_data, _ = DataUtil.clean_metric_data(prev_agg_data['y_matrix'],
|
prev_metric_data, _ = DataUtil.clean_metric_data(prev_agg_data['y_matrix'],
|
||||||
prev_agg_data['y_columnlabels'], session)
|
prev_agg_data['y_columnlabels'], views)
|
||||||
prev_metric_data = prev_metric_data.flatten()
|
prev_metric_data = prev_metric_data.flatten()
|
||||||
prev_metric_scalar = MinMaxScaler().fit(prev_metric_data.reshape(1, -1))
|
prev_metric_scalar = MinMaxScaler().fit(prev_metric_data.reshape(1, -1))
|
||||||
prev_normalized_metric_data = prev_metric_scalar.transform(prev_metric_data.reshape(1, -1))[0]
|
prev_normalized_metric_data = prev_metric_scalar.transform(prev_metric_data.reshape(1, -1))[0]
|
||||||
|
@ -597,6 +599,7 @@ def configuration_recommendation_ddpg(recommendation_ddpg_input): # pylint: dis
|
||||||
result_list = Result.objects.filter(pk=result_id)
|
result_list = Result.objects.filter(pk=result_id)
|
||||||
result = result_list.first()
|
result = result_list.first()
|
||||||
session = result.session
|
session = result.session
|
||||||
|
dbms = session.dbms
|
||||||
task_name = _get_task_name(session, result_id)
|
task_name = _get_task_name(session, result_id)
|
||||||
|
|
||||||
early_return, target_data_res = check_early_return(target_data, algorithm)
|
early_return, target_data_res = check_early_return(target_data, algorithm)
|
||||||
|
@ -609,8 +612,9 @@ def configuration_recommendation_ddpg(recommendation_ddpg_input): # pylint: dis
|
||||||
|
|
||||||
params = JSONUtil.loads(session.hyperparameters)
|
params = JSONUtil.loads(session.hyperparameters)
|
||||||
agg_data = DataUtil.aggregate_data(result_list)
|
agg_data = DataUtil.aggregate_data(result_list)
|
||||||
|
views = VIEWS_FOR_DDPG.get(dbms.type, None)
|
||||||
metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
|
metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
|
||||||
agg_data['y_columnlabels'], session)
|
agg_data['y_columnlabels'], views)
|
||||||
metric_data = metric_data.flatten()
|
metric_data = metric_data.flatten()
|
||||||
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
|
||||||
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
|
||||||
|
|
|
@ -269,16 +269,8 @@ def run_workload_characterization(metric_data, dbms=None):
|
||||||
LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
|
LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
|
||||||
|
|
||||||
views = None if dbms is None else VIEWS_FOR_PRUNING.get(dbms.type, None)
|
views = None if dbms is None else VIEWS_FOR_PRUNING.get(dbms.type, None)
|
||||||
if views is not None:
|
matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, views)
|
||||||
useful_labels = []
|
LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
|
||||||
for label in columnlabels:
|
|
||||||
for view in views:
|
|
||||||
if view in label:
|
|
||||||
useful_labels.append(label)
|
|
||||||
break
|
|
||||||
matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None,
|
|
||||||
useful_labels)
|
|
||||||
LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
|
|
||||||
|
|
||||||
# Bin each column (metric) in the matrix by its decile
|
# Bin each column (metric) in the matrix by its decile
|
||||||
binner = Bin(bin_start=1, axis=0)
|
binner = Bin(bin_start=1, axis=0)
|
||||||
|
|
|
@ -212,33 +212,35 @@ class DataUtil(object):
|
||||||
return X_unique, y_unique, rowlabels_unique
|
return X_unique, y_unique, rowlabels_unique
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def clean_metric_data(metric_matrix, metric_labels, session, useful_labels=None):
|
def clean_metric_data(metric_matrix, metric_labels, useful_views):
|
||||||
# Make metric_labels identical to useful_labels (if given)
|
# Make metric_labels identical to useful_labels (if not None)
|
||||||
# If useful_labels is not given, let it to be all metrics in the catalog.
|
if useful_views is None:
|
||||||
if useful_labels is None:
|
return metric_matrix, metric_labels
|
||||||
metric_objs = MetricCatalog.objects.filter(dbms=session.dbms)
|
|
||||||
metric_cat = [session.target_objective]
|
useful_labels = []
|
||||||
for metric_obj in metric_objs:
|
for label in metric_labels:
|
||||||
metric_cat.append(metric_obj.name)
|
for view in useful_views:
|
||||||
else:
|
if view in label:
|
||||||
metric_cat = useful_labels
|
useful_labels.append(label)
|
||||||
missing_columns = sorted(set(metric_cat) - set(metric_labels))
|
break
|
||||||
unused_columns = set(metric_labels) - set(metric_cat)
|
|
||||||
|
missing_columns = sorted(set(useful_labels) - set(metric_labels))
|
||||||
|
unused_columns = set(metric_labels) - set(useful_labels)
|
||||||
LOG.debug("clean_metric_data: added %d metrics and removed %d metric.",
|
LOG.debug("clean_metric_data: added %d metrics and removed %d metric.",
|
||||||
len(missing_columns), len(unused_columns))
|
len(missing_columns), len(unused_columns))
|
||||||
default_val = 0
|
default_val = 0
|
||||||
metric_cat_size = len(metric_cat)
|
useful_labels_size = len(useful_labels)
|
||||||
matrix = np.ones((len(metric_matrix), metric_cat_size)) * default_val
|
matrix = np.ones((len(metric_matrix), useful_labels_size)) * default_val
|
||||||
metric_labels_dict = {n: i for i, n in enumerate(metric_labels)}
|
metric_labels_dict = {n: i for i, n in enumerate(metric_labels)}
|
||||||
# column labels in matrix has the same order as ones in metric catalog
|
# column labels in matrix has the same order as ones in useful_labels
|
||||||
# missing values are filled with default_val
|
# missing values are filled with default_val
|
||||||
for i, metric_name in enumerate(metric_cat):
|
for i, metric_name in enumerate(useful_labels):
|
||||||
if metric_name in metric_labels_dict:
|
if metric_name in metric_labels_dict:
|
||||||
index = metric_labels_dict[metric_name]
|
index = metric_labels_dict[metric_name]
|
||||||
matrix[:, i] = metric_matrix[:, index]
|
matrix[:, i] = metric_matrix[:, index]
|
||||||
LOG.debug("clean_metric_data: final ~ matrix: %s, labels: %s", matrix.shape,
|
LOG.debug("clean_metric_data: final ~ matrix: %s, labels: %s", matrix.shape,
|
||||||
len(metric_cat))
|
useful_labels_size)
|
||||||
return matrix, metric_cat
|
return matrix, useful_labels
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def clean_knob_data(knob_matrix, knob_labels, sessions):
|
def clean_knob_data(knob_matrix, knob_labels, sessions):
|
||||||
|
|
Loading…
Reference in New Issue