add missing columns in metrics

This commit is contained in:
yangdsh 2019-12-17 04:27:20 +00:00 committed by Dana Van Aken
parent 0aaa0d37f0
commit 3a722df5e2
1 changed files with 45 additions and 8 deletions

View File

@ -25,7 +25,8 @@ from analysis.gpr import ucb
from analysis.gpr.optimize import tf_optimize from analysis.gpr.optimize import tf_optimize
from analysis.preprocessing import Bin, DummyEncoder from analysis.preprocessing import Bin, DummyEncoder
from analysis.constraints import ParamConstraintHelper from analysis.constraints import ParamConstraintHelper
from website.models import PipelineData, PipelineRun, Result, Workload, KnobCatalog, SessionKnob from website.models import (PipelineData, PipelineRun, Result, Workload, KnobCatalog, SessionKnob,
MetricCatalog)
from website import db from website import db
from website.types import PipelineTaskType, AlgorithmType from website.types import PipelineTaskType, AlgorithmType
from website.utils import DataUtil, JSONUtil from website.utils import DataUtil, JSONUtil
@ -153,6 +154,35 @@ def clean_knob_data(knob_matrix, knob_labels, session):
return matrix, knob_labels return matrix, knob_labels
def clean_metric_data(metric_matrix, metric_labels, session):
# Makes sure that all knobs in the dbms are included in the knob_matrix and knob_labels
metric_objs = MetricCatalog.objects.filter(dbms=session.dbms)
metric_cat = [session.target_objective]
for metric_obj in metric_objs:
metric_cat.append(metric_obj.name)
matrix = np.array(metric_matrix)
missing_columns = sorted(set(metric_cat) - set(metric_labels))
unused_columns = set(metric_labels) - set(metric_cat)
LOG.debug("clean_metric_data added %d metrics and removed %d metric.", len(missing_columns),
len(unused_columns))
# If columns are missing from the matrix
if missing_columns:
for metric in missing_columns:
index = metric_cat.index(metric)
default_val = 0
matrix = np.insert(matrix, index, default_val, axis=1)
metric_labels.insert(index, metric)
LOG.debug(matrix.shape)
# If they are useless columns in the matrix
if unused_columns:
indexes = [i for i, n in enumerate(metric_labels) if n in unused_columns]
# Delete unused columns
matrix = np.delete(matrix, indexes, 1)
for i in sorted(indexes, reverse=True):
del metric_labels[i]
return matrix, metric_labels
@task(base=AggregateTargetResults, name='aggregate_target_results') @task(base=AggregateTargetResults, name='aggregate_target_results')
def aggregate_target_results(result_id, algorithm): def aggregate_target_results(result_id, algorithm):
# Check that we've completed the background tasks at least once. We need # Check that we've completed the background tasks at least once. We need
@ -310,9 +340,17 @@ def train_ddpg(result_id):
prev_result = Result.objects.filter(pk=prev_result_id) prev_result = Result.objects.filter(pk=prev_result_id)
agg_data = DataUtil.aggregate_data(result) agg_data = DataUtil.aggregate_data(result)
metric_data = agg_data['y_matrix'].flatten()
base_metric_data = (DataUtil.aggregate_data(base_result))['y_matrix'].flatten() base_metric_data = (DataUtil.aggregate_data(base_result))['y_matrix'].flatten()
prev_metric_data = (DataUtil.aggregate_data(prev_result))['y_matrix'].flatten() prev_metric_data = (DataUtil.aggregate_data(prev_result))['y_matrix'].flatten()
result = Result.objects.get(pk=result_id)
target_objective = result.session.target_objective
prev_obj_idx = [i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective]
# Clean metric data
metric_data, metric_labels = clean_metric_data(agg_data['y_matrix'],
agg_data['y_columnlabels'], session)
metric_data = metric_data.flatten()
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0] normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
@ -327,9 +365,7 @@ def train_ddpg(result_id):
LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num) LOG.info('knob_num: %d, metric_num: %d', knob_num, metric_num)
# Filter ys by current target objective metric # Filter ys by current target objective metric
result = Result.objects.get(pk=result_id) target_obj_idx = [i for i, n in enumerate(metric_labels) if n == target_objective]
target_objective = result.session.target_objective
target_obj_idx = [i for i, n in enumerate(agg_data['y_columnlabels']) if n == target_objective]
if len(target_obj_idx) == 0: if len(target_obj_idx) == 0:
raise Exception(('Could not find target objective in metrics ' raise Exception(('Could not find target objective in metrics '
'(target_obj={})').format(target_objective)) '(target_obj={})').format(target_objective))
@ -338,8 +374,8 @@ def train_ddpg(result_id):
'metrics (target_obj={})').format(len(target_obj_idx), 'metrics (target_obj={})').format(len(target_obj_idx),
target_objective)) target_objective))
objective = metric_data[target_obj_idx] objective = metric_data[target_obj_idx]
base_objective = base_metric_data[target_obj_idx] base_objective = base_metric_data[prev_obj_idx]
prev_objective = prev_metric_data[target_obj_idx] prev_objective = prev_metric_data[prev_obj_idx]
metric_meta = db.target_objectives.get_metric_metadata( metric_meta = db.target_objectives.get_metric_metadata(
result.session.dbms.pk, result.session.target_objective) result.session.dbms.pk, result.session.target_objective)
@ -390,7 +426,8 @@ def configuration_recommendation_ddpg(result_info): # pylint: disable=invalid-n
result = Result.objects.filter(pk=result_id) result = Result.objects.filter(pk=result_id)
session = Result.objects.get(pk=result_id).session session = Result.objects.get(pk=result_id).session
agg_data = DataUtil.aggregate_data(result) agg_data = DataUtil.aggregate_data(result)
metric_data = agg_data['y_matrix'].flatten() metric_data, _ = clean_metric_data(agg_data['y_matrix'], agg_data['y_columnlabels'], session)
metric_data = metric_data.flatten()
metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1)) metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0] normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'], cleaned_knob_data = clean_knob_data(agg_data['X_matrix'], agg_data['X_columnlabels'],