disable dummy encoder in knob identification

This commit is contained in:
bohanjason 2019-12-14 23:41:58 -05:00 committed by Dana Van Aken
parent 1994a09a6e
commit d5f7ae65eb
2 changed files with 17 additions and 12 deletions

View File

@ -482,6 +482,8 @@ def configuration_recommendation(recommendation_input):
workload=mapped_workload, workload=mapped_workload,
task_type=PipelineTaskType.RANKED_KNOBS) task_type=PipelineTaskType.RANKED_KNOBS)
ranked_knobs = JSONUtil.loads(ranked_knobs.data)[:IMPORTANT_KNOB_NUMBER] ranked_knobs = JSONUtil.loads(ranked_knobs.data)[:IMPORTANT_KNOB_NUMBER]
# Sort the important knobs to fix the columns of input X
ranked_knobs = sorted(ranked_knobs)
ranked_knob_idxs = [i for i, cl in enumerate(X_columnlabels) if cl in ranked_knobs] ranked_knob_idxs = [i for i, cl in enumerate(X_columnlabels) if cl in ranked_knobs]
X_workload = X_workload[:, ranked_knob_idxs] X_workload = X_workload[:, ranked_knob_idxs]
X_target = X_target[:, ranked_knob_idxs] X_target = X_target[:, ranked_knob_idxs]

View File

@ -18,7 +18,7 @@ from analysis.preprocessing import (Bin, get_shuffle_indices,
DummyEncoder, DummyEncoder,
consolidate_columnlabels) consolidate_columnlabels)
from website.models import PipelineData, PipelineRun, Result, Workload from website.models import PipelineData, PipelineRun, Result, Workload
from website.settings import RUN_EVERY from website.settings import RUN_EVERY, ENABLE_DUMMY_ENCODER
from website.types import PipelineTaskType, WorkloadStatusType from website.types import PipelineTaskType, WorkloadStatusType
from website.utils import DataUtil, JSONUtil from website.utils import DataUtil, JSONUtil
@ -296,6 +296,7 @@ def run_knob_identification(knob_data, metric_data, dbms):
nonconst_metric_columnlabels.append(cl) nonconst_metric_columnlabels.append(cl)
nonconst_metric_matrix = np.hstack(nonconst_metric_matrix) nonconst_metric_matrix = np.hstack(nonconst_metric_matrix)
if ENABLE_DUMMY_ENCODER:
# determine which knobs need encoding (enums with >2 possible values) # determine which knobs need encoding (enums with >2 possible values)
categorical_info = DataUtil.dummy_encoder_helper(nonconst_knob_columnlabels, categorical_info = DataUtil.dummy_encoder_helper(nonconst_knob_columnlabels,
@ -308,6 +309,8 @@ def run_knob_identification(knob_data, metric_data, dbms):
encoded_knob_matrix = dummy_encoder.fit_transform( encoded_knob_matrix = dummy_encoder.fit_transform(
nonconst_knob_matrix) nonconst_knob_matrix)
encoded_knob_columnlabels = dummy_encoder.new_labels encoded_knob_columnlabels = dummy_encoder.new_labels
else:
encoded_knob_columnlabels = nonconst_knob_columnlabels
# standardize values in each column to N(0, 1) # standardize values in each column to N(0, 1)
standardizer = StandardScaler() standardizer = StandardScaler()