disable dummy encoder in knob identification
This commit is contained in:
parent
1994a09a6e
commit
d5f7ae65eb
|
@ -482,6 +482,8 @@ def configuration_recommendation(recommendation_input):
|
||||||
workload=mapped_workload,
|
workload=mapped_workload,
|
||||||
task_type=PipelineTaskType.RANKED_KNOBS)
|
task_type=PipelineTaskType.RANKED_KNOBS)
|
||||||
ranked_knobs = JSONUtil.loads(ranked_knobs.data)[:IMPORTANT_KNOB_NUMBER]
|
ranked_knobs = JSONUtil.loads(ranked_knobs.data)[:IMPORTANT_KNOB_NUMBER]
|
||||||
|
# Sort the important knobs to fix the columns of input X
|
||||||
|
ranked_knobs = sorted(ranked_knobs)
|
||||||
ranked_knob_idxs = [i for i, cl in enumerate(X_columnlabels) if cl in ranked_knobs]
|
ranked_knob_idxs = [i for i, cl in enumerate(X_columnlabels) if cl in ranked_knobs]
|
||||||
X_workload = X_workload[:, ranked_knob_idxs]
|
X_workload = X_workload[:, ranked_knob_idxs]
|
||||||
X_target = X_target[:, ranked_knob_idxs]
|
X_target = X_target[:, ranked_knob_idxs]
|
||||||
|
|
|
@ -18,7 +18,7 @@ from analysis.preprocessing import (Bin, get_shuffle_indices,
|
||||||
DummyEncoder,
|
DummyEncoder,
|
||||||
consolidate_columnlabels)
|
consolidate_columnlabels)
|
||||||
from website.models import PipelineData, PipelineRun, Result, Workload
|
from website.models import PipelineData, PipelineRun, Result, Workload
|
||||||
from website.settings import RUN_EVERY
|
from website.settings import RUN_EVERY, ENABLE_DUMMY_ENCODER
|
||||||
from website.types import PipelineTaskType, WorkloadStatusType
|
from website.types import PipelineTaskType, WorkloadStatusType
|
||||||
from website.utils import DataUtil, JSONUtil
|
from website.utils import DataUtil, JSONUtil
|
||||||
|
|
||||||
|
@ -296,18 +296,21 @@ def run_knob_identification(knob_data, metric_data, dbms):
|
||||||
nonconst_metric_columnlabels.append(cl)
|
nonconst_metric_columnlabels.append(cl)
|
||||||
nonconst_metric_matrix = np.hstack(nonconst_metric_matrix)
|
nonconst_metric_matrix = np.hstack(nonconst_metric_matrix)
|
||||||
|
|
||||||
# determine which knobs need encoding (enums with >2 possible values)
|
if ENABLE_DUMMY_ENCODER:
|
||||||
|
# determine which knobs need encoding (enums with >2 possible values)
|
||||||
|
|
||||||
categorical_info = DataUtil.dummy_encoder_helper(nonconst_knob_columnlabels,
|
categorical_info = DataUtil.dummy_encoder_helper(nonconst_knob_columnlabels,
|
||||||
dbms)
|
dbms)
|
||||||
# encode categorical variable first (at least, before standardize)
|
# encode categorical variable first (at least, before standardize)
|
||||||
dummy_encoder = DummyEncoder(categorical_info['n_values'],
|
dummy_encoder = DummyEncoder(categorical_info['n_values'],
|
||||||
categorical_info['categorical_features'],
|
categorical_info['categorical_features'],
|
||||||
categorical_info['cat_columnlabels'],
|
categorical_info['cat_columnlabels'],
|
||||||
categorical_info['noncat_columnlabels'])
|
categorical_info['noncat_columnlabels'])
|
||||||
encoded_knob_matrix = dummy_encoder.fit_transform(
|
encoded_knob_matrix = dummy_encoder.fit_transform(
|
||||||
nonconst_knob_matrix)
|
nonconst_knob_matrix)
|
||||||
encoded_knob_columnlabels = dummy_encoder.new_labels
|
encoded_knob_columnlabels = dummy_encoder.new_labels
|
||||||
|
else:
|
||||||
|
encoded_knob_columnlabels = nonconst_knob_columnlabels
|
||||||
|
|
||||||
# standardize values in each column to N(0, 1)
|
# standardize values in each column to N(0, 1)
|
||||||
standardizer = StandardScaler()
|
standardizer = StandardScaler()
|
||||||
|
|
Loading…
Reference in New Issue