From 6d76b22e3b00d29036e782a94eb4c509d77a962f Mon Sep 17 00:00:00 2001 From: bohanjason Date: Tue, 10 Dec 2019 23:03:50 -0500 Subject: [PATCH] support enum/boolean in GPR/DNN --- client/driver/fabfile.py | 14 +++++--- server/website/website/set_default_knobs.py | 20 ++++++++--- server/website/website/settings/constants.py | 3 ++ server/website/website/tasks/async_tasks.py | 38 ++++++++++++-------- 4 files changed, 51 insertions(+), 24 deletions(-) diff --git a/client/driver/fabfile.py b/client/driver/fabfile.py index 943648a..e63baa1 100644 --- a/client/driver/fabfile.py +++ b/client/driver/fabfile.py @@ -425,13 +425,17 @@ def upload_batch(result_dir=None, sort=True, upload_code=None): upload_result(result_dir=result_dir, prefix=prefix, upload_code=upload_code) LOG.info('Uploaded result %d/%d: %s__*.json', i + 1, count, prefix) - @task def dump_database(): dumpfile = os.path.join(dconf.DB_DUMP_DIR, dconf.DB_NAME + '.dump') - if not dconf.ORACLE_FLASH_BACK and file_exists(dumpfile): - LOG.info('%s already exists ! ', dumpfile) - return False + if dconf.DB_TYPE == 'oracle': + if not dconf.ORACLE_FLASH_BACK and file_exists(dumpfile): + LOG.info('%s already exists ! ', dumpfile) + return False + else: + if file_exists(dumpfile): + LOG.info('%s already exists ! ', dumpfile) + return False if dconf.ORACLE_FLASH_BACK: LOG.info('create restore point %s for database %s in %s', dconf.RESTORE_POINT, @@ -582,7 +586,7 @@ def loop(i): @task -def run_loops(max_iter=1): +def run_loops(max_iter=10): # dump database if it's not done before. dump = dump_database() diff --git a/server/website/website/set_default_knobs.py b/server/website/website/set_default_knobs.py index 112eaae..3129e77 100644 --- a/server/website/website/set_default_knobs.py +++ b/server/website/website/set_default_knobs.py @@ -14,8 +14,8 @@ LOG = logging.getLogger(__name__) # tunable knobs in the KnobCatalog will be used instead. DEFAULT_TUNABLE_KNOBS = { DBMSType.POSTGRES: { - "global.checkpoint_completion_target", - "global.default_statistics_target", + "global.autovacuum", + "global.archive_mode", "global.effective_cache_size", "global.maintenance_work_mem", "global.max_wal_size", @@ -59,7 +59,7 @@ STORAGE_PERCENT = 0.8 SESSION_NUM = 50.0 -def set_default_knobs(session): +def set_default_knobs(session, cascade=True): dbtype = session.dbms.type default_tunable_knobs = DEFAULT_TUNABLE_KNOBS.get(dbtype) @@ -71,7 +71,19 @@ def set_default_knobs(session): tunable = knob.name in default_tunable_knobs minval = knob.minval - if knob.vartype in (VarType.INTEGER, VarType.REAL): + # set session knob tunable in knob catalog + if tunable and cascade: + knob.tunable = True + knob.save() + + if knob.vartype is VarType.ENUM: + enumvals = knob.enumvals.split(',') + minval = 0 + maxval = len(enumvals) - 1 + elif knob.vartype is VarType.BOOL: + minval = 0 + maxval = 1 + elif knob.vartype in (VarType.INTEGER, VarType.REAL): vtype = int if knob.vartype == VarType.INTEGER else float minval = vtype(minval) if minval is not None else MINVAL diff --git a/server/website/website/settings/constants.py b/server/website/website/settings/constants.py index 3d5033c..7b2cff5 100644 --- a/server/website/website/settings/constants.py +++ b/server/website/website/settings/constants.py @@ -19,6 +19,9 @@ TOP_NUM_CONFIG = 10 # ---CONSTRAINTS CONSTANTS--- +# address categorical knobs (enum, boolean) +ENABLE_DUMMY_ENCODER = False + # Initial probability to flip categorical feature in apply_constraints # server/analysis/constraints.py INIT_FLIP_PROB = 0.3 diff --git a/server/website/website/tasks/async_tasks.py b/server/website/website/tasks/async_tasks.py index 38c2c86..9518444 100644 --- a/server/website/website/tasks/async_tasks.py +++ b/server/website/website/tasks/async_tasks.py @@ -43,7 +43,7 @@ from website.settings import (USE_GPFLOW, DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUD DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER, DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END, DNN_DEBUG, DNN_DEBUG_INTERVAL, GPR_DEBUG, UCB_BETA, - GPR_MODEL_NAME) + GPR_MODEL_NAME, ENABLE_DUMMY_ENCODER) from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY from website.types import VarType @@ -526,17 +526,23 @@ def configuration_recommendation(recommendation_input): X_matrix = np.vstack([X_target, X_workload]) # Dummy encode categorial variables - categorical_info = DataUtil.dummy_encoder_helper(X_columnlabels, - mapped_workload.dbms) - dummy_encoder = DummyEncoder(categorical_info['n_values'], - categorical_info['categorical_features'], - categorical_info['cat_columnlabels'], - categorical_info['noncat_columnlabels']) - X_matrix = dummy_encoder.fit_transform(X_matrix) - - # below two variables are needed for correctly determing max/min on dummies - binary_index_set = set(categorical_info['binary_vars']) - total_dummies = dummy_encoder.total_dummies() + if ENABLE_DUMMY_ENCODER: + categorical_info = DataUtil.dummy_encoder_helper(X_columnlabels, + mapped_workload.dbms) + dummy_encoder = DummyEncoder(categorical_info['n_values'], + categorical_info['categorical_features'], + categorical_info['cat_columnlabels'], + categorical_info['noncat_columnlabels']) + X_matrix = dummy_encoder.fit_transform(X_matrix) + binary_encoder = categorical_info['binary_vars'] + # below two variables are needed for correctly determing max/min on dummies + binary_index_set = set(categorical_info['binary_vars']) + total_dummies = dummy_encoder.total_dummies() + else: + dummy_encoder = None + binary_encoder = None + binary_index_set = [] + total_dummies = 0 # Scale to N(0, 1) X_scaler = StandardScaler() @@ -566,7 +572,7 @@ def configuration_recommendation(recommendation_input): # Set up constraint helper constraint_helper = ParamConstraintHelper(scaler=X_scaler, encoder=dummy_encoder, - binary_vars=categorical_info['binary_vars'], + binary_vars=binary_encoder, init_flip_prob=INIT_FLIP_PROB, flip_prob_decay=FLIP_PROB_DECAY) @@ -686,8 +692,10 @@ def configuration_recommendation(recommendation_input): best_config_idx = np.argmin(res.minl.ravel()) best_config = res.minl_conf[best_config_idx, :] best_config = X_scaler.inverse_transform(best_config) - # Decode one-hot encoding into categorical knobs - best_config = dummy_encoder.inverse_transform(best_config) + + if ENABLE_DUMMY_ENCODER: + # Decode one-hot encoding into categorical knobs + best_config = dummy_encoder.inverse_transform(best_config) # Although we have max/min limits in the GPRGD training session, it may # lose some precisions. e.g. 0.99..99 >= 1.0 may be True on the scaled data,