support enum/boolean in GPR/DNN

2019-12-10 23:03:50 -05:00 · 2019-12-10 23:03:50 -05:00 · 6d76b22e3b
parent 4b37dec988
commit 6d76b22e3b
4 changed files with 51 additions and 24 deletions
--- a/client/driver/fabfile.py
+++ b/client/driver/fabfile.py
@ -425,13 +425,17 @@ def upload_batch(result_dir=None, sort=True, upload_code=None):
        upload_result(result_dir=result_dir, prefix=prefix, upload_code=upload_code)
        LOG.info('Uploaded result %d/%d: %s__*.json', i + 1, count, prefix)
@task
 def dump_database():
    dumpfile = os.path.join(dconf.DB_DUMP_DIR, dconf.DB_NAME + '.dump')
-    if not dconf.ORACLE_FLASH_BACK and file_exists(dumpfile):
+    if dconf.DB_TYPE == 'oracle':
-        LOG.info('%s already exists ! ', dumpfile)
+        if not dconf.ORACLE_FLASH_BACK and file_exists(dumpfile):
-        return False
+            LOG.info('%s already exists ! ', dumpfile)
            return False
    else:
        if file_exists(dumpfile):
            LOG.info('%s already exists ! ', dumpfile)
            return False
    if dconf.ORACLE_FLASH_BACK:
        LOG.info('create restore point %s for database %s in %s', dconf.RESTORE_POINT,
@ -582,7 +586,7 @@ def loop(i):
@task
-def run_loops(max_iter=1):
+def run_loops(max_iter=10):
    # dump database if it's not done before.
    dump = dump_database()
--- a/server/website/website/set_default_knobs.py
+++ b/server/website/website/set_default_knobs.py
@ -14,8 +14,8 @@ LOG = logging.getLogger(__name__)
 # tunable knobs in the KnobCatalog will be used instead.
 DEFAULT_TUNABLE_KNOBS = {
    DBMSType.POSTGRES: {
-        "global.checkpoint_completion_target",
+        "global.autovacuum",
-        "global.default_statistics_target",
+        "global.archive_mode",
        "global.effective_cache_size",
        "global.maintenance_work_mem",
        "global.max_wal_size",
@ -59,7 +59,7 @@ STORAGE_PERCENT = 0.8
 SESSION_NUM = 50.0
-def set_default_knobs(session):
+def set_default_knobs(session, cascade=True):
    dbtype = session.dbms.type
    default_tunable_knobs = DEFAULT_TUNABLE_KNOBS.get(dbtype)
@ -71,7 +71,19 @@ def set_default_knobs(session):
        tunable = knob.name in default_tunable_knobs
        minval = knob.minval
-        if knob.vartype in (VarType.INTEGER, VarType.REAL):
+        # set session knob tunable in knob catalog
        if tunable and cascade:
            knob.tunable = True
            knob.save()
        if knob.vartype is VarType.ENUM:
            enumvals = knob.enumvals.split(',')
            minval = 0
            maxval = len(enumvals) - 1
        elif knob.vartype is VarType.BOOL:
            minval = 0
            maxval = 1
        elif knob.vartype in (VarType.INTEGER, VarType.REAL):
            vtype = int if knob.vartype == VarType.INTEGER else float
            minval = vtype(minval) if minval is not None else MINVAL
--- a/server/website/website/settings/constants.py
+++ b/server/website/website/settings/constants.py
@ -19,6 +19,9 @@ TOP_NUM_CONFIG = 10
 # ---CONSTRAINTS CONSTANTS---
 # address categorical knobs (enum, boolean)
 ENABLE_DUMMY_ENCODER = False
 #  Initial probability to flip categorical feature in apply_constraints
 #  server/analysis/constraints.py
 INIT_FLIP_PROB = 0.3
--- a/server/website/website/tasks/async_tasks.py
+++ b/server/website/website/tasks/async_tasks.py
@ -43,7 +43,7 @@ from website.settings import (USE_GPFLOW, DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUD
                              DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER,
                              DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END,
                              DNN_DEBUG, DNN_DEBUG_INTERVAL, GPR_DEBUG, UCB_BETA,
-                              GPR_MODEL_NAME)
+                              GPR_MODEL_NAME, ENABLE_DUMMY_ENCODER)
 from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY
 from website.types import VarType
@ -526,17 +526,23 @@ def configuration_recommendation(recommendation_input):
    X_matrix = np.vstack([X_target, X_workload])
    # Dummy encode categorial variables
-    categorical_info = DataUtil.dummy_encoder_helper(X_columnlabels,
+    if ENABLE_DUMMY_ENCODER:
-                                                     mapped_workload.dbms)
+        categorical_info = DataUtil.dummy_encoder_helper(X_columnlabels,
-    dummy_encoder = DummyEncoder(categorical_info['n_values'],
+                                                         mapped_workload.dbms)
-                                 categorical_info['categorical_features'],
+        dummy_encoder = DummyEncoder(categorical_info['n_values'],
-                                 categorical_info['cat_columnlabels'],
+                                     categorical_info['categorical_features'],
-                                 categorical_info['noncat_columnlabels'])
+                                     categorical_info['cat_columnlabels'],
-    X_matrix = dummy_encoder.fit_transform(X_matrix)
+                                     categorical_info['noncat_columnlabels'])
-
+        X_matrix = dummy_encoder.fit_transform(X_matrix)
-    # below two variables are needed for correctly determing max/min on dummies
+        binary_encoder = categorical_info['binary_vars']
-    binary_index_set = set(categorical_info['binary_vars'])
+        # below two variables are needed for correctly determing max/min on dummies
-    total_dummies = dummy_encoder.total_dummies()
+        binary_index_set = set(categorical_info['binary_vars'])
        total_dummies = dummy_encoder.total_dummies()
    else:
        dummy_encoder = None
        binary_encoder = None
        binary_index_set = []
        total_dummies = 0
    # Scale to N(0, 1)
    X_scaler = StandardScaler()
@ -566,7 +572,7 @@ def configuration_recommendation(recommendation_input):
    # Set up constraint helper
    constraint_helper = ParamConstraintHelper(scaler=X_scaler,
                                              encoder=dummy_encoder,
-                                              binary_vars=categorical_info['binary_vars'],
+                                              binary_vars=binary_encoder,
                                              init_flip_prob=INIT_FLIP_PROB,
                                              flip_prob_decay=FLIP_PROB_DECAY)
@ -686,8 +692,10 @@ def configuration_recommendation(recommendation_input):
    best_config_idx = np.argmin(res.minl.ravel())
    best_config = res.minl_conf[best_config_idx, :]
    best_config = X_scaler.inverse_transform(best_config)
-    # Decode one-hot encoding into categorical knobs
+
-    best_config = dummy_encoder.inverse_transform(best_config)
+    if ENABLE_DUMMY_ENCODER:
        # Decode one-hot encoding into categorical knobs
        best_config = dummy_encoder.inverse_transform(best_config)
    # Although we have max/min limits in the GPRGD training session, it may
    # lose some precisions. e.g. 0.99..99 >= 1.0 may be True on the scaled data,