support enum/boolean in GPR/DNN
This commit is contained in:
		
							parent
							
								
									4b37dec988
								
							
						
					
					
						commit
						6d76b22e3b
					
				|  | @ -425,13 +425,17 @@ def upload_batch(result_dir=None, sort=True, upload_code=None): | ||||||
|         upload_result(result_dir=result_dir, prefix=prefix, upload_code=upload_code) |         upload_result(result_dir=result_dir, prefix=prefix, upload_code=upload_code) | ||||||
|         LOG.info('Uploaded result %d/%d: %s__*.json', i + 1, count, prefix) |         LOG.info('Uploaded result %d/%d: %s__*.json', i + 1, count, prefix) | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| @task | @task | ||||||
| def dump_database(): | def dump_database(): | ||||||
|     dumpfile = os.path.join(dconf.DB_DUMP_DIR, dconf.DB_NAME + '.dump') |     dumpfile = os.path.join(dconf.DB_DUMP_DIR, dconf.DB_NAME + '.dump') | ||||||
|     if not dconf.ORACLE_FLASH_BACK and file_exists(dumpfile): |     if dconf.DB_TYPE == 'oracle': | ||||||
|         LOG.info('%s already exists ! ', dumpfile) |         if not dconf.ORACLE_FLASH_BACK and file_exists(dumpfile): | ||||||
|         return False |             LOG.info('%s already exists ! ', dumpfile) | ||||||
|  |             return False | ||||||
|  |     else: | ||||||
|  |         if file_exists(dumpfile): | ||||||
|  |             LOG.info('%s already exists ! ', dumpfile) | ||||||
|  |             return False | ||||||
| 
 | 
 | ||||||
|     if dconf.ORACLE_FLASH_BACK: |     if dconf.ORACLE_FLASH_BACK: | ||||||
|         LOG.info('create restore point %s for database %s in %s', dconf.RESTORE_POINT, |         LOG.info('create restore point %s for database %s in %s', dconf.RESTORE_POINT, | ||||||
|  | @ -582,7 +586,7 @@ def loop(i): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @task | @task | ||||||
| def run_loops(max_iter=1): | def run_loops(max_iter=10): | ||||||
|     # dump database if it's not done before. |     # dump database if it's not done before. | ||||||
|     dump = dump_database() |     dump = dump_database() | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -14,8 +14,8 @@ LOG = logging.getLogger(__name__) | ||||||
| # tunable knobs in the KnobCatalog will be used instead. | # tunable knobs in the KnobCatalog will be used instead. | ||||||
| DEFAULT_TUNABLE_KNOBS = { | DEFAULT_TUNABLE_KNOBS = { | ||||||
|     DBMSType.POSTGRES: { |     DBMSType.POSTGRES: { | ||||||
|         "global.checkpoint_completion_target", |         "global.autovacuum", | ||||||
|         "global.default_statistics_target", |         "global.archive_mode", | ||||||
|         "global.effective_cache_size", |         "global.effective_cache_size", | ||||||
|         "global.maintenance_work_mem", |         "global.maintenance_work_mem", | ||||||
|         "global.max_wal_size", |         "global.max_wal_size", | ||||||
|  | @ -59,7 +59,7 @@ STORAGE_PERCENT = 0.8 | ||||||
| SESSION_NUM = 50.0 | SESSION_NUM = 50.0 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def set_default_knobs(session): | def set_default_knobs(session, cascade=True): | ||||||
|     dbtype = session.dbms.type |     dbtype = session.dbms.type | ||||||
|     default_tunable_knobs = DEFAULT_TUNABLE_KNOBS.get(dbtype) |     default_tunable_knobs = DEFAULT_TUNABLE_KNOBS.get(dbtype) | ||||||
| 
 | 
 | ||||||
|  | @ -71,7 +71,19 @@ def set_default_knobs(session): | ||||||
|         tunable = knob.name in default_tunable_knobs |         tunable = knob.name in default_tunable_knobs | ||||||
|         minval = knob.minval |         minval = knob.minval | ||||||
| 
 | 
 | ||||||
|         if knob.vartype in (VarType.INTEGER, VarType.REAL): |         # set session knob tunable in knob catalog | ||||||
|  |         if tunable and cascade: | ||||||
|  |             knob.tunable = True | ||||||
|  |             knob.save() | ||||||
|  | 
 | ||||||
|  |         if knob.vartype is VarType.ENUM: | ||||||
|  |             enumvals = knob.enumvals.split(',') | ||||||
|  |             minval = 0 | ||||||
|  |             maxval = len(enumvals) - 1 | ||||||
|  |         elif knob.vartype is VarType.BOOL: | ||||||
|  |             minval = 0 | ||||||
|  |             maxval = 1 | ||||||
|  |         elif knob.vartype in (VarType.INTEGER, VarType.REAL): | ||||||
|             vtype = int if knob.vartype == VarType.INTEGER else float |             vtype = int if knob.vartype == VarType.INTEGER else float | ||||||
| 
 | 
 | ||||||
|             minval = vtype(minval) if minval is not None else MINVAL |             minval = vtype(minval) if minval is not None else MINVAL | ||||||
|  |  | ||||||
|  | @ -19,6 +19,9 @@ TOP_NUM_CONFIG = 10 | ||||||
| 
 | 
 | ||||||
| # ---CONSTRAINTS CONSTANTS--- | # ---CONSTRAINTS CONSTANTS--- | ||||||
| 
 | 
 | ||||||
|  | # address categorical knobs (enum, boolean) | ||||||
|  | ENABLE_DUMMY_ENCODER = False | ||||||
|  | 
 | ||||||
| #  Initial probability to flip categorical feature in apply_constraints | #  Initial probability to flip categorical feature in apply_constraints | ||||||
| #  server/analysis/constraints.py | #  server/analysis/constraints.py | ||||||
| INIT_FLIP_PROB = 0.3 | INIT_FLIP_PROB = 0.3 | ||||||
|  |  | ||||||
|  | @ -43,7 +43,7 @@ from website.settings import (USE_GPFLOW, DEFAULT_LENGTH_SCALE, DEFAULT_MAGNITUD | ||||||
|                               DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER, |                               DNN_TRAIN_ITER, DNN_EXPLORE, DNN_EXPLORE_ITER, | ||||||
|                               DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END, |                               DNN_NOISE_SCALE_BEGIN, DNN_NOISE_SCALE_END, | ||||||
|                               DNN_DEBUG, DNN_DEBUG_INTERVAL, GPR_DEBUG, UCB_BETA, |                               DNN_DEBUG, DNN_DEBUG_INTERVAL, GPR_DEBUG, UCB_BETA, | ||||||
|                               GPR_MODEL_NAME) |                               GPR_MODEL_NAME, ENABLE_DUMMY_ENCODER) | ||||||
| 
 | 
 | ||||||
| from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY | from website.settings import INIT_FLIP_PROB, FLIP_PROB_DECAY | ||||||
| from website.types import VarType | from website.types import VarType | ||||||
|  | @ -526,17 +526,23 @@ def configuration_recommendation(recommendation_input): | ||||||
|     X_matrix = np.vstack([X_target, X_workload]) |     X_matrix = np.vstack([X_target, X_workload]) | ||||||
| 
 | 
 | ||||||
|     # Dummy encode categorial variables |     # Dummy encode categorial variables | ||||||
|     categorical_info = DataUtil.dummy_encoder_helper(X_columnlabels, |     if ENABLE_DUMMY_ENCODER: | ||||||
|                                                      mapped_workload.dbms) |         categorical_info = DataUtil.dummy_encoder_helper(X_columnlabels, | ||||||
|     dummy_encoder = DummyEncoder(categorical_info['n_values'], |                                                          mapped_workload.dbms) | ||||||
|                                  categorical_info['categorical_features'], |         dummy_encoder = DummyEncoder(categorical_info['n_values'], | ||||||
|                                  categorical_info['cat_columnlabels'], |                                      categorical_info['categorical_features'], | ||||||
|                                  categorical_info['noncat_columnlabels']) |                                      categorical_info['cat_columnlabels'], | ||||||
|     X_matrix = dummy_encoder.fit_transform(X_matrix) |                                      categorical_info['noncat_columnlabels']) | ||||||
| 
 |         X_matrix = dummy_encoder.fit_transform(X_matrix) | ||||||
|     # below two variables are needed for correctly determing max/min on dummies |         binary_encoder = categorical_info['binary_vars'] | ||||||
|     binary_index_set = set(categorical_info['binary_vars']) |         # below two variables are needed for correctly determing max/min on dummies | ||||||
|     total_dummies = dummy_encoder.total_dummies() |         binary_index_set = set(categorical_info['binary_vars']) | ||||||
|  |         total_dummies = dummy_encoder.total_dummies() | ||||||
|  |     else: | ||||||
|  |         dummy_encoder = None | ||||||
|  |         binary_encoder = None | ||||||
|  |         binary_index_set = [] | ||||||
|  |         total_dummies = 0 | ||||||
| 
 | 
 | ||||||
|     # Scale to N(0, 1) |     # Scale to N(0, 1) | ||||||
|     X_scaler = StandardScaler() |     X_scaler = StandardScaler() | ||||||
|  | @ -566,7 +572,7 @@ def configuration_recommendation(recommendation_input): | ||||||
|     # Set up constraint helper |     # Set up constraint helper | ||||||
|     constraint_helper = ParamConstraintHelper(scaler=X_scaler, |     constraint_helper = ParamConstraintHelper(scaler=X_scaler, | ||||||
|                                               encoder=dummy_encoder, |                                               encoder=dummy_encoder, | ||||||
|                                               binary_vars=categorical_info['binary_vars'], |                                               binary_vars=binary_encoder, | ||||||
|                                               init_flip_prob=INIT_FLIP_PROB, |                                               init_flip_prob=INIT_FLIP_PROB, | ||||||
|                                               flip_prob_decay=FLIP_PROB_DECAY) |                                               flip_prob_decay=FLIP_PROB_DECAY) | ||||||
| 
 | 
 | ||||||
|  | @ -686,8 +692,10 @@ def configuration_recommendation(recommendation_input): | ||||||
|     best_config_idx = np.argmin(res.minl.ravel()) |     best_config_idx = np.argmin(res.minl.ravel()) | ||||||
|     best_config = res.minl_conf[best_config_idx, :] |     best_config = res.minl_conf[best_config_idx, :] | ||||||
|     best_config = X_scaler.inverse_transform(best_config) |     best_config = X_scaler.inverse_transform(best_config) | ||||||
|     # Decode one-hot encoding into categorical knobs | 
 | ||||||
|     best_config = dummy_encoder.inverse_transform(best_config) |     if ENABLE_DUMMY_ENCODER: | ||||||
|  |         # Decode one-hot encoding into categorical knobs | ||||||
|  |         best_config = dummy_encoder.inverse_transform(best_config) | ||||||
| 
 | 
 | ||||||
|     # Although we have max/min limits in the GPRGD training session, it may |     # Although we have max/min limits in the GPRGD training session, it may | ||||||
|     # lose some precisions. e.g. 0.99..99 >= 1.0 may be True on the scaled data, |     # lose some precisions. e.g. 0.99..99 >= 1.0 may be True on the scaled data, | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue