clean metrics for DDPG
This commit is contained in:
		
							parent
							
								
									2916330e89
								
							
						
					
					
						commit
						285522a0f5
					
				| 
						 | 
					@ -32,3 +32,9 @@ VIEWS_FOR_PRUNING = {
 | 
				
			||||||
    DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event',
 | 
					    DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event',
 | 
				
			||||||
                      'dba_workload_replays', 'dba_hist_sys_time_model'],
 | 
					                      'dba_workload_replays', 'dba_hist_sys_time_model'],
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# The views used for DDPG
 | 
				
			||||||
 | 
					# WARNING: modifying this parameter will cause all existing DDPG sessions broken
 | 
				
			||||||
 | 
					VIEWS_FOR_DDPG = {
 | 
				
			||||||
 | 
					    DBMSType.ORACLE: ['dba_hist_sys_time_model'],
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -34,7 +34,7 @@ from website.models import (PipelineData, PipelineRun, Result, Workload, Session
 | 
				
			||||||
from website import db
 | 
					from website import db
 | 
				
			||||||
from website.types import PipelineTaskType, AlgorithmType, VarType
 | 
					from website.types import PipelineTaskType, AlgorithmType, VarType
 | 
				
			||||||
from website.utils import DataUtil, JSONUtil
 | 
					from website.utils import DataUtil, JSONUtil
 | 
				
			||||||
from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE
 | 
					from website.settings import ENABLE_DUMMY_ENCODER, TIME_ZONE, VIEWS_FOR_DDPG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LOG = get_task_logger(__name__)
 | 
					LOG = get_task_logger(__name__)
 | 
				
			||||||
| 
						 | 
					@ -420,6 +420,7 @@ def train_ddpg(train_ddpg_input):
 | 
				
			||||||
    result_id, algorithm, target_data = train_ddpg_input
 | 
					    result_id, algorithm, target_data = train_ddpg_input
 | 
				
			||||||
    result = Result.objects.get(pk=result_id)
 | 
					    result = Result.objects.get(pk=result_id)
 | 
				
			||||||
    session = result.session
 | 
					    session = result.session
 | 
				
			||||||
 | 
					    dbms = session.dbms
 | 
				
			||||||
    task_name = _get_task_name(session, result_id)
 | 
					    task_name = _get_task_name(session, result_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # If the preprocessing method has already generated a config, bypass this method.
 | 
					    # If the preprocessing method has already generated a config, bypass this method.
 | 
				
			||||||
| 
						 | 
					@ -480,13 +481,14 @@ def train_ddpg(train_ddpg_input):
 | 
				
			||||||
    prev_objective = prev_metric_data[target_obj_idx]
 | 
					    prev_objective = prev_metric_data[target_obj_idx]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Clean metric data
 | 
					    # Clean metric data
 | 
				
			||||||
 | 
					    views = VIEWS_FOR_DDPG.get(dbms.type, None)
 | 
				
			||||||
    metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
 | 
					    metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
 | 
				
			||||||
                                                agg_data['y_columnlabels'], session)
 | 
					                                                agg_data['y_columnlabels'], views)
 | 
				
			||||||
    metric_data = metric_data.flatten()
 | 
					    metric_data = metric_data.flatten()
 | 
				
			||||||
    metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
 | 
					    metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
 | 
				
			||||||
    normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
 | 
					    normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
 | 
				
			||||||
    prev_metric_data, _ = DataUtil.clean_metric_data(prev_agg_data['y_matrix'],
 | 
					    prev_metric_data, _ = DataUtil.clean_metric_data(prev_agg_data['y_matrix'],
 | 
				
			||||||
                                                     prev_agg_data['y_columnlabels'], session)
 | 
					                                                     prev_agg_data['y_columnlabels'], views)
 | 
				
			||||||
    prev_metric_data = prev_metric_data.flatten()
 | 
					    prev_metric_data = prev_metric_data.flatten()
 | 
				
			||||||
    prev_metric_scalar = MinMaxScaler().fit(prev_metric_data.reshape(1, -1))
 | 
					    prev_metric_scalar = MinMaxScaler().fit(prev_metric_data.reshape(1, -1))
 | 
				
			||||||
    prev_normalized_metric_data = prev_metric_scalar.transform(prev_metric_data.reshape(1, -1))[0]
 | 
					    prev_normalized_metric_data = prev_metric_scalar.transform(prev_metric_data.reshape(1, -1))[0]
 | 
				
			||||||
| 
						 | 
					@ -597,6 +599,7 @@ def configuration_recommendation_ddpg(recommendation_ddpg_input):  # pylint: dis
 | 
				
			||||||
    result_list = Result.objects.filter(pk=result_id)
 | 
					    result_list = Result.objects.filter(pk=result_id)
 | 
				
			||||||
    result = result_list.first()
 | 
					    result = result_list.first()
 | 
				
			||||||
    session = result.session
 | 
					    session = result.session
 | 
				
			||||||
 | 
					    dbms = session.dbms
 | 
				
			||||||
    task_name = _get_task_name(session, result_id)
 | 
					    task_name = _get_task_name(session, result_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    early_return, target_data_res = check_early_return(target_data, algorithm)
 | 
					    early_return, target_data_res = check_early_return(target_data, algorithm)
 | 
				
			||||||
| 
						 | 
					@ -609,8 +612,9 @@ def configuration_recommendation_ddpg(recommendation_ddpg_input):  # pylint: dis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    params = JSONUtil.loads(session.hyperparameters)
 | 
					    params = JSONUtil.loads(session.hyperparameters)
 | 
				
			||||||
    agg_data = DataUtil.aggregate_data(result_list)
 | 
					    agg_data = DataUtil.aggregate_data(result_list)
 | 
				
			||||||
 | 
					    views = VIEWS_FOR_DDPG.get(dbms.type, None)
 | 
				
			||||||
    metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
 | 
					    metric_data, _ = DataUtil.clean_metric_data(agg_data['y_matrix'],
 | 
				
			||||||
                                                agg_data['y_columnlabels'], session)
 | 
					                                                agg_data['y_columnlabels'], views)
 | 
				
			||||||
    metric_data = metric_data.flatten()
 | 
					    metric_data = metric_data.flatten()
 | 
				
			||||||
    metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
 | 
					    metric_scalar = MinMaxScaler().fit(metric_data.reshape(1, -1))
 | 
				
			||||||
    normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
 | 
					    normalized_metric_data = metric_scalar.transform(metric_data.reshape(1, -1))[0]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -269,15 +269,7 @@ def run_workload_characterization(metric_data, dbms=None):
 | 
				
			||||||
    LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
 | 
					    LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    views = None if dbms is None else VIEWS_FOR_PRUNING.get(dbms.type, None)
 | 
					    views = None if dbms is None else VIEWS_FOR_PRUNING.get(dbms.type, None)
 | 
				
			||||||
    if views is not None:
 | 
					    matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, views)
 | 
				
			||||||
        useful_labels = []
 | 
					 | 
				
			||||||
        for label in columnlabels:
 | 
					 | 
				
			||||||
            for view in views:
 | 
					 | 
				
			||||||
                if view in label:
 | 
					 | 
				
			||||||
                    useful_labels.append(label)
 | 
					 | 
				
			||||||
                    break
 | 
					 | 
				
			||||||
        matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None,
 | 
					 | 
				
			||||||
                                                          useful_labels)
 | 
					 | 
				
			||||||
    LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
 | 
					    LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Bin each column (metric) in the matrix by its decile
 | 
					    # Bin each column (metric) in the matrix by its decile
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -212,33 +212,35 @@ class DataUtil(object):
 | 
				
			||||||
        return X_unique, y_unique, rowlabels_unique
 | 
					        return X_unique, y_unique, rowlabels_unique
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def clean_metric_data(metric_matrix, metric_labels, session, useful_labels=None):
 | 
					    def clean_metric_data(metric_matrix, metric_labels, useful_views):
 | 
				
			||||||
        # Make metric_labels identical to useful_labels (if given)
 | 
					        # Make metric_labels identical to useful_labels (if not None)
 | 
				
			||||||
        # If useful_labels is not given, let it to be all metrics in the catalog.
 | 
					        if useful_views is None:
 | 
				
			||||||
        if useful_labels is None:
 | 
					            return metric_matrix, metric_labels
 | 
				
			||||||
            metric_objs = MetricCatalog.objects.filter(dbms=session.dbms)
 | 
					
 | 
				
			||||||
            metric_cat = [session.target_objective]
 | 
					        useful_labels = []
 | 
				
			||||||
            for metric_obj in metric_objs:
 | 
					        for label in metric_labels:
 | 
				
			||||||
                metric_cat.append(metric_obj.name)
 | 
					            for view in useful_views:
 | 
				
			||||||
        else:
 | 
					                if view in label:
 | 
				
			||||||
            metric_cat = useful_labels
 | 
					                    useful_labels.append(label)
 | 
				
			||||||
        missing_columns = sorted(set(metric_cat) - set(metric_labels))
 | 
					                    break
 | 
				
			||||||
        unused_columns = set(metric_labels) - set(metric_cat)
 | 
					
 | 
				
			||||||
 | 
					        missing_columns = sorted(set(useful_labels) - set(metric_labels))
 | 
				
			||||||
 | 
					        unused_columns = set(metric_labels) - set(useful_labels)
 | 
				
			||||||
        LOG.debug("clean_metric_data: added %d metrics and removed %d metric.",
 | 
					        LOG.debug("clean_metric_data: added %d metrics and removed %d metric.",
 | 
				
			||||||
                  len(missing_columns), len(unused_columns))
 | 
					                  len(missing_columns), len(unused_columns))
 | 
				
			||||||
        default_val = 0
 | 
					        default_val = 0
 | 
				
			||||||
        metric_cat_size = len(metric_cat)
 | 
					        useful_labels_size = len(useful_labels)
 | 
				
			||||||
        matrix = np.ones((len(metric_matrix), metric_cat_size)) * default_val
 | 
					        matrix = np.ones((len(metric_matrix), useful_labels_size)) * default_val
 | 
				
			||||||
        metric_labels_dict = {n: i for i, n in enumerate(metric_labels)}
 | 
					        metric_labels_dict = {n: i for i, n in enumerate(metric_labels)}
 | 
				
			||||||
        # column labels in matrix has the same order as ones in metric catalog
 | 
					        # column labels in matrix has the same order as ones in useful_labels
 | 
				
			||||||
        # missing values are filled with default_val
 | 
					        # missing values are filled with default_val
 | 
				
			||||||
        for i, metric_name in enumerate(metric_cat):
 | 
					        for i, metric_name in enumerate(useful_labels):
 | 
				
			||||||
            if metric_name in metric_labels_dict:
 | 
					            if metric_name in metric_labels_dict:
 | 
				
			||||||
                index = metric_labels_dict[metric_name]
 | 
					                index = metric_labels_dict[metric_name]
 | 
				
			||||||
                matrix[:, i] = metric_matrix[:, index]
 | 
					                matrix[:, i] = metric_matrix[:, index]
 | 
				
			||||||
        LOG.debug("clean_metric_data: final ~ matrix: %s, labels: %s", matrix.shape,
 | 
					        LOG.debug("clean_metric_data: final ~ matrix: %s, labels: %s", matrix.shape,
 | 
				
			||||||
                  len(metric_cat))
 | 
					                  useful_labels_size)
 | 
				
			||||||
        return matrix, metric_cat
 | 
					        return matrix, useful_labels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def clean_knob_data(knob_matrix, knob_labels, sessions):
 | 
					    def clean_knob_data(knob_matrix, knob_labels, sessions):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue