address dana's comment
This commit is contained in:
		
							parent
							
								
									e7d5b0338a
								
							
						
					
					
						commit
						77eb5cdb42
					
				|  | @ -4,6 +4,8 @@ | ||||||
| # Copyright (c) 2017-18, Carnegie Mellon University Database Group | # Copyright (c) 2017-18, Carnegie Mellon University Database Group | ||||||
| # | # | ||||||
| 
 | 
 | ||||||
|  | from website.types import DBMSType | ||||||
|  | 
 | ||||||
| # These parameters are not specified for any session, so they can only be set here | # These parameters are not specified for any session, so they can only be set here | ||||||
| 
 | 
 | ||||||
| # If this flag is set, we check if celery is running, and restart celery if it is not. | # If this flag is set, we check if celery is running, and restart celery if it is not. | ||||||
|  | @ -26,4 +28,7 @@ KNOB_IDENT_USE_PRUNED_METRICS = False | ||||||
| MIN_WORKLOAD_RESULTS_COUNT = 5 | MIN_WORKLOAD_RESULTS_COUNT = 5 | ||||||
| 
 | 
 | ||||||
| # The views used for metrics pruning | # The views used for metrics pruning | ||||||
| VIEWS_FOR_PRUNING = ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event'] | VIEWS_FOR_PRUNING = { | ||||||
|  |     DBMSType.ORACLE: ['dba_hist_osstat', 'dba_hist_sysstat', 'dba_hist_system_event', | ||||||
|  |                       'dba_workload_replays', 'dba_hist_sys_time_model'], | ||||||
|  | } | ||||||
|  |  | ||||||
|  | @ -82,6 +82,8 @@ def run_background_tasks(): | ||||||
|             # Check that there are enough results in the workload |             # Check that there are enough results in the workload | ||||||
|             LOG.info("Not enough results in workload %s (# results: %s, # required: %s).", |             LOG.info("Not enough results in workload %s (# results: %s, # required: %s).", | ||||||
|                      workload_name, num_wkld_results, MIN_WORKLOAD_RESULTS_COUNT) |                      workload_name, num_wkld_results, MIN_WORKLOAD_RESULTS_COUNT) | ||||||
|  |             workload.status = WorkloadStatusType.PROCESSED | ||||||
|  |             workload.save() | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|         LOG.info("Aggregating data for workload %s...", workload_name) |         LOG.info("Aggregating data for workload %s...", workload_name) | ||||||
|  | @ -95,6 +97,16 @@ def run_background_tasks(): | ||||||
|                   metric_data['data'].shape) |                   metric_data['data'].shape) | ||||||
|         LOG.info("Done aggregating data for workload %s.", workload_name) |         LOG.info("Done aggregating data for workload %s.", workload_name) | ||||||
| 
 | 
 | ||||||
|  |         num_valid_results = knob_data['data'].shape[0]  # pylint: disable=unsubscriptable-object | ||||||
|  |         if num_valid_results < MIN_WORKLOAD_RESULTS_COUNT: | ||||||
|  |             # Check that there are enough valid results in the workload | ||||||
|  |             LOG.info("Not enough valid results in workload %s (# valid results: " | ||||||
|  |                      "%s, # required: %s).", workload_name, num_valid_results, | ||||||
|  |                      MIN_WORKLOAD_RESULTS_COUNT) | ||||||
|  |             workload.status = WorkloadStatusType.PROCESSED | ||||||
|  |             workload.save() | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|         # Knob_data and metric_data are 2D numpy arrays. Convert them into a |         # Knob_data and metric_data are 2D numpy arrays. Convert them into a | ||||||
|         # JSON-friendly (nested) lists and then save them as new PipelineData |         # JSON-friendly (nested) lists and then save them as new PipelineData | ||||||
|         # objects. |         # objects. | ||||||
|  | @ -122,7 +134,7 @@ def run_background_tasks(): | ||||||
|         # pruned metrics for this workload and save them in a new PipelineData |         # pruned metrics for this workload and save them in a new PipelineData | ||||||
|         # object. |         # object. | ||||||
|         LOG.info("Pruning metrics for workload %s...", workload_name) |         LOG.info("Pruning metrics for workload %s...", workload_name) | ||||||
|         pruned_metrics = run_workload_characterization(metric_data=metric_data) |         pruned_metrics = run_workload_characterization(metric_data=metric_data, dbms=workload.dbms) | ||||||
|         LOG.info("Done pruning metrics for workload %s (# pruned metrics: %s).\n\n" |         LOG.info("Done pruning metrics for workload %s (# pruned metrics: %s).\n\n" | ||||||
|                  "Pruned metrics: %s\n", workload_name, len(pruned_metrics), |                  "Pruned metrics: %s\n", workload_name, len(pruned_metrics), | ||||||
|                  pruned_metrics) |                  pruned_metrics) | ||||||
|  | @ -240,7 +252,7 @@ def aggregate_data(wkld_results): | ||||||
|     return knob_data, metric_data |     return knob_data, metric_data | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def run_workload_characterization(metric_data): | def run_workload_characterization(metric_data, dbms): | ||||||
|     # Performs workload characterization on the metric_data and returns |     # Performs workload characterization on the metric_data and returns | ||||||
|     # a set of pruned metrics. |     # a set of pruned metrics. | ||||||
|     # |     # | ||||||
|  | @ -255,13 +267,17 @@ def run_workload_characterization(metric_data): | ||||||
|     matrix = metric_data['data'] |     matrix = metric_data['data'] | ||||||
|     columnlabels = metric_data['columnlabels'] |     columnlabels = metric_data['columnlabels'] | ||||||
|     LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape) |     LOG.debug("Workload characterization ~ initial data size: %s", matrix.shape) | ||||||
|  | 
 | ||||||
|  |     views = VIEWS_FOR_PRUNING.get(dbms.type, None) | ||||||
|  |     if views is not None: | ||||||
|         useful_labels = [] |         useful_labels = [] | ||||||
|         for label in columnlabels: |         for label in columnlabels: | ||||||
|         for view in VIEWS_FOR_PRUNING: |             for view in views: | ||||||
|                 if view in label: |                 if view in label: | ||||||
|                     useful_labels.append(label) |                     useful_labels.append(label) | ||||||
|                     break |                     break | ||||||
|     matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None, useful_labels) |         matrix, columnlabels = DataUtil.clean_metric_data(matrix, columnlabels, None, | ||||||
|  |                                                           useful_labels) | ||||||
|         LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape) |         LOG.debug("Workload characterization ~ cleaned data size: %s", matrix.shape) | ||||||
| 
 | 
 | ||||||
|     # Bin each column (metric) in the matrix by its decile |     # Bin each column (metric) in the matrix by its decile | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue