Support adding custom target objectives to the website/db/*/target_objective.py modules

This commit is contained in:
Dana Van Aken
2019-10-08 19:26:38 -04:00
parent f68c23e975
commit 01b4ec3f53
18 changed files with 304 additions and 203 deletions

View File

@@ -3,3 +3,5 @@
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from .base.target_objective import target_objectives

View File

@@ -8,12 +8,14 @@ from collections import OrderedDict
from website.models import KnobCatalog, KnobUnitType, MetricCatalog
from website.types import BooleanType, MetricType, VarType
from website.utils import ConversionUtil
from .. import target_objectives
# pylint: disable=no-self-use
class BaseParser:
def __init__(self, dbms_obj):
self.dbms_id = int(dbms_obj.pk)
knobs = KnobCatalog.objects.filter(dbms=dbms_obj)
self.knob_catalog_ = {k.name: k for k in knobs}
self.tunable_knob_catalog_ = {
@@ -37,26 +39,6 @@ class BaseParser:
self.min_bytes_unit = 'kB'
self.min_time_unit = 'ms'
@property
def transactions_counter(self):
raise NotImplementedError()
@property
def latency_timer(self):
raise NotImplementedError()
def target_metric(self, target_objective=None):
if target_objective == 'throughput_txn_per_sec' or target_objective is None:
# throughput
res = self.transactions_counter
elif target_objective == '99th_lat_ms':
# 99 percentile latency
res = self.latency_timer
else:
raise Exception("Target Objective {} Not Supported".format(target_objective))
return res
def parse_version_string(self, version_string):
return version_string
@@ -178,38 +160,47 @@ class BaseParser:
return knob_data
def _check_knob_num_in_range(self, value, mdata):
return value >= float(mdata.minval) and value <= float(mdata.maxval)
return float(mdata.minval) <= value <= float(mdata.maxval)
def _check_knob_bool_val(self, value):
if isinstance(str, value):
value = value.lower()
return value in self.valid_true_val or value in self.valid_false_val
def convert_dbms_metrics(self, metrics, observation_time, target_objective=None):
# if len(metrics) != len(self.numeric_metric_catalog_):
# raise Exception('The number of metrics should be equal!')
def convert_dbms_metrics(self, metrics, observation_time, target_objective):
metric_data = {}
for name, metadata in list(self.numeric_metric_catalog_.items()):
# Same as metric_data except COUNTER metrics are not divided by the time
base_metric_data = {}
for name, metadata in self.numeric_metric_catalog_.items():
value = metrics[name]
if metadata.metric_type == MetricType.COUNTER:
converted = self.convert_integer(value, metadata)
metric_data[name] = float(converted) / observation_time
elif metadata.metric_type == MetricType.STATISTICS:
converted = self.convert_integer(value, metadata)
metric_data[name] = float(converted)
if metadata.vartype == VarType.INTEGER:
converted = float(self.convert_integer(value, metadata))
elif metadata.vartype == VarType.REAL:
converted = self.convert_real(value, metadata)
else:
raise Exception(
raise ValueError(
("Found non-numeric metric '{}' in the numeric "
"metric catalog: value={}, type={}").format(
name, value, VarType.name(metadata.vartype)))
if metadata.metric_type == MetricType.COUNTER:
assert isinstance(converted, float)
base_metric_data[name] = converted
metric_data[name] = converted / observation_time
elif metadata.metric_type == MetricType.STATISTICS:
assert isinstance(converted, float)
base_metric_data[name] = converted
metric_data[name] = converted
else:
raise ValueError(
'Unknown metric type for {}: {}'.format(name, metadata.metric_type))
if target_objective is not None and self.target_metric(target_objective) not in metric_data:
raise Exception("Cannot find objective function")
if target_objective is not None:
metric_data[target_objective] = metric_data[self.target_metric(target_objective)]
else:
# default
metric_data['throughput_txn_per_sec'] = \
metric_data[self.target_metric(target_objective)]
target_objective_instance = target_objectives.get_target_objective_instance(
self.dbms_id, target_objective)
metric_data[target_objective] = target_objective_instance.compute(
base_metric_data, observation_time)
return metric_data
@@ -355,9 +346,6 @@ class BaseParser:
enumvals = metadata.enumvals.split(',')
return enumvals[int(round(enum_value))]
# def format_integer(self, int_value, metadata):
# return int(round(int_value))
def format_integer(self, int_value, metadata):
int_value = int(round(int_value))
if metadata.unit != KnobUnitType.OTHER and int_value > 0:

View File

@@ -0,0 +1,129 @@
#
# OtterTune - target_objective.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import logging
from collections import OrderedDict
from website import models, types
LOG = logging.getLogger(__name__)
# Direction of performance improvement
LESS_IS_BETTER = '(less is better)'
MORE_IS_BETTER = '(more is better)'
THROUGHPUT = 'throughput_txn_per_sec'
class BaseMetric:
_improvement_choices = (LESS_IS_BETTER, MORE_IS_BETTER, '')
def __init__(self, name, pprint=None, unit='events / second', short_unit='events/sec',
improvement='', scale=1):
if improvement not in self._improvement_choices:
raise ValueError("Improvement must be one of: {}".format(
', '.join("'{}'".format(ic) for ic in self._improvement_choices)))
if scale != 1:
raise NotImplementedError()
self.name = name
self.pprint = pprint or name
self.unit = unit
self.short_unit = short_unit
self.improvement = improvement
self.scale = scale
class BaseTargetObjective(BaseMetric):
_improvement_choices = (LESS_IS_BETTER, MORE_IS_BETTER)
def __init__(self, name, pprint, unit, short_unit, improvement, scale=1):
super().__init__(name=name, pprint=pprint, unit=unit, short_unit=short_unit,
improvement=improvement, scale=scale)
def compute(self, metrics, observation_time):
raise NotImplementedError()
class BaseThroughput(BaseTargetObjective):
def __init__(self, transactions_counter):
super().__init__(name=THROUGHPUT, pprint='Throughput',
unit='transactions / second', short_unit='txn/sec',
improvement=MORE_IS_BETTER)
self.transactions_counter = transactions_counter
def compute(self, metrics, observation_time):
return float(metrics[self.transactions_counter]) / observation_time
class TargetObjectives:
LESS_IS_BETTER = LESS_IS_BETTER
MORE_IS_BETTER = MORE_IS_BETTER
THROUGHPUT = THROUGHPUT
def __init__(self):
self._registry = {}
self._metric_metadatas = {}
self._default_target_objective = THROUGHPUT
def register(self):
from ..myrocks.target_objective import target_objective_list as _myrocks_list
from ..oracle.target_objective import target_objective_list as _oracle_list
from ..postgres.target_objective import target_objective_list as _postgres_list
if not self.registered():
LOG.info('Registering target objectives...')
full_list = _myrocks_list + _oracle_list + _postgres_list
for dbms_type, target_objective_instance in full_list:
dbmss = models.DBMSCatalog.objects.filter(type=dbms_type)
name = target_objective_instance.name
for dbms in dbmss:
dbms_id = int(dbms.pk)
if dbms_id not in self._registry:
self._registry[dbms_id] = {}
self._registry[dbms_id][name] = target_objective_instance
if dbms_id not in self._metric_metadatas:
numeric_metrics = models.MetricCatalog.objects.filter(dbms=dbms).exclude(
metric_type=types.MetricType.INFO).values_list('name', flat=True)
self._metric_metadatas[dbms_id] = [(mname, BaseMetric(mname)) for mname
in sorted(numeric_metrics)]
def registered(self):
return len(self._registry) > 0
def get_metric_metadata(self, dbms_id, target_objective):
if not self.registered():
self.register()
dbms_id = int(dbms_id)
metadata = list(self._metric_metadatas[dbms_id])
target_objective_instance = self._registry[dbms_id][target_objective]
metadata.insert(0, (target_objective, target_objective_instance))
return OrderedDict(metadata)
def get_default_target_objective(self):
return self._default_target_objective
def get_target_objective_instance(self, dbms_id, target_objective):
if not self.registered():
self.register()
dbms_id = int(dbms_id)
instance = self._registry[dbms_id][target_objective]
return instance
def __repr__(self):
s = 'TargetObjectives = (\n'
for dbms_id, entry in self._registry.items(): # pylint: disable=not-an-iterable
s += ' {}:\n'.format(models.DBMSCatalog.objects.get(id=dbms_id).full_name)
for name in entry.keys():
s += ' {}\n'.format(name)
s += ')\n'
return s
target_objectives = TargetObjectives() # pylint: disable=invalid-name

View File

@@ -8,19 +8,12 @@ import re
from collections import OrderedDict
from ..base.parser import BaseParser
from .. import target_objectives
from website.types import MetricType, VarType
class MyRocksParser(BaseParser):
@property
def transactions_counter(self):
return 'session_status.questions'
@property
def latency_timer(self):
raise NotImplementedError()
def parse_version_string(self, version_string):
dbms_version = version_string.split(',')[0]
return re.search(r'\d+\.\d+(?=\.\d+)', dbms_version).group(0)
@@ -145,28 +138,42 @@ class MyRocksParser(BaseParser):
valid_metrics, self.metric_catalog_, default_value='0')
return valid_metrics, diffs
def convert_dbms_metrics(self, metrics, observation_time, target_objective=None):
def convert_dbms_metrics(self, metrics, observation_time, target_objective):
base_metric_data = {}
metric_data = {}
for name, value in list(metrics.items()):
prt_name = self.partial_name(name)
if prt_name in self.numeric_metric_catalog_:
metadata = self.numeric_metric_catalog_[prt_name]
if metadata.metric_type == MetricType.COUNTER:
converted = self.convert_integer(value, metadata)
metric_data[name] = float(converted) / observation_time
if metadata.vartype == VarType.INTEGER:
converted = float(self.convert_integer(value, metadata))
elif metadata.vartype == VarType.REAL:
converted = self.convert_real(value, metadata)
else:
raise Exception('Unknown metric type for {}: {}'.format(
name, metadata.metric_type))
raise ValueError(
("Found non-numeric metric '{}' in the numeric "
"metric catalog: value={}, type={}").format(
name, value, VarType.name(metadata.vartype)))
if target_objective is not None and self.target_metric(target_objective) not in metric_data:
raise Exception("Cannot find objective function")
if metadata.metric_type == MetricType.COUNTER:
assert isinstance(converted, float)
base_metric_data[name] = converted
metric_data[name] = converted / observation_time
elif metadata.metric_type == MetricType.STATISTICS:
assert isinstance(converted, float)
base_metric_data[name] = converted
metric_data[name] = converted
else:
raise ValueError(
'Unknown metric type for {}: {}'.format(name, metadata.metric_type))
target_objective_instance = target_objectives.get_target_objective_instance(
self.dbms_id, target_objective)
metric_data[target_objective] = target_objective_instance.compute(
base_metric_data, observation_time)
if target_objective is not None:
metric_data[target_objective] = metric_data[self.target_metric(target_objective)]
else:
# default
metric_data['throughput_txn_per_sec'] = \
metric_data[self.target_metric(target_objective)]
return metric_data
def convert_dbms_knobs(self, knobs):

View File

@@ -0,0 +1,12 @@
#
# OtterTune - target_objective.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from ..base.target_objective import BaseThroughput
from website.types import DBMSType
target_objective_list = tuple((DBMSType.MYROCKS, target_obj) for target_obj in [ # pylint: disable=invalid-name
BaseThroughput(transactions_counter='session_status.questions')
])

View File

@@ -20,11 +20,3 @@ class OracleParser(BaseParser):
(1024 ** 1, 'k'),
)
self.min_bytes_unit = 'k'
@property
def transactions_counter(self):
return 'global.user commits'
@property
def latency_timer(self):
raise NotImplementedError()

View File

@@ -0,0 +1,27 @@
#
# OtterTune - target_objective.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from ..base.target_objective import BaseTargetObjective, BaseThroughput, LESS_IS_BETTER
from website.types import DBMSType
class DBTime(BaseTargetObjective):
def __init__(self):
super().__init__(name='db_time', pprint='DB Time', unit='milliseconds', short_unit='ms',
improvement=LESS_IS_BETTER)
def compute(self, metrics, observation_time):
metric_names = ('global.db cpu', 'global.cursor: pin s wait on x.time_waited',
'global.user i/o wait time')
db_time = float(sum(metrics[mname] for mname in metric_names)) / observation_time
return db_time
target_objective_list = tuple((DBMSType.ORACLE, target_obj) for target_obj in [ # pylint: disable=invalid-name
BaseThroughput(transactions_counter='global.user commits'),
DBTime(),
])

View File

@@ -52,7 +52,7 @@ def convert_dbms_knobs(dbms_id, knobs):
return _get(dbms_id).convert_dbms_knobs(knobs)
def convert_dbms_metrics(dbms_id, numeric_metrics, observation_time, target_objective=None):
def convert_dbms_metrics(dbms_id, numeric_metrics, observation_time, target_objective):
return _get(dbms_id).convert_dbms_metrics(
numeric_metrics, observation_time, target_objective)

View File

@@ -19,14 +19,6 @@ class PostgresParser(BaseParser):
self.bytes_system = [(f, s) for f, s in ConversionUtil.DEFAULT_BYTES_SYSTEM
if s in ('TB', 'GB', 'MB', 'kB')]
@property
def transactions_counter(self):
return 'pg_stat_database.xact_commit'
@property
def latency_timer(self):
raise NotImplementedError()
def parse_version_string(self, version_string):
dbms_version = version_string.split(',')[0]
return re.search(r'\d+\.\d+(?=\.\d+)', dbms_version).group(0)

View File

@@ -0,0 +1,12 @@
#
# OtterTune - target_objective.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from ..base.target_objective import BaseThroughput
from website.types import DBMSType
target_objective_list = tuple((DBMSType.POSTGRES, target_obj) for target_obj in [ # pylint: disable=invalid-name
BaseThroughput(transactions_counter='pg_stat_database.xact_commit')
])