539 lines
19 KiB
Python
539 lines
19 KiB
Python
#
|
|
# OtterTune - models.py
|
|
#
|
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
|
#
|
|
from collections import OrderedDict
|
|
from pytz import timezone
|
|
|
|
from django.contrib.auth.models import User
|
|
from django.db import models, DEFAULT_DB_ALIAS
|
|
from django.utils.datetime_safe import datetime
|
|
from django.utils.timezone import now
|
|
|
|
from .db import target_objectives
|
|
from .settings import TIME_ZONE
|
|
from .types import (DBMSType, LabelStyleType, MetricType, KnobUnitType,
|
|
PipelineTaskType, VarType, KnobResourceType,
|
|
WorkloadStatusType, AlgorithmType, StorageType)
|
|
|
|
|
|
class BaseModel(models.Model):
|
|
|
|
def __str__(self):
|
|
return self.__unicode__()
|
|
|
|
def __unicode__(self):
|
|
return getattr(self, 'name', str(self.pk))
|
|
|
|
@classmethod
|
|
def get_labels(cls, style=LabelStyleType.DEFAULT_STYLE):
|
|
from .utils import LabelUtil
|
|
|
|
labels = {}
|
|
fields = cls._meta.get_fields()
|
|
for field in fields:
|
|
try:
|
|
verbose_name = field.verbose_name
|
|
if field.name == 'id':
|
|
verbose_name = cls._model_name() + ' id'
|
|
labels[field.name] = verbose_name
|
|
except AttributeError:
|
|
pass
|
|
return LabelUtil.style_labels(labels, style)
|
|
|
|
@classmethod
|
|
def _model_name(cls):
|
|
return cls.__name__
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
abstract = True
|
|
|
|
|
|
class DBMSCatalog(BaseModel):
|
|
type = models.IntegerField(choices=DBMSType.choices())
|
|
version = models.CharField(max_length=16)
|
|
|
|
@property
|
|
def name(self):
|
|
return DBMSType.name(self.type)
|
|
|
|
@property
|
|
def key(self):
|
|
return '{}_{}'.format(self.name, self.version)
|
|
|
|
@property
|
|
def full_name(self):
|
|
return '{} v{}'.format(self.name, self.version)
|
|
|
|
def __unicode__(self):
|
|
return self.full_name
|
|
|
|
|
|
class KnobCatalog(BaseModel):
|
|
dbms = models.ForeignKey(DBMSCatalog)
|
|
name = models.CharField(max_length=128)
|
|
vartype = models.IntegerField(choices=VarType.choices(), verbose_name="variable type")
|
|
unit = models.IntegerField(choices=KnobUnitType.choices())
|
|
category = models.TextField(null=True)
|
|
summary = models.TextField(null=True, verbose_name='description')
|
|
description = models.TextField(null=True)
|
|
scope = models.CharField(max_length=16)
|
|
minval = models.CharField(max_length=32, null=True, verbose_name="minimum value")
|
|
maxval = models.CharField(max_length=32, null=True, verbose_name="maximum value")
|
|
default = models.TextField(verbose_name="default value")
|
|
enumvals = models.TextField(null=True, verbose_name="valid values")
|
|
context = models.CharField(max_length=32)
|
|
tunable = models.BooleanField(verbose_name="tunable")
|
|
resource = models.IntegerField(choices=KnobResourceType.choices(), default=4)
|
|
|
|
|
|
class MetricCatalog(BaseModel):
|
|
dbms = models.ForeignKey(DBMSCatalog)
|
|
name = models.CharField(max_length=128)
|
|
vartype = models.IntegerField(choices=VarType.choices())
|
|
summary = models.TextField(null=True, verbose_name='description')
|
|
scope = models.CharField(max_length=16)
|
|
metric_type = models.IntegerField(choices=MetricType.choices())
|
|
|
|
|
|
class Project(BaseModel):
|
|
user = models.ForeignKey(User)
|
|
name = models.CharField(max_length=64, verbose_name="project name")
|
|
description = models.TextField(null=True, blank=True)
|
|
creation_time = models.DateTimeField()
|
|
last_update = models.DateTimeField()
|
|
|
|
def delete(self, using=DEFAULT_DB_ALIAS, keep_parents=False):
|
|
sessions = Session.objects.filter(project=self)
|
|
for x in sessions:
|
|
x.delete()
|
|
super(Project, self).delete(using, keep_parents)
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
unique_together = ('user', 'name')
|
|
|
|
|
|
class Hardware(BaseModel):
|
|
|
|
@property
|
|
def name(self):
|
|
return '{} CPUs, {}GB RAM, {}GB {}'.format(
|
|
self.cpu, self.memory, self.storage, StorageType.name(self.storage_type))
|
|
|
|
cpu = models.IntegerField(default=4, verbose_name='Number of CPUs')
|
|
memory = models.IntegerField(default=16, verbose_name='Memory (GB)')
|
|
storage = models.IntegerField(default=32, verbose_name='Storage (GB)')
|
|
storage_type = models.IntegerField(choices=StorageType.choices(),
|
|
default=StorageType.SSD, verbose_name='Storage Type')
|
|
additional_specs = models.TextField(null=True, default=None)
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
unique_together = ('cpu', 'memory', 'storage', 'storage_type')
|
|
|
|
|
|
class Session(BaseModel):
|
|
|
|
TUNING_OPTIONS = OrderedDict([
|
|
("tuning_session", "Tuning Session"),
|
|
("no_tuning_session", "No Tuning"),
|
|
("randomly_generate", "Randomly Generate"),
|
|
("lhs", "Run LHS")
|
|
])
|
|
|
|
user = models.ForeignKey(User)
|
|
name = models.CharField(max_length=64, verbose_name="session name")
|
|
description = models.TextField(null=True, blank=True)
|
|
dbms = models.ForeignKey(DBMSCatalog)
|
|
hardware = models.ForeignKey(Hardware)
|
|
algorithm = models.IntegerField(choices=AlgorithmType.choices(),
|
|
default=AlgorithmType.GPR)
|
|
lhs_samples = models.TextField(default="[]")
|
|
ddpg_actor_model = models.BinaryField(null=True, blank=True)
|
|
ddpg_critic_model = models.BinaryField(null=True, blank=True)
|
|
ddpg_reply_memory = models.BinaryField(null=True, blank=True)
|
|
dnn_model = models.BinaryField(null=True, blank=True)
|
|
|
|
project = models.ForeignKey(Project)
|
|
creation_time = models.DateTimeField()
|
|
last_update = models.DateTimeField()
|
|
|
|
upload_code = models.CharField(max_length=30, unique=True)
|
|
tuning_session = models.CharField(choices=TUNING_OPTIONS.items(),
|
|
max_length=64, default='tuning_session',
|
|
verbose_name='session type')
|
|
|
|
target_objective = models.CharField(
|
|
max_length=64, default=target_objectives.default())
|
|
hyperparameters = models.TextField(default='''{
|
|
"DDPG_ACTOR_HIDDEN_SIZES": [128, 128, 64],
|
|
"DDPG_ACTOR_LEARNING_RATE": 0.02,
|
|
"DDPG_CRITIC_HIDDEN_SIZES": [64, 128, 64],
|
|
"DDPG_CRITIC_LEARNING_RATE": 0.001,
|
|
"DDPG_BATCH_SIZE": 32,
|
|
"DDPG_GAMMA": 0.0,
|
|
"DDPG_SIMPLE_REWARD": true,
|
|
"DDPG_UPDATE_EPOCHS": 30,
|
|
"DDPG_USE_DEFAULT": false,
|
|
"DNN_DEBUG": true,
|
|
"DNN_DEBUG_INTERVAL": 100,
|
|
"DNN_EXPLORE": false,
|
|
"DNN_EXPLORE_ITER": 500,
|
|
"DNN_GD_ITER": 100,
|
|
"DNN_NOISE_SCALE_BEGIN": 0.1,
|
|
"DNN_NOISE_SCALE_END": 0.0,
|
|
"DNN_TRAIN_ITER": 100,
|
|
"FLIP_PROB_DECAY": 0.5,
|
|
"GPR_BATCH_SIZE": 3000,
|
|
"GPR_DEBUG": true,
|
|
"GPR_EPS": 0.001,
|
|
"GPR_EPSILON": 1e-06,
|
|
"GPR_LEARNING_RATE": 0.01,
|
|
"GPR_LENGTH_SCALE": 2.0,
|
|
"GPR_MAGNITUDE": 1.0,
|
|
"GPR_MAX_ITER": 500,
|
|
"GPR_MAX_TRAIN_SIZE": 7000,
|
|
"GPR_MU_MULTIPLIER": 1.0,
|
|
"GPR_MODEL_NAME": "BasicGP",
|
|
"GPR_HP_LEARNING_RATE": 0.001,
|
|
"GPR_HP_MAX_ITER": 5000,
|
|
"GPR_RIDGE": 1.0,
|
|
"GPR_SIGMA_MULTIPLIER": 1.0,
|
|
"GPR_UCB_SCALE": 0.2,
|
|
"GPR_USE_GPFLOW": true,
|
|
"GPR_UCB_BETA": "get_beta_td",
|
|
"IMPORTANT_KNOB_NUMBER": 10000,
|
|
"INIT_FLIP_PROB": 0.3,
|
|
"NUM_SAMPLES": 30,
|
|
"TF_NUM_THREADS": 4,
|
|
"TOP_NUM_CONFIG": 10}''')
|
|
|
|
def clean(self):
|
|
if self.target_objective is None:
|
|
self.target_objective = target_objectives.default()
|
|
|
|
def delete(self, using=DEFAULT_DB_ALIAS, keep_parents=False):
|
|
SessionKnob.objects.get(session=self).delete()
|
|
results = Result.objects.filter(session=self)
|
|
for r in results:
|
|
r.knob_data.delete()
|
|
r.metric_data.delete()
|
|
r.delete()
|
|
super(Session, self).delete(using=DEFAULT_DB_ALIAS, keep_parents=False)
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
unique_together = ('user', 'project', 'name')
|
|
|
|
|
|
class SessionKnobManager(models.Manager):
|
|
@staticmethod
|
|
def get_knobs_for_session(session):
|
|
# Returns a dict of the knob
|
|
session_knobs = SessionKnob.objects.filter(
|
|
session=session, tunable=True).prefetch_related('knob')
|
|
session_knobs = {s.knob.pk: s for s in session_knobs}
|
|
knob_dicts = list(KnobCatalog.objects.filter(id__in=session_knobs.keys()).values())
|
|
for knob_info in knob_dicts:
|
|
sess_knob = session_knobs[knob_info['id']]
|
|
knob_info['minval'] = sess_knob.minval
|
|
knob_info['maxval'] = sess_knob.maxval
|
|
knob_info['upperbound'] = sess_knob.upperbound
|
|
knob_info['lowerbound'] = sess_knob.lowerbound
|
|
knob_info['tunable'] = sess_knob.tunable
|
|
if knob_info['vartype'] is VarType.ENUM:
|
|
enumvals = knob_info['enumvals'].split(',')
|
|
knob_info["minval"] = 0
|
|
knob_info["maxval"] = len(enumvals) - 1
|
|
if knob_info['vartype'] is VarType.BOOL:
|
|
knob_info["minval"] = 0
|
|
knob_info["maxval"] = 1
|
|
|
|
return knob_dicts
|
|
|
|
@staticmethod
|
|
def get_knob_min_max_tunability(session, tunable_only=False):
|
|
# Returns a dict of the knob
|
|
filter_args = dict(session=session)
|
|
if tunable_only:
|
|
filter_args['tunable'] = True
|
|
session_knobs = SessionKnob.objects.filter(**filter_args).values(
|
|
'knob__name', 'tunable', 'minval', 'maxval')
|
|
|
|
session_knob_dicts = {}
|
|
for entry in session_knobs:
|
|
new_entry = dict(entry)
|
|
knob_name = new_entry.pop('knob__name')
|
|
session_knob_dicts[knob_name] = new_entry
|
|
return session_knob_dicts
|
|
|
|
@staticmethod
|
|
def set_knob_min_max_tunability(session, knob_dicts, cascade=True, disable_others=False):
|
|
# Returns a dict of the knob
|
|
knob_dicts = {k.lower(): v for k, v in knob_dicts.items()}
|
|
session_knobs = {k.name.lower(): k for k in SessionKnob.objects.filter(session=session)}
|
|
for lower_name, session_knob in session_knobs.items():
|
|
if lower_name in knob_dicts:
|
|
settings = knob_dicts[lower_name]
|
|
session_knob.minval = settings["minval"]
|
|
session_knob.maxval = settings["maxval"]
|
|
session_knob.tunable = settings["tunable"]
|
|
if "upperbound" in settings:
|
|
session_knob.upperbound = settings["upperbound"]
|
|
if "lowerbound" in settings:
|
|
session_knob.lowerbound = settings["lowerbound"]
|
|
session_knob.save()
|
|
if cascade:
|
|
knob = KnobCatalog.objects.get(name=session_knob.name, dbms=session.dbms)
|
|
knob.tunable = session_knob.tunable
|
|
if knob.vartype in (VarType.INTEGER, VarType.REAL):
|
|
if knob.minval is None or session_knob.minval < float(knob.minval):
|
|
knob.minval = session_knob.minval
|
|
if knob.maxval is None or session_knob.maxval > float(knob.maxval):
|
|
knob.maxval = session_knob.maxval
|
|
knob.save()
|
|
elif disable_others:
|
|
# Set all knobs not in knob_dicts to not tunable
|
|
session_knob.tunable = False
|
|
session_knob.save()
|
|
|
|
|
|
class SessionKnob(BaseModel):
|
|
|
|
@property
|
|
def name(self):
|
|
return self.knob.name
|
|
|
|
objects = SessionKnobManager()
|
|
session = models.ForeignKey(Session)
|
|
knob = models.ForeignKey(KnobCatalog)
|
|
minval = models.CharField(max_length=32, null=True, verbose_name="minimum value")
|
|
maxval = models.CharField(max_length=32, null=True, verbose_name="maximum value")
|
|
upperbound = models.CharField(max_length=32, null=True, verbose_name="upperbound")
|
|
lowerbound = models.CharField(max_length=32, null=True, verbose_name="lowerbound")
|
|
tunable = models.BooleanField(verbose_name="tunable")
|
|
|
|
|
|
class DataModel(BaseModel):
|
|
session = models.ForeignKey(Session)
|
|
name = models.CharField(max_length=50)
|
|
creation_time = models.DateTimeField()
|
|
data = models.TextField()
|
|
dbms = models.ForeignKey(DBMSCatalog)
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
abstract = True
|
|
|
|
|
|
class DataManager(models.Manager):
|
|
|
|
@staticmethod
|
|
def create_name(data_obj, key):
|
|
ts = data_obj.creation_time.strftime("%m-%d-%y")
|
|
return key + '@' + ts + '#' + str(data_obj.pk)
|
|
|
|
|
|
class KnobDataManager(DataManager):
|
|
|
|
def create_knob_data(self, session, knobs, data, dbms):
|
|
try:
|
|
return KnobData.objects.get(session=session,
|
|
knobs=knobs)
|
|
except KnobData.DoesNotExist:
|
|
knob_data = self.create(session=session,
|
|
knobs=knobs,
|
|
data=data,
|
|
dbms=dbms,
|
|
creation_time=now())
|
|
knob_data.name = self.create_name(knob_data, dbms.key)
|
|
knob_data.save()
|
|
return knob_data
|
|
|
|
|
|
class KnobData(DataModel):
|
|
objects = KnobDataManager()
|
|
|
|
knobs = models.TextField()
|
|
|
|
|
|
class MetricDataManager(DataManager):
|
|
|
|
def create_metric_data(self, session, metrics, data, dbms):
|
|
metric_data = self.create(session=session,
|
|
metrics=metrics,
|
|
data=data,
|
|
dbms=dbms,
|
|
creation_time=now())
|
|
metric_data.name = self.create_name(metric_data, dbms.key)
|
|
metric_data.save()
|
|
return metric_data
|
|
|
|
|
|
class MetricData(DataModel):
|
|
objects = MetricDataManager()
|
|
|
|
metrics = models.TextField()
|
|
|
|
|
|
class WorkloadManager(models.Manager):
|
|
|
|
def create_workload(self, dbms, hardware, name, project):
|
|
# (dbms,hardware,name) should be unique for each workload
|
|
try:
|
|
return Workload.objects.get(dbms=dbms, hardware=hardware, name=name, project=project)
|
|
except Workload.DoesNotExist:
|
|
return self.create(dbms=dbms,
|
|
hardware=hardware,
|
|
name=name,
|
|
project=project)
|
|
|
|
|
|
class Workload(BaseModel):
|
|
|
|
# __DEFAULT_FMT = '{db}_{hw}_UNASSIGNED'.format
|
|
|
|
objects = WorkloadManager()
|
|
|
|
dbms = models.ForeignKey(DBMSCatalog)
|
|
hardware = models.ForeignKey(Hardware)
|
|
name = models.CharField(max_length=128, verbose_name='workload name')
|
|
project = models.ForeignKey(Project)
|
|
status = models.IntegerField(choices=WorkloadStatusType.choices(),
|
|
default=WorkloadStatusType.MODIFIED,
|
|
editable=False)
|
|
|
|
def delete(self, using=DEFAULT_DB_ALIAS, keep_parents=False):
|
|
# The results should not have corresponding workloads.
|
|
# results = Result.objects.filter(workload=self)
|
|
# if results.exists():
|
|
# raise Exception("Cannot delete {} workload since results exist. ".format(self.name))
|
|
|
|
# Delete PipelineData with corresponding workloads
|
|
pipelinedatas = PipelineData.objects.filter(workload=self)
|
|
for x in pipelinedatas:
|
|
x.delete()
|
|
super(Workload, self).delete(using, keep_parents)
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
unique_together = ("dbms", "hardware", "name", "project")
|
|
|
|
# @property
|
|
# def isdefault(self):
|
|
# return self.cluster_name == self.default
|
|
#
|
|
# @property
|
|
# def default(self):
|
|
# return self.__DEFAULT_FMT(db=self.dbms.pk,
|
|
# hw=self.hardware.pk)
|
|
#
|
|
# @staticmethod
|
|
# def get_default(dbms_id, hw_id):
|
|
# return Workload.__DEFAULT_FMT(db=dbms_id,
|
|
# hw=hw_id)
|
|
|
|
|
|
class PipelineRunManager(models.Manager):
|
|
|
|
def get_latest(self):
|
|
return self.all().exclude(end_time=None).first()
|
|
|
|
|
|
class PipelineRun(models.Model):
|
|
objects = PipelineRunManager()
|
|
|
|
start_time = models.DateTimeField()
|
|
end_time = models.DateTimeField(null=True)
|
|
|
|
def __unicode__(self):
|
|
return str(self.pk)
|
|
|
|
def __str__(self):
|
|
return self.__unicode__()
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
ordering = ["-id"]
|
|
|
|
|
|
class PipelineData(models.Model):
|
|
pipeline_run = models.ForeignKey(PipelineRun, verbose_name='group')
|
|
task_type = models.IntegerField(choices=PipelineTaskType.choices())
|
|
workload = models.ForeignKey(Workload)
|
|
data = models.TextField()
|
|
creation_time = models.DateTimeField()
|
|
|
|
class Meta: # pylint: disable=no-init
|
|
unique_together = ("pipeline_run", "task_type", "workload")
|
|
|
|
|
|
class ResultManager(models.Manager):
|
|
|
|
def create_result(self, session, dbms, workload,
|
|
knob_data, metric_data,
|
|
observation_start_time,
|
|
observation_end_time,
|
|
observation_time,
|
|
task_ids=None,
|
|
next_config=None):
|
|
return self.create(
|
|
session=session,
|
|
dbms=dbms,
|
|
workload=workload,
|
|
knob_data=knob_data,
|
|
metric_data=metric_data,
|
|
observation_start_time=observation_start_time,
|
|
observation_end_time=observation_end_time,
|
|
observation_time=observation_time,
|
|
task_ids=task_ids,
|
|
next_configuration=next_config,
|
|
creation_time=now())
|
|
|
|
|
|
class Result(BaseModel):
|
|
objects = ResultManager()
|
|
|
|
session = models.ForeignKey(Session, verbose_name='session name')
|
|
dbms = models.ForeignKey(DBMSCatalog)
|
|
workload = models.ForeignKey(Workload)
|
|
knob_data = models.ForeignKey(KnobData)
|
|
metric_data = models.ForeignKey(MetricData)
|
|
|
|
creation_time = models.DateTimeField()
|
|
observation_start_time = models.DateTimeField()
|
|
observation_end_time = models.DateTimeField()
|
|
observation_time = models.FloatField()
|
|
task_ids = models.TextField(null=True)
|
|
next_configuration = models.TextField(null=True)
|
|
pipeline_knobs = models.ForeignKey(PipelineData, null=True, related_name='pipeline_knobs')
|
|
pipeline_metrics = models.ForeignKey(PipelineData, null=True, related_name='pipeline_metrics')
|
|
|
|
def __unicode__(self):
|
|
return str(self.pk)
|
|
|
|
|
|
class BackupData(BaseModel):
|
|
result = models.ForeignKey(Result)
|
|
raw_knobs = models.TextField()
|
|
raw_initial_metrics = models.TextField()
|
|
raw_final_metrics = models.TextField()
|
|
raw_summary = models.TextField()
|
|
knob_log = models.TextField()
|
|
metric_log = models.TextField()
|
|
|
|
|
|
class ExecutionTime(models.Model):
|
|
module = models.CharField(max_length=32)
|
|
function = models.CharField(max_length=64)
|
|
tag = models.CharField(max_length=64, blank=True, default='')
|
|
start_time = models.DateTimeField()
|
|
execution_time = models.FloatField() # in seconds
|
|
result = models.ForeignKey(Result, null=True, blank=True, default=None)
|
|
|
|
@property
|
|
def event(self):
|
|
return '.'.join((e for e in (self.module, self.function, self.tag) if e))
|
|
|
|
def save(self, force_insert=False, force_update=False, using=None, update_fields=None):
|
|
if isinstance(self.start_time, (int, float)):
|
|
self.start_time = datetime.fromtimestamp(int(self.start_time), timezone(TIME_ZONE))
|
|
super().save(force_insert=force_insert, force_update=force_update, using=using,
|
|
update_fields=update_fields)
|