# # OtterTune - models.py # # Copyright (c) 2017-18, Carnegie Mellon University Database Group # from collections import OrderedDict from pytz import timezone from django.contrib.auth.models import User from django.db import models, DEFAULT_DB_ALIAS from django.utils.datetime_safe import datetime from django.utils.timezone import now from .db import target_objectives from .settings import TIME_ZONE from .types import (DBMSType, LabelStyleType, MetricType, KnobUnitType, PipelineTaskType, VarType, KnobResourceType, WorkloadStatusType, AlgorithmType, StorageType) class BaseModel(models.Model): def __str__(self): return self.__unicode__() def __unicode__(self): return getattr(self, 'name', str(self.pk)) @classmethod def get_labels(cls, style=LabelStyleType.DEFAULT_STYLE): from .utils import LabelUtil labels = {} fields = cls._meta.get_fields() for field in fields: try: verbose_name = field.verbose_name if field.name == 'id': verbose_name = cls._model_name() + ' id' labels[field.name] = verbose_name except AttributeError: pass return LabelUtil.style_labels(labels, style) @classmethod def _model_name(cls): return cls.__name__ class Meta: # pylint: disable=no-init abstract = True class DBMSCatalog(BaseModel): type = models.IntegerField(choices=DBMSType.choices()) version = models.CharField(max_length=16) @property def name(self): return DBMSType.name(self.type) @property def key(self): return '{}_{}'.format(self.name, self.version) @property def full_name(self): return '{} v{}'.format(self.name, self.version) def __unicode__(self): return self.full_name class KnobCatalog(BaseModel): dbms = models.ForeignKey(DBMSCatalog) name = models.CharField(max_length=128) vartype = models.IntegerField(choices=VarType.choices(), verbose_name="variable type") unit = models.IntegerField(choices=KnobUnitType.choices()) category = models.TextField(null=True) summary = models.TextField(null=True, verbose_name='description') description = models.TextField(null=True) scope = models.CharField(max_length=16) minval = models.CharField(max_length=32, null=True, verbose_name="minimum value") maxval = models.CharField(max_length=32, null=True, verbose_name="maximum value") default = models.TextField(verbose_name="default value") enumvals = models.TextField(null=True, verbose_name="valid values") context = models.CharField(max_length=32) tunable = models.BooleanField(verbose_name="tunable") resource = models.IntegerField(choices=KnobResourceType.choices(), default=4) class MetricCatalog(BaseModel): dbms = models.ForeignKey(DBMSCatalog) name = models.CharField(max_length=128) vartype = models.IntegerField(choices=VarType.choices()) summary = models.TextField(null=True, verbose_name='description') scope = models.CharField(max_length=16) metric_type = models.IntegerField(choices=MetricType.choices()) class Project(BaseModel): user = models.ForeignKey(User) name = models.CharField(max_length=64, verbose_name="project name") description = models.TextField(null=True, blank=True) creation_time = models.DateTimeField() last_update = models.DateTimeField() def delete(self, using=DEFAULT_DB_ALIAS, keep_parents=False): sessions = Session.objects.filter(project=self) for x in sessions: x.delete() super(Project, self).delete(using, keep_parents) class Meta: # pylint: disable=no-init unique_together = ('user', 'name') class Hardware(BaseModel): @property def name(self): return '{} CPUs, {}GB RAM, {}GB {}'.format( self.cpu, self.memory, self.storage, StorageType.name(self.storage_type)) cpu = models.IntegerField(default=4, verbose_name='Number of CPUs') memory = models.IntegerField(default=16, verbose_name='Memory (GB)') storage = models.IntegerField(default=32, verbose_name='Storage (GB)') storage_type = models.IntegerField(choices=StorageType.choices(), default=StorageType.SSD, verbose_name='Storage Type') additional_specs = models.TextField(null=True, default=None) class Meta: # pylint: disable=no-init unique_together = ('cpu', 'memory', 'storage', 'storage_type') class Session(BaseModel): TUNING_OPTIONS = OrderedDict([ ("tuning_session", "Tuning Session"), ("no_tuning_session", "No Tuning"), ("randomly_generate", "Randomly Generate"), ("lhs", "Run LHS") ]) user = models.ForeignKey(User) name = models.CharField(max_length=64, verbose_name="session name") description = models.TextField(null=True, blank=True) dbms = models.ForeignKey(DBMSCatalog) hardware = models.ForeignKey(Hardware) algorithm = models.IntegerField(choices=AlgorithmType.choices(), default=AlgorithmType.GPR) lhs_samples = models.TextField(default="[]") ddpg_actor_model = models.BinaryField(null=True, blank=True) ddpg_critic_model = models.BinaryField(null=True, blank=True) ddpg_reply_memory = models.BinaryField(null=True, blank=True) dnn_model = models.BinaryField(null=True, blank=True) project = models.ForeignKey(Project) creation_time = models.DateTimeField() last_update = models.DateTimeField() upload_code = models.CharField(max_length=30, unique=True) tuning_session = models.CharField(choices=TUNING_OPTIONS.items(), max_length=64, default='tuning_session', verbose_name='session type') target_objective = models.CharField( max_length=64, default=target_objectives.default()) hyperparameters = models.TextField(default='''{ "DDPG_ACTOR_HIDDEN_SIZES": [128, 128, 64], "DDPG_ACTOR_LEARNING_RATE": 0.02, "DDPG_CRITIC_HIDDEN_SIZES": [64, 128, 64], "DDPG_CRITIC_LEARNING_RATE": 0.001, "DDPG_BATCH_SIZE": 32, "DDPG_GAMMA": 0.0, "DDPG_SIMPLE_REWARD": true, "DDPG_UPDATE_EPOCHS": 30, "DDPG_USE_DEFAULT": false, "DNN_DEBUG": true, "DNN_DEBUG_INTERVAL": 100, "DNN_EXPLORE": false, "DNN_EXPLORE_ITER": 500, "DNN_GD_ITER": 100, "DNN_NOISE_SCALE_BEGIN": 0.1, "DNN_NOISE_SCALE_END": 0.0, "DNN_TRAIN_ITER": 100, "FLIP_PROB_DECAY": 0.5, "GPR_BATCH_SIZE": 3000, "GPR_DEBUG": true, "GPR_EPS": 0.001, "GPR_EPSILON": 1e-06, "GPR_LEARNING_RATE": 0.01, "GPR_LENGTH_SCALE": 2.0, "GPR_MAGNITUDE": 1.0, "GPR_MAX_ITER": 500, "GPR_MAX_TRAIN_SIZE": 7000, "GPR_MU_MULTIPLIER": 1.0, "GPR_MODEL_NAME": "BasicGP", "GPR_HP_LEARNING_RATE": 0.001, "GPR_HP_MAX_ITER": 5000, "GPR_RIDGE": 1.0, "GPR_SIGMA_MULTIPLIER": 1.0, "GPR_UCB_SCALE": 0.2, "GPR_USE_GPFLOW": true, "GPR_UCB_BETA": "get_beta_td", "IMPORTANT_KNOB_NUMBER": 10000, "INIT_FLIP_PROB": 0.3, "NUM_SAMPLES": 30, "TF_NUM_THREADS": 4, "TOP_NUM_CONFIG": 10}''') def clean(self): if self.target_objective is None: self.target_objective = target_objectives.default() def delete(self, using=DEFAULT_DB_ALIAS, keep_parents=False): SessionKnob.objects.get(session=self).delete() results = Result.objects.filter(session=self) for r in results: r.knob_data.delete() r.metric_data.delete() r.delete() super(Session, self).delete(using=DEFAULT_DB_ALIAS, keep_parents=False) class Meta: # pylint: disable=no-init unique_together = ('user', 'project', 'name') class SessionKnobManager(models.Manager): @staticmethod def get_knobs_for_session(session): # Returns a dict of the knob session_knobs = SessionKnob.objects.filter( session=session, tunable=True).prefetch_related('knob') session_knobs = {s.knob.pk: s for s in session_knobs} knob_dicts = list(KnobCatalog.objects.filter(id__in=session_knobs.keys()).values()) for knob_info in knob_dicts: sess_knob = session_knobs[knob_info['id']] knob_info['minval'] = sess_knob.minval knob_info['maxval'] = sess_knob.maxval knob_info['upperbound'] = sess_knob.upperbound knob_info['lowerbound'] = sess_knob.lowerbound knob_info['tunable'] = sess_knob.tunable if knob_info['vartype'] is VarType.ENUM: enumvals = knob_info['enumvals'].split(',') knob_info["minval"] = 0 knob_info["maxval"] = len(enumvals) - 1 if knob_info['vartype'] is VarType.BOOL: knob_info["minval"] = 0 knob_info["maxval"] = 1 return knob_dicts @staticmethod def get_knob_min_max_tunability(session, tunable_only=False): # Returns a dict of the knob filter_args = dict(session=session) if tunable_only: filter_args['tunable'] = True session_knobs = SessionKnob.objects.filter(**filter_args).values( 'knob__name', 'tunable', 'minval', 'maxval') session_knob_dicts = {} for entry in session_knobs: new_entry = dict(entry) knob_name = new_entry.pop('knob__name') session_knob_dicts[knob_name] = new_entry return session_knob_dicts @staticmethod def set_knob_min_max_tunability(session, knob_dicts, cascade=True, disable_others=False): # Returns a dict of the knob knob_dicts = {k.lower(): v for k, v in knob_dicts.items()} session_knobs = {k.name.lower(): k for k in SessionKnob.objects.filter(session=session)} for lower_name, session_knob in session_knobs.items(): if lower_name in knob_dicts: settings = knob_dicts[lower_name] session_knob.minval = settings["minval"] session_knob.maxval = settings["maxval"] session_knob.tunable = settings["tunable"] if "upperbound" in settings: session_knob.upperbound = settings["upperbound"] if "lowerbound" in settings: session_knob.lowerbound = settings["lowerbound"] session_knob.save() if cascade: knob = KnobCatalog.objects.get(name=session_knob.name, dbms=session.dbms) knob.tunable = session_knob.tunable if knob.vartype in (VarType.INTEGER, VarType.REAL): if knob.minval is None or session_knob.minval < float(knob.minval): knob.minval = session_knob.minval if knob.maxval is None or session_knob.maxval > float(knob.maxval): knob.maxval = session_knob.maxval knob.save() elif disable_others: # Set all knobs not in knob_dicts to not tunable session_knob.tunable = False session_knob.save() class SessionKnob(BaseModel): @property def name(self): return self.knob.name objects = SessionKnobManager() session = models.ForeignKey(Session) knob = models.ForeignKey(KnobCatalog) minval = models.CharField(max_length=32, null=True, verbose_name="minimum value") maxval = models.CharField(max_length=32, null=True, verbose_name="maximum value") upperbound = models.CharField(max_length=32, null=True, verbose_name="upperbound") lowerbound = models.CharField(max_length=32, null=True, verbose_name="lowerbound") tunable = models.BooleanField(verbose_name="tunable") class DataModel(BaseModel): session = models.ForeignKey(Session) name = models.CharField(max_length=50) creation_time = models.DateTimeField() data = models.TextField() dbms = models.ForeignKey(DBMSCatalog) class Meta: # pylint: disable=no-init abstract = True class DataManager(models.Manager): @staticmethod def create_name(data_obj, key): ts = data_obj.creation_time.strftime("%m-%d-%y") return key + '@' + ts + '#' + str(data_obj.pk) class KnobDataManager(DataManager): def create_knob_data(self, session, knobs, data, dbms): try: return KnobData.objects.get(session=session, knobs=knobs) except KnobData.DoesNotExist: knob_data = self.create(session=session, knobs=knobs, data=data, dbms=dbms, creation_time=now()) knob_data.name = self.create_name(knob_data, dbms.key) knob_data.save() return knob_data class KnobData(DataModel): objects = KnobDataManager() knobs = models.TextField() class MetricDataManager(DataManager): def create_metric_data(self, session, metrics, data, dbms): metric_data = self.create(session=session, metrics=metrics, data=data, dbms=dbms, creation_time=now()) metric_data.name = self.create_name(metric_data, dbms.key) metric_data.save() return metric_data class MetricData(DataModel): objects = MetricDataManager() metrics = models.TextField() class WorkloadManager(models.Manager): def create_workload(self, dbms, hardware, name, project): # (dbms,hardware,name) should be unique for each workload try: return Workload.objects.get(dbms=dbms, hardware=hardware, name=name, project=project) except Workload.DoesNotExist: return self.create(dbms=dbms, hardware=hardware, name=name, project=project) class Workload(BaseModel): # __DEFAULT_FMT = '{db}_{hw}_UNASSIGNED'.format objects = WorkloadManager() dbms = models.ForeignKey(DBMSCatalog) hardware = models.ForeignKey(Hardware) name = models.CharField(max_length=128, verbose_name='workload name') project = models.ForeignKey(Project) status = models.IntegerField(choices=WorkloadStatusType.choices(), default=WorkloadStatusType.MODIFIED, editable=False) def delete(self, using=DEFAULT_DB_ALIAS, keep_parents=False): # The results should not have corresponding workloads. # results = Result.objects.filter(workload=self) # if results.exists(): # raise Exception("Cannot delete {} workload since results exist. ".format(self.name)) # Delete PipelineData with corresponding workloads pipelinedatas = PipelineData.objects.filter(workload=self) for x in pipelinedatas: x.delete() super(Workload, self).delete(using, keep_parents) class Meta: # pylint: disable=no-init unique_together = ("dbms", "hardware", "name", "project") # @property # def isdefault(self): # return self.cluster_name == self.default # # @property # def default(self): # return self.__DEFAULT_FMT(db=self.dbms.pk, # hw=self.hardware.pk) # # @staticmethod # def get_default(dbms_id, hw_id): # return Workload.__DEFAULT_FMT(db=dbms_id, # hw=hw_id) class PipelineRunManager(models.Manager): def get_latest(self): return self.all().exclude(end_time=None).first() class PipelineRun(models.Model): objects = PipelineRunManager() start_time = models.DateTimeField() end_time = models.DateTimeField(null=True) def __unicode__(self): return str(self.pk) def __str__(self): return self.__unicode__() class Meta: # pylint: disable=no-init ordering = ["-id"] class PipelineData(models.Model): pipeline_run = models.ForeignKey(PipelineRun, verbose_name='group') task_type = models.IntegerField(choices=PipelineTaskType.choices()) workload = models.ForeignKey(Workload) data = models.TextField() creation_time = models.DateTimeField() class Meta: # pylint: disable=no-init unique_together = ("pipeline_run", "task_type", "workload") class ResultManager(models.Manager): def create_result(self, session, dbms, workload, knob_data, metric_data, observation_start_time, observation_end_time, observation_time, task_ids=None, next_config=None): return self.create( session=session, dbms=dbms, workload=workload, knob_data=knob_data, metric_data=metric_data, observation_start_time=observation_start_time, observation_end_time=observation_end_time, observation_time=observation_time, task_ids=task_ids, next_configuration=next_config, creation_time=now()) class Result(BaseModel): objects = ResultManager() session = models.ForeignKey(Session, verbose_name='session name') dbms = models.ForeignKey(DBMSCatalog) workload = models.ForeignKey(Workload) knob_data = models.ForeignKey(KnobData) metric_data = models.ForeignKey(MetricData) creation_time = models.DateTimeField() observation_start_time = models.DateTimeField() observation_end_time = models.DateTimeField() observation_time = models.FloatField() task_ids = models.TextField(null=True) next_configuration = models.TextField(null=True) pipeline_knobs = models.ForeignKey(PipelineData, null=True, related_name='pipeline_knobs') pipeline_metrics = models.ForeignKey(PipelineData, null=True, related_name='pipeline_metrics') def __unicode__(self): return str(self.pk) class BackupData(BaseModel): result = models.ForeignKey(Result) raw_knobs = models.TextField() raw_initial_metrics = models.TextField() raw_final_metrics = models.TextField() raw_summary = models.TextField() knob_log = models.TextField() metric_log = models.TextField() class ExecutionTime(models.Model): module = models.CharField(max_length=32) function = models.CharField(max_length=64) tag = models.CharField(max_length=64, blank=True, default='') start_time = models.DateTimeField() execution_time = models.FloatField() # in seconds result = models.ForeignKey(Result, null=True, blank=True, default=None) @property def event(self): return '.'.join((e for e in (self.module, self.function, self.tag) if e)) def save(self, force_insert=False, force_update=False, using=None, update_fields=None): if isinstance(self.start_time, (int, float)): self.start_time = datetime.fromtimestamp(int(self.start_time), timezone(TIME_ZONE)) super().save(force_insert=force_insert, force_update=force_update, using=using, update_fields=update_fields)