333 lines
14 KiB
Python
333 lines
14 KiB
Python
#
|
|
# OtterTune - test_utils.py
|
|
#
|
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
|
#
|
|
|
|
import string
|
|
import numpy as np
|
|
from django.test import TestCase
|
|
from website.utils import JSONUtil, MediaUtil, DataUtil, ConversionUtil, LabelUtil, TaskUtil
|
|
from website.types import LabelStyleType, VarType
|
|
from website.models import Result, DBMSCatalog
|
|
|
|
|
|
class JSONUtilTest(TestCase):
|
|
def test_util(self):
|
|
json_str = \
|
|
"""{
|
|
"glossary": {
|
|
"title": "example glossary",
|
|
"GlossDiv": {
|
|
"title": "S",
|
|
"GlossList": {
|
|
"GlossEntry": {
|
|
"ID": "SGML",
|
|
"SortAs": "SGML",
|
|
"GlossTerm": "Standard Generalized Markup Language",
|
|
"Acronym": "SGML",
|
|
"Abbrev": "ISO 8879:1986",
|
|
"GlossDef": {
|
|
"para": "A meta-markup language",
|
|
"GlossSeeAlso": ["GML", "XML"]
|
|
},
|
|
"GlossSee": "markup"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}"""
|
|
|
|
compress_str = """{"glossary": {"title": "example glossary",
|
|
"GlossDiv": {"title": "S", "GlossList": {"GlossEntry": {"ID": "SGML",
|
|
"SortAs": "SGML", "GlossTerm": "Standard Generalized Markup
|
|
Language", "Acronym": "SGML", "Abbrev": "ISO 8879:1986", "GlossDef":
|
|
{"para": "A meta-markup language", "GlossSeeAlso": ["GML", "XML"]}, "GlossSee":
|
|
"markup"}}}}}"""
|
|
|
|
results = JSONUtil.loads(json_str)
|
|
self.assertEqual(list(results.keys())[0], "glossary")
|
|
self.assertTrue("title" in list(results["glossary"].keys()))
|
|
self.assertTrue("GlossDiv" in list(results["glossary"].keys()))
|
|
self.assertEqual(results["glossary"]["GlossDiv"]
|
|
["GlossList"]["GlossEntry"]["ID"], "SGML")
|
|
self.assertEqual(results["glossary"]["GlossDiv"]
|
|
["GlossList"]["GlossEntry"]["GlossSee"], "markup")
|
|
|
|
result_str = "".join(JSONUtil.dumps(results).split())
|
|
self.assertEqual(result_str, "".join(compress_str.split()))
|
|
|
|
|
|
class MediaUtilTest(TestCase):
|
|
def test_codegen(self):
|
|
code20 = MediaUtil.upload_code_generator(20)
|
|
self.assertEqual(len(code20), 20)
|
|
self.assertTrue(code20.isalnum())
|
|
code40 = MediaUtil.upload_code_generator(40)
|
|
self.assertEqual(len(code40), 40)
|
|
self.assertTrue(code40.isalnum())
|
|
digit_code = MediaUtil.upload_code_generator(40, string.digits)
|
|
self.assertEqual(len(digit_code), 40)
|
|
self.assertTrue(digit_code.isdigit())
|
|
letter_code = MediaUtil.upload_code_generator(60,
|
|
string.ascii_uppercase)
|
|
self.assertEqual(len(letter_code), 60)
|
|
self.assertTrue(letter_code.isalpha())
|
|
|
|
|
|
class TaskUtilTest(TestCase):
|
|
def test_get_task_status(self):
|
|
# FIXME: Actually setup celery tasks instead of a dummy class?
|
|
test_tasks = []
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks)
|
|
self.assertTrue(status is None and num_complete == 0)
|
|
|
|
test_tasks2 = [VarType() for i in range(5)]
|
|
for task in test_tasks2:
|
|
task.status = "SUCCESS"
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks2)
|
|
self.assertTrue(status == "SUCCESS" and num_complete == 5)
|
|
|
|
test_tasks3 = test_tasks2
|
|
test_tasks3[3].status = "FAILURE"
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks3)
|
|
self.assertTrue(status == "FAILURE" and num_complete == 3)
|
|
|
|
test_tasks4 = test_tasks3
|
|
test_tasks4[2].status = "REVOKED"
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks4)
|
|
self.assertTrue(status == "REVOKED" and num_complete == 2)
|
|
|
|
test_tasks5 = test_tasks4
|
|
test_tasks5[1].status = "RETRY"
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks5)
|
|
self.assertTrue(status == "RETRY" and num_complete == 1)
|
|
|
|
test_tasks6 = [VarType() for i in range(10)]
|
|
for i, task in enumerate(test_tasks6):
|
|
task.status = "PENDING" if i % 2 == 0 else "SUCCESS"
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks6)
|
|
self.assertTrue(status == "PENDING" and num_complete == 5)
|
|
|
|
test_tasks7 = test_tasks6
|
|
test_tasks7[9].status = "STARTED"
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks7)
|
|
self.assertTrue(status == "STARTED" and num_complete == 4)
|
|
|
|
test_tasks8 = test_tasks7
|
|
test_tasks8[9].status = "RECEIVED"
|
|
|
|
(status, num_complete) = TaskUtil.get_task_status(test_tasks8)
|
|
self.assertTrue(status == "RECEIVED" and num_complete == 4)
|
|
|
|
with self.assertRaises(Exception):
|
|
test_tasks9 = [VarType() for i in range(1)]
|
|
test_tasks9[0].status = "attemped"
|
|
TaskUtil.get_task_status(test_tasks9)
|
|
|
|
|
|
class DataUtilTest(TestCase):
|
|
|
|
fixtures = ['test_website.json', 'postgres-96_knobs.json']
|
|
|
|
def test_aggregate(self):
|
|
|
|
workload2 = Result.objects.filter(workload=2)
|
|
num_results = Result.objects.filter(workload=2).count()
|
|
knobs = list(JSONUtil.loads(workload2[0].knob_data.data).keys())
|
|
metrics = list(JSONUtil.loads(workload2[0].metric_data.data).keys())
|
|
num_knobs = len(knobs)
|
|
num_metrics = len(metrics)
|
|
|
|
test_result = DataUtil.aggregate_data(workload2)
|
|
|
|
self.assertTrue('X_matrix' in list(test_result.keys()))
|
|
self.assertTrue('y_matrix' in list(test_result.keys()))
|
|
self.assertTrue('rowlabels' in list(test_result.keys()))
|
|
self.assertTrue('X_columnlabels' in list(test_result.keys()))
|
|
self.assertTrue('y_columnlabels' in list(test_result.keys()))
|
|
|
|
self.assertEqual(test_result['X_columnlabels'], knobs)
|
|
self.assertEqual(test_result['y_columnlabels'], metrics)
|
|
self.assertEqual(test_result['X_matrix'].shape[0], num_results)
|
|
self.assertEqual(test_result['y_matrix'].shape[0], num_results)
|
|
self.assertEqual(test_result['X_matrix'].shape[1], num_knobs)
|
|
self.assertEqual(test_result['y_matrix'].shape[1], num_metrics)
|
|
|
|
def test_combine(self):
|
|
test_dedup_row_labels = np.array(["Workload-0", "Workload-1"])
|
|
test_dedup_x = np.matrix([[0.22, 5, "string", "11:11", "fsync", True],
|
|
[0.21, 6, "string", "11:12", "fsync", True]])
|
|
test_dedup_y = np.matrix([[30, 30, 40],
|
|
[10, 10, 40]])
|
|
|
|
test_x, test_y, row_labels = DataUtil.combine_duplicate_rows(
|
|
test_dedup_x, test_dedup_y, test_dedup_row_labels)
|
|
|
|
self.assertEqual(len(test_x), len(test_y))
|
|
self.assertEqual(len(test_x), len(row_labels))
|
|
|
|
self.assertEqual(row_labels[0], tuple([test_dedup_row_labels[0]]))
|
|
self.assertEqual(row_labels[1], tuple([test_dedup_row_labels[1]]))
|
|
self.assertTrue((test_x[0] == test_dedup_x[0]).all())
|
|
self.assertTrue((test_x[1] == test_dedup_x[1]).all())
|
|
self.assertTrue((test_y[0] == test_dedup_y[0]).all())
|
|
self.assertTrue((test_y[1] == test_dedup_y[1]).all())
|
|
|
|
test_row_labels = np.array(["Workload-0",
|
|
"Workload-1",
|
|
"Workload-2",
|
|
"Workload-3"])
|
|
test_x_matrix = np.matrix([[0.22, 5, "string", "timestamp", "enum", True],
|
|
[0.3, 5, "rstring", "timestamp2", "enum", False],
|
|
[0.22, 5, "string", "timestamp", "enum", True],
|
|
[0.3, 5, "r", "timestamp2", "enum", False]])
|
|
test_y_matrix = np.matrix([[20, 30, 40],
|
|
[30, 30, 40],
|
|
[20, 30, 40],
|
|
[32, 30, 40]])
|
|
|
|
test_x, test_y, row_labels = DataUtil.combine_duplicate_rows(
|
|
test_x_matrix, test_y_matrix, test_row_labels)
|
|
|
|
self.assertTrue(len(test_x) <= len(test_x_matrix))
|
|
self.assertTrue(len(test_y) <= len(test_y_matrix))
|
|
self.assertEqual(len(test_x), len(test_y))
|
|
self.assertEqual(len(test_x), len(row_labels))
|
|
|
|
row_labels_set = set(row_labels)
|
|
self.assertTrue(tuple(["Workload-0", "Workload-2"]) in row_labels_set)
|
|
self.assertTrue(("Workload-1",) in row_labels_set)
|
|
self.assertTrue(("Workload-3",) in row_labels_set)
|
|
|
|
rows = set()
|
|
for i in test_x:
|
|
self.assertTrue(tuple(i) not in rows)
|
|
self.assertTrue(i in test_x_matrix)
|
|
rows.add(tuple(i))
|
|
|
|
rowys = set()
|
|
for i in test_y:
|
|
self.assertTrue(tuple(i) not in rowys)
|
|
self.assertTrue(i in test_y_matrix)
|
|
rowys.add(tuple(i))
|
|
|
|
def test_no_featured_categorical(self):
|
|
featured_knobs = ['global.backend_flush_after',
|
|
'global.bgwriter_delay',
|
|
'global.wal_writer_delay',
|
|
'global.work_mem']
|
|
postgresdb = DBMSCatalog.objects.get(pk=1)
|
|
categorical_info = DataUtil.dummy_encoder_helper(featured_knobs,
|
|
dbms=postgresdb)
|
|
self.assertEqual(len(categorical_info['n_values']), 0)
|
|
self.assertEqual(len(categorical_info['categorical_features']), 0)
|
|
self.assertEqual(categorical_info['cat_columnlabels'], [])
|
|
self.assertEqual(categorical_info['noncat_columnlabels'], featured_knobs)
|
|
|
|
def test_featured_categorical(self):
|
|
featured_knobs = ['global.backend_flush_after',
|
|
'global.bgwriter_delay',
|
|
'global.wal_writer_delay',
|
|
'global.work_mem',
|
|
'global.wal_sync_method'] # last knob categorical
|
|
postgresdb = DBMSCatalog.objects.get(pk=1)
|
|
categorical_info = DataUtil.dummy_encoder_helper(featured_knobs,
|
|
dbms=postgresdb)
|
|
self.assertEqual(len(categorical_info['n_values']), 1)
|
|
self.assertEqual(categorical_info['n_values'][0], 4)
|
|
self.assertEqual(len(categorical_info['categorical_features']), 1)
|
|
self.assertEqual(categorical_info['categorical_features'][0], 4)
|
|
self.assertEqual(categorical_info['cat_columnlabels'], ['global.wal_sync_method'])
|
|
self.assertEqual(categorical_info['noncat_columnlabels'], featured_knobs[:-1])
|
|
|
|
|
|
class ConversionUtilTest(TestCase):
|
|
def test_get_raw_size(self):
|
|
# Bytes - In Bytes
|
|
byte_test_convert = ['1PB', '2TB', '3GB', '4MB', '5kB', '6B']
|
|
byte_ans = [1024**5, 2 * 1024**4, 3 * 1024**3, 4 * 1024**2, 5 * 1024**1, 6]
|
|
for i, byte_test in enumerate(byte_test_convert):
|
|
byte_conversion = ConversionUtil.get_raw_size(
|
|
byte_test, system=ConversionUtil.DEFAULT_BYTES_SYSTEM)
|
|
self.assertEqual(byte_conversion, byte_ans[i])
|
|
|
|
# Time - In Milliseconds
|
|
day_test_convert = ['1000ms', '1s', '10min', '20h', '1d']
|
|
day_ans = [1000, 1000, 600000, 72000000, 86400000]
|
|
for i, day_test in enumerate(day_test_convert):
|
|
day_conversion = ConversionUtil.get_raw_size(
|
|
day_test, system=ConversionUtil.DEFAULT_TIME_SYSTEM)
|
|
self.assertEqual(day_conversion, day_ans[i])
|
|
|
|
def test_get_human_readable(self):
|
|
# Bytes
|
|
byte_test_convert = [1024**5, 2 * 1024**4, 3 * 1024**3,
|
|
4 * 1024**2, 5 * 1024**1, 6]
|
|
byte_ans = ['1PB', '2TB', '3GB', '4MB', '5kB', '6B']
|
|
for i, byte_test in enumerate(byte_test_convert):
|
|
byte_readable = ConversionUtil.get_human_readable(
|
|
byte_test, system=ConversionUtil.DEFAULT_BYTES_SYSTEM)
|
|
self.assertEqual(byte_readable, byte_ans[i])
|
|
|
|
# Time
|
|
day_test_convert = [500, 1000, 55000, 600000, 72000000, 86400000]
|
|
day_ans = ['500ms', '1s', '55s', '10min', '20h', '1d']
|
|
for i, day_test in enumerate(day_test_convert):
|
|
day_readable = ConversionUtil.get_human_readable(
|
|
day_test, system=ConversionUtil.DEFAULT_TIME_SYSTEM)
|
|
self.assertEqual(day_readable, day_ans[i])
|
|
|
|
|
|
class LabelUtilTest(TestCase):
|
|
def test_style_labels(self):
|
|
label_style = LabelStyleType()
|
|
|
|
test_label_map = {"Name": "Postgres",
|
|
"Test": "LabelUtils",
|
|
"DBMS": "dbms",
|
|
"??": "Dbms",
|
|
"???": "DBms",
|
|
"CapF": "random Word"}
|
|
|
|
res_title_label_map = LabelUtil.style_labels(test_label_map,
|
|
style=label_style.TITLE)
|
|
|
|
test_keys = ["Name", "Test", "DBMS", "??", "???", "CapF"]
|
|
title_ans = ["Postgres", "Labelutils", "DBMS", "DBMS", "DBMS",
|
|
"Random Word"]
|
|
|
|
for i, key in enumerate(test_keys):
|
|
self.assertEqual(res_title_label_map[key], title_ans[i])
|
|
|
|
res_capfirst_label_map = LabelUtil.style_labels(test_label_map,
|
|
style=label_style.CAPFIRST)
|
|
|
|
cap_ans = ["Postgres", "LabelUtils", "DBMS", "DBMS", "DBMS",
|
|
"Random Word"]
|
|
|
|
for i, key in enumerate(test_keys):
|
|
if (key == "???"): # DBms -> DBMS or DBms?
|
|
continue
|
|
self.assertEqual(res_capfirst_label_map[key], cap_ans[i])
|
|
|
|
res_lower_label_map = LabelUtil.style_labels(test_label_map,
|
|
style=label_style.LOWER)
|
|
|
|
lower_ans = ["postgres", "labelutils", "dbms", "dbms", "dbms",
|
|
"random word"]
|
|
|
|
for i, key in enumerate(test_keys):
|
|
self.assertEqual(res_lower_label_map[key], lower_ans[i])
|
|
|
|
with self.assertRaises(Exception):
|
|
LabelUtil.style_labels(test_label_map,
|
|
style=label_style.Invalid)
|