Initial commit with BSL

This commit is contained in:
Andy Pavlo
2019-08-23 11:47:19 -04:00
commit 3e564ce922
286 changed files with 177642 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
#
# OtterTune - __init__.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#

19
server/analysis/base.py Normal file
View File

@@ -0,0 +1,19 @@
#
# OtterTune - base.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Oct 25, 2017
@author: dva
'''
from abc import ABCMeta, abstractmethod
class ModelBase(object, metaclass=ABCMeta):
@abstractmethod
def _reset(self):
pass

793
server/analysis/cluster.py Normal file
View File

@@ -0,0 +1,793 @@
#
# OtterTune - cluster.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jul 4, 2016
@author: dva
'''
from abc import ABCMeta, abstractproperty
from collections import OrderedDict
import os
import json
import copy
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import cdist
from sklearn.metrics import silhouette_score
from sklearn.cluster import KMeans as SklearnKMeans
from celery.utils.log import get_task_logger
from .base import ModelBase
# Log debug messages
LOGGER = get_task_logger(__name__)
class KMeans(ModelBase):
"""
KMeans:
Fits an Sklearn KMeans model to X.
See also
--------
http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html
Attributes
----------
n_clusters_ : int
The number of clusters, K
cluster_inertia_ : float
Sum of squared distances of samples to their closest cluster center
cluster_labels_ : array, [n_clusters_]
Labels indicating the membership of each point
cluster_centers_ : array, [n_clusters, n_features]
Coordinates of cluster centers
sample_labels_ : array, [n_samples]
Labels for each of the samples in X
sample_distances_ : array, [n_samples]
The distance between each sample point and its cluster's center
Constants
---------
SAMPLE_CUTOFF_ : int
If n_samples > SAMPLE_CUTOFF_ then sample distances
are NOT recorded
"""
SAMPLE_CUTOFF_ = 1000
def __init__(self):
self.model_ = None
self.n_clusters_ = None
self.sample_labels_ = None
self.sample_distances_ = None
@property
def cluster_inertia_(self):
# Sum of squared distances of samples to their closest cluster center
return None if self.model_ is None else \
self.model_.inertia_
@property
def cluster_labels_(self):
# Cluster membership labels for each point
return None if self.model_ is None else \
copy.deepcopy(self.model_.labels_)
@property
def cluster_centers_(self):
# Coordinates of the cluster centers
return None if self.model_ is None else \
copy.deepcopy(self.model_.cluster_centers_)
def _reset(self):
"""Resets all attributes (erases the model)"""
self.model_ = None
self.n_clusters_ = None
self.sample_labels_ = None
self.sample_distances_ = None
def fit(self, X, K, sample_labels=None, estimator_params=None):
"""Fits a Sklearn KMeans model to X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
K : int
The number of clusters.
sample_labels : array-like, shape (n_samples), optional
Labels for each of the samples in X.
estimator_params : dict, optional
The parameters to pass to the KMeans estimators.
Returns
-------
self
"""
self._reset()
# Note: previously set n_init=50
self.model_ = SklearnKMeans(K)
if estimator_params is not None:
assert isinstance(estimator_params, dict)
self.model_.set_params(**estimator_params)
# Compute Kmeans model
self.model_.fit(X)
if sample_labels is None:
sample_labels = ["sample_{}".format(i) for i in range(X.shape[0])]
assert len(sample_labels) == X.shape[0]
self.sample_labels_ = np.array(sample_labels)
self.n_clusters_ = K
# Record sample label/distance from its cluster center
self.sample_distances_ = OrderedDict()
for cluster_label in range(self.n_clusters_):
assert cluster_label not in self.sample_distances_
member_rows = X[self.cluster_labels_ == cluster_label, :]
member_labels = self.sample_labels_[self.cluster_labels_ == cluster_label]
centroid = np.expand_dims(self.cluster_centers_[cluster_label], axis=0)
# "All clusters must have at least 1 member!"
if member_rows.shape[0] == 0:
return None
# Calculate distance between each member row and the current cluster
dists = np.empty(member_rows.shape[0])
dist_labels = []
for j, (row, label) in enumerate(zip(member_rows, member_labels)):
dists[j] = cdist(np.expand_dims(row, axis=0), centroid, "euclidean").squeeze()
dist_labels.append(label)
# Sort the distances/labels in ascending order
sort_order = np.argsort(dists)
dists = dists[sort_order]
dist_labels = np.array(dist_labels)[sort_order]
self.sample_distances_[cluster_label] = {
"sample_labels": dist_labels,
"distances": dists,
}
return self
def get_closest_samples(self):
"""Returns a list of the labels of the samples that are located closest
to their cluster's center.
Returns
----------
closest_samples : list
A list of the sample labels that are located the closest to
their cluster's center.
"""
if self.sample_distances_ is None:
raise Exception("No model has been fit yet!")
return [samples['sample_labels'][0] for samples in list(self.sample_distances_.values())]
def get_memberships(self):
'''
Return the memberships in each cluster
'''
memberships = OrderedDict()
for cluster_label, samples in list(self.sample_distances_.items()):
memberships[cluster_label] = OrderedDict(
[(l, d) for l, d in zip(samples["sample_labels"], samples["distances"])])
return json.dumps(memberships, indent=4)
class KMeansClusters(ModelBase):
"""
KMeansClusters:
Fits a KMeans model to X for clusters in the range [min_cluster_, max_cluster_].
Attributes
----------
min_cluster_ : int
The minimum cluster size to fit a KMeans model to
max_cluster_ : int
The maximum cluster size to fit a KMeans model to
cluster_map_ : dict
A dictionary mapping the cluster size (K) to the KMeans
model fitted to X with K clusters
sample_labels_ : array, [n_samples]
Labels for each of the samples in X
"""
def __init__(self):
self.min_cluster_ = None
self.max_cluster_ = None
self.cluster_map_ = None
self.sample_labels_ = None
def _reset(self):
"""Resets all attributes (erases the model)"""
self.min_cluster_ = None
self.max_cluster_ = None
self.cluster_map_ = None
self.sample_labels_ = None
def fit(self, X, min_cluster, max_cluster, sample_labels=None, estimator_params=None):
"""Fits a KMeans model to X for each cluster in the range [min_cluster, max_cluster].
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
min_cluster : int
The minimum cluster size to fit a KMeans model to.
max_cluster : int
The maximum cluster size to fit a KMeans model to.
sample_labels : array-like, shape (n_samples), optional
Labels for each of the samples in X.
estimator_params : dict, optional
The parameters to pass to the KMeans estimators.
Returns
-------
self
"""
self._reset()
self.min_cluster_ = min_cluster
self.max_cluster_ = max_cluster
self.cluster_map_ = {}
if sample_labels is None:
sample_labels = ["sample_{}".format(i) for i in range(X.shape[1])]
self.sample_labels_ = sample_labels
for K in range(self.min_cluster_, self.max_cluster_ + 1):
tmp = KMeans().fit(X, K, self.sample_labels_, estimator_params)
if tmp is None: # Set maximum cluster
assert K > min_cluster, "min_cluster is too large for the model"
self.max_cluster_ = K - 1
break
else:
self.cluster_map_[K] = tmp
return self
def save(self, savedir):
"""Saves the KMeans model results
Parameters
----------
savedir : string
Path to the directory to save the results in.
"""
if self.cluster_map_ is None:
raise Exception("No models have been fitted yet!")
cluster_map = OrderedDict()
inertias = []
for K, model in sorted(self.cluster_map_.items()):
cluster_map[K] = {
"cluster_inertia": model.cluster_inertia_,
"cluster_labels": model.cluster_labels_,
"cluster_centers": model.cluster_centers_,
}
inertias.append(model.cluster_inertia_)
# Save sum of squares plot (elbow curve)
fig = plt.figure()
plt.plot(list(cluster_map.keys()), inertias, '--o')
plt.xlabel("Number of clusters (K)")
plt.ylabel("Within sum of squares W_k")
plt.title("Within Sum of Squares vs. Number of Clusters")
fig.canvas.set_window_title(os.path.basename(savedir))
savepath = os.path.join(savedir, "kmeans_sum_of_squares.pdf")
plt.savefig(savepath, bbox_inches="tight")
plt.close()
# save cluster memberships
for K in range(self.min_cluster_, self.max_cluster_ + 1):
savepath = os.path.join(savedir,
"memberships_{}-clusters.json".format(K))
members = self.cluster_map_[K].get_memberships()
with open(savepath, "w") as f:
f.write(members)
class KSelection(ModelBase, metaclass=ABCMeta):
"""KSelection:
Abstract class for techniques that approximate the optimal
number of clusters (K).
Attributes
----------
optimal_num_clusters_ : int
An estimation of the optimal number of clusters K for
a KMeans model fit to X
clusters_ : array, [n_clusters]
The sizes of the clusters
name_ : string
The name of this technique
"""
NAME_ = None
def __init__(self):
self.optimal_num_clusters_ = None
self.clusters_ = None
def _reset(self):
"""Resets all attributes (erases the model)"""
self.optimal_num_clusters_ = None
self.clusters_ = None
@abstractproperty
def name_(self):
pass
def save(self, savedir):
"""Saves the estimation of the optimal # of clusters.
Parameters
----------
savedir : string
Path to the directory to save the results in.
"""
if self.optimal_num_clusters_ is None:
raise Exception("Optimal number of clusters has not been computed!")
# Save the computed optimal number of clusters
savepath = os.path.join(savedir, self.name_ + "_optimal_num_clusters.txt")
with open(savepath, "w") as f:
f.write(str(self.optimal_num_clusters_))
class GapStatistic(KSelection):
"""GapStatistic:
Approximates the optimal number of clusters (K).
References
----------
https://web.stanford.edu/~hastie/Papers/gap.pdf
Attributes
----------
optimal_num_clusters_ : int
An estimation of the optimal number of clusters K for
a KMeans model fit to X
clusters_ : array, [n_clusters]
The sizes of the clusters
name_ : string
The name of this technique
log_wks_ : array, [n_clusters]
The within-dispersion measures of X (log)
log_wkbs_ : array, [n_clusters]
The within-dispersion measures of the generated
reference data sets
khats_ : array, [n_clusters]
The gap-statistic for each cluster
"""
NAME_ = "gap-statistic"
def __init__(self):
super(GapStatistic, self).__init__()
self.log_wks_ = None
self.log_wkbs_ = None
self.khats_ = None
@property
def name_(self):
return self.NAME_
def _reset(self):
"""Resets all attributes (erases the model)"""
super(GapStatistic, self)._reset()
self.log_wks_ = None
self.log_wkbs_ = None
self.khats_ = None
def fit(self, X, cluster_map, n_b=50):
"""Estimates the optimal number of clusters (K) for a
KMeans model trained on X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
cluster_map_ : dict
A dictionary mapping each cluster size (K) to the KMeans
model fitted to X with K clusters
n_B : int
The number of reference data sets to generate
Returns
-------
self
"""
self._reset()
mins, maxs = GapStatistic.bounding_box(X)
n_clusters = len(cluster_map)
# Dispersion for real distribution
log_wks = np.zeros(n_clusters)
log_wkbs = np.zeros(n_clusters)
sk = np.zeros(n_clusters)
for indk, (K, model) in enumerate(sorted(cluster_map.items())):
# Computes Wk: the within-dispersion of each cluster size (k)
log_wks[indk] = np.log(model.cluster_inertia_ / (2.0 * K))
# Create B reference datasets
log_bwkbs = np.zeros(n_b)
for i in range(n_b):
Xb = np.empty_like(X)
for j in range(X.shape[1]):
Xb[:, j] = np.random.uniform(mins[j], maxs[j], size=X.shape[0])
Xb_model = KMeans().fit(Xb, K)
log_bwkbs[i] = np.log(Xb_model.cluster_inertia_ / (2.0 * K))
log_wkbs[indk] = sum(log_bwkbs) / n_b
sk[indk] = np.sqrt(sum((log_bwkbs - log_wkbs[indk]) ** 2) / n_b)
sk = sk * np.sqrt(1 + 1.0 / n_b)
khats = np.zeros(n_clusters)
gaps = log_wkbs - log_wks
gsks = gaps - sk
khats[1:] = gaps[0:-1] - gsks[1:]
self.clusters_ = np.array(sorted(cluster_map.keys()))
for i in range(1, n_clusters):
if gaps[i - 1] >= gsks[i]:
self.optimal_num_clusters_ = self.clusters_[i - 1]
break
if self.optimal_num_clusters_ is None:
LOGGER.info("GapStatistic NOT found the optimal k, \
use the last(maximum) k instead ")
self.optimal_num_clusters_ = self.clusters_[-1]
self.log_wks_ = log_wks
self.log_wkbs_ = log_wkbs
self.khats_ = khats
return self
@staticmethod
def bounding_box(X):
"""Computes the box that tightly bounds X
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
Returns
-------
The mins and maxs that make up the bounding box
"""
mins = np.min(X, axis=0)
maxs = np.max(X, axis=0)
return mins, maxs
@staticmethod
def Wk(X, mu, cluster_labels):
"""Computes the within-dispersion of each cluster size (k)
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
mu : array-like, shape (n_clusters, n_features)
Coordinates of cluster centers
cluster_labels: array-like, shape (n_samples)
Labels for each of the samples in X.
Returns
-------
The within-dispersion of each cluster (K)
"""
K = len(mu)
return sum([np.linalg.norm(mu[i] - x) ** 2 / (2.0 * K)
for i in range(K)
for x in X[cluster_labels == i]])
def save(self, savedir):
"""Saves the estimation results of the optimal # of clusters.
Parameters
----------
savedir : string
Path to the directory to save the results in.
"""
super(GapStatistic, self).save(savedir)
# Plot the calculated gap
gaps = self.log_wkbs_ - self.log_wks_
fig = plt.figure()
plt.plot(self.clusters_, gaps, '--o')
plt.title("Gap vs. Number of Clusters")
plt.xlabel("Number of clusters (K)")
plt.ylabel("gap_K")
fig.canvas.set_window_title(os.path.basename(savedir))
plt.savefig(os.path.join(savedir, self.name_ + ".pdf"), bbox_inches="tight")
plt.close()
# Plot the gap statistic
fig = plt.figure()
plt.bar(self.clusters_, self.khats_)
plt.title("Gap Statistic vs. Number of Clusters")
plt.xlabel("Number of clusters (K)")
plt.ylabel("gap(K)-(gap(K+1)-s(K+1))")
fig.canvas.set_window_title(os.path.basename(savedir))
plt.savefig(os.path.join(savedir, self.name_ + "_final.pdf"),
bbox_inches="tight")
plt.close()
class DetK(KSelection):
"""DetK:
Approximates the optimal number of clusters (K).
References
----------
https://www.ee.columbia.edu/~dpwe/papers/PhamDN05-kmeans.pdf
Attributes
----------
optimal_num_clusters_ : int
An estimation of the optimal number of clusters K for
KMeans models fit to X
clusters_ : array, [n_clusters]
The sizes of the clusters
name_ : string
The name of this technique
fs_ : array, [n_clusters]
The computed evaluation functions F(K) for each cluster size K
"""
NAME_ = "det-k"
def __init__(self):
super(DetK, self).__init__()
self.fs_ = None
@property
def name_(self):
return DetK.NAME_
def _reset(self):
"""Resets all attributes (erases the model)"""
super(DetK, self)._reset()
self.fs_ = None
def fit(self, X, cluster_map):
"""Estimates the optimal number of clusters (K) for a
KMeans model trained on X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
cluster_map_ : dict
A dictionary mapping each cluster size (K) to the KMeans
model fitted to X with K clusters
Returns
-------
self
"""
self._reset()
n_clusters = len(cluster_map)
nd = X.shape[1]
fs = np.empty(n_clusters)
sks = np.empty(n_clusters)
alpha = {}
# K from 1 to maximum_cluster_
for i, (K, model) \
in enumerate(sorted(cluster_map.items())):
# Compute alpha(K, nd) (i.e. alpha[K])
if K == 2:
alpha[K] = 1 - 3.0 / (4 * nd)
elif K > 2:
alpha[K] = alpha[K - 1] + (1 - alpha[K - 1]) / 6.0
sks[i] = model.cluster_inertia_
if K == 1:
fs[i] = 1
elif sks[i - 1] == 0:
fs[i] = 1
else:
fs[i] = sks[i] / (alpha[K] * sks[i - 1])
self.clusters_ = np.array(sorted(cluster_map.keys()))
self.optimal_num_clusters_ = self.clusters_[np.argmin(fs)]
self.fs_ = fs
return self
def save(self, savedir):
"""Saves the estimation results of the optimal # of clusters.
Parameters
----------
savedir : string
Path to the directory to save the results in.
"""
super(DetK, self).save(savedir)
# Plot the evaluation function
fig = plt.figure()
plt.plot(self.clusters_, self.fs_, '--o')
plt.xlabel("Number of clusters (K)")
plt.ylabel("Evaluation function (F_k)")
plt.title("Evaluation Function vs. Number of Clusters")
fig.canvas.set_window_title(os.path.basename(savedir))
savepath = os.path.join(savedir, self.name_ + "_eval_function.pdf")
plt.savefig(savepath, bbox_inches="tight")
plt.close()
class Silhouette(KSelection):
"""Det:
Approximates the optimal number of clusters (K).
References
----------
http://scikit-learn.org/stable/modules/generated/sklearn.metrics.silhouette_score.html
Attributes
----------
optimal_num_clusters_ : int
An estimation of the optimal number of clusters K for
KMeans models fit to X
clusters_ : array, [n_clusters]
The sizes of the clusters
name_ : string
The name of this technique
Score_ : array, [n_clusters]
The mean Silhouette Coefficient for each cluster size K
"""
# short for Silhouette score
NAME_ = "s-score"
def __init__(self):
super(Silhouette, self).__init__()
self.scores_ = None
@property
def name_(self):
return Silhouette.NAME_
def _reset(self):
"""Resets all attributes (erases the model)"""
super(Silhouette, self)._reset()
self.scores_ = None
def fit(self, X, cluster_map):
"""Estimates the optimal number of clusters (K) for a
KMeans model trained on X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
cluster_map_ : dict
A dictionary mapping each cluster size (K) to the KMeans
model fitted to X with K clusters
Returns
-------
self
"""
self._reset()
n_clusters = len(cluster_map)
# scores = np.empty(n_clusters)
scores = np.zeros(n_clusters)
for i, (K, model) \
in enumerate(sorted(cluster_map.items())):
if K <= 1: # K >= 2
continue
scores[i] = silhouette_score(X, model.cluster_labels_)
self.clusters_ = np.array(sorted(cluster_map.keys()))
self.optimal_num_clusters_ = self.clusters_[np.argmax(scores)]
self.scores_ = scores
return self
def save(self, savedir):
"""Saves the estimation results of the optimal # of clusters.
Parameters
----------
savedir : string
Path to the directory to save the results in.
"""
super(Silhouette, self).save(savedir)
# Plot the evaluation function
fig = plt.figure()
plt.plot(self.clusters_, self.scores_, '--o')
plt.xlabel("Number of clusters (K)")
plt.ylabel("Silhouette scores")
plt.title("Silhouette Scores vs. Number of Clusters")
fig.canvas.set_window_title(os.path.basename(savedir))
savepath = os.path.join(savedir, self.name_ + "_eval_function.pdf")
plt.savefig(savepath, bbox_inches="tight")
plt.close()
def create_kselection_model(model_name):
"""Constructs the KSelection model object with the given name
Parameters
----------
model_name : string
Name of the KSelection model.
One of ['gap-statistic', 'det-k', 's-score']
Returns
-------
The constructed model object
"""
kselection_map = {
DetK.NAME_: DetK,
GapStatistic.NAME_: GapStatistic,
Silhouette.NAME_: Silhouette
}
if model_name not in kselection_map:
raise Exception("KSelection model {} not supported!".format(model_name))
else:
return kselection_map[model_name]()

View File

@@ -0,0 +1,115 @@
#
# OtterTune - constraints.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Sep 8, 2016
@author: dvanaken
'''
import numpy as np
class ParamConstraintHelper(object):
def __init__(self, scaler, encoder=None, binary_vars=None,
init_flip_prob=0.3, flip_prob_decay=0.5):
if 'inverse_transform' not in dir(scaler):
raise Exception("Scaler object must provide function inverse_transform(X)")
if 'transform' not in dir(scaler):
raise Exception("Scaler object must provide function transform(X)")
self.scaler_ = scaler
if encoder is not None and len(encoder.n_values) > 0:
self.is_dummy_encoded_ = True
self.encoder_ = encoder.encoder
else:
self.is_dummy_encoded_ = False
self.binary_vars_ = binary_vars
self.init_flip_prob_ = init_flip_prob
self.flip_prob_decay_ = flip_prob_decay
def apply_constraints(self, sample, scaled=True, rescale=True):
conv_sample = self._handle_scaling(sample, scaled)
if self.is_dummy_encoded_:
# apply categorical (ie enum var, >=3 values) constraints
n_values = self.encoder_.n_values_
cat_start_indices = self.encoder_.feature_indices_
for i, nvals in enumerate(n_values):
start_idx = cat_start_indices[i]
cvals = conv_sample[start_idx: start_idx + nvals]
cvals = np.array(np.arange(nvals) == np.argmax(cvals), dtype=float)
assert np.sum(cvals) == 1
conv_sample[start_idx: start_idx + nvals] = cvals
# apply binary (0-1) constraints
if self.binary_vars_ is not None:
for i in self.binary_vars_:
# round to closest
if conv_sample[i] >= 0.5:
conv_sample[i] = 1
else:
conv_sample[i] = 0
conv_sample = self._handle_rescaling(conv_sample, rescale)
return conv_sample
def _handle_scaling(self, sample, scaled):
if scaled:
if sample.ndim == 1:
sample = sample.reshape(1, -1)
sample = self.scaler_.inverse_transform(sample).ravel()
else:
sample = np.array(sample)
return sample
def _handle_rescaling(self, sample, rescale):
if rescale:
if sample.ndim == 1:
sample = sample.reshape(1, -1)
return self.scaler_.transform(sample).ravel()
return sample
def randomize_categorical_features(self, sample, scaled=True, rescale=True):
# If there are no categorical features, this function is a no-op.
if not self.is_dummy_encoded_:
return sample
n_values = self.encoder_.n_values_
cat_start_indices = self.encoder_.feature_indices_
n_cat_feats = len(n_values)
conv_sample = self._handle_scaling(sample, scaled)
flips = np.zeros((n_cat_feats,), dtype=bool)
# Always flip at least one categorical feature
flips[0] = True
# Flip the rest with decreasing probability
p = self.init_flip_prob_
for i in range(1, n_cat_feats):
if np.random.rand() <= p:
flips[i] = True
p *= self.flip_prob_decay_
flip_shuffle_indices = np.random.choice(np.arange(n_cat_feats),
n_cat_feats,
replace=False)
flips = flips[flip_shuffle_indices]
for i, nvals in enumerate(n_values):
if flips[i]:
start_idx = cat_start_indices[i]
current_val = conv_sample[start_idx: start_idx + nvals]
assert np.all(np.logical_or(current_val == 0, current_val == 1)), \
"categorical {0}: value not 0/1: {1}".format(i, current_val)
choices = np.arange(nvals)[current_val != 1]
assert choices.size == nvals - 1
r = np.zeros(nvals)
r[np.random.choice(choices)] = 1
assert np.sum(r) == 1
conv_sample[start_idx: start_idx + nvals] = r
conv_sample = self._handle_rescaling(conv_sample, rescale)
return conv_sample

View File

@@ -0,0 +1,111 @@
#
# OtterTune - factor_analysis.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jul 4, 2016
@author: dvanaken
'''
import numpy as np
from sklearn.decomposition import FactorAnalysis as SklearnFactorAnalysis
from .base import ModelBase
class FactorAnalysis(ModelBase):
"""FactorAnalysis (FA):
Fits an Sklearn FactorAnalysis model to X.
See also
--------
http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.FactorAnalysis.html
Attributes
----------
model_ : sklearn.decomposition.FactorAnalysis
The fitted FA model
components_ : array, [n_components, n_features]
Components (i.e., factors) with maximum variance
feature_labels_ : array, [n_features]
total_variance_ : float
The total amount of variance explained by the components
pvars_ : array, [n_components]
The percentage of the variance explained by each component
pvars_noise_ : array, [n_components]
The percentage of the variance explained by each component also
accounting for noise
"""
def __init__(self):
self.model_ = None
self.components_ = None
self.feature_labels_ = None
self.total_variance_ = None
self.pvars_ = None
self.pvars_noise_ = None
def _reset(self):
"""Resets all attributes (erases the model)"""
self.model_ = None
self.components_ = None
self.feature_labels_ = None
self.total_variance_ = None
self.pvars_ = None
self.pvars_noise_ = None
def fit(self, X, feature_labels=None, n_components=None, estimator_params=None):
"""Fits an Sklearn FA model to X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
feature_labels : array-like, shape (n_features), optional
Labels for each of the features in X.
estimator_params : dict, optional
The parameters to pass to Sklearn's FA estimators.
Returns
-------
self
"""
self._reset()
if feature_labels is None:
feature_labels = ["feature_{}".format(i) for i in range(X.shape[1])]
self.feature_labels_ = feature_labels
if n_components is not None:
model = SklearnFactorAnalysis(n_components=n_components)
else:
model = SklearnFactorAnalysis()
self.model_ = model
if estimator_params is not None:
# Update Sklearn estimator params
assert isinstance(estimator_params, dict)
self.model_.set_params(**estimator_params)
self.model_.fit(X)
# Remove zero-valued components (n_components x n_features)
components_mask = np.sum(self.model_.components_ != 0.0, axis=1) > 0.0
self.components_ = self.model_.components_[components_mask]
# Compute the % variance explained (with/without noise)
c2 = np.sum(self.components_ ** 2, axis=1)
self.total_variance_ = np.sum(c2)
self.pvars_ = 100 * c2 / self.total_variance_
self.pvars_noise_ = 100 * c2 / (self.total_variance_ +
np.sum(self.model_.noise_variance_))
return self

148
server/analysis/gp.py Normal file
View File

@@ -0,0 +1,148 @@
#
# OtterTune - gp.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Feb 18, 2018
@author: Bohan Zhang
'''
import numpy as np
from scipy.spatial.distance import cdist as ed
from scipy import special
from analysis.gp_tf import GPRResult
# numpy version of Gaussian Process Regression, not using Tensorflow
class GPRNP(object):
def __init__(self, length_scale=1.0, magnitude=1.0, max_train_size=7000,
batch_size=3000, check_numerics=True, debug=False):
assert np.isscalar(length_scale)
assert np.isscalar(magnitude)
assert length_scale > 0 and magnitude > 0
self.length_scale = length_scale
self.magnitude = magnitude
self.max_train_size_ = max_train_size
self.batch_size_ = batch_size
self.check_numerics = check_numerics
self.debug = debug
self.X_train = None
self.y_train = None
self.K = None
self.K_inv = None
self.y_best = None
def __repr__(self):
rep = ""
for k, v in sorted(self.__dict__.items()):
rep += "{} = {}\n".format(k, v)
return rep
def __str__(self):
return self.__repr__()
def _reset(self):
self.X_train = None
self.y_train = None
self.K = None
self.K_inv = None
self.y_best = None
def check_X_y(self, X, y):
from sklearn.utils.validation import check_X_y
if X.shape[0] > self.max_train_size_:
raise Exception("X_train size cannot exceed {} ({})"
.format(self.max_train_size_, X.shape[0]))
return check_X_y(X, y, multi_output=True,
allow_nd=True, y_numeric=True,
estimator="GPRNP")
def check_fitted(self):
if self.X_train is None or self.y_train is None \
or self.K is None:
raise Exception("The model must be trained before making predictions!")
@staticmethod
def check_array(X):
from sklearn.utils.validation import check_array
return check_array(X, allow_nd=True, estimator="GPRNP")
@staticmethod
def check_output(X):
finite_els = np.isfinite(X)
if not np.all(finite_els):
raise Exception("Input contains non-finite values: {}"
.format(X[~finite_els]))
def fit(self, X_train, y_train, ridge=0.01):
self._reset()
X_train, y_train = self.check_X_y(X_train, y_train)
if X_train.ndim != 2 or y_train.ndim != 2:
raise Exception("X_train or y_train should have 2 dimensions! X_dim:{}, y_dim:{}"
.format(X_train.ndim, y_train.ndim))
self.X_train = np.float32(X_train)
self.y_train = np.float32(y_train)
sample_size = self.X_train.shape[0]
if np.isscalar(ridge):
ridge = np.ones(sample_size) * ridge
assert isinstance(ridge, np.ndarray)
assert ridge.ndim == 1
K = self.magnitude * np.exp(-ed(self.X_train, self.X_train) / self.length_scale) \
+ np.diag(ridge)
K_inv = np.linalg.inv(K)
self.K = K
self.K_inv = K_inv
self.y_best = np.min(y_train)
return self
def predict(self, X_test):
self.check_fitted()
if X_test.ndim != 2:
raise Exception("X_test should have 2 dimensions! X_dim:{}"
.format(X_test.ndim))
X_test = np.float32(GPRNP.check_array(X_test))
test_size = X_test.shape[0]
arr_offset = 0
length_scale = self.length_scale
yhats = np.zeros([test_size, 1])
sigmas = np.zeros([test_size, 1])
eips = np.zeros([test_size, 1])
while arr_offset < test_size:
if arr_offset + self.batch_size_ > test_size:
end_offset = test_size
else:
end_offset = arr_offset + self.batch_size_
xt_ = X_test[arr_offset:end_offset]
K2 = self.magnitude * np.exp(-ed(self.X_train, xt_) / length_scale)
K3 = self.magnitude * np.exp(-ed(xt_, xt_) / length_scale)
K2_trans = np.transpose(K2)
yhat = np.matmul(K2_trans, np.matmul(self.K_inv, self.y_train))
sigma = np.sqrt(np.diag(K3 - np.matmul(K2_trans, np.matmul(self.K_inv, K2)))) \
.reshape(xt_.shape[0], 1)
u = (self.y_best - yhat) / sigma
phi1 = 0.5 * special.erf(u / np.sqrt(2.0)) + 0.5
phi2 = (1.0 / np.sqrt(2.0 * np.pi)) * np.exp(np.square(u) * (-0.5))
eip = sigma * (u * phi1 + phi2)
yhats[arr_offset:end_offset] = yhat
sigmas[arr_offset:end_offset] = sigma
eips[arr_offset:end_offset] = eip
arr_offset = end_offset
GPRNP.check_output(yhats)
GPRNP.check_output(sigmas)
return GPRResult(yhats, sigmas)
def get_params(self, deep=True):
return {"length_scale": self.length_scale,
"magnitude": self.magnitude,
"X_train": self.X_train,
"y_train": self.y_train,
"K": self.K,
"K_inv": self.K_inv}
def set_params(self, **parameters):
for param, val in list(parameters.items()):
setattr(self, param, val)
return self

710
server/analysis/gp_tf.py Normal file
View File

@@ -0,0 +1,710 @@
#
# OtterTune - gp_tf.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Aug 18, 2016
@author: Bohan Zhang, Dana Van Aken
'''
import gc
import numpy as np
import tensorflow as tf
from .util import get_analysis_logger
LOG = get_analysis_logger(__name__)
class GPRResult(object):
def __init__(self, ypreds=None, sigmas=None):
self.ypreds = ypreds
self.sigmas = sigmas
class GPRGDResult(GPRResult):
def __init__(self, ypreds=None, sigmas=None,
minl=None, minl_conf=None):
super(GPRGDResult, self).__init__(ypreds, sigmas)
self.minl = minl
self.minl_conf = minl_conf
class GPR(object):
def __init__(self, length_scale=1.0, magnitude=1.0, max_train_size=7000,
batch_size=3000, num_threads=4, check_numerics=True, debug=False):
assert np.isscalar(length_scale)
assert np.isscalar(magnitude)
assert length_scale > 0 and magnitude > 0
self.length_scale = length_scale
self.magnitude = magnitude
self.max_train_size_ = max_train_size
self.batch_size_ = batch_size
self.num_threads_ = num_threads
self.check_numerics = check_numerics
self.debug = debug
self.X_train = None
self.y_train = None
self.xy_ = None
self.K = None
self.K_inv = None
self.graph = None
self.vars = None
self.ops = None
def build_graph(self):
self.vars = {}
self.ops = {}
self.graph = tf.Graph()
with self.graph.as_default():
mag_const = tf.constant(self.magnitude,
dtype=np.float32,
name='magnitude')
ls_const = tf.constant(self.length_scale,
dtype=np.float32,
name='length_scale')
# Nodes for distance computation
v1 = tf.placeholder(tf.float32, name="v1")
v2 = tf.placeholder(tf.float32, name="v2")
dist_op = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2), 1), name='dist_op')
if self.check_numerics:
dist_op = tf.check_numerics(dist_op, "dist_op: ")
self.vars['v1_h'] = v1
self.vars['v2_h'] = v2
self.ops['dist_op'] = dist_op
# Nodes for kernel computation
X_dists = tf.placeholder(tf.float32, name='X_dists')
ridge_ph = tf.placeholder(tf.float32, name='ridge')
K_op = mag_const * tf.exp(-X_dists / ls_const)
if self.check_numerics:
K_op = tf.check_numerics(K_op, "K_op: ")
K_ridge_op = K_op + tf.diag(ridge_ph)
if self.check_numerics:
K_ridge_op = tf.check_numerics(K_ridge_op, "K_ridge_op: ")
self.vars['X_dists_h'] = X_dists
self.vars['ridge_h'] = ridge_ph
self.ops['K_op'] = K_op
self.ops['K_ridge_op'] = K_ridge_op
# Nodes for xy computation
K = tf.placeholder(tf.float32, name='K')
K_inv = tf.placeholder(tf.float32, name='K_inv')
xy_ = tf.placeholder(tf.float32, name='xy_')
yt_ = tf.placeholder(tf.float32, name='yt_')
K_inv_op = tf.matrix_inverse(K)
if self.check_numerics:
K_inv_op = tf.check_numerics(K_inv_op, "K_inv: ")
xy_op = tf.matmul(K_inv, yt_)
if self.check_numerics:
xy_op = tf.check_numerics(xy_op, "xy_: ")
self.vars['K_h'] = K
self.vars['K_inv_h'] = K_inv
self.vars['xy_h'] = xy_
self.vars['yt_h'] = yt_
self.ops['K_inv_op'] = K_inv_op
self.ops['xy_op'] = xy_op
# Nodes for yhat/sigma computation
K2 = tf.placeholder(tf.float32, name="K2")
K3 = tf.placeholder(tf.float32, name="K3")
yhat_ = tf.cast(tf.matmul(tf.transpose(K2), xy_), tf.float32)
if self.check_numerics:
yhat_ = tf.check_numerics(yhat_, "yhat_: ")
sv1 = tf.matmul(tf.transpose(K2), tf.matmul(K_inv, K2))
if self.check_numerics:
sv1 = tf.check_numerics(sv1, "sv1: ")
sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 - sv1))), tf.float32)
if self.check_numerics:
sig_val = tf.check_numerics(sig_val, "sig_val: ")
self.vars['K2_h'] = K2
self.vars['K3_h'] = K3
self.ops['yhat_op'] = yhat_
self.ops['sig_op'] = sig_val
# Compute y_best (min y)
y_best_op = tf.cast(tf.reduce_min(yt_, 0, True), tf.float32)
if self.check_numerics:
y_best_op = tf.check_numerics(y_best_op, "y_best_op: ")
self.ops['y_best_op'] = y_best_op
sigma = tf.placeholder(tf.float32, name='sigma')
yhat = tf.placeholder(tf.float32, name='yhat')
self.vars['sigma_h'] = sigma
self.vars['yhat_h'] = yhat
def __repr__(self):
rep = ""
for k, v in sorted(self.__dict__.items()):
rep += "{} = {}\n".format(k, v)
return rep
def __str__(self):
return self.__repr__()
def check_X_y(self, X, y):
from sklearn.utils.validation import check_X_y
if X.shape[0] > self.max_train_size_:
raise Exception("X_train size cannot exceed {} ({})"
.format(self.max_train_size_, X.shape[0]))
return check_X_y(X, y, multi_output=True,
allow_nd=True, y_numeric=True,
estimator="GPR")
def check_fitted(self):
if self.X_train is None or self.y_train is None \
or self.xy_ is None or self.K is None:
raise Exception("The model must be trained before making predictions!")
@staticmethod
def check_array(X):
from sklearn.utils.validation import check_array
return check_array(X, allow_nd=True, estimator="GPR")
@staticmethod
def check_output(X):
finite_els = np.isfinite(X)
if not np.all(finite_els):
raise Exception("Input contains non-finite values: {}"
.format(X[~finite_els]))
def fit(self, X_train, y_train, ridge=1.0):
self._reset()
X_train, y_train = self.check_X_y(X_train, y_train)
self.X_train = np.float32(X_train)
self.y_train = np.float32(y_train)
sample_size = self.X_train.shape[0]
if np.isscalar(ridge):
ridge = np.ones(sample_size) * ridge
assert isinstance(ridge, np.ndarray)
assert ridge.ndim == 1
X_dists = np.zeros((sample_size, sample_size), dtype=np.float32)
with tf.Session(graph=self.graph,
config=tf.ConfigProto(
intra_op_parallelism_threads=self.num_threads_)) as sess:
dist_op = self.ops['dist_op']
v1, v2 = self.vars['v1_h'], self.vars['v2_h']
for i in range(sample_size):
X_dists[i] = sess.run(dist_op, feed_dict={v1: self.X_train[i], v2: self.X_train})
K_ridge_op = self.ops['K_ridge_op']
X_dists_ph = self.vars['X_dists_h']
ridge_ph = self.vars['ridge_h']
self.K = sess.run(K_ridge_op, feed_dict={X_dists_ph: X_dists, ridge_ph: ridge})
K_ph = self.vars['K_h']
K_inv_op = self.ops['K_inv_op']
self.K_inv = sess.run(K_inv_op, feed_dict={K_ph: self.K})
xy_op = self.ops['xy_op']
K_inv_ph = self.vars['K_inv_h']
yt_ph = self.vars['yt_h']
self.xy_ = sess.run(xy_op, feed_dict={K_inv_ph: self.K_inv,
yt_ph: self.y_train})
return self
def predict(self, X_test):
self.check_fitted()
X_test = np.float32(GPR.check_array(X_test))
test_size = X_test.shape[0]
sample_size = self.X_train.shape[0]
arr_offset = 0
yhats = np.zeros([test_size, 1])
sigmas = np.zeros([test_size, 1])
with tf.Session(graph=self.graph,
config=tf.ConfigProto(
intra_op_parallelism_threads=self.num_threads_)) as sess:
# Nodes for distance operation
dist_op = self.ops['dist_op']
v1 = self.vars['v1_h']
v2 = self.vars['v2_h']
# Nodes for kernel computation
K_op = self.ops['K_op']
X_dists = self.vars['X_dists_h']
# Nodes to compute yhats/sigmas
yhat_ = self.ops['yhat_op']
K_inv_ph = self.vars['K_inv_h']
K2 = self.vars['K2_h']
K3 = self.vars['K3_h']
xy_ph = self.vars['xy_h']
while arr_offset < test_size:
if arr_offset + self.batch_size_ > test_size:
end_offset = test_size
else:
end_offset = arr_offset + self.batch_size_
X_test_batch = X_test[arr_offset:end_offset]
batch_len = end_offset - arr_offset
dists1 = np.zeros([sample_size, batch_len])
for i in range(sample_size):
dists1[i] = sess.run(dist_op, feed_dict={v1: self.X_train[i],
v2: X_test_batch})
sig_val = self.ops['sig_op']
K2_ = sess.run(K_op, feed_dict={X_dists: dists1})
yhat = sess.run(yhat_, feed_dict={K2: K2_, xy_ph: self.xy_})
dists2 = np.zeros([batch_len, batch_len])
for i in range(batch_len):
dists2[i] = sess.run(dist_op, feed_dict={v1: X_test_batch[i], v2: X_test_batch})
K3_ = sess.run(K_op, feed_dict={X_dists: dists2})
sigma = np.zeros([1, batch_len], np.float32)
sigma[0] = sess.run(sig_val, feed_dict={K_inv_ph: self.K_inv, K2: K2_, K3: K3_})
sigma = np.transpose(sigma)
yhats[arr_offset: end_offset] = yhat
sigmas[arr_offset: end_offset] = sigma
arr_offset = end_offset
GPR.check_output(yhats)
GPR.check_output(sigmas)
return GPRResult(yhats, sigmas)
def get_params(self, deep=True):
return {"length_scale": self.length_scale,
"magnitude": self.magnitude,
"X_train": self.X_train,
"y_train": self.y_train,
"xy_": self.xy_,
"K": self.K,
"K_inv": self.K_inv}
def set_params(self, **parameters):
for param, val in list(parameters.items()):
setattr(self, param, val)
return self
def _reset(self):
self.X_train = None
self.y_train = None
self.xy_ = None
self.K = None
self.K_inv = None
self.graph = None
self.build_graph()
gc.collect()
class GPRGD(GPR):
GP_BETA_UCB = "UCB"
GP_BETA_CONST = "CONST"
def __init__(self,
length_scale=1.0,
magnitude=1.0,
max_train_size=7000,
batch_size=3000,
num_threads=4,
learning_rate=0.01,
epsilon=1e-6,
max_iter=100,
sigma_multiplier=3.0,
mu_multiplier=1.0):
super(GPRGD, self).__init__(length_scale=length_scale,
magnitude=magnitude,
max_train_size=max_train_size,
batch_size=batch_size,
num_threads=num_threads)
self.learning_rate = learning_rate
self.epsilon = epsilon
self.max_iter = max_iter
self.sigma_multiplier = sigma_multiplier
self.mu_multiplier = mu_multiplier
self.X_min = None
self.X_max = None
def fit(self, X_train, y_train, X_min, X_max, ridge): # pylint: disable=arguments-differ
super(GPRGD, self).fit(X_train, y_train, ridge)
self.X_min = X_min
self.X_max = X_max
with tf.Session(graph=self.graph,
config=tf.ConfigProto(
intra_op_parallelism_threads=self.num_threads_)) as sess:
xt_ = tf.Variable(self.X_train[0], tf.float32)
xt_ph = tf.placeholder(tf.float32)
xt_assign_op = xt_.assign(xt_ph)
init = tf.global_variables_initializer()
sess.run(init)
K2_mat = tf.transpose(tf.expand_dims(tf.sqrt(tf.reduce_sum(tf.pow(
tf.subtract(xt_, self.X_train), 2), 1)), 0))
if self.check_numerics is True:
K2_mat = tf.check_numerics(K2_mat, "K2_mat: ")
K2__ = tf.cast(self.magnitude * tf.exp(-K2_mat / self.length_scale), tf.float32)
if self.check_numerics is True:
K2__ = tf.check_numerics(K2__, "K2__: ")
yhat_gd = tf.cast(tf.matmul(tf.transpose(K2__), self.xy_), tf.float32)
if self.check_numerics is True:
yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ")
sig_val = tf.cast((tf.sqrt(self.magnitude - tf.matmul(
tf.transpose(K2__), tf.matmul(self.K_inv, K2__)))), tf.float32)
if self.check_numerics is True:
sig_val = tf.check_numerics(sig_val, message="sigma: ")
LOG.debug("\nyhat_gd : %s", str(sess.run(yhat_gd)))
LOG.debug("\nsig_val : %s", str(sess.run(sig_val)))
loss = tf.squeeze(tf.subtract(self.mu_multiplier * yhat_gd,
self.sigma_multiplier * sig_val))
if self.check_numerics is True:
loss = tf.check_numerics(loss, "loss: ")
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
epsilon=self.epsilon)
# optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
train = optimizer.minimize(loss)
self.vars['xt_'] = xt_
self.vars['xt_ph'] = xt_ph
self.ops['xt_assign_op'] = xt_assign_op
self.ops['yhat_gd'] = yhat_gd
self.ops['sig_val2'] = sig_val
self.ops['loss_op'] = loss
self.ops['train_op'] = train
return self
def predict(self, X_test, constraint_helper=None, # pylint: disable=arguments-differ
categorical_feature_method='hillclimbing',
categorical_feature_steps=3):
self.check_fitted()
X_test = np.float32(GPR.check_array(X_test))
test_size = X_test.shape[0]
nfeats = self.X_train.shape[1]
arr_offset = 0
yhats = np.zeros([test_size, 1])
sigmas = np.zeros([test_size, 1])
minls = np.zeros([test_size, 1])
minl_confs = np.zeros([test_size, nfeats])
with tf.Session(graph=self.graph,
config=tf.ConfigProto(
intra_op_parallelism_threads=self.num_threads_)) as sess:
while arr_offset < test_size:
if arr_offset + self.batch_size_ > test_size:
end_offset = test_size
else:
end_offset = arr_offset + self.batch_size_
X_test_batch = X_test[arr_offset:end_offset]
batch_len = end_offset - arr_offset
xt_ = self.vars['xt_']
init = tf.global_variables_initializer()
sess.run(init)
sig_val = self.ops['sig_val2']
yhat_gd = self.ops['yhat_gd']
loss = self.ops['loss_op']
train = self.ops['train_op']
xt_ph = self.vars['xt_ph']
assign_op = self.ops['xt_assign_op']
yhat = np.empty((batch_len, 1))
sigma = np.empty((batch_len, 1))
minl = np.empty((batch_len, 1))
minl_conf = np.empty((batch_len, nfeats))
for i in range(batch_len):
if self.debug is True:
LOG.info("-------------------------------------------")
yhats_it = np.empty((self.max_iter + 1,)) * np.nan
sigmas_it = np.empty((self.max_iter + 1,)) * np.nan
losses_it = np.empty((self.max_iter + 1,)) * np.nan
confs_it = np.empty((self.max_iter + 1, nfeats)) * np.nan
sess.run(assign_op, feed_dict={xt_ph: X_test_batch[i]})
step = 0
for step in range(self.max_iter):
if self.debug is True:
LOG.info("Batch %d, iter %d:", i, step)
yhats_it[step] = sess.run(yhat_gd)[0][0]
sigmas_it[step] = sess.run(sig_val)[0][0]
losses_it[step] = sess.run(loss)
confs_it[step] = sess.run(xt_)
if self.debug is True:
LOG.info(" yhat: %s", str(yhats_it[step]))
LOG.info(" sigma: %s", str(sigmas_it[step]))
LOG.info(" loss: %s", str(losses_it[step]))
LOG.info(" conf: %s", str(confs_it[step]))
sess.run(train)
# constraint Projected Gradient Descent
xt = sess.run(xt_)
xt_valid = np.minimum(xt, self.X_max)
xt_valid = np.maximum(xt_valid, self.X_min)
sess.run(assign_op, feed_dict={xt_ph: xt_valid})
if constraint_helper is not None:
xt_valid = constraint_helper.apply_constraints(sess.run(xt_))
sess.run(assign_op, feed_dict={xt_ph: xt_valid})
if categorical_feature_method == 'hillclimbing':
if step % categorical_feature_steps == 0:
current_xt = sess.run(xt_)
current_loss = sess.run(loss)
new_xt = \
constraint_helper.randomize_categorical_features(
current_xt)
sess.run(assign_op, feed_dict={xt_ph: new_xt})
new_loss = sess.run(loss)
if current_loss < new_loss:
sess.run(assign_op, feed_dict={xt_ph: new_xt})
else:
raise Exception("Unknown categorial feature method: {}".format(
categorical_feature_method))
if step == self.max_iter - 1:
# Record results from final iteration
yhats_it[-1] = sess.run(yhat_gd)[0][0]
sigmas_it[-1] = sess.run(sig_val)[0][0]
losses_it[-1] = sess.run(loss)
confs_it[-1] = sess.run(xt_)
assert np.all(np.isfinite(yhats_it))
assert np.all(np.isfinite(sigmas_it))
assert np.all(np.isfinite(losses_it))
assert np.all(np.isfinite(confs_it))
# Store info for conf with min loss from all iters
if np.all(~np.isfinite(losses_it)):
min_loss_idx = 0
else:
min_loss_idx = np.nanargmin(losses_it)
yhat[i] = yhats_it[min_loss_idx]
sigma[i] = sigmas_it[min_loss_idx]
minl[i] = losses_it[min_loss_idx]
minl_conf[i] = confs_it[min_loss_idx]
minls[arr_offset:end_offset] = minl
minl_confs[arr_offset:end_offset] = minl_conf
yhats[arr_offset:end_offset] = yhat
sigmas[arr_offset:end_offset] = sigma
arr_offset = end_offset
GPR.check_output(yhats)
GPR.check_output(sigmas)
GPR.check_output(minls)
GPR.check_output(minl_confs)
return GPRGDResult(yhats, sigmas, minls, minl_confs)
@staticmethod
def calculate_sigma_multiplier(t, ndim, bound=0.1):
assert t > 0
assert ndim > 0
assert bound > 0 and bound <= 1
beta = 2 * np.log(ndim * (t**2) * (np.pi**2) / 6 * bound)
if beta > 0:
beta = np.sqrt(beta)
else:
beta = 1
return beta
# def gp_tf(X_train, y_train, X_test, ridge, length_scale, magnitude, batch_size=3000):
# with tf.Graph().as_default():
# y_best = tf.cast(tf.reduce_min(y_train, 0, True), tf.float32)
# sample_size = X_train.shape[0]
# train_size = X_test.shape[0]
# arr_offset = 0
# yhats = np.zeros([train_size, 1])
# sigmas = np.zeros([train_size, 1])
# eips = np.zeros([train_size, 1])
# X_train = np.float32(X_train)
# y_train = np.float32(y_train)
# X_test = np.float32(X_test)
# ridge = np.float32(ridge)
#
# v1 = tf.placeholder(tf.float32,name="v1")
# v2 = tf.placeholder(tf.float32,name="v2")
# dist_op = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2), 1))
# try:
# sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
#
# dists = np.zeros([sample_size,sample_size])
# for i in range(sample_size):
# dists[i] = sess.run(dist_op,feed_dict={v1:X_train[i], v2:X_train})
#
#
# dists = tf.cast(dists, tf.float32)
# K = magnitude * tf.exp(-dists/length_scale) + tf.diag(ridge);
#
# K2 = tf.placeholder(tf.float32, name="K2")
# K3 = tf.placeholder(tf.float32, name="K3")
#
# x = tf.matmul(tf.matrix_inverse(K), y_train)
# yhat_ = tf.cast(tf.matmul(tf.transpose(K2), x), tf.float32);
# sig_val = tf.cast((tf.sqrt(tf.diag_part(K3 - tf.matmul(tf.transpose(K2),
# tf.matmul(tf.matrix_inverse(K),
# K2))))),
# tf.float32)
#
# u = tf.placeholder(tf.float32, name="u")
# phi1 = 0.5 * tf.erf(u / np.sqrt(2.0)) + 0.5
# phi2 = (1.0 / np.sqrt(2.0 * np.pi)) * tf.exp(tf.square(u) * (-0.5));
# eip = (tf.multiply(u, phi1) + phi2);
#
# while arr_offset < train_size:
# if arr_offset + batch_size > train_size:
# end_offset = train_size
# else:
# end_offset = arr_offset + batch_size;
#
# xt_ = X_test[arr_offset:end_offset];
# batch_len = end_offset - arr_offset
#
# dists = np.zeros([sample_size, batch_len])
# for i in range(sample_size):
# dists[i] = sess.run(dist_op, feed_dict={v1:X_train[i], v2:xt_})
#
# K2_ = magnitude * tf.exp(-dists / length_scale);
# K2_ = sess.run(K2_)
#
# dists = np.zeros([batch_len, batch_len])
# for i in range(batch_len):
# dists[i] = sess.run(dist_op, feed_dict={v1:xt_[i], v2:xt_})
# K3_ = magnitude * tf.exp(-dists / length_scale);
# K3_ = sess.run(K3_)
#
# yhat = sess.run(yhat_, feed_dict={K2:K2_})
#
# sigma = np.zeros([1, batch_len], np.float32)
# sigma[0] = (sess.run(sig_val, feed_dict={K2:K2_, K3:K3_}))
# sigma = np.transpose(sigma)
#
# u_ = tf.cast(tf.div(tf.subtract(y_best, yhat), sigma), tf.float32)
# u_ = sess.run(u_)
# eip_p = sess.run(eip, feed_dict={u:u_})
# eip_ = tf.multiply(sigma, eip_p)
# yhats[arr_offset:end_offset] = yhat
# sigmas[arr_offset:end_offset] = sigma;
# eips[arr_offset:end_offset] = sess.run(eip_);
# arr_offset = end_offset
#
# finally:
# sess.close()
#
# return yhats, sigmas, eips
def euclidean_mat(X, y, sess):
x_n = X.shape[0]
y_n = y.shape[0]
z = np.zeros([x_n, y_n])
for i in range(x_n):
v1 = X[i]
tmp = []
for j in range(y_n):
v2 = y[j]
tmp.append(tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2))))
z[i] = (sess.run(tmp))
return z
def gd_tf(xs, ys, xt, ridge, length_scale=1.0, magnitude=1.0, max_iter=50):
LOG.debug("xs shape: %s", str(xs.shape))
LOG.debug("ys shape: %s", str(ys.shape))
LOG.debug("xt shape: %s", str(xt.shape))
with tf.Graph().as_default():
# y_best = tf.cast(tf.reduce_min(ys,0,True),tf.float32); #array
# yhat_gd = tf.check_numerics(yhat_gd, message="yhat: ")
sample_size = xs.shape[0]
nfeats = xs.shape[1]
test_size = xt.shape[0]
# arr_offset = 0
ini_size = xt.shape[0]
yhats = np.zeros([test_size, 1])
sigmas = np.zeros([test_size, 1])
minl = np.zeros([test_size, 1])
new_conf = np.zeros([test_size, nfeats])
xs = np.float32(xs)
ys = np.float32(ys)
xt_ = tf.Variable(xt[0], tf.float32)
sess = tf.Session(config=tf.ConfigProto(intra_op_parallelism_threads=8))
init = tf.global_variables_initializer()
sess.run(init)
ridge = np.float32(ridge)
v1 = tf.placeholder(tf.float32, name="v1")
v2 = tf.placeholder(tf.float32, name="v2")
dist = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(v1, v2), 2), 1))
tmp = np.zeros([sample_size, sample_size])
for i in range(sample_size):
tmp[i] = sess.run(dist, feed_dict={v1: xs[i], v2: xs})
tmp = tf.cast(tmp, tf.float32)
K = magnitude * tf.exp(-tmp / length_scale) + tf.diag(ridge)
LOG.debug("K shape: %s", str(sess.run(K).shape))
K2_mat = tf.sqrt(tf.reduce_sum(tf.pow(tf.subtract(xt_, xs), 2), 1))
K2_mat = tf.transpose(tf.expand_dims(K2_mat, 0))
K2 = tf.cast(tf.exp(-K2_mat / length_scale), tf.float32)
x = tf.matmul(tf.matrix_inverse(K), ys)
x = sess.run(x)
yhat_ = tf.cast(tf.matmul(tf.transpose(K2), x), tf.float32)
sig_val = tf.cast((tf.sqrt(magnitude - tf.matmul(
tf.transpose(K2), tf.matmul(tf.matrix_inverse(K), K2)))), tf.float32)
LOG.debug('yhat shape: %s', str(sess.run(yhat_).shape))
LOG.debug('sig_val shape: %s', str(sess.run(sig_val).shape))
yhat_ = tf.check_numerics(yhat_, message='yhat: ')
sig_val = tf.check_numerics(sig_val, message='sig_val: ')
loss = tf.squeeze(tf.subtract(yhat_, sig_val))
loss = tf.check_numerics(loss, message='loss: ')
# optimizer = tf.train.GradientDescentOptimizer(0.1)
LOG.debug('loss: %s', str(sess.run(loss)))
optimizer = tf.train.AdamOptimizer(0.1)
train = optimizer.minimize(loss)
init = tf.global_variables_initializer()
sess.run(init)
for i in range(ini_size):
assign_op = xt_.assign(xt[i])
sess.run(assign_op)
for step in range(max_iter):
LOG.debug('sample #: %d, iter #: %d, loss: %s', i, step, str(sess.run(loss)))
sess.run(train)
yhats[i] = sess.run(yhat_)[0][0]
sigmas[i] = sess.run(sig_val)[0][0]
minl[i] = sess.run(loss)
new_conf[i] = sess.run(xt_)
return yhats, sigmas, minl, new_conf
def main():
pass
def create_random_matrices(n_samples=3000, n_feats=12, n_test=4444):
X_train = np.random.rand(n_samples, n_feats)
y_train = np.random.rand(n_samples, 1)
X_test = np.random.rand(n_test, n_feats)
length_scale = np.random.rand()
magnitude = np.random.rand()
ridge = np.ones(n_samples) * np.random.rand()
return X_train, y_train, X_test, length_scale, magnitude, ridge
if __name__ == "__main__":
main()

109
server/analysis/lasso.py Normal file
View File

@@ -0,0 +1,109 @@
#
# OtterTune - lasso.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jul 8, 2016
@author: dvanaken
'''
import numpy as np
from sklearn.linear_model import lasso_path
from .base import ModelBase
class LassoPath(ModelBase):
"""Lasso:
Computes the Lasso path using Sklearn's lasso_path method.
See also
--------
http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.lasso_path.html
Attributes
----------
feature_labels_ : array, [n_features]
Labels for each of the features in X.
alphas_ : array, [n_alphas]
The alphas along the path where models are computed. (These are
the decreasing values of the penalty along the path).
coefs_ : array, [n_outputs, n_features, n_alphas]
Coefficients along the path.
rankings_ : array, [n_features]
The average ranking of each feature across all target values.
"""
def __init__(self):
self.feature_labels_ = None
self.alphas_ = None
self.coefs_ = None
self.rankings_ = None
def _reset(self):
"""Resets all attributes (erases the model)"""
self.feature_labels_ = None
self.alphas_ = None
self.coefs_ = None
self.rankings_ = None
def fit(self, X, y, feature_labels, estimator_params=None):
"""Computes the Lasso path using Sklearn's lasso_path method.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data (the independent variables).
y : array-like, shape (n_samples, n_outputs)
Training data (the output/target values).
feature_labels : array-like, shape (n_features)
Labels for each of the features in X.
estimator_params : dict, optional
The parameters to pass to Sklearn's Lasso estimator.
Returns
-------
self
"""
self._reset()
if estimator_params is None:
estimator_params = {}
self.feature_labels_ = feature_labels
alphas, coefs, _ = lasso_path(X, y, **estimator_params)
self.alphas_ = alphas.copy()
self.coefs_ = coefs.copy()
# Rank the features in X by order of importance. This ranking is based
# on how early a given features enter the regression (the earlier a
# feature enters the regression, the MORE important it is).
feature_rankings = [[] for _ in range(X.shape[1])]
for target_coef_paths in self.coefs_:
for i, feature_path in enumerate(target_coef_paths):
entrance_step = 1
for val_at_step in feature_path:
if val_at_step == 0:
entrance_step += 1
else:
break
feature_rankings[i].append(entrance_step)
self.rankings_ = np.array([np.mean(ranks) for ranks in feature_rankings])
return self
def get_ranked_features(self):
if self.rankings_ is None:
raise Exception("No lasso path has been fit yet!")
rank_idxs = np.argsort(self.rankings_)
return [self.feature_labels_[i] for i in rank_idxs]

View File

@@ -0,0 +1,489 @@
#
# OtterTune - preprocessing.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from abc import ABCMeta, abstractmethod
from itertools import chain, combinations, combinations_with_replacement
import numpy as np
from sklearn.preprocessing import MinMaxScaler as SklearnMinMaxScaler
from .util import is_numeric_matrix, is_lexical_matrix
# ==========================================================
# Preprocessing Base Class
# ==========================================================
class Preprocess(object, metaclass=ABCMeta):
@abstractmethod
def fit(self, matrix):
pass
@abstractmethod
def transform(self, matrix, copy=True):
pass
def fit_transform(self, matrix, copy=True):
self.fit(matrix)
return self.transform(matrix, copy=True)
@abstractmethod
def inverse_transform(self, matrix, copy=True):
pass
# ==========================================================
# Bin by Deciles
# ==========================================================
class Bin(Preprocess):
def __init__(self, bin_start, axis=None):
if axis is not None and \
axis != 1 and axis != 0:
raise NotImplementedError("Axis={} is not yet implemented".format(axis))
self.deciles_ = None
self.bin_start_ = bin_start
self.axis_ = axis
def fit(self, matrix):
if self.axis_ is None:
self.deciles_ = get_deciles(matrix, self.axis_)
elif self.axis_ == 0: # Bin columns
self.deciles_ = []
for col in matrix.T:
self.deciles_.append(get_deciles(col, axis=None))
elif self.axis_ == 1: # Bin rows
self.deciles_ = []
for row in matrix:
self.deciles_.append(get_deciles(row, axis=None))
return self
def transform(self, matrix, copy=True):
assert self.deciles_ is not None
if self.axis_ is None:
res = bin_by_decile(matrix, self.deciles_,
self.bin_start_, self.axis_)
elif self.axis_ == 0: # Transform columns
columns = []
for col, decile in zip(matrix.T, self.deciles_):
columns.append(bin_by_decile(col, decile,
self.bin_start_, axis=None))
res = np.vstack(columns).T
elif self.axis_ == 1: # Transform rows
rows = []
for row, decile in zip(matrix, self.deciles_):
rows.append(bin_by_decile(row, decile,
self.bin_start_, axis=None))
res = np.vstack(rows)
assert res.shape == matrix.shape
return res
def inverse_transform(self, matrix, copy=True):
raise NotImplementedError("This method is not supported")
def get_deciles(matrix, axis=None):
if axis is not None:
raise NotImplementedError("Axis is not yet implemented")
assert matrix.ndim > 0
assert matrix.size > 0
decile_range = np.arange(10, 101, 10)
deciles = np.percentile(matrix, decile_range, axis=axis)
deciles[-1] = np.Inf
return deciles
def bin_by_decile(matrix, deciles, bin_start, axis=None):
if axis is not None:
raise NotImplementedError("Axis is not yet implemented")
assert matrix.ndim > 0
assert matrix.size > 0
assert deciles is not None
assert len(deciles) == 10
binned_matrix = np.zeros_like(matrix)
for i in range(10)[::-1]:
decile = deciles[i]
binned_matrix[matrix <= decile] = i + bin_start
return binned_matrix
# ==========================================================
# Shuffle Indices
# ==========================================================
class Shuffler(Preprocess):
def __init__(self, shuffle_rows=True, shuffle_columns=False,
row_indices=None, column_indices=None, seed=0):
self.shuffle_rows_ = shuffle_rows
self.shuffle_columns_ = shuffle_columns
self.row_indices_ = row_indices
self.column_indices_ = column_indices
np.random.seed(seed)
self.fitted_ = False
def fit(self, matrix):
if self.shuffle_rows_ and self.row_indices_ is None:
self.row_indices_ = get_shuffle_indices(matrix.data.shape[0])
if self.shuffle_columns_ and self.column_indices_ is None:
self.column_indices_ = get_shuffle_indices(matrix.data.shape[1])
self.fitted_ = True
def transform(self, matrix, copy=True):
if not self.fitted_:
raise Exception("The fit() function must be called before transform()")
if copy:
matrix = matrix.copy()
if self.shuffle_rows_:
matrix.data = matrix.data[self.row_indices_]
matrix.rowlabels = matrix.rowlabels[self.row_indices_]
if self.shuffle_columns_:
matrix.data = matrix.data[:, self.column_indices_]
matrix.columnlabels = matrix.columnlabels[self.column_indices_]
return matrix
def inverse_transform(self, matrix, copy=True):
if copy:
matrix = matrix.copy()
if self.shuffle_rows_:
inverse_row_indices = np.argsort(self.row_indices_)
matrix.data = matrix.data[inverse_row_indices]
matrix.rowlabels = matrix.rowlabels[inverse_row_indices]
if self.shuffle_columns_:
inverse_column_indices = np.argsort(self.column_indices_)
matrix.data = matrix.data[:, inverse_column_indices]
matrix.columnlabels = matrix.columnlabels[inverse_column_indices]
return matrix
def get_shuffle_indices(size, seed=None):
if seed is not None:
assert isinstance(seed, int)
np.random.seed(seed)
if isinstance(size, int):
return np.random.choice(size, size, replace=False)
else:
indices = []
for d in size:
indices.append(np.random.choice(d, d, replace=False))
return indices
# ==========================================================
# Polynomial Features
# ==========================================================
class PolynomialFeatures(Preprocess):
"""Compute the polynomial features of the input array.
This code was copied and modified from sklearn's
implementation.
"""
def __init__(self, degree=2, interaction_only=False, include_bias=True):
self.degree_ = degree
self.interaction_only_ = interaction_only
self.include_bias_ = include_bias
self.n_input_features_ = None
self.n_output_features_ = None
# @property
# def powers_(self):
# combinations = self._combinations(self.n_input_features_, self.degree_,
# self.interaction_only_,
# self.include_bias_)
# return np.vstack(np.bincount(c, minlength=self.n_input_features_)
# for c in combinations)
@staticmethod
def _combinations(n_features, degree, interaction_only, include_bias):
comb = (combinations if interaction_only else combinations_with_replacement)
start = int(not include_bias)
return chain.from_iterable(comb(list(range(n_features)), i)
for i in range(start, degree + 1))
def fit(self, matrix):
assert matrix.ndim == 2
assert matrix.size > 0
_, n_features = matrix.shape
combos = self._combinations(n_features, self.degree_,
self.interaction_only_,
self.include_bias_)
self.n_input_features_ = matrix.shape[1]
self.n_output_features_ = sum(1 for _ in combos)
return self
def transform(self, matrix, copy=True):
"""Transform data to polynomial features
Parameters
----------
X : array-like, shape [n_samples, n_features]
The data to transform, row by row.
Returns
-------
XP : np.ndarray shape [n_samples, NP]
The matrix of features, where NP is the number of polynomial
features generated from the combination of inputs.
"""
assert matrix.ndim == 2
assert matrix.size > 0
n_samples, n_features = matrix.shape
if n_features != self.n_input_features_:
raise ValueError("X shape does not match training shape")
is_numeric_type = is_numeric_matrix(matrix)
is_lexical_type = is_lexical_matrix(matrix)
if is_lexical_type:
strs = matrix.reshape((matrix.size,))
maxlen = max([len(s) for s in strs])
dtype = "S{}".format(maxlen * 2 + 1)
else:
dtype = matrix.dtype
# allocate output data
poly_matrix = np.empty((n_samples, self.n_output_features_), dtype=dtype)
combos = self._combinations(n_features, self.degree_,
self.interaction_only_,
self.include_bias_)
for i, c in enumerate(combos):
if is_numeric_type:
poly_matrix[:, i] = matrix[:, c].prod(1)
elif is_lexical_type:
n_poly1_feats = n_features + int(self.include_bias_)
if i >= n_poly1_feats:
x = "*".join(np.squeeze(matrix[:, c]).tolist())
else:
x = "".join(np.squeeze(matrix[:, c]).tolist())
poly_matrix[:, i] = x
else:
raise TypeError("Unsupported matrix type {}".format(matrix.dtype))
return poly_matrix
def inverse_transform(self, matrix, copy=True):
raise NotImplementedError("This method is not supported")
# ==========================================================
# Dummy Encoding
# ==========================================================
class DummyEncoder(Preprocess):
def __init__(self, n_values, categorical_features, cat_columnlabels, noncat_columnlabels):
from sklearn.preprocessing import OneHotEncoder
if not isinstance(n_values, np.ndarray):
n_values = np.array(n_values)
if not isinstance(categorical_features, np.ndarray):
categorical_features = np.array(categorical_features)
# assert categorical_features.size > 0
assert categorical_features.shape == n_values.shape
for nv in n_values:
if nv <= 2:
raise Exception("Categorical features must have 3+ labels")
self.n_values = n_values
self.cat_columnlabels = cat_columnlabels
self.noncat_columnlabels = noncat_columnlabels
self.encoder = OneHotEncoder(
n_values=n_values, categorical_features=categorical_features, sparse=False)
self.new_labels = None
self.cat_idxs_old = categorical_features
def fit(self, matrix):
self.encoder.fit(matrix)
# determine new columnlabels
# categorical variables are done in order specified by categorical_features
new_labels = []
for i, cat_label in enumerate(self.cat_columnlabels):
low = self.encoder.feature_indices_[i]
high = self.encoder.feature_indices_[i + 1]
for j in range(low, high):
# eg the categorical variable named cat_var with 5 possible values
# turns into 0/1 variables named cat_var____0, ..., cat_var____4
new_labels.append(cat_label + "____" + str(j - low))
# according to sklearn documentation,
# "non-categorical features are always stacked to the right of the matrix"
# by observation, it looks like the non-categorical features' relative order is preserved
# BUT: there is no guarantee made about that behavior!
# We either trust OneHotEncoder to be sensible, or look for some other way
new_labels += self.noncat_columnlabels
self.new_labels = new_labels
def transform(self, matrix, copy=True):
# actually transform the matrix
matrix_encoded = self.encoder.transform(matrix)
return matrix_encoded
def fit_transform(self, matrix, copy=True):
self.fit(matrix)
return self.transform(matrix)
def inverse_transform(self, matrix, copy=True):
n_values = self.n_values
# If there are no categorical variables, no transformation happened.
if len(n_values) == 0:
return matrix
# Otherwise, this is a dummy-encoded matrix. Transform it back to original form.
n_features = matrix.shape[-1] - self.encoder.feature_indices_[-1] + len(n_values)
noncat_start_idx = self.encoder.feature_indices_[-1]
inverted_matrix = np.empty((matrix.shape[0], n_features))
cat_idx = 0
noncat_idx = 0
for i in range(n_features):
if i in self.cat_idxs_old:
new_col = np.ones((matrix.shape[0],))
start_idx = self.encoder.feature_indices_[cat_idx]
for j in range(n_values[cat_idx]):
col = matrix[:, start_idx + j]
new_col[col == 1] = j
cat_idx += 1
else:
new_col = np.array(matrix[:, noncat_start_idx + noncat_idx])
noncat_idx += 1
inverted_matrix[:, i] = new_col
return inverted_matrix
def total_dummies(self):
return sum(self.n_values)
def consolidate_columnlabels(columnlabels):
import re
# use this to check if a label was created by dummy encoder
p = re.compile(r'(.*)____\d+')
consolidated_columnlabels = []
cat_seen = set() # avoid duplicate cat_labels
for lab in columnlabels:
m = p.match(lab)
# m.group(1) is the original column name
if m:
if m.group(1) not in cat_seen:
cat_seen.add(m.group(1))
consolidated_columnlabels.append(m.group(1))
else:
# non-categorical variable
consolidated_columnlabels.append(lab)
return consolidated_columnlabels
def fix_scaler(scaler, encoder, params):
p = 0.5
mean = scaler.mean_
var = scaler.var_
n_values = encoder.n_values
cat_start_idxs = encoder.xform_start_indices
current_idx = 0
cat_idx = 0
for param in params:
if param.iscategorical:
if param.isboolean:
nvals = 1
else:
assert cat_start_idxs[cat_idx] == current_idx
nvals = n_values[cat_idx]
cat_idx += 1
cat_mean = nvals * p
cat_var = cat_mean * (1 - p)
mean[current_idx: current_idx + nvals] = cat_mean
var[current_idx: current_idx + nvals] = cat_var
current_idx += nvals
else:
current_idx += 1
scaler.mean_ = mean
scaler.var_ = var
scaler.scale_ = np.sqrt(var)
def get_min_max(params, encoder=None):
if encoder is not None:
num_cat_feats = encoder.n_values.size
nfeats = len(params) - num_cat_feats + np.sum(encoder.n_values)
n_values = encoder.n_values
cat_start_idxs = encoder.xform_start_indices
else:
num_cat_feats = 0
nfeats = len(params)
n_values = np.array([])
cat_start_idxs = np.array([])
mins = np.empty((nfeats,))
maxs = np.empty((nfeats,))
current_idx = 0
cat_idx = 0
for param in params:
if param.iscategorical:
if param.isboolean:
nvals = 1
else:
assert cat_start_idxs[cat_idx] == current_idx
nvals = n_values[cat_idx]
cat_idx += 1
mins[current_idx: current_idx + nvals] = 0
maxs[current_idx: current_idx + nvals] = 1
current_idx += nvals
else:
mins[current_idx] = param.true_range[0] # valid_values[0]
maxs[current_idx] = param.true_range[1] # valid_values[-1]
current_idx += 1
return mins, maxs
# ==========================================================
# Min-max scaler
# ==========================================================
class MinMaxScaler(Preprocess):
def __init__(self, mins=None, maxs=None):
self.scaler_ = SklearnMinMaxScaler()
if mins is not None:
assert isinstance(mins, np.ndarray)
if mins.ndim == 1:
mins = mins.reshape(1, -1)
self.scaler_.partial_fit(mins)
self.mins_ = mins
else:
self.mins_ = None
if maxs is not None:
assert isinstance(maxs, np.ndarray)
if maxs.ndim == 1:
maxs = maxs.reshape(1, -1)
self.scaler_.partial_fit(maxs)
self.maxs_ = maxs
else:
self.maxs_ = None
self.fitted_ = self.mins_ is not None and self.maxs_ is not None
def fit(self, matrix):
if matrix.ndim == 1:
matrix = matrix.reshape(1, -1)
self.scaler_.partial_fit(matrix)
self.mins_ = self.scaler_.data_min_
self.maxs_ = self.scaler_.data_max_
self.fitted_ = True
return self
def transform(self, matrix, copy=True):
if not self.fitted_:
raise Exception("Model not fitted!")
if matrix.ndim == 1:
matrix = matrix.reshape(1, -1)
return self.scaler_.transform(matrix)
def inverse_transform(self, matrix, copy=True):
if matrix.ndim == 1:
matrix = matrix.reshape(1, -1)
return self.scaler_.inverse_transform(matrix)

View File

@@ -0,0 +1,5 @@
#
# OtterTune - __init__.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#

View File

@@ -0,0 +1,91 @@
#
# OtterTune - test_cluster.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import unittest
import numpy as np
from sklearn import datasets
from analysis.cluster import KMeans, KMeansClusters, create_kselection_model
class TestKMeans(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestKMeans, cls).setUpClass()
iris = datasets.load_iris()
cls.model = KMeans()
cls.model.fit(iris.data, 5, iris.target,
estimator_params={'n_init': 50, 'random_state': 42})
def test_kmeans_n_clusters(self):
self.assertEqual(self.model.n_clusters_, 5)
def test_kmeans_cluster_inertia(self):
self.assertAlmostEqual(self.model.cluster_inertia_, 46.535, 2)
def test_kmeans_cluster_labels(self):
expected_labels = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 2, 3, 3, 3,
2, 3, 2, 2, 3, 2, 3, 2, 3, 3, 2, 3, 2, 3, 2, 3, 3, 3, 3,
3, 3, 3, 2, 2, 2, 2, 3, 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2,
2, 2, 3, 2, 2, 4, 3, 0, 4, 4, 0, 2, 0, 4, 0, 4, 4, 4, 3,
4, 4, 4, 0, 0, 3, 4, 3, 0, 3, 4, 0, 3, 3, 4, 0, 0, 0, 4,
3, 3, 0, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 3]
for lab_actual, lab_expected in zip(self.model.cluster_labels_, expected_labels):
self.assertEqual(lab_actual, lab_expected)
def test_kmeans_sample_labels(self):
for lab_actual, lab_expected in zip(self.model.sample_labels_, datasets.load_iris().target):
self.assertEqual(lab_actual, lab_expected)
def test_kmeans_cluster_centers(self):
expected_centers = [[7.475, 3.125, 6.300, 2.050],
[5.006, 3.418, 1.464, 0.244],
[5.508, 2.600, 3.908, 1.204],
[6.207, 2.853, 4.746, 1.564],
[6.529, 3.058, 5.508, 2.162]]
for row_actual, row_expected in zip(self.model.cluster_centers_, expected_centers):
for val_actual, val_expected in zip(row_actual, row_expected):
self.assertAlmostEqual(val_actual, val_expected, 2)
class TestKSelection(unittest.TestCase):
def setUp(self):
np.random.seed(seed=42)
@classmethod
def setUpClass(cls):
super(TestKSelection, cls).setUpClass()
# Load Iris data
iris = datasets.load_iris()
cls.matrix = iris.data
cls.kmeans_models = KMeansClusters()
cls.kmeans_models.fit(cls.matrix,
min_cluster=1,
max_cluster=10,
sample_labels=iris.target,
estimator_params={'n_init': 50, 'random_state': 42})
def test_detk_optimal_num_clusters(self):
# Compute optimal # cluster using det-k
detk = create_kselection_model("det-k")
detk.fit(self.matrix, self.kmeans_models.cluster_map_)
self.assertEqual(detk.optimal_num_clusters_, 2)
def test_gap_statistic_optimal_num_clusters(self):
# Compute optimal # cluster using gap-statistics
gap = create_kselection_model("gap-statistic")
gap.fit(self.matrix, self.kmeans_models.cluster_map_)
self.assertEqual(gap.optimal_num_clusters_, 8)
def test_silhouette_optimal_num_clusters(self):
# Compute optimal # cluster using Silhouette Analysis
sil = create_kselection_model("s-score")
sil.fit(self.matrix, self.kmeans_models.cluster_map_)
self.assertEqual(sil.optimal_num_clusters_, 2)

View File

@@ -0,0 +1,116 @@
#
# OtterTune - test_constraints.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import unittest
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from analysis.constraints import ParamConstraintHelper
from analysis.preprocessing import DummyEncoder
class ConstraintHelperTestCase(unittest.TestCase):
def test_scale_rescale(self):
X = datasets.load_boston()['data']
X_scaler = StandardScaler()
# params hard-coded for test (messy to import constant from website module)
constraint_helper = ParamConstraintHelper(X_scaler, None,
init_flip_prob=0.3,
flip_prob_decay=0.5)
X_scaled = X_scaler.fit_transform(X)
# there may be some floating point imprecision between scaling and rescaling
row_unscaled = np.round(constraint_helper._handle_scaling(X_scaled[0], True), 10) # pylint: disable=protected-access
self.assertTrue(np.all(X[0] == row_unscaled))
row_rescaled = constraint_helper._handle_rescaling(row_unscaled, True) # pylint: disable=protected-access
self.assertTrue(np.all(X_scaled[0] == row_rescaled))
def test_apply_constraints_unscaled(self):
n_values = [3]
categorical_features = [0]
encoder = DummyEncoder(n_values, categorical_features, ['a'], [])
encoder.fit([[0, 17]])
X_scaler = StandardScaler()
constraint_helper = ParamConstraintHelper(X_scaler, encoder,
init_flip_prob=0.3,
flip_prob_decay=0.5)
X = [0.1, 0.2, 0.3, 17]
X_expected = [0, 0, 1, 17]
X_corrected = constraint_helper.apply_constraints(X, scaled=False, rescale=False)
self.assertTrue(np.all(X_corrected == X_expected))
def test_apply_constraints(self):
n_values = [3]
categorical_features = [0]
encoder = DummyEncoder(n_values, categorical_features, ['a'], [])
encoder.fit([[0, 17]])
X_scaler = StandardScaler()
X = np.array([[0, 0, 1, 17], [1, 0, 0, 17]], dtype=float)
X_scaled = X_scaler.fit_transform(X)
constraint_helper = ParamConstraintHelper(X_scaler, encoder,
init_flip_prob=0.3,
flip_prob_decay=0.5)
row = X_scaled[0]
new_row = np.copy(row)
new_row[0: 3] += 0.1 # should still represent [0, 0, 1] encoding
row_corrected = constraint_helper.apply_constraints(new_row)
self.assertTrue(np.all(row == row_corrected))
# tests that repeatedly applying randomize_categorical_features
# always results in valid configurations of categorical dumny encodings
# and will lead to all possible values of categorical variables being tried
def test_randomize_categorical_features(self):
# variable 0 is categorical, 3 values
# variable 1 is not categorical
# variable 2 is categorical, 4 values
cat_var_0_levels = 3
cat_var_2_levels = 4
cat_var_0_idx = 0
cat_var_2_idx = 2
n_values = [cat_var_0_levels, cat_var_2_levels]
categorical_features = [cat_var_0_idx, cat_var_2_idx]
encoder = DummyEncoder(n_values, categorical_features, ['a', 'b'], [])
encoder.fit([[0, 17, 0]])
X_scaler = StandardScaler()
constraint_helper = ParamConstraintHelper(X_scaler, encoder,
init_flip_prob=0.3,
flip_prob_decay=0.5)
# row is a sample encoded set of features,
# note that the non-categorical variable is on the right
row = np.array([0, 0, 1, 1, 0, 0, 0, 17], dtype=float)
trials = 20
cat_var_0_counts = np.zeros(cat_var_0_levels)
cat_var_2_counts = np.zeros(cat_var_2_levels)
for _ in range(trials):
# possibly flip the categorical features
row = constraint_helper.randomize_categorical_features(row, scaled=False, rescale=False)
# check that result is valid for cat_var_0
cat_var_0_dummies = row[0: cat_var_0_levels]
self.assertTrue(np.all(np.logical_or(cat_var_0_dummies == 0, cat_var_0_dummies == 1)))
self.assertEqual(np.sum(cat_var_0_dummies), 1)
cat_var_0_counts[np.argmax(cat_var_0_dummies)] += 1
# check that result is valid for cat_var_2
cat_var_2_dummies = row[cat_var_0_levels: cat_var_0_levels + cat_var_2_levels]
self.assertTrue(np.all(np.logical_or(cat_var_2_dummies == 0, cat_var_2_dummies == 1)))
self.assertEqual(np.sum(cat_var_2_dummies), 1)
cat_var_2_counts[np.argmax(cat_var_2_dummies)] += 1
self.assertEqual(row[-1], 17)
for ct in cat_var_0_counts:
self.assertTrue(ct > 0)
for ct in cat_var_2_counts:
self.assertTrue(ct > 0)
if __name__ == '__main__':
unittest.main()

View File

@@ -0,0 +1,61 @@
#
# OtterTune - test_gpr.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import unittest
from sklearn import datasets
from analysis.gp import GPRNP
from analysis.gp_tf import GPR
# test numpy version GPR
class TestGPRNP(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestGPRNP, cls).setUpClass()
boston = datasets.load_boston()
data = boston['data']
X_train = data[0:500]
X_test = data[500:]
y_train = boston['target'][0:500].reshape(500, 1)
cls.model = GPRNP(length_scale=1.0, magnitude=1.0)
cls.model.fit(X_train, y_train, ridge=1.0)
cls.gpr_result = cls.model.predict(X_test)
def test_gprnp_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
self.assertEqual(sigmas_round, expected_sigmas)
# test Tensorflow version GPR
class TestGPRTF(unittest.TestCase):
@classmethod
def setUpClass(cls):
super(TestGPRTF, cls).setUpClass()
boston = datasets.load_boston()
data = boston['data']
X_train = data[0:500]
X_test = data[500:]
y_train = boston['target'][0:500].reshape(500, 1)
cls.model = GPR(length_scale=1.0, magnitude=1.0)
cls.model.fit(X_train, y_train, ridge=1.0)
cls.gpr_result = cls.model.predict(X_test)
def test_gprnp_ypreds(self):
ypreds_round = [round(x[0], 4) for x in self.gpr_result.ypreds]
expected_ypreds = [0.0181, 0.0014, 0.0006, 0.0015, 0.0039, 0.0014]
self.assertEqual(ypreds_round, expected_ypreds)
def test_gprnp_sigmas(self):
sigmas_round = [round(x[0], 4) for x in self.gpr_result.sigmas]
expected_sigmas = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
self.assertEqual(sigmas_round, expected_sigmas)

View File

@@ -0,0 +1,83 @@
#
# OtterTune - test_preprocessing.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import unittest
import numpy as np
from analysis.preprocessing import DummyEncoder, consolidate_columnlabels
class TestDummyEncoder(unittest.TestCase):
def test_no_categoricals(self):
X = [[1, 2, 3], [4, 5, 6]]
n_values = []
categorical_features = []
cat_columnlabels = []
noncat_columnlabels = ['a', 'b', 'c']
enc = DummyEncoder(n_values, categorical_features,
cat_columnlabels, noncat_columnlabels)
X_encoded = enc.fit_transform(X)
new_labels = enc.new_labels
self.assertTrue(np.all(X == X_encoded))
self.assertEqual(noncat_columnlabels, new_labels)
def test_simple_categorical(self):
X = [[0, 1, 2], [1, 1, 2], [2, 1, 2]]
n_values = [3]
categorical_features = [0]
cat_columnlabels = ['label']
noncat_columnlabels = ['a', 'b']
X_expected = [[1, 0, 0, 1, 2], [0, 1, 0, 1, 2], [0, 0, 1, 1, 2]]
new_labels_expected = ['label____0', 'label____1', 'label____2', 'a', 'b']
enc = DummyEncoder(n_values, categorical_features,
cat_columnlabels, noncat_columnlabels)
X_encoded = enc.fit_transform(X)
new_labels = enc.new_labels
self.assertTrue(np.all(X_expected == X_encoded))
self.assertEqual(new_labels_expected, new_labels)
def test_mixed_categorical(self):
X = [[1, 0, 2], [1, 1, 2], [1, 2, 2]]
n_values = [3]
categorical_features = [1]
cat_columnlabels = ['label']
noncat_columnlabels = ['a', 'b']
X_expected = [[1, 0, 0, 1, 2], [0, 1, 0, 1, 2], [0, 0, 1, 1, 2]]
new_labels_expected = ['label____0', 'label____1', 'label____2', 'a', 'b']
enc = DummyEncoder(n_values, categorical_features,
cat_columnlabels, noncat_columnlabels)
X_encoded = enc.fit_transform(X)
new_labels = enc.new_labels
self.assertTrue(np.all(X_expected == X_encoded))
self.assertEqual(new_labels_expected, new_labels)
def test_consolidate(self):
labels = ['label1____0', 'label1____1', 'label2____0', 'label2____1', 'noncat']
consolidated = consolidate_columnlabels(labels)
expected = ['label1', 'label2', 'noncat']
self.assertEqual(expected, consolidated)
def test_inverse_transform(self):
X = [[1, 0, 2], [1, 1, 2], [1, 2, 2]]
n_values = [3]
categorical_features = [1]
cat_columnlabels = ['label']
noncat_columnlabels = ['a', 'b']
X_expected = [[1, 0, 0, 1, 2], [0, 1, 0, 1, 2], [0, 0, 1, 1, 2]]
enc = DummyEncoder(n_values, categorical_features,
cat_columnlabels, noncat_columnlabels)
X_encoded = enc.fit_transform(X)
self.assertTrue(np.all(X_encoded == X_expected))
X_decoded = enc.inverse_transform(X_encoded)
self.assertTrue(np.all(X == X_decoded))
if __name__ == '__main__':
unittest.main()

106
server/analysis/util.py Normal file
View File

@@ -0,0 +1,106 @@
#
# OtterTune - util.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Oct 24, 2017
@author: dva
'''
import logging
from numbers import Number
import contextlib
import datetime
import numpy as np
def get_analysis_logger(name, level=logging.INFO):
logger = logging.getLogger(name)
log_handler = logging.StreamHandler()
log_formatter = logging.Formatter(
fmt='%(asctime)s [%(funcName)s:%(lineno)03d] %(levelname)-5s: %(message)s',
datefmt='%m-%d-%Y %H:%M:%S'
)
log_handler.setFormatter(log_formatter)
logger.addHandler(log_handler)
logger.setLevel(level)
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
return logger
LOG = get_analysis_logger(__name__)
def stdev_zero(data, axis=None, nearzero=1e-8):
mstd = np.expand_dims(data.std(axis=axis), axis=axis)
return (np.abs(mstd) < nearzero).squeeze()
def get_datetime():
return datetime.datetime.utcnow()
class TimerStruct(object):
def __init__(self):
self.__start_time = 0.0
self.__stop_time = 0.0
self.__elapsed = None
@property
def elapsed_seconds(self):
if self.__elapsed is None:
return (get_datetime() - self.__start_time).total_seconds()
return self.__elapsed.total_seconds()
def start(self):
self.__start_time = get_datetime()
def stop(self):
self.__stop_time = get_datetime()
self.__elapsed = (self.__stop_time - self.__start_time)
@contextlib.contextmanager
def stopwatch(message=None):
ts = TimerStruct()
ts.start()
try:
yield ts
finally:
ts.stop()
if message is not None:
LOG.info('Total elapsed_seconds time for %s: %.3fs', message, ts.elapsed_seconds)
def get_data_base(arr):
"""For a given Numpy array, finds the
base array that "owns" the actual data."""
base = arr
while isinstance(base.base, np.ndarray):
base = base.base
return base
def arrays_share_data(x, y):
return get_data_base(x) is get_data_base(y)
def array_tostring(arr):
arr_shape = arr.shape
arr = arr.ravel()
arr = np.array([str(a) for a in arr])
return arr.reshape(arr_shape)
def is_numeric_matrix(matrix):
assert matrix.size > 0
return isinstance(matrix.ravel()[0], Number)
def is_lexical_matrix(matrix):
assert matrix.size > 0
return isinstance(matrix.ravel()[0], str)

13
server/website/.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
# Logs and settings #
#####################
log/
*.log
local_settings.py
# celery beat schedule file #
#############################
celerybeat-schedule
# Raw data files #
##################
data/media/*

100
server/website/LICENSE Normal file
View File

@@ -0,0 +1,100 @@
Business Source License 1.1
Parameters
Licensor: Carnegie Mellon University
Licensed Work: OtterTune
The Licensed Work is (c) 2019 Carnegie Mellon University
Additional Use Grant: You may make use of the Licensed Work, provided that
you may not use the Licensed Work for a Database
Service.
A “Database Service” is a commercial offering that
allows third parties (other than your employees and
contractors) to access the functionality of the
Licensed Work by creating tables whose schemas are
controlled by such third parties.
Change Date: 2024-01-18
Change License: Apache License, Version 2.0
Notice
The Business Source License (this document, or the “License”) is not an Open
Source license. However, the Licensed Work will eventually be made available
under an Open Source License, as stated in this License.
License text copyright (c) 2017 MariaDB Corporation Ab, All Rights Reserved.
“Business Source License” is a trademark of MariaDB Corporation Ab.
-----------------------------------------------------------------------------
Business Source License 1.1
Terms
The Licensor hereby grants you the right to copy, modify, create derivative
works, redistribute, and make non-production use of the Licensed Work. The
Licensor may make an Additional Use Grant, above, permitting limited
production use.
Effective on the Change Date, or the fourth anniversary of the first publicly
available distribution of a specific version of the Licensed Work under this
License, whichever comes first, the Licensor hereby grants you rights under
the terms of the Change License, and the rights granted in the paragraph
above terminate.
If your use of the Licensed Work does not comply with the requirements
currently in effect as described in this License, you must purchase a
commercial license from the Licensor, its affiliated entities, or authorized
resellers, or you must refrain from using the Licensed Work.
All copies of the original and modified Licensed Work, and derivative works
of the Licensed Work, are subject to this License. This License applies
separately for each version of the Licensed Work and the Change Date may vary
for each version of the Licensed Work released by Licensor.
You must conspicuously display this License on each original or modified copy
of the Licensed Work. If you receive the Licensed Work in original or
modified form from a third party, the terms and conditions set forth in this
License apply to your use of that work.
Any use of the Licensed Work in violation of this License will automatically
terminate your rights under this License for the current and all other
versions of the Licensed Work.
This License does not grant you any right in any trademark or logo of
Licensor or its affiliates (provided that you may use a trademark or logo of
Licensor as expressly required by this License).
TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
AN “AS IS” BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
TITLE.
MariaDB hereby grants you permission to use this Licenses text to license
your works, and to refer to it using the trademark “Business Source License”,
as long as you comply with the Covenants of Licensor below.
Covenants of Licensor
In consideration of the right to use this Licenses text and the “Business
Source License” name and trademark, Licensor covenants to MariaDB, and to all
other recipients of the licensed work to be provided by Licensor:
1. To specify as the Change License the GPL Version 2.0 or any later version,
or a license that is compatible with GPL Version 2.0 or a later version,
where “compatible” means that software provided under the Change License can
be included in a program with software provided under GPL Version 2.0 or a
later version. Licensor may specify additional Change Licenses without
limitation.
2. To either: (a) specify an additional grant of rights to use that does not
impose any additional restriction on the right granted in this License, as
the Additional Use Grant; or (b) insert the text “None”.
3. To specify a Change Date.
4. Not to modify this License in any other way.

72
server/website/README.md Normal file
View File

@@ -0,0 +1,72 @@
Website
=======
OLTP-Bench Website is an intermediate between the client's database and OtterTune (DBMS Auto-tuning system).
## Requirements
##### Ubuntu Packages
```
sudo apt-get install python-pip python-dev python-mysqldb rabbitmq-server
```
##### Python Packages
```
sudo pip install -r requirements.txt
```
## Installation Instructions
##### 1. Update the Django settings
Navigate to the settings directory:
```
cd website/settings
```
Copy the credentials template:
```
cp credentials_TEMPLATE.py credentials.py
```
Edit `credentials.py` and update the secret key and database information.
##### 2. Serve the static files
If you do not use the website for production, simply set `DEBUG = True` in `credentials.py`. Then Django will handle static files automatically.
This is not an efficient way for production. You need to configure other servers like Apache to serve static files in the production environment. ([Details](https://docs.djangoproject.com/en/1.11/howto/static-files/deployment/))
##### 3. Create the MySQL database if it does not already exist
```
mysqladmin create -u <username> -p ottertune
```
##### 4. Migrate the Django models into the database
```
python manage.py makemigrations website
python manage.py migrate
```
##### 5. Create the super user
```
python manage.py createsuperuser
```
##### 6. Start the message broker, celery worker, website server, and periodic task
```
sudo rabbitmq-server -detached
python manage.py celery worker --loglevel=info --pool=threads
python manage.py runserver 0.0.0.0:8000
python manage.py celerybeat --verbosity=2 --loglevel=info
```

1
server/website/beat.sh Executable file
View File

@@ -0,0 +1 @@
python3 manage.py celerybeat --verbosity=2 --loglevel=info > beat.log 2>&1 &

1
server/website/celery.sh Executable file
View File

@@ -0,0 +1 @@
python3 manage.py celery worker --loglevel=info --pool=threads --concurrency=1 > celery.log 2>&1 &

2
server/website/config/.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
*.bak
prod_supervisord.conf

View File

@@ -0,0 +1,33 @@
# -----------------------------
# PostgreSQL configuration file
# -----------------------------
#
# This file consists of lines of the form:
#
# name = value
#
# (The "=" is optional.) Whitespace may be used. Comments are introduced with
# "#" anywhere on a line. The complete list of parameter names and allowed
# values can be found in the PostgreSQL documentation.
#
# The commented-out settings shown in this file represent the default values.
# Re-commenting a setting is NOT sufficient to revert it to the default value;
# you need to reload the server.
#
# This file is read on server startup and when the server receives a SIGHUP
# signal. If you edit the file on a running system, you have to SIGHUP the
# server for the changes to take effect, or use "pg_ctl reload". Some
# parameters, which are marked below, require a server shutdown and restart to
# take effect.
#
# Any parameter can also be given as a command-line option to the server, e.g.,
# "postgres -c log_connections=on". Some parameters can be changed at run time
# with the "SET" SQL command.
#
# Memory units: kB = kilobytes Time units: ms = milliseconds
# MB = megabytes s = seconds
# GB = gigabytes min = minutes
# h = hours
# d = days

1
server/website/django.sh Executable file
View File

@@ -0,0 +1 @@
python3 manage.py runserver 0.0.0.0:8000 > django.log 2>&1 &

171
server/website/fabfile.py vendored Normal file
View File

@@ -0,0 +1,171 @@
#
# OtterTune - fabfile.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Admin tasks
@author: dvanaken
'''
import logging
from collections import namedtuple
from fabric.api import env, local, quiet, settings, task
from fabric.state import output as fabric_output
from website.settings import DATABASES, PROJECT_ROOT
LOG = logging.getLogger(__name__)
# Fabric environment settings
env.hosts = ['localhost']
fabric_output.update({
'running': False,
'stdout': True,
})
Status = namedtuple('Status', ['RUNNING', 'STOPPED'])
STATUS = Status(0, 1)
# Setup and base commands
RABBITMQ_CMD = 'sudo rabbitmqctl {action}'.format
@task
def start_rabbitmq(detached=True):
detached = parse_bool(detached)
cmd = 'sudo rabbitmq-server' + (' -detached' if detached else '')
local(cmd)
@task
def stop_rabbitmq():
with settings(warn_only=True):
local(RABBITMQ_CMD(action='stop'))
@task
def status_rabbitmq():
with settings(warn_only=True), quiet():
res = local(RABBITMQ_CMD(action='status'), capture=True)
if res.return_code == 2 or res.return_code == 69:
status = STATUS.STOPPED
elif res.return_code == 0:
status = STATUS.RUNNING
else:
raise Exception("Rabbitmq: unknown status " + str(res.return_code))
log_status(status, 'rabbitmq')
return status
@task
def start_celery():
if status_rabbitmq() == STATUS.STOPPED:
start_rabbitmq()
local('python manage.py celery worker --detach --loglevel=info --pool=threads')
@task
def stop_celery():
with settings(warn_only=True), quiet():
local('kill -9 `ps auxww | grep \'celery worker\' | awk \'{print $2}\'`')
@task
def start_debug_server(host="0.0.0.0", port=8000):
stop_celery()
start_celery()
local('python manage.py runserver {}:{}'.format(host, port))
@task
def stop_all():
stop_celery()
stop_rabbitmq()
def parse_bool(value):
if isinstance(value, bool):
return value
elif isinstance(value, str):
return value.lower() == 'true'
else:
raise Exception('Cannot convert {} to bool'.format(type(value)))
def log_status(status, task_name):
LOG.info("%s status: %s", task_name, STATUS._fields[STATUS.index(status)])
@task
def reset_website():
# WARNING: destroys the existing website and creates with all
# of the required inital data loaded (e.g., the KnobCatalog)
# Recreate the ottertune database
user = DATABASES['default']['USER']
passwd = DATABASES['default']['PASSWORD']
name = DATABASES['default']['NAME']
local("mysql -u {} -p{} -N -B -e \"DROP DATABASE IF EXISTS {}\"".format(
user, passwd, name))
local("mysql -u {} -p{} -N -B -e \"CREATE DATABASE {}\"".format(
user, passwd, name))
# Reinitialize the website
local('python manage.py migrate website')
local('python manage.py migrate')
@task
def create_test_website():
# WARNING: destroys the existing website and creates a new one. Creates
# a test user and two test sessions: a basic session and a tuning session.
# The tuning session has knob/metric data preloaded (5 workloads, 20
# samples each).
reset_website()
local("python manage.py loaddata test_website.json")
@task
def setup_test_user():
# Adds a test user to an existing website with two empty sessions
local(("echo \"from django.contrib.auth.models import User; "
"User.objects.filter(email='user@email.com').delete(); "
"User.objects.create_superuser('user', 'user@email.com', 'abcd123')\" "
"| python manage.py shell"))
local("python manage.py loaddata test_user_sessions.json")
@task
def generate_and_load_data(n_workload, n_samples_per_workload, upload_code,
random_seed=''):
local('python script/controller_simulator/data_generator.py {} {} {}'.format(
n_workload, n_samples_per_workload, random_seed))
local(('python script/controller_simulator/upload_data.py '
'script/controller_simulator/generated_data {}').format(upload_code))
@task
def dumpdata(dumppath):
# Helper function for calling Django's loaddata function that excludes
# the static fixture data from being dumped
excluded_models = ['DBMSCatalog', 'KnobCatalog', 'MetricCatalog', 'Hardware']
cmd = 'python manage.py dumpdata --natural-foreign --natural-primary'
for model in excluded_models:
cmd += ' --exclude website.' + model
cmd += ' > ' + dumppath
local(cmd)
@task
def run_background_tasks():
# Runs the background tasks just once.
cmd = ("from website.tasks import run_background_tasks; "
"run_background_tasks()")
local(('export PYTHONPATH={}\:$PYTHONPATH; ' # pylint: disable=anomalous-backslash-in-string
'django-admin shell --settings=website.settings '
'-c\"{}\"').format(PROJECT_ROOT, cmd))

10
server/website/manage.py Executable file
View File

@@ -0,0 +1,10 @@
#!/usr/bin/env python
import os
import sys
if __name__ == "__main__":
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "website.settings")
from django.core.management import execute_from_command_line
execute_from_command_line(sys.argv)

View File

@@ -0,0 +1,21 @@
autopep8==1.3.4
celery==3.1.23
Django==1.10.1
django-celery==3.2.1
django-debug-toolbar==1.5
django-request-logging==0.4.6
mock==2.0.0
Fabric3==1.13.1.post1
hurry.filesize==0.9
matplotlib==2.0.0
numpy==1.13.1
requests==2.18.4
pycodestyle==2.3.1
astroid==1.5.1
pylint==1.5.2
pyDOE==0.3.8
mysqlclient==1.3.12
scikit-learn==0.19.1
scipy==1.0.0
tensorflow==1.6.0
threadpool==1.3.2

View File

@@ -0,0 +1 @@
generated_data

View File

@@ -0,0 +1,132 @@
#
# OtterTune - data_generator.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Nov 30, 2017
@author: dvanaken
'''
import copy
import datetime
import logging
import os
import shutil
import sys
import json
import numpy as np
LOG = logging.getLogger(__name__)
# Data generator configuration
OBSERVATION_TIME_SEC = 300 # 5 minutes
START_TIME = datetime.datetime.now() - datetime.timedelta(weeks=1)
START_FREQUENCY = datetime.timedelta(minutes=10)
END_FREQUENCY = datetime.timedelta(seconds=OBSERVATION_TIME_SEC)
EPOCH = datetime.datetime.utcfromtimestamp(0)
# Paths
ROOT_DIR = os.path.abspath(os.path.dirname(__file__))
SAMPLE_DIR = os.path.join(ROOT_DIR, 'samples')
OUTPUT_DIR = os.path.join(ROOT_DIR, 'generated_data')
def unix_time_millis(dt):
return int((dt - EPOCH).total_seconds() * 1000.0)
def generate_data(n_workloads, n_samples_per_workload):
with open(os.path.join(SAMPLE_DIR, 'knobs.json'), 'r') as f:
knob_sample = json.load(f)
with open(os.path.join(SAMPLE_DIR, 'metrics_before.json'), 'r') as f:
metrics_start_sample = json.load(f)
with open(os.path.join(SAMPLE_DIR, 'metrics_after.json'), 'r') as f:
metrics_end_sample = json.load(f)
with open(os.path.join(SAMPLE_DIR, 'summary.json'), 'r') as f:
summary_sample = json.load(f)
start_time = START_TIME
end_time = START_TIME + END_FREQUENCY
for i in range(n_workloads):
workload_name = 'workload-{}'.format(i)
wkld_dir = os.path.join(OUTPUT_DIR, workload_name)
os.mkdir(wkld_dir)
for j in range(n_samples_per_workload):
knob_data = copy.deepcopy(knob_sample)
metrics_start_data = copy.deepcopy(metrics_start_sample)
metrics_end_data = copy.deepcopy(metrics_end_sample)
summary_data = copy.deepcopy(summary_sample)
summary_data['workload_name'] = workload_name
summary_data['observation_time'] = OBSERVATION_TIME_SEC
summary_data['start_time'] = unix_time_millis(start_time)
summary_data['end_time'] = unix_time_millis(end_time)
start_time = start_time + START_FREQUENCY
end_time = start_time + END_FREQUENCY
knob_vals = np.random.randint(1, 11, 4)
global_knobs = knob_data['global']['global']
global_knobs['shared_buffers'] = str(knob_vals[0]) + 'GB'
global_knobs['work_mem'] = str(knob_vals[1]) + 'GB'
global_knobs['checkpoint_timing'] = str(knob_vals[2]) + 'min'
global_knobs['effective_io_concurrency'] = str(knob_vals[3])
metrics_start_data['global']['pg_stat_bgwriter']['buffers_alloc'] = np.random.randint(
3000, 7000)
metrics_end_data['global']['pg_stat_bgwriter']['buffers_alloc'] = np.random.randint(
7000, 10000)
locations = [
('xact_commit', metrics_start_data['local']['database']['pg_stat_database']),
('xact_commit', metrics_end_data['local']['database']['pg_stat_database']),
('n_tup_ins', metrics_start_data['local']['table']['pg_stat_user_tables']),
('n_tup_ins', metrics_end_data['local']['table']['pg_stat_user_tables']),
('idx_blks_hit', metrics_start_data['local']['indexes']['pg_statio_user_indexes']),
('idx_blks_hit', metrics_end_data['local']['indexes']['pg_statio_user_indexes']),
]
for k, (name, loc) in enumerate(locations):
for kvs in list(loc.values()):
if k % 2 == 0: # start time must be smaller value
met_val = np.random.randint(30000, 70000)
else:
met_val = np.random.randint(70000, 100000)
kvs[name] = met_val
basepath = os.path.join(wkld_dir, 'sample-{}'.format(j))
with open(basepath + "__knobs.json", 'w') as f:
json.dump(knob_data, f, indent=4)
with open(basepath + '__metrics_start.json', 'w') as f:
json.dump(metrics_start_data, f, indent=4)
with open(basepath + '__metrics_end.json', 'w') as f:
json.dump(metrics_end_data, f, indent=4)
with open(basepath + '__summary.json', 'w') as f:
json.dump(summary_data, f, indent=4)
def main():
if len(sys.argv) < 3:
LOG.error('Usage: python data_generator.py [n_workloads] [n_samples_per_workload] '
'[optional: random_seed]')
sys.exit(1)
if len(sys.argv) == 4:
random_seed = int(sys.argv[3])
LOG.info("Seeding the generator with value: %d", random_seed)
np.random.seed(seed=random_seed)
shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
os.mkdir(OUTPUT_DIR)
generate_data(int(sys.argv[1]), int(sys.argv[2]))
LOG.info("Finished. Generated data written to %s.", OUTPUT_DIR)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,274 @@
{
"global": {"global": {
"DateStyle": "ISO, MDY",
"IntervalStyle": "postgres",
"TimeZone": "America/New_York",
"allow_system_table_mods": "off",
"application_name": "",
"archive_command": "(disabled)",
"archive_mode": "off",
"archive_timeout": "0",
"array_nulls": "on",
"authentication_timeout": "1min",
"autovacuum": "on",
"autovacuum_analyze_scale_factor": "0.1",
"autovacuum_analyze_threshold": "50",
"autovacuum_freeze_max_age": "200000000",
"autovacuum_max_workers": "3",
"autovacuum_multixact_freeze_max_age": "400000000",
"autovacuum_naptime": "1min",
"autovacuum_vacuum_cost_delay": "20ms",
"autovacuum_vacuum_cost_limit": "-1",
"autovacuum_vacuum_scale_factor": "0.2",
"autovacuum_vacuum_threshold": "50",
"autovacuum_work_mem": "-1",
"backend_flush_after": "0",
"backslash_quote": "safe_encoding",
"bgwriter_delay": "200ms",
"bgwriter_flush_after": "0",
"bgwriter_lru_maxpages": "100",
"bgwriter_lru_multiplier": "2",
"block_size": "8192",
"bonjour": "off",
"bonjour_name": "",
"bytea_output": "hex",
"check_function_bodies": "on",
"checkpoint_completion_target": "0.5",
"checkpoint_flush_after": "0",
"checkpoint_timeout": "5min",
"checkpoint_warning": "30s",
"client_encoding": "UTF8",
"client_min_messages": "notice",
"cluster_name": "",
"commit_delay": "0",
"commit_siblings": "5",
"config_file": "/Users/MacadamiaKitten/Desktop/psql_db/postgresql.conf",
"constraint_exclusion": "partition",
"cpu_index_tuple_cost": "0.005",
"cpu_operator_cost": "0.0025",
"cpu_tuple_cost": "0.01",
"cursor_tuple_fraction": "0.1",
"data_checksums": "off",
"data_directory": "/Users/MacadamiaKitten/Desktop/psql_db",
"db_user_namespace": "off",
"deadlock_timeout": "1s",
"debug_assertions": "off",
"debug_pretty_print": "on",
"debug_print_parse": "off",
"debug_print_plan": "off",
"debug_print_rewritten": "off",
"default_statistics_target": "100",
"default_tablespace": "",
"default_text_search_config": "pg_catalog.english",
"default_transaction_deferrable": "off",
"default_transaction_isolation": "read committed",
"default_transaction_read_only": "off",
"default_with_oids": "off",
"dynamic_library_path": "$libdir",
"dynamic_shared_memory_type": "posix",
"effective_cache_size": "4GB",
"effective_io_concurrency": "0",
"enable_bitmapscan": "on",
"enable_gathermerge": "on",
"enable_hashagg": "on",
"enable_hashjoin": "on",
"enable_indexonlyscan": "on",
"enable_indexscan": "on",
"enable_material": "on",
"enable_mergejoin": "on",
"enable_nestloop": "on",
"enable_seqscan": "on",
"enable_sort": "on",
"enable_tidscan": "on",
"escape_string_warning": "on",
"event_source": "PostgreSQL",
"exit_on_error": "off",
"external_pid_file": "",
"extra_float_digits": "3",
"force_parallel_mode": "off",
"from_collapse_limit": "8",
"fsync": "on",
"full_page_writes": "on",
"geqo": "on",
"geqo_effort": "5",
"geqo_generations": "0",
"geqo_pool_size": "0",
"geqo_seed": "0",
"geqo_selection_bias": "2",
"geqo_threshold": "12",
"gin_fuzzy_search_limit": "0",
"gin_pending_list_limit": "4MB",
"hba_file": "/Users/MacadamiaKitten/Desktop/psql_db/pg_hba.conf",
"hot_standby": "on",
"hot_standby_feedback": "off",
"huge_pages": "try",
"ident_file": "/Users/MacadamiaKitten/Desktop/psql_db/pg_ident.conf",
"idle_in_transaction_session_timeout": "0",
"ignore_checksum_failure": "off",
"ignore_system_indexes": "off",
"integer_datetimes": "on",
"join_collapse_limit": "8",
"krb_caseins_users": "off",
"krb_server_keyfile": "FILE:/usr/local/etc/postgresql/krb5.keytab",
"lc_collate": "en_US.UTF-8",
"lc_ctype": "en_US.UTF-8",
"lc_messages": "en_US.UTF-8",
"lc_monetary": "en_US.UTF-8",
"lc_numeric": "en_US.UTF-8",
"lc_time": "en_US.UTF-8",
"listen_addresses": "localhost",
"lo_compat_privileges": "off",
"local_preload_libraries": "",
"lock_timeout": "0",
"log_autovacuum_min_duration": "-1",
"log_checkpoints": "off",
"log_connections": "off",
"log_destination": "stderr",
"log_directory": "log",
"log_disconnections": "off",
"log_duration": "off",
"log_error_verbosity": "default",
"log_executor_stats": "off",
"log_file_mode": "0600",
"log_filename": "postgresql-%Y-%m-%d_%H%M%S.log",
"log_hostname": "off",
"log_line_prefix": "%m [%p] ",
"log_lock_waits": "off",
"log_min_duration_statement": "-1",
"log_min_error_statement": "error",
"log_min_messages": "warning",
"log_parser_stats": "off",
"log_planner_stats": "off",
"log_replication_commands": "off",
"log_rotation_age": "1d",
"log_rotation_size": "10MB",
"log_statement": "none",
"log_statement_stats": "off",
"log_temp_files": "-1",
"log_timezone": "US/Eastern",
"log_truncate_on_rotation": "off",
"logging_collector": "off",
"maintenance_work_mem": "64MB",
"max_connections": "100",
"max_files_per_process": "1000",
"max_function_args": "100",
"max_identifier_length": "63",
"max_index_keys": "32",
"max_locks_per_transaction": "64",
"max_logical_replication_workers": "4",
"max_parallel_workers": "8",
"max_parallel_workers_per_gather": "2",
"max_pred_locks_per_page": "2",
"max_pred_locks_per_relation": "-2",
"max_pred_locks_per_transaction": "64",
"max_prepared_transactions": "0",
"max_replication_slots": "10",
"max_stack_depth": "2MB",
"max_standby_archive_delay": "30s",
"max_standby_streaming_delay": "30s",
"max_sync_workers_per_subscription": "2",
"max_wal_senders": "10",
"max_wal_size": "1GB",
"max_worker_processes": "8",
"min_parallel_index_scan_size": "512kB",
"min_parallel_table_scan_size": "8MB",
"min_wal_size": "80MB",
"old_snapshot_threshold": "-1",
"operator_precedence_warning": "off",
"parallel_setup_cost": "1000",
"parallel_tuple_cost": "0.1",
"password_encryption": "md5",
"port": "5432",
"post_auth_delay": "0",
"pre_auth_delay": "0",
"quote_all_identifiers": "off",
"random_page_cost": "4",
"replacement_sort_tuples": "150000",
"restart_after_crash": "on",
"row_security": "on",
"search_path": "\"$user\", public",
"segment_size": "1GB",
"seq_page_cost": "1",
"server_encoding": "UTF8",
"server_version": "10.1",
"server_version_num": "100001",
"session_preload_libraries": "",
"session_replication_role": "origin",
"shared_buffers": "128MB",
"shared_preload_libraries": "",
"ssl": "off",
"ssl_ca_file": "",
"ssl_cert_file": "server.crt",
"ssl_ciphers": "HIGH:MEDIUM:+3DES:!aNULL",
"ssl_crl_file": "",
"ssl_dh_params_file": "",
"ssl_ecdh_curve": "prime256v1",
"ssl_key_file": "server.key",
"ssl_prefer_server_ciphers": "on",
"standard_conforming_strings": "on",
"statement_timeout": "0",
"stats_temp_directory": "pg_stat_tmp",
"superuser_reserved_connections": "3",
"synchronize_seqscans": "on",
"synchronous_commit": "on",
"synchronous_standby_names": "",
"syslog_facility": "local0",
"syslog_ident": "postgres",
"syslog_sequence_numbers": "on",
"syslog_split_messages": "on",
"tcp_keepalives_count": "8",
"tcp_keepalives_idle": "7200",
"tcp_keepalives_interval": "75",
"temp_buffers": "8MB",
"temp_file_limit": "-1",
"temp_tablespaces": "",
"timezone_abbreviations": "Default",
"trace_notify": "off",
"trace_recovery_messages": "log",
"trace_sort": "off",
"track_activities": "on",
"track_activity_query_size": "1024",
"track_commit_timestamp": "off",
"track_counts": "on",
"track_functions": "none",
"track_io_timing": "off",
"transaction_deferrable": "off",
"transaction_isolation": "read committed",
"transaction_read_only": "off",
"transform_null_equals": "off",
"unix_socket_directories": "/tmp",
"unix_socket_group": "",
"unix_socket_permissions": "0777",
"update_process_title": "on",
"vacuum_cost_delay": "0",
"vacuum_cost_limit": "200",
"vacuum_cost_page_dirty": "20",
"vacuum_cost_page_hit": "1",
"vacuum_cost_page_miss": "10",
"vacuum_defer_cleanup_age": "0",
"vacuum_freeze_min_age": "50000000",
"vacuum_freeze_table_age": "150000000",
"vacuum_multixact_freeze_min_age": "5000000",
"vacuum_multixact_freeze_table_age": "150000000",
"wal_block_size": "8192",
"wal_buffers": "4MB",
"wal_compression": "off",
"wal_consistency_checking": "",
"wal_keep_segments": "0",
"wal_level": "replica",
"wal_log_hints": "off",
"wal_receiver_status_interval": "10s",
"wal_receiver_timeout": "1min",
"wal_retrieve_retry_interval": "5s",
"wal_segment_size": "16MB",
"wal_sender_timeout": "1min",
"wal_sync_method": "open_datasync",
"wal_writer_delay": "200ms",
"wal_writer_flush_after": "1MB",
"work_mem": "4MB",
"xmlbinary": "base64",
"xmloption": "content",
"zero_damaged_pages": "off"
}},
"local": null
}

View File

@@ -0,0 +1,582 @@
{
"global": {
"pg_stat_archiver": {
"archived_count": "0",
"failed_count": "0",
"stats_reset": "2017-11-10 10:59:47.397075-05"
},
"pg_stat_bgwriter": {
"buffers_alloc": "87670",
"buffers_backend": "81032",
"buffers_backend_fsync": "0",
"buffers_checkpoint": "33250",
"buffers_clean": "49590",
"checkpoint_sync_time": "19",
"checkpoint_write_time": "597851",
"checkpoints_req": "2",
"checkpoints_timed": "1277",
"maxwritten_clean": "325",
"stats_reset": "2017-11-10 10:59:47.397075-05"
}
},
"local": {
"table": {
"pg_stat_user_tables": {
"history": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"last_autoanalyze": "2017-11-20 15:59:02.567618-05",
"n_dead_tup": "0",
"n_live_tup": "60854",
"n_mod_since_analyze": "854",
"n_tup_del": "0",
"n_tup_hot_upd": "0",
"n_tup_ins": "60854",
"n_tup_upd": "0",
"relid": "16536",
"relname": "history",
"schemaname": "public",
"seq_scan": "2",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"warehouse": {
"analyze_count": "0",
"autoanalyze_count": "2",
"autovacuum_count": "2",
"idx_scan": "202634",
"idx_tup_fetch": "202634",
"last_autoanalyze": "2017-11-20 19:23:34.236294-05",
"last_autovacuum": "2017-11-20 19:23:34.235793-05",
"n_dead_tup": "0",
"n_live_tup": "2",
"n_mod_since_analyze": "0",
"n_tup_del": "0",
"n_tup_hot_upd": "854",
"n_tup_ins": "2",
"n_tup_upd": "854",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"stock": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "644561",
"idx_tup_fetch": "644561",
"last_autoanalyze": "2017-11-20 15:59:01.368483-05",
"n_dead_tup": "4364",
"n_live_tup": "200000",
"n_mod_since_analyze": "8901",
"n_tup_del": "0",
"n_tup_hot_upd": "5305",
"n_tup_ins": "200000",
"n_tup_upd": "8901",
"relid": "16523",
"relname": "stock",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"customer": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "125261",
"idx_tup_fetch": "85299628",
"last_autoanalyze": "2017-11-20 15:59:18.824212-05",
"n_dead_tup": "1510",
"n_live_tup": "60000",
"n_mod_since_analyze": "1594",
"n_tup_del": "0",
"n_tup_hot_upd": "262",
"n_tup_ins": "60000",
"n_tup_upd": "1594",
"relid": "16540",
"relname": "customer",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"order_line": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "1655",
"idx_tup_fetch": "33762",
"last_autoanalyze": "2017-11-20 16:00:11.017507-05",
"n_dead_tup": "2550",
"n_live_tup": "608373",
"n_mod_since_analyze": "16230",
"n_tup_del": "0",
"n_tup_hot_upd": "5393",
"n_tup_ins": "608373",
"n_tup_upd": "7329",
"relid": "16513",
"relname": "order_line",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"oorder": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "627652",
"idx_tup_fetch": "627652",
"last_autoanalyze": "2017-11-20 15:59:54.690984-05",
"n_dead_tup": "117",
"n_live_tup": "60889",
"n_mod_since_analyze": "1629",
"n_tup_del": "0",
"n_tup_hot_upd": "662",
"n_tup_ins": "60900",
"n_tup_upd": "740",
"relid": "16528",
"relname": "oorder",
"schemaname": "public",
"seq_scan": "4",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"new_order": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "1481",
"idx_tup_fetch": "1480",
"last_autoanalyze": "2017-11-20 16:00:11.217111-05",
"n_dead_tup": "751",
"n_live_tup": "16964",
"n_mod_since_analyze": "1629",
"n_tup_del": "740",
"n_tup_hot_upd": "0",
"n_tup_ins": "17715",
"n_tup_upd": "0",
"relid": "16518",
"relname": "new_order",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"district": {
"analyze_count": "0",
"autoanalyze_count": "2",
"autovacuum_count": "0",
"idx_scan": "122234",
"idx_tup_fetch": "122234",
"last_autoanalyze": "2017-11-20 19:23:34.201509-05",
"n_dead_tup": "33",
"n_live_tup": "20",
"n_mod_since_analyze": "0",
"n_tup_del": "0",
"n_tup_hot_upd": "1754",
"n_tup_ins": "20",
"n_tup_upd": "1754",
"relid": "16549",
"relname": "district",
"schemaname": "public",
"seq_scan": "2221",
"seq_tup_read": "41522",
"vacuum_count": "0"
},
"item": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "209020",
"idx_tup_fetch": "209009",
"last_autoanalyze": "2017-11-20 15:59:26.613728-05",
"n_dead_tup": "0",
"n_live_tup": "102000",
"n_mod_since_analyze": "2000",
"n_tup_del": "0",
"n_tup_hot_upd": "0",
"n_tup_ins": "100000",
"n_tup_upd": "0",
"relid": "16554",
"relname": "item",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"vacuum_count": "0"
}
},
"pg_statio_user_tables": {
"history": {
"heap_blks_hit": "184380",
"heap_blks_read": "746",
"relid": "16536",
"relname": "history",
"schemaname": "public"
},
"order_line": {
"heap_blks_hit": "1869417",
"heap_blks_read": "12419",
"idx_blks_hit": "1788651",
"idx_blks_read": "3708",
"relid": "16513",
"relname": "order_line",
"schemaname": "public"
},
"warehouse": {
"heap_blks_hit": "404486",
"heap_blks_read": "80",
"idx_blks_hit": "202643",
"idx_blks_read": "6",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public"
},
"new_order": {
"heap_blks_hit": "37856",
"heap_blks_read": "192",
"idx_blks_hit": "38225",
"idx_blks_read": "134",
"relid": "16518",
"relname": "new_order",
"schemaname": "public"
},
"stock": {
"heap_blks_hit": "1920817",
"heap_blks_read": "11757",
"idx_blks_hit": "2447522",
"idx_blks_read": "1530",
"relid": "16523",
"relname": "stock",
"schemaname": "public"
},
"oorder": {
"heap_blks_hit": "1378399",
"heap_blks_read": "928",
"idx_blks_hit": "3979052",
"idx_blks_read": "1881",
"relid": "16528",
"relname": "oorder",
"schemaname": "public"
},
"district": {
"heap_blks_hit": "249754",
"heap_blks_read": "3",
"idx_blks_hit": "122259",
"idx_blks_read": "5",
"relid": "16549",
"relname": "district",
"schemaname": "public"
},
"item": {
"heap_blks_hit": "509702",
"heap_blks_read": "4542",
"idx_blks_hit": "617914",
"idx_blks_read": "877",
"relid": "16554",
"relname": "item",
"schemaname": "public"
},
"customer": {
"heap_blks_hit": "70136669",
"heap_blks_read": "13826",
"idx_blks_hit": "1411491",
"idx_blks_read": "2716",
"relid": "16540",
"relname": "customer",
"schemaname": "public",
"tidx_blks_hit": "0",
"tidx_blks_read": "0",
"toast_blks_hit": "0",
"toast_blks_read": "0"
}
}
},
"database": {
"pg_stat_database": {
"postgres": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "115229324",
"blks_read": "104188",
"conflicts": "0",
"datid": "12558",
"datname": "postgres",
"deadlocks": "0",
"numbackends": "1",
"stats_reset": "2017-11-10 11:14:57.116228-05",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "1818",
"tup_fetched": "103355344",
"tup_inserted": "2210752",
"tup_returned": "110741743",
"tup_updated": "32675",
"xact_commit": "19082",
"xact_rollback": "17"
},
"tpcc": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "0",
"blks_read": "0",
"conflicts": "0",
"datid": "16384",
"datname": "tpcc",
"deadlocks": "0",
"numbackends": "0",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "0",
"tup_fetched": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"xact_commit": "0",
"xact_rollback": "0"
},
"template1": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "0",
"blks_read": "0",
"conflicts": "0",
"datid": "1",
"datname": "template1",
"deadlocks": "0",
"numbackends": "0",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "0",
"tup_fetched": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"xact_commit": "0",
"xact_rollback": "0"
},
"template0": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "0",
"blks_read": "0",
"conflicts": "0",
"datid": "12557",
"datname": "template0",
"deadlocks": "0",
"numbackends": "0",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "0",
"tup_fetched": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"xact_commit": "0",
"xact_rollback": "0"
}
},
"pg_stat_database_conflicts": {
"postgres": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "12558",
"datname": "postgres"
},
"tpcc": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "16384",
"datname": "tpcc"
},
"template1": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "1",
"datname": "template1"
},
"template0": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "12557",
"datname": "template0"
}
}
},
"indexes": {
"pg_stat_user_indexes": {
"order_line": {
"idx_scan": "1655",
"idx_tup_fetch": "33762",
"idx_tup_read": "35698",
"indexrelid": "16516",
"indexrelname": "order_line_pkey",
"relid": "16513",
"relname": "order_line",
"schemaname": "public"
},
"new_order": {
"idx_scan": "1481",
"idx_tup_fetch": "1480",
"idx_tup_read": "2200",
"indexrelid": "16521",
"indexrelname": "new_order_pkey",
"relid": "16518",
"relname": "new_order",
"schemaname": "public"
},
"stock": {
"idx_scan": "644561",
"idx_tup_fetch": "644561",
"idx_tup_read": "647319",
"indexrelid": "16526",
"indexrelname": "stock_pkey",
"relid": "16523",
"relname": "stock",
"schemaname": "public"
},
"oorder": {
"idx_scan": "616371",
"idx_tup_fetch": "616371",
"idx_tup_read": "616371",
"indexrelid": "16565",
"indexrelname": "idx_order",
"relid": "16528",
"relname": "oorder",
"schemaname": "public"
},
"customer": {
"idx_scan": "82442",
"idx_tup_fetch": "85256809",
"idx_tup_read": "85256841",
"indexrelid": "16564",
"indexrelname": "idx_customer_name",
"relid": "16540",
"relname": "customer",
"schemaname": "public"
},
"district": {
"idx_scan": "122234",
"idx_tup_fetch": "122234",
"idx_tup_read": "122234",
"indexrelid": "16552",
"indexrelname": "district_pkey",
"relid": "16549",
"relname": "district",
"schemaname": "public"
},
"item": {
"idx_scan": "209020",
"idx_tup_fetch": "209009",
"idx_tup_read": "209009",
"indexrelid": "16557",
"indexrelname": "item_pkey",
"relid": "16554",
"relname": "item",
"schemaname": "public"
},
"warehouse": {
"idx_scan": "202634",
"idx_tup_fetch": "201331",
"idx_tup_read": "202634",
"indexrelid": "16562",
"indexrelname": "warehouse_pkey",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public"
}
},
"pg_statio_user_indexes": {
"order_line": {
"idx_blks_hit": "1788651",
"idx_blks_read": "3708",
"indexrelid": "16516",
"indexrelname": "order_line_pkey",
"relid": "16513",
"relname": "order_line",
"schemaname": "public"
},
"new_order": {
"idx_blks_hit": "38225",
"idx_blks_read": "134",
"indexrelid": "16521",
"indexrelname": "new_order_pkey",
"relid": "16518",
"relname": "new_order",
"schemaname": "public"
},
"stock": {
"idx_blks_hit": "2447522",
"idx_blks_read": "1530",
"indexrelid": "16526",
"indexrelname": "stock_pkey",
"relid": "16523",
"relname": "stock",
"schemaname": "public"
},
"oorder": {
"idx_blks_hit": "3689479",
"idx_blks_read": "733",
"indexrelid": "16565",
"indexrelname": "idx_order",
"relid": "16528",
"relname": "oorder",
"schemaname": "public"
},
"customer": {
"idx_blks_hit": "1151523",
"idx_blks_read": "1589",
"indexrelid": "16564",
"indexrelname": "idx_customer_name",
"relid": "16540",
"relname": "customer",
"schemaname": "public"
},
"district": {
"idx_blks_hit": "122259",
"idx_blks_read": "5",
"indexrelid": "16552",
"indexrelname": "district_pkey",
"relid": "16549",
"relname": "district",
"schemaname": "public"
},
"item": {
"idx_blks_hit": "617914",
"idx_blks_read": "877",
"indexrelid": "16557",
"indexrelname": "item_pkey",
"relid": "16554",
"relname": "item",
"schemaname": "public"
},
"warehouse": {
"idx_blks_hit": "202643",
"idx_blks_read": "6",
"indexrelid": "16562",
"indexrelname": "warehouse_pkey",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public"
}
}
}
}
}

View File

@@ -0,0 +1,582 @@
{
"global": {
"pg_stat_archiver": {
"archived_count": "0",
"failed_count": "0",
"stats_reset": "2017-11-10 10:59:47.397075-05"
},
"pg_stat_bgwriter": {
"buffers_alloc": "87670",
"buffers_backend": "81032",
"buffers_backend_fsync": "0",
"buffers_checkpoint": "33250",
"buffers_clean": "49590",
"checkpoint_sync_time": "19",
"checkpoint_write_time": "597851",
"checkpoints_req": "2",
"checkpoints_timed": "1277",
"maxwritten_clean": "325",
"stats_reset": "2017-11-10 10:59:47.397075-05"
}
},
"local": {
"table": {
"pg_stat_user_tables": {
"history": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"last_autoanalyze": "2017-11-20 15:59:02.567618-05",
"n_dead_tup": "0",
"n_live_tup": "60854",
"n_mod_since_analyze": "854",
"n_tup_del": "0",
"n_tup_hot_upd": "0",
"n_tup_ins": "60854",
"n_tup_upd": "0",
"relid": "16536",
"relname": "history",
"schemaname": "public",
"seq_scan": "2",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"warehouse": {
"analyze_count": "0",
"autoanalyze_count": "2",
"autovacuum_count": "2",
"idx_scan": "202634",
"idx_tup_fetch": "202634",
"last_autoanalyze": "2017-11-20 19:23:34.236294-05",
"last_autovacuum": "2017-11-20 19:23:34.235793-05",
"n_dead_tup": "0",
"n_live_tup": "2",
"n_mod_since_analyze": "0",
"n_tup_del": "0",
"n_tup_hot_upd": "854",
"n_tup_ins": "2",
"n_tup_upd": "854",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"stock": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "644561",
"idx_tup_fetch": "644561",
"last_autoanalyze": "2017-11-20 15:59:01.368483-05",
"n_dead_tup": "4364",
"n_live_tup": "200000",
"n_mod_since_analyze": "8901",
"n_tup_del": "0",
"n_tup_hot_upd": "5305",
"n_tup_ins": "200000",
"n_tup_upd": "8901",
"relid": "16523",
"relname": "stock",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"customer": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "125261",
"idx_tup_fetch": "85299628",
"last_autoanalyze": "2017-11-20 15:59:18.824212-05",
"n_dead_tup": "1510",
"n_live_tup": "60000",
"n_mod_since_analyze": "1594",
"n_tup_del": "0",
"n_tup_hot_upd": "262",
"n_tup_ins": "60000",
"n_tup_upd": "1594",
"relid": "16540",
"relname": "customer",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"order_line": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "1655",
"idx_tup_fetch": "33762",
"last_autoanalyze": "2017-11-20 16:00:11.017507-05",
"n_dead_tup": "2550",
"n_live_tup": "608373",
"n_mod_since_analyze": "16230",
"n_tup_del": "0",
"n_tup_hot_upd": "5393",
"n_tup_ins": "608373",
"n_tup_upd": "7329",
"relid": "16513",
"relname": "order_line",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"oorder": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "627652",
"idx_tup_fetch": "627652",
"last_autoanalyze": "2017-11-20 15:59:54.690984-05",
"n_dead_tup": "117",
"n_live_tup": "60889",
"n_mod_since_analyze": "1629",
"n_tup_del": "0",
"n_tup_hot_upd": "662",
"n_tup_ins": "60900",
"n_tup_upd": "740",
"relid": "16528",
"relname": "oorder",
"schemaname": "public",
"seq_scan": "4",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"new_order": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "1481",
"idx_tup_fetch": "1480",
"last_autoanalyze": "2017-11-20 16:00:11.217111-05",
"n_dead_tup": "751",
"n_live_tup": "16964",
"n_mod_since_analyze": "1629",
"n_tup_del": "740",
"n_tup_hot_upd": "0",
"n_tup_ins": "17715",
"n_tup_upd": "0",
"relid": "16518",
"relname": "new_order",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"vacuum_count": "0"
},
"district": {
"analyze_count": "0",
"autoanalyze_count": "2",
"autovacuum_count": "0",
"idx_scan": "122234",
"idx_tup_fetch": "122234",
"last_autoanalyze": "2017-11-20 19:23:34.201509-05",
"n_dead_tup": "33",
"n_live_tup": "20",
"n_mod_since_analyze": "0",
"n_tup_del": "0",
"n_tup_hot_upd": "1754",
"n_tup_ins": "20",
"n_tup_upd": "1754",
"relid": "16549",
"relname": "district",
"schemaname": "public",
"seq_scan": "2221",
"seq_tup_read": "41522",
"vacuum_count": "0"
},
"item": {
"analyze_count": "0",
"autoanalyze_count": "1",
"autovacuum_count": "0",
"idx_scan": "209020",
"idx_tup_fetch": "209009",
"last_autoanalyze": "2017-11-20 15:59:26.613728-05",
"n_dead_tup": "0",
"n_live_tup": "102000",
"n_mod_since_analyze": "2000",
"n_tup_del": "0",
"n_tup_hot_upd": "0",
"n_tup_ins": "100000",
"n_tup_upd": "0",
"relid": "16554",
"relname": "item",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"vacuum_count": "0"
}
},
"pg_statio_user_tables": {
"history": {
"heap_blks_hit": "184380",
"heap_blks_read": "746",
"relid": "16536",
"relname": "history",
"schemaname": "public"
},
"order_line": {
"heap_blks_hit": "1869417",
"heap_blks_read": "12419",
"idx_blks_hit": "1788651",
"idx_blks_read": "3708",
"relid": "16513",
"relname": "order_line",
"schemaname": "public"
},
"warehouse": {
"heap_blks_hit": "404486",
"heap_blks_read": "80",
"idx_blks_hit": "202643",
"idx_blks_read": "6",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public"
},
"new_order": {
"heap_blks_hit": "37856",
"heap_blks_read": "192",
"idx_blks_hit": "38225",
"idx_blks_read": "134",
"relid": "16518",
"relname": "new_order",
"schemaname": "public"
},
"stock": {
"heap_blks_hit": "1920817",
"heap_blks_read": "11757",
"idx_blks_hit": "2447522",
"idx_blks_read": "1530",
"relid": "16523",
"relname": "stock",
"schemaname": "public"
},
"oorder": {
"heap_blks_hit": "1378399",
"heap_blks_read": "928",
"idx_blks_hit": "3979052",
"idx_blks_read": "1881",
"relid": "16528",
"relname": "oorder",
"schemaname": "public"
},
"district": {
"heap_blks_hit": "249754",
"heap_blks_read": "3",
"idx_blks_hit": "122259",
"idx_blks_read": "5",
"relid": "16549",
"relname": "district",
"schemaname": "public"
},
"item": {
"heap_blks_hit": "509702",
"heap_blks_read": "4542",
"idx_blks_hit": "617914",
"idx_blks_read": "877",
"relid": "16554",
"relname": "item",
"schemaname": "public"
},
"customer": {
"heap_blks_hit": "70136669",
"heap_blks_read": "13826",
"idx_blks_hit": "1411491",
"idx_blks_read": "2716",
"relid": "16540",
"relname": "customer",
"schemaname": "public",
"tidx_blks_hit": "0",
"tidx_blks_read": "0",
"toast_blks_hit": "0",
"toast_blks_read": "0"
}
}
},
"database": {
"pg_stat_database": {
"postgres": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "115229324",
"blks_read": "104188",
"conflicts": "0",
"datid": "12558",
"datname": "postgres",
"deadlocks": "0",
"numbackends": "1",
"stats_reset": "2017-11-10 11:14:57.116228-05",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "1818",
"tup_fetched": "103355344",
"tup_inserted": "2210752",
"tup_returned": "110741743",
"tup_updated": "32675",
"xact_commit": "19082",
"xact_rollback": "17"
},
"tpcc": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "0",
"blks_read": "0",
"conflicts": "0",
"datid": "16384",
"datname": "tpcc",
"deadlocks": "0",
"numbackends": "0",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "0",
"tup_fetched": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"xact_commit": "0",
"xact_rollback": "0"
},
"template1": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "0",
"blks_read": "0",
"conflicts": "0",
"datid": "1",
"datname": "template1",
"deadlocks": "0",
"numbackends": "0",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "0",
"tup_fetched": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"xact_commit": "0",
"xact_rollback": "0"
},
"template0": {
"blk_read_time": "0",
"blk_write_time": "0",
"blks_hit": "0",
"blks_read": "0",
"conflicts": "0",
"datid": "12557",
"datname": "template0",
"deadlocks": "0",
"numbackends": "0",
"temp_bytes": "0",
"temp_files": "0",
"tup_deleted": "0",
"tup_fetched": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"xact_commit": "0",
"xact_rollback": "0"
}
},
"pg_stat_database_conflicts": {
"postgres": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "12558",
"datname": "postgres"
},
"tpcc": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "16384",
"datname": "tpcc"
},
"template1": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "1",
"datname": "template1"
},
"template0": {
"confl_bufferpin": "0",
"confl_deadlock": "0",
"confl_lock": "0",
"confl_snapshot": "0",
"confl_tablespace": "0",
"datid": "12557",
"datname": "template0"
}
}
},
"indexes": {
"pg_stat_user_indexes": {
"order_line": {
"idx_scan": "1655",
"idx_tup_fetch": "33762",
"idx_tup_read": "35698",
"indexrelid": "16516",
"indexrelname": "order_line_pkey",
"relid": "16513",
"relname": "order_line",
"schemaname": "public"
},
"new_order": {
"idx_scan": "1481",
"idx_tup_fetch": "1480",
"idx_tup_read": "2200",
"indexrelid": "16521",
"indexrelname": "new_order_pkey",
"relid": "16518",
"relname": "new_order",
"schemaname": "public"
},
"stock": {
"idx_scan": "644561",
"idx_tup_fetch": "644561",
"idx_tup_read": "647319",
"indexrelid": "16526",
"indexrelname": "stock_pkey",
"relid": "16523",
"relname": "stock",
"schemaname": "public"
},
"oorder": {
"idx_scan": "616371",
"idx_tup_fetch": "616371",
"idx_tup_read": "616371",
"indexrelid": "16565",
"indexrelname": "idx_order",
"relid": "16528",
"relname": "oorder",
"schemaname": "public"
},
"customer": {
"idx_scan": "82442",
"idx_tup_fetch": "85256809",
"idx_tup_read": "85256841",
"indexrelid": "16564",
"indexrelname": "idx_customer_name",
"relid": "16540",
"relname": "customer",
"schemaname": "public"
},
"district": {
"idx_scan": "122234",
"idx_tup_fetch": "122234",
"idx_tup_read": "122234",
"indexrelid": "16552",
"indexrelname": "district_pkey",
"relid": "16549",
"relname": "district",
"schemaname": "public"
},
"item": {
"idx_scan": "209020",
"idx_tup_fetch": "209009",
"idx_tup_read": "209009",
"indexrelid": "16557",
"indexrelname": "item_pkey",
"relid": "16554",
"relname": "item",
"schemaname": "public"
},
"warehouse": {
"idx_scan": "202634",
"idx_tup_fetch": "201331",
"idx_tup_read": "202634",
"indexrelid": "16562",
"indexrelname": "warehouse_pkey",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public"
}
},
"pg_statio_user_indexes": {
"order_line": {
"idx_blks_hit": "1788651",
"idx_blks_read": "3708",
"indexrelid": "16516",
"indexrelname": "order_line_pkey",
"relid": "16513",
"relname": "order_line",
"schemaname": "public"
},
"new_order": {
"idx_blks_hit": "38225",
"idx_blks_read": "134",
"indexrelid": "16521",
"indexrelname": "new_order_pkey",
"relid": "16518",
"relname": "new_order",
"schemaname": "public"
},
"stock": {
"idx_blks_hit": "2447522",
"idx_blks_read": "1530",
"indexrelid": "16526",
"indexrelname": "stock_pkey",
"relid": "16523",
"relname": "stock",
"schemaname": "public"
},
"oorder": {
"idx_blks_hit": "3689479",
"idx_blks_read": "733",
"indexrelid": "16565",
"indexrelname": "idx_order",
"relid": "16528",
"relname": "oorder",
"schemaname": "public"
},
"customer": {
"idx_blks_hit": "1151523",
"idx_blks_read": "1589",
"indexrelid": "16564",
"indexrelname": "idx_customer_name",
"relid": "16540",
"relname": "customer",
"schemaname": "public"
},
"district": {
"idx_blks_hit": "122259",
"idx_blks_read": "5",
"indexrelid": "16552",
"indexrelname": "district_pkey",
"relid": "16549",
"relname": "district",
"schemaname": "public"
},
"item": {
"idx_blks_hit": "617914",
"idx_blks_read": "877",
"indexrelid": "16557",
"indexrelname": "item_pkey",
"relid": "16554",
"relname": "item",
"schemaname": "public"
},
"warehouse": {
"idx_blks_hit": "202643",
"idx_blks_read": "6",
"indexrelid": "16562",
"indexrelname": "warehouse_pkey",
"relid": "16559",
"relname": "warehouse",
"schemaname": "public"
}
}
}
}
}

View File

@@ -0,0 +1,8 @@
{
"workload_name": "workload-0",
"database_type": "postgres",
"start_time": 1512076859887,
"observation_time": 300,
"end_time": 1512076864891,
"database_version": "9.6"
}

View File

@@ -0,0 +1,62 @@
#
# OtterTune - upload_data.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Nov 30, 2017
@author: dvanaken
'''
import argparse
import glob
import logging
import os
import requests
# Logging
LOG = logging.getLogger(__name__)
LOG.addHandler(logging.StreamHandler())
LOG.setLevel(logging.INFO)
def upload(basedir, upload_code, upload_url):
for wkld_dir in sorted(glob.glob(os.path.join(basedir, '*'))):
LOG.info('Uploading sample for workload %s...', wkld_dir)
sample_idx = 0
while True:
samples = glob.glob(os.path.join(wkld_dir, 'sample-{}__*').format(sample_idx))
if len(samples) == 0:
break
assert len(samples) == 4
basename = samples[0].split('__')[0]
params = {
'summary': open(basename + '__summary.json', 'r'),
'knobs': open(basename + '__knobs.json', 'r'),
'metrics_before': open(basename + '__metrics_start.json', 'r'),
'metrics_after': open(basename + '__metrics_end.json', 'r'),
}
response = requests.post(upload_url + "/new_result/",
files=params,
data={'upload_code': upload_code})
LOG.info("Response: %s\n", response.content.decode())
sample_idx += 1
def main():
parser = argparse.ArgumentParser(description="Upload generated data to the website")
parser.add_argument('basedir', type=str, nargs=1,
help='Directory containing the generated data')
parser.add_argument('upload_code', type=str, nargs=1,
help='The website\'s upload code')
parser.add_argument('upload_url', type=str, default='http://0.0.0.0:8000',
nargs='?', help='The website\'s URL')
args = parser.parse_args()
upload(args.basedir[0], args.upload_code[0], args.upload_url)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,11 @@
#
# OtterTune - fix_permissions.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from fabric.api import local
PATH = "/var/www/ottertune"
USER = "www-data"
local("sudo chown -R {0}:{0} {1}".format(USER, PATH))
local("sudo chmod -R ugo+rX,ug+w {}".format(PATH))

View File

@@ -0,0 +1 @@
*.txt

View File

@@ -0,0 +1,80 @@
#
# OtterTune - create_ranked_knobs.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import logging
import os
import shutil
import json
import itertools
LOG = logging.getLogger(__name__)
DATADIR = '/dataset/oltpbench/first_paper_experiments/analysis/knob_identification'
DBMSS = {'postgres-9.6': 1}
HARDWARES = {'m3.xlarge': 16}
TIMESTAMP = '2016-12-04 11:00'
TASK_TYPE = 2
PREFIX = 'global'
MODEL = 'website.PipelineResult'
VALIDATE = True
EXTRA_EXCEPTIONS = {
PREFIX + '.' + 'checkpoint_segments',
}
def validate_postgres(knobs, dbms):
with open('../knob_settings/{}/{}_knobs.json'.format(dbms.replace('-', '_'),
dbms.replace('.', '')), 'r') as f:
knob_info = json.load(f)
knob_info = {k['fields']['name']: k['fields'] for k in knob_info}
for kname, kinfo in list(knob_info.items()):
if kname not in knobs and kinfo['tunable'] is True:
knobs.append(kname)
LOG.warning("Adding missing knob to end of list (%s)", kname)
knob_names = list(knob_info.keys())
for kname in knobs:
if kname not in knob_names:
if kname not in EXTRA_EXCEPTIONS:
raise Exception('Extra knob: {}'.format(kname))
knobs.remove(kname)
LOG.warning("Removing extra knob (%s)", kname)
def main():
for dbms, hw in itertools.product(list(DBMSS.keys()), HARDWARES):
datapath = os.path.join(DATADIR, '{}_{}'.format(dbms, hw))
if not os.path.exists(datapath):
raise IOError('Path does not exist: {}'.format(datapath))
with open(os.path.join(datapath, 'featured_knobs.txt'), 'r') as f:
knobs = [k.strip() for k in f.read().split('\n')]
knobs = [PREFIX + '.' + k for k in knobs]
if VALIDATE and dbms.startswith('postgres'):
validate_postgres(knobs, dbms)
basename = '{}_{}_ranked_knobs'.format(dbms, hw).replace('.', '')
with open(basename + '.txt', 'w') as f:
f.write('\n'.join(knobs))
django_entry = [{
'model': MODEL,
'fields': {
'dbms': DBMSS[dbms],
'hardware': HARDWARES[hw],
'creation_timestamp': TIMESTAMP,
'task_type': TASK_TYPE,
'value': json.dumps(knobs, indent=4)
}
}]
savepath = basename + '.json'
with open(savepath, 'w') as f:
json.dump(django_entry, f, indent=4)
shutil.copy(savepath, '../../../preload/{}'.format(savepath))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,12 @@
[
{
"fields": {
"hardware": 16,
"dbms": 1,
"task_type": 2,
"creation_timestamp": "2016-12-04 11:00",
"value": "[\n \"global.shared_buffers\", \n \"global.effective_cache_size\", \n \"global.bgwriter_lru_maxpages\", \n \"global.bgwriter_delay\", \n \"global.checkpoint_completion_target\", \n \"global.deadlock_timeout\", \n \"global.default_statistics_target\", \n \"global.effective_io_concurrency\", \n \"global.checkpoint_timeout\", \n \"global.commit_delay\", \n \"global.commit_siblings\", \n \"global.wal_buffers\", \n \"global.temp_buffers\", \n \"global.from_collapse_limit\", \n \"global.join_collapse_limit\", \n \"global.bgwriter_lru_multiplier\", \n \"global.random_page_cost\", \n \"global.work_mem\", \n \"global.maintenance_work_mem\", \n \"global.min_wal_size\", \n \"global.max_parallel_workers_per_gather\", \n \"global.seq_page_cost\", \n \"global.max_worker_processes\", \n \"global.wal_sync_method\", \n \"global.checkpoint_flush_after\", \n \"global.wal_writer_delay\", \n \"global.backend_flush_after\", \n \"global.bgwriter_flush_after\", \n \"global.min_parallel_relation_size\", \n \"global.wal_writer_flush_after\", \n \"global.max_wal_size\"\n]"
},
"model": "website.PipelineResult"
}
]

View File

@@ -0,0 +1,210 @@
#
# OtterTune - create_knob_settings.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import json
import shutil
# Oracle Type:
# 1 - Boolean
# 2 - String
# 3 - Integer
# 4 - Parameter file
# 5 - Reserved
# 6 - Big integer
# Ottertune Type:
# STRING = 1
# INTEGER = 2
# REAL = 3
# BOOL = 4
# ENUM = 5
# TIMESTAMP = 6
# miss:
# OPTIMIZER_MODE
# cursor_sharing
def set_field(fields):
if fields['name'].upper() == 'MEMORY_TARGET':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 33000000000 # 33G
fields['default'] = 0
if fields['name'].upper() == 'MEMORY_MAX_TARGET':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 33000000000 # 33G
fields['default'] = 0
if fields['name'].upper() == 'SGA_TARGET':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 33000000000 # 33G
fields['default'] = 0
if fields['name'].upper() == 'SGA_MAX_SIZE':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 33000000000 # 33G
fields['default'] = 0
if fields['name'].upper() == 'DB_CACHE_SIZE':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 25000000000 # 24G
fields['default'] = 4000000000 # 4G
if fields['name'].upper() == 'SHARED_POOL_SIZE':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 4000000000 # 4G
fields['default'] = 1000000000 # 1G
if fields['name'].upper() == 'SHARED_IO_POOL_SIZE':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 4000000000 # 4G
fields['default'] = 1000000000 # 1G
if fields['name'].upper() == 'STREAMS_POOL_SIZE':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 4000000000 # 4G
fields['default'] = 20000000 # 20M
if fields['name'].upper() == 'LOG_BUFFER':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 2000000000 # 2GB
fields['default'] = 50000000 # 50M
if fields['name'].upper() == 'DB_KEEP_CACHE_SIZE':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 2000000000 # 2GB
fields['default'] = 500000000 # 500M
if fields['name'].upper() == 'DB_RECYCLE_CACHE_SIZE':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 2000000000 # 2GB
fields['default'] = 500000000 # 500M
if fields['name'].upper() == 'LARGE_POOL_SIZE':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 2000000000 # 2GB
fields['default'] = 500000000 # 500M
if fields['name'].upper() == 'PGA_AGGREGATE_TARGET':
fields['tunable'] = False
fields['minval'] = 0
fields['maxval'] = 33000000000 # 33G
fields['default'] = 0
if fields['name'].lower() == 'bitmap_merge_area_size':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 5000000000 # 3G
fields['default'] = 0
if fields['name'].lower() == 'create_bitmap_area_size':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 5000000000 # 3G
fields['default'] = 0
if fields['name'].lower() == 'hash_area_size':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 3000000000 # 3G
fields['default'] = 0
if fields['name'].lower() == 'sort_area_size':
fields['tunable'] = True
fields['minval'] = 0
fields['maxval'] = 3000000000 # 3G
fields['default'] = 0
if fields['name'].upper() == 'OPEN_CURSORS':
fields['tunable'] = False
fields['minval'] = 200
fields['maxval'] = 400
fields['default'] = 300
if fields['name'].upper() == 'DB_FILE_MULTIBLOCK_READ_COUNT':
fields['tunable'] = False
fields['minval'] = 64
fields['maxval'] = 256
fields['default'] = 128
if fields['name'].upper() == 'optimizer_index_cost_adj'.upper():
fields['tunable'] = False
fields['minval'] = 1
fields['maxval'] = 10000
fields['default'] = 100
if fields['name'].upper() == 'OPTIMIZER_USE_PENDING_STATISTICS':
fields['tunable'] = False
fields['minval'] = None
fields['maxval'] = None
fields['default'] = False
if fields['name'].upper() == 'OPTIMIZER_USE_INVISIBLE_INDEXES':
fields['tunable'] = False
fields['minval'] = None
fields['maxval'] = None
fields['default'] = False
if fields['name'].upper() == 'OPTIMIZER_USE_SQL_PLAN_BASELINES':
fields['tunable'] = False
fields['minval'] = None
fields['maxval'] = None
fields['default'] = True
if fields['name'].upper() == 'OPTIMIZER_CAPTURE_SQL_PLAN_BASELINES':
fields['tunable'] = False
fields['minval'] = None
fields['maxval'] = None
fields['default'] = False
if fields['name'].upper() == 'DISK_ASYNCH_IO':
fields['tunable'] = True
fields['vartype'] = 5
fields['enumvals'] = 'TRUE,FALSE'
fields['default'] = 'TRUE'
def main():
final_metrics = []
with open('oracle.txt', 'r') as f:
num = 0
lines = f.readlines()
for line in lines:
line = line.strip().replace("\n", "")
if not line:
continue
if line in ['DESCRIPTION', 'NAME', 'TYPE'] or line.startswith('-'):
continue
if num == 0:
entry = {}
entry['model'] = 'website.KnobCatalog'
fields = {}
fields['name'] = line
elif num == 1:
if line in ['3', '6']:
fields['vartype'] = 2
fields['default'] = 0
elif line == '1':
fields['vartype'] = 4
fields['default'] = False
else:
fields['vartype'] = 1
fields['default'] = ''
elif num == 2:
fields['summary'] = line
fields['scope'] = 'global'
fields['dbms'] = 18 # oracle
fields['category'] = ''
fields['enumvals'] = None
fields['context'] = ''
fields['unit'] = 3 # other
fields['tunable'] = False
fields['scope'] = 'global'
fields['description'] = ''
fields['minval'] = None
fields['maxval'] = None
set_field(fields)
fields['name'] = 'global.' + fields['name']
entry['fields'] = fields
final_metrics.append(entry)
num = (num + 1) % 3
with open('oracle_knobs.json', 'w') as f:
json.dump(final_metrics, f, indent=4)
shutil.copy("oracle_knobs.json", "../../../../website/fixtures/oracle_knobs.json")
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
tunable_params.txt
settings.json
postgres-96_tunable_knob_names.json

View File

@@ -0,0 +1,581 @@
#
# OtterTune - create_knob_settings.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import csv
import json
import shutil
from collections import OrderedDict
PG_SYSTEM = [
(1024 ** 5, 'PB'),
(1024 ** 4, 'TB'),
(1024 ** 3, 'GB'),
(1024 ** 2, 'MB'),
(1024 ** 1, 'kB'),
(1024 ** 0, 'B'),
]
PG_TIME = [
(1000 * 1 * 60, 'min'),
(1000 ** 0, 'ms'),
(1000 ** 1, 's'),
]
# def create_tuning_config(t_minval=None, t_maxval=None, t_minval_type=None, t_maxval_type=None,
# t_resource_type=None, t_weight_samples=False,
# t_step=None, t_enumvals=None,
# t_powers_of_2=False, t_additional_values=[], t_dependent=False,
# t_notes=''):
# cfg = {}
# cfg['t_minval'] = t_minval
# cfg['t_minval_type'] = t_minval_type
# cfg['t_maxval'] = t_maxval
# cfg['t_maxval_type'] = t_maxval_type
# cfg['t_resource_type'] = t_resource_type
# cfg['t_step'] = t_step
# cfg['t_enumvals'] = t_enumvals
# cfg['t_powers_of_2'] = t_powers_of_2
# cfg['t_additional_values'] = t_additional_values
# cfg['t_dependent'] = t_dependent
# cfg['t_weight_samples'] = t_weight_samples
#
# return cfg
STRING = 1
INTEGER = 2
REAL = 3
BOOL = 4
ENUM = 5
TIMESTAMP = 6
TYPE_NAMES = {
'string': STRING,
'integer': INTEGER,
'real': REAL,
'bool': BOOL,
'enum': ENUM,
'timestamp': TIMESTAMP
}
UNIT_BYTES = 1
UNIT_MS = 2
UNIT_OTHER = 3
def convert(size, system=None):
if system is None:
system = PG_SYSTEM
for multiplier, suffix in system:
if size.endswith(suffix):
if len(size) == len(suffix):
amount = 1
else:
amount = int(size[:-len(suffix)])
return amount * multiplier
return None
PARAMS = OrderedDict()
PARAM_PREFIX = 'global'
with open("settings.csv", "r") as f:
READER = csv.READER(f, delimiter=',')
HEADER = None
for i, row in enumerate(READER):
if i == 0:
HEADER = list(row)
else:
param = {}
param['name'] = row[HEADER.index('name')]
param['vartype'] = TYPE_NAMES[row[HEADER.index('vartype')]]
param['category'] = row[HEADER.index('category')]
param['enumvals'] = row[HEADER.index('enumvals')]
param['context'] = row[HEADER.index('context')]
param['unit'] = None
param['tunable'] = None
param['scope'] = 'global'
param['summary'] = row[HEADER.index('short_desc')]
param['description'] = row[HEADER.index('extra_desc')]
default = row[HEADER.index('boot_val')]
minval = row[HEADER.index('min_val')]
maxval = row[HEADER.index('max_val')]
if param['vartype'] == INTEGER:
default = int(default)
minval = int(minval)
maxval = int(maxval)
elif param['vartype'] == REAL:
default = float(default) # pylint: disable=redefined-variable-type
minval = float(minval) # pylint: disable=redefined-variable-type
maxval = float(maxval) # pylint: disable=redefined-variable-type
else:
assert minval == ''
assert maxval == ''
minval = None
maxval = None
param['minval'] = minval
param['maxval'] = maxval
param['default'] = default
if param['enumvals'] != '':
enumvals = param['enumvals'][1:-1].split(',')
for i, enumval in enumerate(enumvals):
if enumval.startswith('\"') and enumval.endswith('\"'):
enumvals[i] = enumval[1:-1]
param['enumvals'] = ','.join(enumvals)
else:
param['enumvals'] = None
pg_unit = row[HEADER.index('unit')]
if pg_unit != '':
factor = convert(pg_unit)
if factor is None:
factor = convert(pg_unit, system=PG_TIME)
assert factor is not None
param['unit'] = UNIT_MS
else:
param['unit'] = UNIT_BYTES
if param['default'] > 0:
param['default'] = param['default'] * factor
if param['minval'] > 0:
param['minval'] = param['minval'] * factor
if param['maxval'] > 0:
param['maxval'] = param['maxval'] * factor
else:
param['unit'] = UNIT_OTHER
# Internal params are read-only
if param['context'] == 'internal':
param['tunable'] = 'no'
# All string param types are not tunable in 9.6
if param['vartype'] == STRING:
param['tunable'] = 'no'
# We do not tune autovacuum (yet)
if param['name'].startswith('autovacuum'):
param['tunable'] = 'no'
# No need to tune debug params
if param['name'].startswith('debug'):
param['tunable'] = 'no'
# Don't want to disable query tuning options
if param['name'].startswith('enable'):
param['tunable'] = 'no'
# These options control a special-case query optimizer
if param['name'].startswith('geqo'):
param['tunable'] = 'no'
# Do not tune logging settings
if param['name'].startswith('log'):
param['tunable'] = 'no'
# Do not tune SSL settings
if param['name'].startswith('ssl'):
param['tunable'] = 'no'
# Do not tune syslog settings
if param['name'].startswith('syslog'):
param['tunable'] = 'no'
# Do not tune TPC settings
if param['name'].startswith('tcp'):
param['tunable'] = 'no'
if param['name'].startswith('trace'):
param['tunable'] = 'no'
if param['name'].startswith('track'):
param['tunable'] = 'no'
# We do not tune autovacuum (yet)
if param['name'].startswith('vacuum'):
param['tunable'] = 'no'
# Do not tune replication settings
if param['category'].startswith('Replication'):
param['tunable'] = 'no'
PARAMS[param['name']] = param
# We only want to tune some settings
PARAMS['allow_system_table_mods']['tunable'] = 'no'
PARAMS['archive_mode']['tunable'] = 'no'
PARAMS['archive_timeout']['tunable'] = 'no'
PARAMS['array_nulls']['tunable'] = 'no'
PARAMS['authentication_timeout']['tunable'] = 'no'
PARAMS['backend_flush_after']['tunable'] = 'yes'
PARAMS['backslash_quote']['tunable'] = 'no'
PARAMS['bgwriter_delay']['tunable'] = 'yes'
PARAMS['bgwriter_flush_after']['tunable'] = 'yes'
PARAMS['bgwriter_lru_maxpages']['tunable'] = 'yes'
PARAMS['bgwriter_lru_multiplier']['tunable'] = 'yes'
PARAMS['bonjour']['tunable'] = 'no'
PARAMS['bonjour_name']['tunable'] = 'no'
PARAMS['bytea_output']['tunable'] = 'no'
PARAMS['check_function_bodies']['tunable'] = 'no'
PARAMS['checkpoint_completion_target']['tunable'] = 'yes'
PARAMS['checkpoint_flush_after']['tunable'] = 'yes'
PARAMS['checkpoint_timeout']['tunable'] = 'yes'
PARAMS['checkpoint_warning']['tunable'] = 'no'
PARAMS['client_min_messages']['tunable'] = 'no'
PARAMS['commit_delay']['tunable'] = 'yes'
PARAMS['commit_siblings']['tunable'] = 'yes'
PARAMS['constraint_exclusion']['tunable'] = 'no'
PARAMS['cpu_index_tuple_cost']['tunable'] = 'maybe'
PARAMS['cpu_operator_cost']['tunable'] = 'maybe'
PARAMS['cpu_tuple_cost']['tunable'] = 'maybe'
PARAMS['cursor_tuple_fraction']['tunable'] = 'maybe'
PARAMS['db_user_namespace']['tunable'] = 'no'
PARAMS['deadlock_timeout']['tunable'] = 'yes'
PARAMS['default_statistics_target']['tunable'] = 'yes'
PARAMS['default_transaction_deferrable']['tunable'] = 'no'
PARAMS['default_transaction_isolation']['tunable'] = 'no'
PARAMS['default_transaction_read_only']['tunable'] = 'no'
PARAMS['default_with_oids']['tunable'] = 'no'
PARAMS['dynamic_shared_memory_type']['tunable'] = 'no'
PARAMS['effective_cache_size']['tunable'] = 'yes'
PARAMS['effective_io_concurrency']['tunable'] = 'yes'
PARAMS['escape_string_warning']['tunable'] = 'no'
PARAMS['exit_on_error']['tunable'] = 'no'
PARAMS['extra_float_digits']['tunable'] = 'no'
PARAMS['force_parallel_mode']['tunable'] = 'no'
PARAMS['from_collapse_limit']['tunable'] = 'yes'
PARAMS['fsync']['tunable'] = 'no' # dangerous
PARAMS['full_page_writes']['tunable'] = 'no' # dangerous
PARAMS['gin_fuzzy_search_limit']['tunable'] = 'no'
PARAMS['gin_pending_list_limit']['tunable'] = 'no'
PARAMS['huge_pages']['tunable'] = 'no'
PARAMS['idle_in_transaction_session_timeout']['tunable'] = 'no'
PARAMS['ignore_checksum_failure']['tunable'] = 'no'
PARAMS['ignore_system_indexes']['tunable'] = 'no'
PARAMS['IntervalStyle']['tunable'] = 'no'
PARAMS['join_collapse_limit']['tunable'] = 'yes'
PARAMS['krb_caseins_users']['tunable'] = 'no'
PARAMS['lo_compat_privileges']['tunable'] = 'no'
PARAMS['lock_timeout']['tunable'] = 'no' # Tuning is not recommended in Postgres 9.6 manual
PARAMS['maintenance_work_mem']['tunable'] = 'yes'
PARAMS['max_connections']['tunable'] = 'no' # This is set based on # of client connections
PARAMS['max_files_per_process']['tunable'] = 'no' # Should only be increased if OS complains
PARAMS['max_locks_per_transaction']['tunable'] = 'no'
PARAMS['max_parallel_workers_per_gather']['tunable'] = 'yes' # Must be < max_worker_processes
PARAMS['max_pred_locks_per_transaction']['tunable'] = 'no'
PARAMS['max_prepared_transactions']['tunable'] = 'no'
PARAMS['max_replication_slots']['tunable'] = 'no'
PARAMS['max_stack_depth']['tunable'] = 'no'
PARAMS['max_wal_senders']['tunable'] = 'no'
PARAMS['max_wal_size']['tunable'] = 'yes'
PARAMS['max_worker_processes']['tunable'] = 'yes'
PARAMS['min_parallel_relation_size']['tunable'] = 'yes'
PARAMS['min_wal_size']['tunable'] = 'yes'
PARAMS['old_snapshot_threshold']['tunable'] = 'no'
PARAMS['operator_precedence_warning']['tunable'] = 'no'
PARAMS['parallel_setup_cost']['tunable'] = 'maybe'
PARAMS['parallel_tuple_cost']['tunable'] = 'maybe'
PARAMS['password_encryption']['tunable'] = 'no'
PARAMS['port']['tunable'] = 'no'
PARAMS['post_auth_delay']['tunable'] = 'no'
PARAMS['pre_auth_delay']['tunable'] = 'no'
PARAMS['quote_all_identifiers']['tunable'] = 'no'
PARAMS['random_page_cost']['tunable'] = 'yes'
PARAMS['replacement_sort_tuples']['tunable'] = 'no'
PARAMS['restart_after_crash']['tunable'] = 'no'
PARAMS['row_security']['tunable'] = 'no'
PARAMS['seq_page_cost']['tunable'] = 'yes'
PARAMS['session_replication_role']['tunable'] = 'no'
PARAMS['shared_buffers']['tunable'] = 'yes'
PARAMS['sql_inheritance']['tunable'] = 'no'
PARAMS['standard_conforming_strings']['tunable'] = 'no'
PARAMS['statement_timeout']['tunable'] = 'no'
PARAMS['superuser_reserved_connections']['tunable'] = 'no'
PARAMS['synchronize_seqscans']['tunable'] = 'no'
PARAMS['synchronous_commit']['tunable'] = 'no' # dangerous
PARAMS['temp_buffers']['tunable'] = 'yes'
PARAMS['temp_file_limit']['tunable'] = 'no'
PARAMS['transaction_deferrable']['tunable'] = 'no'
PARAMS['transaction_isolation']['tunable'] = 'no'
PARAMS['transaction_read_only']['tunable'] = 'no'
PARAMS['transform_null_equals']['tunable'] = 'no'
PARAMS['unix_socket_permissions']['tunable'] = 'no'
PARAMS['update_process_title']['tunable'] = 'no'
PARAMS['wal_buffers']['tunable'] = 'yes'
PARAMS['wal_compression']['tunable'] = 'no'
PARAMS['wal_keep_segments']['tunable'] = 'no'
PARAMS['wal_level']['tunable'] = 'no'
PARAMS['wal_log_hints']['tunable'] = 'no'
PARAMS['wal_sync_method']['tunable'] = 'yes'
PARAMS['wal_writer_delay']['tunable'] = 'yes'
PARAMS['wal_writer_flush_after']['tunable'] = 'yes'
PARAMS['work_mem']['tunable'] = 'yes'
PARAMS['xmlbinary']['tunable'] = 'no'
PARAMS['xmloption']['tunable'] = 'no'
PARAMS['zero_damaged_pages']['tunable'] = 'no'
with open('tunable_params.txt', 'w') as f:
for opt in ['yes', 'maybe', 'no', '']:
f.write(opt.upper() + '\n')
f.write('---------------------------------------------------\n')
for p, pdict in list(PARAMS.items()):
if pdict['tunable'] == opt:
f.write('{}\t{}\t{}\n'.format(p, pdict['vartype'], pdict['unit']))
f.write('\n')
# MAX_MEM = 36 # 64GB or 2^36
#
# # backend_flush_after - range between 0 & 2MB
# # max = 2^21, eff_min = 2^13 (8kB), step either 0.5 or 1
# # other_values = [0]
# # powers_of_2 = true
# PARAMS['backend_flush_after']['tuning_config'] = create_tuning_config(
# t_minval=13, t_maxval=21, t_step=0.5, t_additional_values=[0],
# t_powers_of_2=True, t_weight_samples=True)
#
# # bgwriter_delay
# # true minval = 10, maxval = 500, step = 10
# PARAMS['bgwriter_delay']['tuning_config'] = create_tuning_config(
# t_minval=10, t_maxval=500, t_step=10)
#
# # bgwriter_flush_after
# # same as backend_flush_after
# PARAMS['bgwriter_flush_after']['tuning_config'] = create_tuning_config(
# t_minval=13, t_maxval=21, t_step=0.5, t_additional_values=[0],
# t_powers_of_2=True, t_weight_samples=True)
#
# # bgwriter_lru_maxpages
# # minval = 0, maxval = 1000, step = 50
# PARAMS['bgwriter_lru_maxpages']['tuning_config'] = create_tuning_config(
# t_minval=0, t_maxval=1000, t_step=50)
#
# # bgwriter_lru_multiplier
# # minval = 0.0, maxval = 10.0, step = 0.5
# PARAMS['bgwriter_lru_multiplier']['tuning_config'] = create_tuning_config(
# t_minval=0.0, t_maxval=10.0, t_step=0.5)
#
# # checkpoint_completion_target
# # minval = 0.0, maxval = 1.0, step = 0.1
# PARAMS['checkpoint_completion_target']['tuning_config'] = create_tuning_config(
# t_minval=0.0, t_maxval=1.0, t_step=0.1)
#
# # checkpoint_flush_after
# # same as backend_flush_after
# PARAMS['checkpoint_flush_after']['tuning_config'] = create_tuning_config(
# t_minval=13, t_maxval=21, t_step=0.5, t_additional_values=[0], t_powers_of_2=True)
#
# # checkpoint_timeout
# # minval = 5min, maxval = 3 hours, step = 5min
# # other_values = 1min (maybe)
# PARAMS['checkpoint_timeout']['tuning_config'] = create_tuning_config(
# t_minval=300000, t_maxval=10800000, t_step=300000, t_additional_values=[60000])
#
# # commit_delay
# # minval = 0, maxval = 10000, step = 500
# PARAMS['commit_delay']['tuning_config'] = create_tuning_config(
# t_minval=0, t_maxval=10000, t_step=500)
#
# # commit_siblings
# # minval = 0, maxval = 20, step = 1
# PARAMS['commit_siblings']['tuning_config'] = create_tuning_config(
# t_minval=0, t_maxval=20, t_step=1)
#
# # deadlock_timeout
# # minval = 500, maxval = 20000, step = 500
# PARAMS['deadlock_timeout']['tuning_config'] = create_tuning_config(
# t_minval=500, t_maxval=20000, t_step=500)
#
# # default_statistics_target
# # minval = 50, maxval = 2000, step = 50
# PARAMS['default_statistics_target']['tuning_config'] = create_tuning_config(
# t_minval=50, t_maxval=2000, t_step=50)
#
# # effective_cache_size
# # eff_min = 256MB = 2^19, eff_max = over max memory (by 25%)
# # other_values = []
# # powers_of_2 = true
# PARAMS['effective_cache_size']['tuning_config'] = create_tuning_config(
# t_minval=19, t_maxval=1.25, t_maxval_type='percentage', t_resource_type='memory',
# t_step=0.5, t_powers_of_2=True, t_weight_samples=True,
# t_notes='t_maxval = 25% amt greater than max memory')
#
# # effective_io_concurrency
# # minval = 0, maxval = 10, step = 1
# PARAMS['effective_io_concurrency']['tuning_config'] = create_tuning_config(
# t_minval=0, t_maxval=10, t_step=1)
#
# # from_collapse_limit
# # minval = 4, maxval = 40, step = 4
# # other_values = 1
# PARAMS['from_collapse_limit']['tuning_config'] = create_tuning_config(
# t_minval=4, t_maxval=40, t_step=4, t_additional_values=[1])
#
# # join_collapse_limit
# # minval = 4, maxval = 40, step = 4
# # other_values = 1
# PARAMS['join_collapse_limit']['tuning_config'] = create_tuning_config(
# t_minval=4, t_maxval=40, t_step=4, t_additional_values=[1])
#
# # random_page_cost
# # minval = current value of seq_page_cost, maxval = seq_page_cost + 5, step = 0.5
# PARAMS['random_page_cost']['tuning_config'] = create_tuning_config(
# t_minval=None, t_maxval=None, t_step=0.5, t_dependent=True,
# t_notes='t_minval = current value of seq_page_cost, t_maxval = seq_page_cost + 5')
#
# # seq_page_cost
# # minval = 0.0, maxval = 2.0, step = 0.1
# PARAMS['seq_page_cost']['tuning_config'] = create_tuning_config(
# t_minval=0.0, t_maxval=2.0, t_step=0.1)
#
# # maintenance_work_mem
# # eff_min 8MB, eff_max = 1/2 - 3/4
# PARAMS['maintenance_work_mem']['tuning_config'] = create_tuning_config(
# t_minval=23, t_maxval=0.4, t_maxval_type='percentage', t_resource_type='memory',
# t_step=0.5, t_powers_of_2=True, #t_weight_samples=True,
# t_notes='t_maxval = 40% of total memory')
#
# # max_parallel_workers_per_gather
# # minval = 0, maxval = current value of max_worker_processes
# PARAMS['max_parallel_workers_per_gather']['tuning_config'] = create_tuning_config(
# t_minval=0, t_maxval=None, t_step=1, t_dependent=True,
# t_notes='t_maxval = max_worker_processes')
#
# # max_wal_size
# # eff_min = 2^25, eff_max = 10GB? some percentage of total disk space?
# PARAMS['max_wal_size']['tuning_config'] = create_tuning_config(
# t_minval=25, t_maxval=33.5, t_step=0.5, t_powers_of_2=True,
# t_weight_samples=True, t_notes='t_maxval = some % of total disk space')
#
# # max_worker_processes
# # min = 4, max = 16, step = 2
# PARAMS['max_worker_processes']['tuning_config'] = create_tuning_config(
# t_minval=4, t_maxval=16, t_step=2)
#
# # min_parallel_relation_size
# # min = 1MB = 2^20, max = 2^30
# PARAMS['min_parallel_relation_size']['tuning_config'] = create_tuning_config(
# t_minval=20, t_maxval=2^30, t_step=0.5, t_powers_of_2=True)
#
# # min_wal_size
# # default = 80MB, some min, then max is up to current max_wal_size
# PARAMS['min_wal_size']['tuning_config'] = create_tuning_config(
# t_minval=25, t_maxval=None, t_step=0.5, t_powers_of_2=True,
# t_dependent=True, t_notes='t_maxval = max_wal_size')
#
# # shared buffers
# # min = 8388608 = 2^23, max = 70% of total memory
# PARAMS['shared_buffers']['tuning_config'] = create_tuning_config(
# t_minval=23, t_maxval=0.7, t_maxval_type='percentage', t_resource_type='memory',
# t_step=0.5, t_powers_of_2=True, t_weight_samples=True,
# t_notes='t_maxval = 70% of total memory')
#
# # temp buffers
# # min ~ 2^20, max = some percent of total memory
# PARAMS['temp_buffers']['tuning_config'] = create_tuning_config(
# t_minval=20, t_maxval=0.25, t_maxval_type='percentage', t_resource_type='memory',
# t_step=0.5, t_powers_of_2=True, t_weight_samples=True,
# t_notes='t_maxval = some % of total memory')
#
# # wal_buffers
# # min = 32kB = 2^15, max = 2GB
# # other_values = [-1]
# PARAMS['wal_buffers']['tuning_config'] = create_tuning_config(
# t_minval=15, t_maxval=30.5, t_step=0.5, t_powers_of_2=True,
# t_additional_values=[-1], t_weight_samples=True)
#
# # wal_sync_method
# # enum: [open_datasync, fdatasync, fsync, open_sync]
# PARAMS['wal_sync_method']['tuning_config'] = create_tuning_config(
# t_enumvals=['open_datasync', 'fdatasync', 'fsync', 'open_sync'])
#
# # wal_writer_delay
# # min = 50ms, max = 1000ms, step = 50ms
# # other_values = 10ms
# PARAMS['wal_writer_delay']['tuning_config'] = create_tuning_config(
# t_minval=50, t_maxval=1000, t_step=50, t_additional_values=[10])
#
# # wal_writer_flush_after
# # same as backend_flush_after
# PARAMS['wal_writer_flush_after']['tuning_config'] = create_tuning_config(
# t_minval=13, t_maxval=21, t_step=0.5, t_additional_values=[0], t_powers_of_2=True)
#
# # work_mem
# # min = 64kB = 2^16, max = some percent of total memory
# PARAMS['work_mem']['tuning_config'] = create_tuning_config(
# t_minval=16, t_maxval=0.3, t_maxval_type='percentage', t_resource_type='memory',
# t_step=0.5, t_powers_of_2=True, t_weight_samples=True, t_dependent=True,
# t_notes='t_maxval = 30% of total memory')
# max_name_len = 0
# contexts = set()
# for pname, pinfo in PARAMS.iteritems():
# if pinfo['tunable'] == 'yes':
# assert pinfo['tuning_config'] is not None
# if pinfo['unit'] == 'bytes':
# assert pinfo['tuning_config']['t_powers_of_2'] == True
# if len(pname) > max_name_len:
# max_name_len = len(pname)
# contexts.add(pinfo['context'])
# print "Max name length: {}".format(max_name_len)
# print "Contexts: {}".format(contexts)
TMP_PARAMS = OrderedDict()
for k, v in list(PARAMS.items()):
newname = PARAM_PREFIX + '.' + k
v['name'] = newname
TMP_PARAMS[newname] = v
PARAMS = TMP_PARAMS
with open("settings.json", "w") as f:
json.dump(PARAMS, f, indent=4)
# maxlen = 0
# for pname, pinfo in PARAMS.iteritems():
# length = len(str(pinfo['default']))
# if length > maxlen:
# maxlen = length
# print pname, length
# print "maxlen: {}".format(maxlen)
JSON_SETTINGS = []
SORTED_KNOB_NAMES = []
for pname, pinfo in sorted(PARAMS.items()):
entry = {}
entry['model'] = 'website.KnobCatalog'
fields = dict(pinfo)
fields['tunable'] = fields['tunable'] == 'yes'
for k, v in list(fields.items()):
if v is not None and not isinstance(v, str) and not isinstance(v, bool):
fields[k] = str(v)
fields['dbms'] = 1
entry['fields'] = fields
JSON_SETTINGS.append(entry)
SORTED_KNOB_NAMES.append(pname)
with open("postgres-96_knobs.json", "w") as f:
json.dump(JSON_SETTINGS, f, indent=4)
shutil.copy("postgres-96_knobs.json", "../../../../website/fixtures/postgres-96_knobs.json")
# sorted_knobs = [{
# 'model': 'website.PipelineResult',
# 'fields': {
# "dbms": 1,
# "task_type": 1,
# "component": 4,
# "hardware": 17,
# "version_id": 0,
# "value": json.dumps(SORTED_KNOB_NAMES),
# }
# }]
# fname = 'postgres-96_sorted_knob_labels.json'
# with open(fname, "w") as f:
# json.dump(sorted_knobs, f, indent=4)
# shutil.copy(fname, "../../../preload/")

View File

@@ -0,0 +1,262 @@
name,setting,unit,category,short_desc,extra_desc,context,vartype,source,min_val,max_val,enumvals,boot_val,reset_val,sourcefile,sourceline,pending_restart
allow_system_table_mods,off,,Developer Options,Allows modifications of the structure of system tables.,,postmaster,bool,default,,,,off,off,,,f
application_name,psql,,Reporting and Logging / What to Log,Sets the application name to be reported in statistics and logs.,,user,string,client,,,,"",psql,,,f
archive_command,(disabled),,Write-Ahead Log / Archiving,Sets the shell command that will be called to archive a WAL file.,,sighup,string,default,,,,"","",,,f
archive_mode,off,,Write-Ahead Log / Archiving,Allows archiving of WAL files using archive_command.,,postmaster,enum,default,,,"{always,on,off}",off,off,,,f
archive_timeout,0,s,Write-Ahead Log / Archiving,Forces a switch to the next xlog file if a new file has not been started within N seconds.,,sighup,integer,default,0,1073741823,,0,0,,,f
array_nulls,on,,Version and Platform Compatibility / Previous PostgreSQL Versions,Enable input of NULL elements in arrays.,"When turned on, unquoted NULL in an array input value means a null value; otherwise it is taken literally.",user,bool,default,,,,on,on,,,f
authentication_timeout,60,s,Connections and Authentication / Security and Authentication,Sets the maximum allowed time to complete client authentication.,,sighup,integer,default,1,600,,60,60,,,f
autovacuum,on,,Autovacuum,Starts the autovacuum subprocess.,,sighup,bool,default,,,,on,on,,,f
autovacuum_analyze_scale_factor,0.1,,Autovacuum,"Number of tuple inserts, updates, or deletes prior to analyze as a fraction of reltuples.",,sighup,real,default,0,100,,0.1,0.1,,,f
autovacuum_analyze_threshold,50,,Autovacuum,"Minimum number of tuple inserts, updates, or deletes prior to analyze.",,sighup,integer,default,0,2147483647,,50,50,,,f
autovacuum_freeze_max_age,200000000,,Autovacuum,Age at which to autovacuum a table to prevent transaction ID wraparound.,,postmaster,integer,default,100000,2000000000,,200000000,200000000,,,f
autovacuum_max_workers,3,,Autovacuum,Sets the maximum number of simultaneously running autovacuum worker processes.,,postmaster,integer,default,1,262143,,3,3,,,f
autovacuum_multixact_freeze_max_age,400000000,,Autovacuum,Multixact age at which to autovacuum a table to prevent multixact wraparound.,,postmaster,integer,default,10000,2000000000,,400000000,400000000,,,f
autovacuum_naptime,60,s,Autovacuum,Time to sleep between autovacuum runs.,,sighup,integer,default,1,2147483,,60,60,,,f
autovacuum_vacuum_cost_delay,20,ms,Autovacuum,"Vacuum cost delay in milliseconds, for autovacuum.",,sighup,integer,default,-1,100,,20,20,,,f
autovacuum_vacuum_cost_limit,-1,,Autovacuum,"Vacuum cost amount available before napping, for autovacuum.",,sighup,integer,default,-1,10000,,-1,-1,,,f
autovacuum_vacuum_scale_factor,0.2,,Autovacuum,Number of tuple updates or deletes prior to vacuum as a fraction of reltuples.,,sighup,real,default,0,100,,0.2,0.2,,,f
autovacuum_vacuum_threshold,50,,Autovacuum,Minimum number of tuple updates or deletes prior to vacuum.,,sighup,integer,default,0,2147483647,,50,50,,,f
autovacuum_work_mem,-1,kB,Resource Usage / Memory,Sets the maximum memory to be used by each autovacuum worker process.,,sighup,integer,default,-1,2147483647,,-1,-1,,,f
backend_flush_after,0,8kB,Resource Usage / Asynchronous Behavior,Number of pages after which previously performed writes are flushed to disk.,,user,integer,default,0,256,,0,0,,,f
backslash_quote,safe_encoding,,Version and Platform Compatibility / Previous PostgreSQL Versions,"Sets whether ""\'"" is allowed in string literals.",,user,enum,default,,,"{safe_encoding,on,off}",safe_encoding,safe_encoding,,,f
bgwriter_delay,200,ms,Resource Usage / Background Writer,Background writer sleep time between rounds.,,sighup,integer,default,10,10000,,200,200,,,f
bgwriter_flush_after,64,8kB,Resource Usage / Background Writer,Number of pages after which previously performed writes are flushed to disk.,,sighup,integer,default,0,256,,64,64,,,f
bgwriter_lru_maxpages,100,,Resource Usage / Background Writer,Background writer maximum number of LRU pages to flush per round.,,sighup,integer,default,0,1000,,100,100,,,f
bgwriter_lru_multiplier,2,,Resource Usage / Background Writer,Multiple of the average buffer usage to free per round.,,sighup,real,default,0,10,,2,2,,,f
block_size,8192,,Preset Options,Shows the size of a disk block.,,internal,integer,default,8192,8192,,8192,8192,,,f
bonjour,off,,Connections and Authentication / Connection Settings,Enables advertising the server via Bonjour.,,postmaster,bool,default,,,,off,off,,,f
bonjour_name,"",,Connections and Authentication / Connection Settings,Sets the Bonjour service name.,,postmaster,string,default,,,,"","",,,f
bytea_output,hex,,Client Connection Defaults / Statement Behavior,Sets the output format for bytea.,,user,enum,default,,,"{escape,hex}",hex,hex,,,f
check_function_bodies,on,,Client Connection Defaults / Statement Behavior,Check function bodies during CREATE FUNCTION.,,user,bool,default,,,,on,on,,,f
checkpoint_completion_target,0.5,,Write-Ahead Log / Checkpoints,"Time spent flushing dirty buffers during checkpoint, as fraction of checkpoint interval.",,sighup,real,default,0,1,,0.5,0.5,,,f
checkpoint_flush_after,32,8kB,Write-Ahead Log / Checkpoints,Number of pages after which previously performed writes are flushed to disk.,,sighup,integer,default,0,256,,32,32,,,f
checkpoint_timeout,300,s,Write-Ahead Log / Checkpoints,Sets the maximum time between automatic WAL checkpoints.,,sighup,integer,default,30,86400,,300,300,,,f
checkpoint_warning,30,s,Write-Ahead Log / Checkpoints,Enables warnings if checkpoint segments are filled more frequently than this.,Write a message to the server log if checkpoints caused by the filling of checkpoint segment files happens more frequently than this number of seconds. Zero turns off the warning.,sighup,integer,default,0,2147483647,,30,30,,,f
client_encoding,UTF8,,Client Connection Defaults / Locale and Formatting,Sets the client's character set encoding.,,user,string,client,,,,SQL_ASCII,UTF8,,,f
client_min_messages,notice,,Reporting and Logging / When to Log,Sets the message levels that are sent to the client.,"Each level includes all the levels that follow it. The later the level, the fewer messages are sent.",user,enum,default,,,"{debug5,debug4,debug3,debug2,debug1,log,notice,warning,error}",notice,notice,,,f
cluster_name,9.6/main,,Process Title,"Sets the name of the cluster, which is included in the process title.",,postmaster,string,configuration file,,,,"",9.6/main,/etc/postgresql/9.6/main/postgresql.conf,463,f
commit_delay,0,,Write-Ahead Log / Settings,Sets the delay in microseconds between transaction commit and flushing WAL to disk.,,superuser,integer,default,0,100000,,0,0,,,f
commit_siblings,5,,Write-Ahead Log / Settings,Sets the minimum concurrent open transactions before performing commit_delay.,,user,integer,default,0,1000,,5,5,,,f
config_file,/etc/postgresql/9.6/main/postgresql.conf,,File Locations,Sets the server's main configuration file.,,postmaster,string,override,,,,,/etc/postgresql/9.6/main/postgresql.conf,,,f
constraint_exclusion,partition,,Query Tuning / Other Planner Options,Enables the planner to use constraints to optimize queries.,Table scans will be skipped if their constraints guarantee that no rows match the query.,user,enum,default,,,"{partition,on,off}",partition,partition,,,f
cpu_index_tuple_cost,0.005,,Query Tuning / Planner Cost Constants,Sets the planner's estimate of the cost of processing each index entry during an index scan.,,user,real,default,0,1.79769e+308,,0.005,0.005,,,f
cpu_operator_cost,0.0025,,Query Tuning / Planner Cost Constants,Sets the planner's estimate of the cost of processing each operator or function call.,,user,real,default,0,1.79769e+308,,0.0025,0.0025,,,f
cpu_tuple_cost,0.01,,Query Tuning / Planner Cost Constants,Sets the planner's estimate of the cost of processing each tuple (row).,,user,real,default,0,1.79769e+308,,0.01,0.01,,,f
cursor_tuple_fraction,0.1,,Query Tuning / Other Planner Options,Sets the planner's estimate of the fraction of a cursor's rows that will be retrieved.,,user,real,default,0,1,,0.1,0.1,,,f
data_checksums,off,,Preset Options,Shows whether data checksums are turned on for this cluster.,,internal,bool,override,,,,off,off,,,f
data_directory,/var/lib/postgresql/9.6/main,,File Locations,Sets the server's data directory.,,postmaster,string,override,,,,,/var/lib/postgresql/9.6/main,,,f
DateStyle,"ISO, MDY",,Client Connection Defaults / Locale and Formatting,Sets the display format for date and time values.,Also controls interpretation of ambiguous date inputs.,user,string,configuration file,,,,"ISO, MDY","ISO, MDY",/etc/postgresql/9.6/main/postgresql.conf,552,f
db_user_namespace,off,,Connections and Authentication / Security and Authentication,Enables per-database user names.,,sighup,bool,default,,,,off,off,,,f
deadlock_timeout,1000,ms,Lock Management,Sets the time to wait on a lock before checking for deadlock.,,superuser,integer,default,1,2147483647,,1000,1000,,,f
debug_assertions,off,,Preset Options,Shows whether the running server has assertion checks enabled.,,internal,bool,default,,,,off,off,,,f
debug_pretty_print,on,,Reporting and Logging / What to Log,Indents parse and plan tree displays.,,user,bool,default,,,,on,on,,,f
debug_print_parse,off,,Reporting and Logging / What to Log,Logs each query's parse tree.,,user,bool,default,,,,off,off,,,f
debug_print_plan,off,,Reporting and Logging / What to Log,Logs each query's execution plan.,,user,bool,default,,,,off,off,,,f
debug_print_rewritten,off,,Reporting and Logging / What to Log,Logs each query's rewritten parse tree.,,user,bool,default,,,,off,off,,,f
default_statistics_target,100,,Query Tuning / Other Planner Options,Sets the default statistics target.,This applies to table columns that have not had a column-specific target set via ALTER TABLE SET STATISTICS.,user,integer,default,1,10000,,100,100,,,f
default_tablespace,"",,Client Connection Defaults / Statement Behavior,Sets the default tablespace to create tables and indexes in.,An empty string selects the database's default tablespace.,user,string,default,,,,"","",,,f
default_text_search_config,pg_catalog.english,,Client Connection Defaults / Locale and Formatting,Sets default text search configuration.,,user,string,configuration file,,,,pg_catalog.simple,pg_catalog.english,/etc/postgresql/9.6/main/postgresql.conf,574,f
default_transaction_deferrable,off,,Client Connection Defaults / Statement Behavior,Sets the default deferrable status of new transactions.,,user,bool,default,,,,off,off,,,f
default_transaction_isolation,read committed,,Client Connection Defaults / Statement Behavior,Sets the transaction isolation level of each new transaction.,,user,enum,default,,,"{serializable,""repeatable read"",""read committed"",""read uncommitted""}",read committed,read committed,,,f
default_transaction_read_only,off,,Client Connection Defaults / Statement Behavior,Sets the default read-only status of new transactions.,,user,bool,default,,,,off,off,,,f
default_with_oids,off,,Version and Platform Compatibility / Previous PostgreSQL Versions,Create new tables with OIDs by default.,,user,bool,default,,,,off,off,,,f
dynamic_library_path,$libdir,,Client Connection Defaults / Other Defaults,Sets the path for dynamically loadable modules.,"If a dynamically loadable module needs to be opened and the specified name does not have a directory component (i.e., the name does not contain a slash), the system will search this path for the specified file.",superuser,string,default,,,,$libdir,$libdir,,,f
dynamic_shared_memory_type,posix,,Resource Usage / Memory,Selects the dynamic shared memory implementation used.,,postmaster,enum,configuration file,,,"{posix,sysv,mmap,none}",posix,posix,/etc/postgresql/9.6/main/postgresql.conf,127,f
effective_cache_size,524288,8kB,Query Tuning / Planner Cost Constants,Sets the planner's assumption about the size of the disk cache.,"That is, the portion of the kernel's disk cache that will be used for PostgreSQL data files. This is measured in disk pages, which are normally 8 kB each.",user,integer,default,1,2147483647,,524288,524288,,,f
effective_io_concurrency,1,,Resource Usage / Asynchronous Behavior,Number of simultaneous requests that can be handled efficiently by the disk subsystem.,"For RAID arrays, this should be approximately the number of drive spindles in the array.",user,integer,default,0,1000,,1,1,,,f
enable_bitmapscan,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of bitmap-scan plans.,,user,bool,default,,,,on,on,,,f
enable_hashagg,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of hashed aggregation plans.,,user,bool,default,,,,on,on,,,f
enable_hashjoin,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of hash join plans.,,user,bool,default,,,,on,on,,,f
enable_indexonlyscan,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of index-only-scan plans.,,user,bool,default,,,,on,on,,,f
enable_indexscan,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of index-scan plans.,,user,bool,default,,,,on,on,,,f
enable_material,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of materialization.,,user,bool,default,,,,on,on,,,f
enable_mergejoin,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of merge join plans.,,user,bool,default,,,,on,on,,,f
enable_nestloop,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of nested-loop join plans.,,user,bool,default,,,,on,on,,,f
enable_seqscan,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of sequential-scan plans.,,user,bool,default,,,,on,on,,,f
enable_sort,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of explicit sort steps.,,user,bool,default,,,,on,on,,,f
enable_tidscan,on,,Query Tuning / Planner Method Configuration,Enables the planner's use of TID scan plans.,,user,bool,default,,,,on,on,,,f
escape_string_warning,on,,Version and Platform Compatibility / Previous PostgreSQL Versions,Warn about backslash escapes in ordinary string literals.,,user,bool,default,,,,on,on,,,f
event_source,PostgreSQL,,Reporting and Logging / Where to Log,Sets the application name used to identify PostgreSQL messages in the event log.,,postmaster,string,default,,,,PostgreSQL,PostgreSQL,,,f
exit_on_error,off,,Error Handling,Terminate session on any error.,,user,bool,default,,,,off,off,,,f
external_pid_file,/var/run/postgresql/9.6-main.pid,,File Locations,Writes the postmaster PID to the specified file.,,postmaster,string,configuration file,,,,,/var/run/postgresql/9.6-main.pid,/etc/postgresql/9.6/main/postgresql.conf,49,f
extra_float_digits,0,,Client Connection Defaults / Locale and Formatting,Sets the number of digits displayed for floating-point values.,"This affects real, double precision, and geometric data types. The parameter value is added to the standard number of digits (FLT_DIG or DBL_DIG as appropriate).",user,integer,default,-15,3,,0,0,,,f
force_parallel_mode,off,,Query Tuning / Other Planner Options,Forces use of parallel query facilities.,"If possible, run query using a parallel worker and with parallel restrictions.",user,enum,default,,,"{off,on,regress}",off,off,,,f
from_collapse_limit,8,,Query Tuning / Other Planner Options,Sets the FROM-list size beyond which subqueries are not collapsed.,The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items.,user,integer,default,1,2147483647,,8,8,,,f
fsync,on,,Write-Ahead Log / Settings,Forces synchronization of updates to disk.,The server will use the fsync() system call in several places to make sure that updates are physically written to disk. This insures that a database cluster will recover to a consistent state after an operating system or hardware crash.,sighup,bool,default,,,,on,on,,,f
full_page_writes,on,,Write-Ahead Log / Settings,Writes full pages to WAL when first modified after a checkpoint.,"A page write in process during an operating system crash might be only partially written to disk. During recovery, the row changes stored in WAL are not enough to recover. This option writes pages when first modified after a checkpoint to WAL so full recovery is possible.",sighup,bool,default,,,,on,on,,,f
geqo,on,,Query Tuning / Genetic Query Optimizer,Enables genetic query optimization.,This algorithm attempts to do planning without exhaustive searching.,user,bool,default,,,,on,on,,,f
geqo_effort,5,,Query Tuning / Genetic Query Optimizer,GEQO: effort is used to set the default for other GEQO parameters.,,user,integer,default,1,10,,5,5,,,f
geqo_generations,0,,Query Tuning / Genetic Query Optimizer,GEQO: number of iterations of the algorithm.,Zero selects a suitable default value.,user,integer,default,0,2147483647,,0,0,,,f
geqo_pool_size,0,,Query Tuning / Genetic Query Optimizer,GEQO: number of individuals in the population.,Zero selects a suitable default value.,user,integer,default,0,2147483647,,0,0,,,f
geqo_seed,0,,Query Tuning / Genetic Query Optimizer,GEQO: seed for random path selection.,,user,real,default,0,1,,0,0,,,f
geqo_selection_bias,2,,Query Tuning / Genetic Query Optimizer,GEQO: selective pressure within the population.,,user,real,default,1.5,2,,2,2,,,f
geqo_threshold,12,,Query Tuning / Genetic Query Optimizer,Sets the threshold of FROM items beyond which GEQO is used.,,user,integer,default,2,2147483647,,12,12,,,f
gin_fuzzy_search_limit,0,,Client Connection Defaults / Other Defaults,Sets the maximum allowed result for exact search by GIN.,,user,integer,default,0,2147483647,,0,0,,,f
gin_pending_list_limit,4096,kB,Client Connection Defaults / Statement Behavior,Sets the maximum size of the pending list for GIN index.,,user,integer,default,64,2147483647,,4096,4096,,,f
hba_file,/etc/postgresql/9.6/main/pg_hba.conf,,File Locations,"Sets the server's ""hba"" configuration file.",,postmaster,string,override,,,,,/etc/postgresql/9.6/main/pg_hba.conf,,,f
hot_standby,off,,Replication / Standby Servers,Allows connections and queries during recovery.,,postmaster,bool,default,,,,off,off,,,f
hot_standby_feedback,off,,Replication / Standby Servers,Allows feedback from a hot standby to the primary that will avoid query conflicts.,,sighup,bool,default,,,,off,off,,,f
huge_pages,try,,Resource Usage / Memory,Use of huge pages on Linux.,,postmaster,enum,default,,,"{off,on,try}",try,try,,,f
ident_file,/etc/postgresql/9.6/main/pg_ident.conf,,File Locations,"Sets the server's ""ident"" configuration file.",,postmaster,string,override,,,,,/etc/postgresql/9.6/main/pg_ident.conf,,,f
idle_in_transaction_session_timeout,0,ms,Client Connection Defaults / Statement Behavior,Sets the maximum allowed duration of any idling transaction.,A value of 0 turns off the timeout.,user,integer,default,0,2147483647,,0,0,,,f
ignore_checksum_failure,off,,Developer Options,Continues processing after a checksum failure.,"Detection of a checksum failure normally causes PostgreSQL to report an error, aborting the current transaction. Setting ignore_checksum_failure to true causes the system to ignore the failure (but still report a warning), and continue processing. This behavior could cause crashes or other serious problems. Only has an effect if checksums are enabled.",superuser,bool,default,,,,off,off,,,f
ignore_system_indexes,off,,Developer Options,Disables reading from system indexes.,"It does not prevent updating the indexes, so it is safe to use. The worst consequence is slowness.",backend,bool,default,,,,off,off,,,f
integer_datetimes,on,,Preset Options,Datetimes are integer based.,,internal,bool,default,,,,on,on,,,f
IntervalStyle,postgres,,Client Connection Defaults / Locale and Formatting,Sets the display format for interval values.,,user,enum,default,,,"{postgres,postgres_verbose,sql_standard,iso_8601}",postgres,postgres,,,f
join_collapse_limit,8,,Query Tuning / Other Planner Options,Sets the FROM-list size beyond which JOIN constructs are not flattened.,The planner will flatten explicit JOIN constructs into lists of FROM items whenever a list of no more than this many items would result.,user,integer,default,1,2147483647,,8,8,,,f
krb_caseins_users,off,,Connections and Authentication / Security and Authentication,Sets whether Kerberos and GSSAPI user names should be treated as case-insensitive.,,sighup,bool,default,,,,off,off,,,f
krb_server_keyfile,FILE:/etc/postgresql-common/krb5.keytab,,Connections and Authentication / Security and Authentication,Sets the location of the Kerberos server key file.,,sighup,string,default,,,,FILE:/etc/postgresql-common/krb5.keytab,FILE:/etc/postgresql-common/krb5.keytab,,,f
lc_collate,en_US.UTF-8,,Client Connection Defaults / Locale and Formatting,Shows the collation order locale.,,internal,string,override,,,,C,en_US.UTF-8,,,f
lc_ctype,en_US.UTF-8,,Client Connection Defaults / Locale and Formatting,Shows the character classification and case conversion locale.,,internal,string,override,,,,C,en_US.UTF-8,,,f
lc_messages,en_US.UTF-8,,Client Connection Defaults / Locale and Formatting,Sets the language in which messages are displayed.,,superuser,string,configuration file,,,,"",en_US.UTF-8,/etc/postgresql/9.6/main/postgresql.conf,567,f
lc_monetary,en_US.UTF-8,,Client Connection Defaults / Locale and Formatting,Sets the locale for formatting monetary amounts.,,user,string,configuration file,,,,C,en_US.UTF-8,/etc/postgresql/9.6/main/postgresql.conf,569,f
lc_numeric,en_US.UTF-8,,Client Connection Defaults / Locale and Formatting,Sets the locale for formatting numbers.,,user,string,configuration file,,,,C,en_US.UTF-8,/etc/postgresql/9.6/main/postgresql.conf,570,f
lc_time,en_US.UTF-8,,Client Connection Defaults / Locale and Formatting,Sets the locale for formatting date and time values.,,user,string,configuration file,,,,C,en_US.UTF-8,/etc/postgresql/9.6/main/postgresql.conf,571,f
listen_addresses,localhost,,Connections and Authentication / Connection Settings,Sets the host name or IP address(es) to listen to.,,postmaster,string,default,,,,localhost,localhost,,,f
lo_compat_privileges,off,,Version and Platform Compatibility / Previous PostgreSQL Versions,Enables backward compatibility mode for privilege checks on large objects.,"Skips privilege checks when reading or modifying large objects, for compatibility with PostgreSQL releases prior to 9.0.",superuser,bool,default,,,,off,off,,,f
local_preload_libraries,"",,Client Connection Defaults / Shared Library Preloading,Lists unprivileged shared libraries to preload into each backend.,,user,string,default,,,,"","",,,f
lock_timeout,0,ms,Client Connection Defaults / Statement Behavior,Sets the maximum allowed duration of any wait for a lock.,A value of 0 turns off the timeout.,user,integer,default,0,2147483647,,0,0,,,f
log_autovacuum_min_duration,-1,ms,Reporting and Logging / What to Log,Sets the minimum execution time above which autovacuum actions will be logged.,Zero prints all actions. -1 turns autovacuum logging off.,sighup,integer,default,-1,2147483647,,-1,-1,,,f
log_checkpoints,off,,Reporting and Logging / What to Log,Logs each checkpoint.,,sighup,bool,default,,,,off,off,,,f
log_connections,off,,Reporting and Logging / What to Log,Logs each successful connection.,,superuser-backend,bool,default,,,,off,off,,,f
log_destination,stderr,,Reporting and Logging / Where to Log,Sets the destination for server log output.,"Valid values are combinations of ""stderr"", ""syslog"", ""csvlog"", and ""eventlog"", depending on the platform.",sighup,string,default,,,,stderr,stderr,,,f
log_directory,pg_log,,Reporting and Logging / Where to Log,Sets the destination directory for log files.,Can be specified as relative to the data directory or as absolute path.,sighup,string,default,,,,pg_log,pg_log,,,f
log_disconnections,off,,Reporting and Logging / What to Log,"Logs end of a session, including duration.",,superuser-backend,bool,default,,,,off,off,,,f
log_duration,off,,Reporting and Logging / What to Log,Logs the duration of each completed SQL statement.,,superuser,bool,default,,,,off,off,,,f
log_error_verbosity,default,,Reporting and Logging / What to Log,Sets the verbosity of logged messages.,,superuser,enum,default,,,"{terse,default,verbose}",default,default,,,f
log_executor_stats,off,,Statistics / Monitoring,Writes executor performance statistics to the server log.,,superuser,bool,default,,,,off,off,,,f
log_file_mode,0600,,Reporting and Logging / Where to Log,Sets the file permissions for log files.,The parameter value is expected to be a numeric mode specification in the form accepted by the chmod and umask system calls. (To use the customary octal format the number must start with a 0 (zero).),sighup,integer,default,0,511,,384,384,,,f
log_filename,postgresql-%Y-%m-%d_%H%M%S.log,,Reporting and Logging / Where to Log,Sets the file name pattern for log files.,,sighup,string,default,,,,postgresql-%Y-%m-%d_%H%M%S.log,postgresql-%Y-%m-%d_%H%M%S.log,,,f
log_hostname,off,,Reporting and Logging / What to Log,Logs the host name in the connection logs.,"By default, connection logs only show the IP address of the connecting host. If you want them to show the host name you can turn this on, but depending on your host name resolution setup it might impose a non-negligible performance penalty.",sighup,bool,default,,,,off,off,,,f
log_line_prefix,%m [%p] %q%u@%d ,,Reporting and Logging / What to Log,Controls information prefixed to each log line.,"If blank, no prefix is used.",sighup,string,configuration file,,,,"",%m [%p] %q%u@%d ,/etc/postgresql/9.6/main/postgresql.conf,431,f
log_lock_waits,off,,Reporting and Logging / What to Log,Logs long lock waits.,,superuser,bool,default,,,,off,off,,,f
log_min_duration_statement,-1,ms,Reporting and Logging / When to Log,Sets the minimum execution time above which statements will be logged.,Zero prints all queries. -1 turns this feature off.,superuser,integer,default,-1,2147483647,,-1,-1,,,f
log_min_error_statement,error,,Reporting and Logging / When to Log,Causes all statements generating error at or above this level to be logged.,"Each level includes all the levels that follow it. The later the level, the fewer messages are sent.",superuser,enum,default,,,"{debug5,debug4,debug3,debug2,debug1,info,notice,warning,error,log,fatal,panic}",error,error,,,f
log_min_messages,warning,,Reporting and Logging / When to Log,Sets the message levels that are logged.,"Each level includes all the levels that follow it. The later the level, the fewer messages are sent.",superuser,enum,default,,,"{debug5,debug4,debug3,debug2,debug1,info,notice,warning,error,log,fatal,panic}",warning,warning,,,f
log_parser_stats,off,,Statistics / Monitoring,Writes parser performance statistics to the server log.,,superuser,bool,default,,,,off,off,,,f
log_planner_stats,off,,Statistics / Monitoring,Writes planner performance statistics to the server log.,,superuser,bool,default,,,,off,off,,,f
log_replication_commands,off,,Reporting and Logging / What to Log,Logs each replication command.,,superuser,bool,default,,,,off,off,,,f
log_rotation_age,1440,min,Reporting and Logging / Where to Log,Automatic log file rotation will occur after N minutes.,,sighup,integer,default,0,35791394,,1440,1440,,,f
log_rotation_size,10240,kB,Reporting and Logging / Where to Log,Automatic log file rotation will occur after N kilobytes.,,sighup,integer,default,0,2097151,,10240,10240,,,f
log_statement,none,,Reporting and Logging / What to Log,Sets the type of statements logged.,,superuser,enum,default,,,"{none,ddl,mod,all}",none,none,,,f
log_statement_stats,off,,Statistics / Monitoring,Writes cumulative performance statistics to the server log.,,superuser,bool,default,,,,off,off,,,f
log_temp_files,-1,kB,Reporting and Logging / What to Log,Log the use of temporary files larger than this number of kilobytes.,Zero logs all files. The default is -1 (turning this feature off).,superuser,integer,default,-1,2147483647,,-1,-1,,,f
log_timezone,localtime,,Reporting and Logging / What to Log,Sets the time zone to use in log messages.,,sighup,string,configuration file,,,,GMT,localtime,/etc/postgresql/9.6/main/postgresql.conf,458,f
log_truncate_on_rotation,off,,Reporting and Logging / Where to Log,Truncate existing log files of same name during log rotation.,,sighup,bool,default,,,,off,off,,,f
logging_collector,off,,Reporting and Logging / Where to Log,Start a subprocess to capture stderr output and/or csvlogs into log files.,,postmaster,bool,default,,,,off,off,,,f
maintenance_work_mem,65536,kB,Resource Usage / Memory,Sets the maximum memory to be used for maintenance operations.,This includes operations such as VACUUM and CREATE INDEX.,user,integer,default,1024,2147483647,,65536,65536,,,f
max_connections,100,,Connections and Authentication / Connection Settings,Sets the maximum number of concurrent connections.,,postmaster,integer,configuration file,1,262143,,100,100,/etc/postgresql/9.6/main/postgresql.conf,64,f
max_files_per_process,1000,,Resource Usage / Kernel Resources,Sets the maximum number of simultaneously open files for each server process.,,postmaster,integer,default,25,2147483647,,1000,1000,,,f
max_function_args,100,,Preset Options,Shows the maximum number of function arguments.,,internal,integer,default,100,100,,100,100,,,f
max_identifier_length,63,,Preset Options,Shows the maximum identifier length.,,internal,integer,default,63,63,,63,63,,,f
max_index_keys,32,,Preset Options,Shows the maximum number of index keys.,,internal,integer,default,32,32,,32,32,,,f
max_locks_per_transaction,64,,Lock Management,Sets the maximum number of locks per transaction.,The shared lock table is sized on the assumption that at most max_locks_per_transaction * max_connections distinct objects will need to be locked at any one time.,postmaster,integer,default,10,2147483647,,64,64,,,f
max_parallel_workers_per_gather,0,,Resource Usage / Asynchronous Behavior,Sets the maximum number of parallel processes per executor node.,,user,integer,default,0,1024,,0,0,,,f
max_pred_locks_per_transaction,64,,Lock Management,Sets the maximum number of predicate locks per transaction.,The shared predicate lock table is sized on the assumption that at most max_pred_locks_per_transaction * max_connections distinct objects will need to be locked at any one time.,postmaster,integer,default,10,2147483647,,64,64,,,f
max_prepared_transactions,0,,Resource Usage / Memory,Sets the maximum number of simultaneously prepared transactions.,,postmaster,integer,default,0,262143,,0,0,,,f
max_replication_slots,0,,Replication / Sending Servers,Sets the maximum number of simultaneously defined replication slots.,,postmaster,integer,default,0,262143,,0,0,,,f
max_stack_depth,2048,kB,Resource Usage / Memory,"Sets the maximum stack depth, in kilobytes.",,superuser,integer,environment variable,100,2147483647,,100,2048,,,f
max_standby_archive_delay,30000,ms,Replication / Standby Servers,Sets the maximum delay before canceling queries when a hot standby server is processing archived WAL data.,,sighup,integer,default,-1,2147483647,,30000,30000,,,f
max_standby_streaming_delay,30000,ms,Replication / Standby Servers,Sets the maximum delay before canceling queries when a hot standby server is processing streamed WAL data.,,sighup,integer,default,-1,2147483647,,30000,30000,,,f
max_wal_senders,0,,Replication / Sending Servers,Sets the maximum number of simultaneously running WAL sender processes.,,postmaster,integer,default,0,262143,,0,0,,,f
max_wal_size,64,16MB,Write-Ahead Log / Checkpoints,Sets the WAL size that triggers a checkpoint.,,sighup,integer,default,2,2147483647,,64,64,,,f
max_worker_processes,8,,Resource Usage / Asynchronous Behavior,Maximum number of concurrent worker processes.,,postmaster,integer,default,0,262143,,8,8,,,f
min_parallel_relation_size,1024,8kB,Query Tuning / Planner Cost Constants,Sets the minimum size of relations to be considered for parallel scan.,,user,integer,default,0,715827882,,1024,1024,,,f
min_wal_size,5,16MB,Write-Ahead Log / Checkpoints,Sets the minimum size to shrink the WAL to.,,sighup,integer,default,2,2147483647,,5,5,,,f
old_snapshot_threshold,-1,min,Resource Usage / Asynchronous Behavior,Time before a snapshot is too old to read pages changed after the snapshot was taken.,A value of -1 disables this feature.,postmaster,integer,default,-1,86400,,-1,-1,,,f
operator_precedence_warning,off,,Version and Platform Compatibility / Previous PostgreSQL Versions,Emit a warning for constructs that changed meaning since PostgreSQL 9.4.,,user,bool,default,,,,off,off,,,f
parallel_setup_cost,1000,,Query Tuning / Planner Cost Constants,Sets the planner's estimate of the cost of starting up worker processes for parallel query.,,user,real,default,0,1.79769e+308,,1000,1000,,,f
parallel_tuple_cost,0.1,,Query Tuning / Planner Cost Constants,Sets the planner's estimate of the cost of passing each tuple (row) from worker to master backend.,,user,real,default,0,1.79769e+308,,0.1,0.1,,,f
password_encryption,on,,Connections and Authentication / Security and Authentication,Encrypt passwords.,"When a password is specified in CREATE USER or ALTER USER without writing either ENCRYPTED or UNENCRYPTED, this parameter determines whether the password is to be encrypted.",user,bool,default,,,,on,on,,,f
port,5432,,Connections and Authentication / Connection Settings,Sets the TCP port the server listens on.,,postmaster,integer,configuration file,1,65535,,5432,5432,/etc/postgresql/9.6/main/postgresql.conf,63,f
post_auth_delay,0,s,Developer Options,Waits N seconds on connection startup after authentication.,This allows attaching a debugger to the process.,backend,integer,default,0,2147,,0,0,,,f
pre_auth_delay,0,s,Developer Options,Waits N seconds on connection startup before authentication.,This allows attaching a debugger to the process.,sighup,integer,default,0,60,,0,0,,,f
quote_all_identifiers,off,,Version and Platform Compatibility / Previous PostgreSQL Versions,"When generating SQL fragments, quote all identifiers.",,user,bool,default,,,,off,off,,,f
random_page_cost,4,,Query Tuning / Planner Cost Constants,Sets the planner's estimate of the cost of a nonsequentially fetched disk page.,,user,real,default,0,1.79769e+308,,4,4,,,f
replacement_sort_tuples,150000,,Resource Usage / Memory,Sets the maximum number of tuples to be sorted using replacement selection.,"When more tuples than this are present, quicksort will be used.",user,integer,default,0,2147483647,,150000,150000,,,f
restart_after_crash,on,,Error Handling,Reinitialize server after backend crash.,,sighup,bool,default,,,,on,on,,,f
row_security,on,,Connections and Authentication / Security and Authentication,Enable row security.,"When enabled, row security will be applied to all users.",user,bool,default,,,,on,on,,,f
search_path,"""$user"", public",,Client Connection Defaults / Statement Behavior,Sets the schema search order for names that are not schema-qualified.,,user,string,default,,,,"""$user"", public","""$user"", public",,,f
segment_size,131072,8kB,Preset Options,Shows the number of pages per disk file.,,internal,integer,default,131072,131072,,131072,131072,,,f
seq_page_cost,1,,Query Tuning / Planner Cost Constants,Sets the planner's estimate of the cost of a sequentially fetched disk page.,,user,real,default,0,1.79769e+308,,1,1,,,f
server_encoding,UTF8,,Client Connection Defaults / Locale and Formatting,Sets the server (database) character set encoding.,,internal,string,override,,,,SQL_ASCII,UTF8,,,f
server_version,9.6.3,,Preset Options,Shows the server version.,,internal,string,default,,,,9.6.3,9.6.3,,,f
server_version_num,90603,,Preset Options,Shows the server version as an integer.,,internal,integer,default,90603,90603,,90603,90603,,,f
session_preload_libraries,"",,Client Connection Defaults / Shared Library Preloading,Lists shared libraries to preload into each backend.,,superuser,string,default,,,,"","",,,f
session_replication_role,origin,,Client Connection Defaults / Statement Behavior,Sets the session's behavior for triggers and rewrite rules.,,superuser,enum,default,,,"{origin,replica,local}",origin,origin,,,f
shared_buffers,16384,8kB,Resource Usage / Memory,Sets the number of shared memory buffers used by the server.,,postmaster,integer,configuration file,16,1073741823,,1024,16384,/etc/postgresql/9.6/main/postgresql.conf,113,f
shared_preload_libraries,"",,Client Connection Defaults / Shared Library Preloading,Lists shared libraries to preload into server.,,postmaster,string,default,,,,"","",,,f
sql_inheritance,on,,Version and Platform Compatibility / Previous PostgreSQL Versions,Causes subtables to be included by default in various commands.,,user,bool,default,,,,on,on,,,f
ssl,on,,Connections and Authentication / Security and Authentication,Enables SSL connections.,,postmaster,bool,configuration file,,,,off,on,/etc/postgresql/9.6/main/postgresql.conf,79,f
ssl_ca_file,"",,Connections and Authentication / Security and Authentication,Location of the SSL certificate authority file.,,postmaster,string,default,,,,"","",,,f
ssl_cert_file,/etc/ssl/certs/ssl-cert-snakeoil.pem,,Connections and Authentication / Security and Authentication,Location of the SSL server certificate file.,,postmaster,string,configuration file,,,,server.crt,/etc/ssl/certs/ssl-cert-snakeoil.pem,/etc/postgresql/9.6/main/postgresql.conf,84,f
ssl_ciphers,HIGH:MEDIUM:+3DES:!aNULL,,Connections and Authentication / Security and Authentication,Sets the list of allowed SSL ciphers.,,postmaster,string,default,,,,HIGH:MEDIUM:+3DES:!aNULL,HIGH:MEDIUM:+3DES:!aNULL,,,f
ssl_crl_file,"",,Connections and Authentication / Security and Authentication,Location of the SSL certificate revocation list file.,,postmaster,string,default,,,,"","",,,f
ssl_ecdh_curve,prime256v1,,Connections and Authentication / Security and Authentication,Sets the curve to use for ECDH.,,postmaster,string,default,,,,prime256v1,prime256v1,,,f
ssl_key_file,/etc/ssl/private/ssl-cert-snakeoil.key,,Connections and Authentication / Security and Authentication,Location of the SSL server private key file.,,postmaster,string,configuration file,,,,server.key,/etc/ssl/private/ssl-cert-snakeoil.key,/etc/postgresql/9.6/main/postgresql.conf,85,f
ssl_prefer_server_ciphers,on,,Connections and Authentication / Security and Authentication,Give priority to server ciphersuite order.,,postmaster,bool,default,,,,on,on,,,f
standard_conforming_strings,on,,Version and Platform Compatibility / Previous PostgreSQL Versions,Causes '...' strings to treat backslashes literally.,,user,bool,default,,,,on,on,,,f
statement_timeout,0,ms,Client Connection Defaults / Statement Behavior,Sets the maximum allowed duration of any statement.,A value of 0 turns off the timeout.,user,integer,default,0,2147483647,,0,0,,,f
stats_temp_directory,/var/run/postgresql/9.6-main.pg_stat_tmp,,Statistics / Query and Index Statistics Collector,Writes temporary statistics files to the specified directory.,,sighup,string,configuration file,,,,pg_stat_tmp,/var/run/postgresql/9.6-main.pg_stat_tmp,/etc/postgresql/9.6/main/postgresql.conf,479,f
superuser_reserved_connections,3,,Connections and Authentication / Connection Settings,Sets the number of connection slots reserved for superusers.,,postmaster,integer,default,0,262143,,3,3,,,f
synchronize_seqscans,on,,Version and Platform Compatibility / Previous PostgreSQL Versions,Enable synchronized sequential scans.,,user,bool,default,,,,on,on,,,f
synchronous_commit,on,,Write-Ahead Log / Settings,Sets the current transaction's synchronization level.,,user,enum,default,,,"{local,remote_write,remote_apply,on,off}",on,on,,,f
synchronous_standby_names,"",,Replication / Master Server,Number of synchronous standbys and list of names of potential synchronous ones.,,sighup,string,default,,,,"","",,,f
syslog_facility,local0,,Reporting and Logging / Where to Log,"Sets the syslog ""facility"" to be used when syslog enabled.",,sighup,enum,default,,,"{local0,local1,local2,local3,local4,local5,local6,local7}",local0,local0,,,f
syslog_ident,postgres,,Reporting and Logging / Where to Log,Sets the program name used to identify PostgreSQL messages in syslog.,,sighup,string,default,,,,postgres,postgres,,,f
syslog_sequence_numbers,on,,Reporting and Logging / Where to Log,Add sequence number to syslog messages to avoid duplicate suppression.,,sighup,bool,default,,,,on,on,,,f
syslog_split_messages,on,,Reporting and Logging / Where to Log,Split messages sent to syslog by lines and to fit into 1024 bytes.,,sighup,bool,default,,,,on,on,,,f
tcp_keepalives_count,0,,Client Connection Defaults / Other Defaults,Maximum number of TCP keepalive retransmits.,This controls the number of consecutive keepalive retransmits that can be lost before a connection is considered dead. A value of 0 uses the system default.,user,integer,default,0,2147483647,,0,0,,,f
tcp_keepalives_idle,0,s,Client Connection Defaults / Other Defaults,Time between issuing TCP keepalives.,A value of 0 uses the system default.,user,integer,default,0,2147483647,,0,0,,,f
tcp_keepalives_interval,0,s,Client Connection Defaults / Other Defaults,Time between TCP keepalive retransmits.,A value of 0 uses the system default.,user,integer,default,0,2147483647,,0,0,,,f
temp_buffers,1024,8kB,Resource Usage / Memory,Sets the maximum number of temporary buffers used by each session.,,user,integer,default,100,1073741823,,1024,1024,,,f
temp_file_limit,-1,kB,Resource Usage / Disk,Limits the total size of all temporary files used by each process.,-1 means no limit.,superuser,integer,default,-1,2147483647,,-1,-1,,,f
temp_tablespaces,"",,Client Connection Defaults / Statement Behavior,Sets the tablespace(s) to use for temporary tables and sort files.,,user,string,default,,,,"","",,,f
TimeZone,localtime,,Client Connection Defaults / Locale and Formatting,Sets the time zone for displaying and interpreting time stamps.,,user,string,configuration file,,,,GMT,localtime,/etc/postgresql/9.6/main/postgresql.conf,554,f
timezone_abbreviations,Default,,Client Connection Defaults / Locale and Formatting,Selects a file of time zone abbreviations.,,user,string,default,,,,,Default,,,f
trace_notify,off,,Developer Options,Generates debugging output for LISTEN and NOTIFY.,,user,bool,default,,,,off,off,,,f
trace_recovery_messages,log,,Developer Options,Enables logging of recovery-related debugging information.,"Each level includes all the levels that follow it. The later the level, the fewer messages are sent.",sighup,enum,default,,,"{debug5,debug4,debug3,debug2,debug1,log,notice,warning,error}",log,log,,,f
trace_sort,off,,Developer Options,Emit information about resource usage in sorting.,,user,bool,default,,,,off,off,,,f
track_activities,on,,Statistics / Query and Index Statistics Collector,Collects information about executing commands.,"Enables the collection of information on the currently executing command of each session, along with the time at which that command began execution.",superuser,bool,default,,,,on,on,,,f
track_activity_query_size,1024,,Resource Usage / Memory,"Sets the size reserved for pg_stat_activity.query, in bytes.",,postmaster,integer,default,100,102400,,1024,1024,,,f
track_commit_timestamp,off,,Replication,Collects transaction commit time.,,postmaster,bool,default,,,,off,off,,,f
track_counts,on,,Statistics / Query and Index Statistics Collector,Collects statistics on database activity.,,superuser,bool,default,,,,on,on,,,f
track_functions,none,,Statistics / Query and Index Statistics Collector,Collects function-level statistics on database activity.,,superuser,enum,default,,,"{none,pl,all}",none,none,,,f
track_io_timing,off,,Statistics / Query and Index Statistics Collector,Collects timing statistics for database I/O activity.,,superuser,bool,default,,,,off,off,,,f
transaction_deferrable,off,,Client Connection Defaults / Statement Behavior,Whether to defer a read-only serializable transaction until it can be executed with no possible serialization failures.,,user,bool,override,,,,off,off,,,f
transaction_isolation,read committed,,Client Connection Defaults / Statement Behavior,Sets the current transaction's isolation level.,,user,string,override,,,,default,default,,,f
transaction_read_only,off,,Client Connection Defaults / Statement Behavior,Sets the current transaction's read-only status.,,user,bool,override,,,,off,off,,,f
transform_null_equals,off,,Version and Platform Compatibility / Other Platforms and Clients,"Treats ""expr=NULL"" as ""expr IS NULL"".","When turned on, expressions of the form expr = NULL (or NULL = expr) are treated as expr IS NULL, that is, they return true if expr evaluates to the null value, and false otherwise. The correct behavior of expr = NULL is to always return null (unknown).",user,bool,default,,,,off,off,,,f
unix_socket_directories,/var/run/postgresql,,Connections and Authentication / Connection Settings,Sets the directories where Unix-domain sockets will be created.,,postmaster,string,configuration file,,,,/var/run/postgresql,/var/run/postgresql,/etc/postgresql/9.6/main/postgresql.conf,66,f
unix_socket_group,"",,Connections and Authentication / Connection Settings,Sets the owning group of the Unix-domain socket.,The owning user of the socket is always the user that starts the server.,postmaster,string,default,,,,"","",,,f
unix_socket_permissions,0777,,Connections and Authentication / Connection Settings,Sets the access permissions of the Unix-domain socket.,Unix-domain sockets use the usual Unix file system permission set. The parameter value is expected to be a numeric mode specification in the form accepted by the chmod and umask system calls. (To use the customary octal format the number must start with a 0 (zero).),postmaster,integer,default,0,511,,511,511,,,f
update_process_title,on,,Process Title,Updates the process title to show the active SQL command.,Enables updating of the process title every time a new SQL command is received by the server.,superuser,bool,default,,,,on,on,,,f
vacuum_cost_delay,0,ms,Resource Usage / Cost-Based Vacuum Delay,Vacuum cost delay in milliseconds.,,user,integer,default,0,100,,0,0,,,f
vacuum_cost_limit,200,,Resource Usage / Cost-Based Vacuum Delay,Vacuum cost amount available before napping.,,user,integer,default,1,10000,,200,200,,,f
vacuum_cost_page_dirty,20,,Resource Usage / Cost-Based Vacuum Delay,Vacuum cost for a page dirtied by vacuum.,,user,integer,default,0,10000,,20,20,,,f
vacuum_cost_page_hit,1,,Resource Usage / Cost-Based Vacuum Delay,Vacuum cost for a page found in the buffer cache.,,user,integer,default,0,10000,,1,1,,,f
vacuum_cost_page_miss,10,,Resource Usage / Cost-Based Vacuum Delay,Vacuum cost for a page not found in the buffer cache.,,user,integer,default,0,10000,,10,10,,,f
vacuum_defer_cleanup_age,0,,Replication / Master Server,"Number of transactions by which VACUUM and HOT cleanup should be deferred, if any.",,sighup,integer,default,0,1000000,,0,0,,,f
vacuum_freeze_min_age,50000000,,Client Connection Defaults / Statement Behavior,Minimum age at which VACUUM should freeze a table row.,,user,integer,default,0,1000000000,,50000000,50000000,,,f
vacuum_freeze_table_age,150000000,,Client Connection Defaults / Statement Behavior,Age at which VACUUM should scan whole table to freeze tuples.,,user,integer,default,0,2000000000,,150000000,150000000,,,f
vacuum_multixact_freeze_min_age,5000000,,Client Connection Defaults / Statement Behavior,Minimum age at which VACUUM should freeze a MultiXactId in a table row.,,user,integer,default,0,1000000000,,5000000,5000000,,,f
vacuum_multixact_freeze_table_age,150000000,,Client Connection Defaults / Statement Behavior,Multixact age at which VACUUM should scan whole table to freeze tuples.,,user,integer,default,0,2000000000,,150000000,150000000,,,f
wal_block_size,8192,,Preset Options,Shows the block size in the write ahead log.,,internal,integer,default,8192,8192,,8192,8192,,,f
wal_buffers,512,8kB,Write-Ahead Log / Settings,Sets the number of disk-page buffers in shared memory for WAL.,,postmaster,integer,override,-1,262143,,-1,512,,,f
wal_compression,off,,Write-Ahead Log / Settings,Compresses full-page writes written in WAL file.,,superuser,bool,default,,,,off,off,,,f
wal_keep_segments,0,,Replication / Sending Servers,Sets the number of WAL files held for standby servers.,,sighup,integer,default,0,2147483647,,0,0,,,f
wal_level,minimal,,Write-Ahead Log / Settings,Set the level of information written to the WAL.,,postmaster,enum,default,,,"{minimal,replica,logical}",minimal,minimal,,,f
wal_log_hints,off,,Write-Ahead Log / Settings,"Writes full pages to WAL when first modified after a checkpoint, even for a non-critical modifications.",,postmaster,bool,default,,,,off,off,,,f
wal_receiver_status_interval,10,s,Replication / Standby Servers,Sets the maximum interval between WAL receiver status reports to the primary.,,sighup,integer,default,0,2147483,,10,10,,,f
wal_receiver_timeout,60000,ms,Replication / Standby Servers,Sets the maximum wait time to receive data from the primary.,,sighup,integer,default,0,2147483647,,60000,60000,,,f
wal_retrieve_retry_interval,5000,ms,Replication / Standby Servers,Sets the time to wait before retrying to retrieve WAL after a failed attempt.,,sighup,integer,default,1,2147483647,,5000,5000,,,f
wal_segment_size,2048,8kB,Preset Options,Shows the number of pages per write ahead log segment.,,internal,integer,default,2048,2048,,2048,2048,,,f
wal_sender_timeout,60000,ms,Replication / Sending Servers,Sets the maximum time to wait for WAL replication.,,sighup,integer,default,0,2147483647,,60000,60000,,,f
wal_sync_method,fdatasync,,Write-Ahead Log / Settings,Selects the method used for forcing WAL updates to disk.,,sighup,enum,default,,,"{fsync,fdatasync,open_sync,open_datasync}",fdatasync,fdatasync,,,f
wal_writer_delay,200,ms,Write-Ahead Log / Settings,Time between WAL flushes performed in the WAL writer.,,sighup,integer,default,1,10000,,200,200,,,f
wal_writer_flush_after,128,8kB,Write-Ahead Log / Settings,Amount of WAL written out by WAL writer that triggers a flush.,,sighup,integer,default,0,2147483647,,128,128,,,f
work_mem,4096,kB,Resource Usage / Memory,Sets the maximum memory to be used for query workspaces.,This much memory can be used by each internal sort operation and hash table before switching to temporary disk files.,user,integer,default,64,2147483647,,4096,4096,,,f
xmlbinary,base64,,Client Connection Defaults / Statement Behavior,Sets how binary values are to be encoded in XML.,,user,enum,default,,,"{base64,hex}",base64,base64,,,f
xmloption,content,,Client Connection Defaults / Statement Behavior,Sets whether XML data in implicit parsing and serialization operations is to be considered as documents or content fragments.,,user,enum,default,,,"{content,document}",content,content,,,f
zero_damaged_pages,off,,Developer Options,Continues processing past damaged page headers.,"Detection of a damaged page header normally causes PostgreSQL to report an error, aborting the current transaction. Setting zero_damaged_pages to true causes the system to instead report a warning, zero out the damaged page, and continue processing. This behavior will destroy data, namely all the rows on the damaged page.",superuser,bool,default,,,,off,off,,,f
1 name setting unit category short_desc extra_desc context vartype source min_val max_val enumvals boot_val reset_val sourcefile sourceline pending_restart
2 allow_system_table_mods off Developer Options Allows modifications of the structure of system tables. postmaster bool default off off f
3 application_name psql Reporting and Logging / What to Log Sets the application name to be reported in statistics and logs. user string client psql f
4 archive_command (disabled) Write-Ahead Log / Archiving Sets the shell command that will be called to archive a WAL file. sighup string default f
5 archive_mode off Write-Ahead Log / Archiving Allows archiving of WAL files using archive_command. postmaster enum default {always,on,off} off off f
6 archive_timeout 0 s Write-Ahead Log / Archiving Forces a switch to the next xlog file if a new file has not been started within N seconds. sighup integer default 0 1073741823 0 0 f
7 array_nulls on Version and Platform Compatibility / Previous PostgreSQL Versions Enable input of NULL elements in arrays. When turned on, unquoted NULL in an array input value means a null value; otherwise it is taken literally. user bool default on on f
8 authentication_timeout 60 s Connections and Authentication / Security and Authentication Sets the maximum allowed time to complete client authentication. sighup integer default 1 600 60 60 f
9 autovacuum on Autovacuum Starts the autovacuum subprocess. sighup bool default on on f
10 autovacuum_analyze_scale_factor 0.1 Autovacuum Number of tuple inserts, updates, or deletes prior to analyze as a fraction of reltuples. sighup real default 0 100 0.1 0.1 f
11 autovacuum_analyze_threshold 50 Autovacuum Minimum number of tuple inserts, updates, or deletes prior to analyze. sighup integer default 0 2147483647 50 50 f
12 autovacuum_freeze_max_age 200000000 Autovacuum Age at which to autovacuum a table to prevent transaction ID wraparound. postmaster integer default 100000 2000000000 200000000 200000000 f
13 autovacuum_max_workers 3 Autovacuum Sets the maximum number of simultaneously running autovacuum worker processes. postmaster integer default 1 262143 3 3 f
14 autovacuum_multixact_freeze_max_age 400000000 Autovacuum Multixact age at which to autovacuum a table to prevent multixact wraparound. postmaster integer default 10000 2000000000 400000000 400000000 f
15 autovacuum_naptime 60 s Autovacuum Time to sleep between autovacuum runs. sighup integer default 1 2147483 60 60 f
16 autovacuum_vacuum_cost_delay 20 ms Autovacuum Vacuum cost delay in milliseconds, for autovacuum. sighup integer default -1 100 20 20 f
17 autovacuum_vacuum_cost_limit -1 Autovacuum Vacuum cost amount available before napping, for autovacuum. sighup integer default -1 10000 -1 -1 f
18 autovacuum_vacuum_scale_factor 0.2 Autovacuum Number of tuple updates or deletes prior to vacuum as a fraction of reltuples. sighup real default 0 100 0.2 0.2 f
19 autovacuum_vacuum_threshold 50 Autovacuum Minimum number of tuple updates or deletes prior to vacuum. sighup integer default 0 2147483647 50 50 f
20 autovacuum_work_mem -1 kB Resource Usage / Memory Sets the maximum memory to be used by each autovacuum worker process. sighup integer default -1 2147483647 -1 -1 f
21 backend_flush_after 0 8kB Resource Usage / Asynchronous Behavior Number of pages after which previously performed writes are flushed to disk. user integer default 0 256 0 0 f
22 backslash_quote safe_encoding Version and Platform Compatibility / Previous PostgreSQL Versions Sets whether "\'" is allowed in string literals. user enum default {safe_encoding,on,off} safe_encoding safe_encoding f
23 bgwriter_delay 200 ms Resource Usage / Background Writer Background writer sleep time between rounds. sighup integer default 10 10000 200 200 f
24 bgwriter_flush_after 64 8kB Resource Usage / Background Writer Number of pages after which previously performed writes are flushed to disk. sighup integer default 0 256 64 64 f
25 bgwriter_lru_maxpages 100 Resource Usage / Background Writer Background writer maximum number of LRU pages to flush per round. sighup integer default 0 1000 100 100 f
26 bgwriter_lru_multiplier 2 Resource Usage / Background Writer Multiple of the average buffer usage to free per round. sighup real default 0 10 2 2 f
27 block_size 8192 Preset Options Shows the size of a disk block. internal integer default 8192 8192 8192 8192 f
28 bonjour off Connections and Authentication / Connection Settings Enables advertising the server via Bonjour. postmaster bool default off off f
29 bonjour_name Connections and Authentication / Connection Settings Sets the Bonjour service name. postmaster string default f
30 bytea_output hex Client Connection Defaults / Statement Behavior Sets the output format for bytea. user enum default {escape,hex} hex hex f
31 check_function_bodies on Client Connection Defaults / Statement Behavior Check function bodies during CREATE FUNCTION. user bool default on on f
32 checkpoint_completion_target 0.5 Write-Ahead Log / Checkpoints Time spent flushing dirty buffers during checkpoint, as fraction of checkpoint interval. sighup real default 0 1 0.5 0.5 f
33 checkpoint_flush_after 32 8kB Write-Ahead Log / Checkpoints Number of pages after which previously performed writes are flushed to disk. sighup integer default 0 256 32 32 f
34 checkpoint_timeout 300 s Write-Ahead Log / Checkpoints Sets the maximum time between automatic WAL checkpoints. sighup integer default 30 86400 300 300 f
35 checkpoint_warning 30 s Write-Ahead Log / Checkpoints Enables warnings if checkpoint segments are filled more frequently than this. Write a message to the server log if checkpoints caused by the filling of checkpoint segment files happens more frequently than this number of seconds. Zero turns off the warning. sighup integer default 0 2147483647 30 30 f
36 client_encoding UTF8 Client Connection Defaults / Locale and Formatting Sets the client's character set encoding. user string client SQL_ASCII UTF8 f
37 client_min_messages notice Reporting and Logging / When to Log Sets the message levels that are sent to the client. Each level includes all the levels that follow it. The later the level, the fewer messages are sent. user enum default {debug5,debug4,debug3,debug2,debug1,log,notice,warning,error} notice notice f
38 cluster_name 9.6/main Process Title Sets the name of the cluster, which is included in the process title. postmaster string configuration file 9.6/main /etc/postgresql/9.6/main/postgresql.conf 463 f
39 commit_delay 0 Write-Ahead Log / Settings Sets the delay in microseconds between transaction commit and flushing WAL to disk. superuser integer default 0 100000 0 0 f
40 commit_siblings 5 Write-Ahead Log / Settings Sets the minimum concurrent open transactions before performing commit_delay. user integer default 0 1000 5 5 f
41 config_file /etc/postgresql/9.6/main/postgresql.conf File Locations Sets the server's main configuration file. postmaster string override /etc/postgresql/9.6/main/postgresql.conf f
42 constraint_exclusion partition Query Tuning / Other Planner Options Enables the planner to use constraints to optimize queries. Table scans will be skipped if their constraints guarantee that no rows match the query. user enum default {partition,on,off} partition partition f
43 cpu_index_tuple_cost 0.005 Query Tuning / Planner Cost Constants Sets the planner's estimate of the cost of processing each index entry during an index scan. user real default 0 1.79769e+308 0.005 0.005 f
44 cpu_operator_cost 0.0025 Query Tuning / Planner Cost Constants Sets the planner's estimate of the cost of processing each operator or function call. user real default 0 1.79769e+308 0.0025 0.0025 f
45 cpu_tuple_cost 0.01 Query Tuning / Planner Cost Constants Sets the planner's estimate of the cost of processing each tuple (row). user real default 0 1.79769e+308 0.01 0.01 f
46 cursor_tuple_fraction 0.1 Query Tuning / Other Planner Options Sets the planner's estimate of the fraction of a cursor's rows that will be retrieved. user real default 0 1 0.1 0.1 f
47 data_checksums off Preset Options Shows whether data checksums are turned on for this cluster. internal bool override off off f
48 data_directory /var/lib/postgresql/9.6/main File Locations Sets the server's data directory. postmaster string override /var/lib/postgresql/9.6/main f
49 DateStyle ISO, MDY Client Connection Defaults / Locale and Formatting Sets the display format for date and time values. Also controls interpretation of ambiguous date inputs. user string configuration file ISO, MDY ISO, MDY /etc/postgresql/9.6/main/postgresql.conf 552 f
50 db_user_namespace off Connections and Authentication / Security and Authentication Enables per-database user names. sighup bool default off off f
51 deadlock_timeout 1000 ms Lock Management Sets the time to wait on a lock before checking for deadlock. superuser integer default 1 2147483647 1000 1000 f
52 debug_assertions off Preset Options Shows whether the running server has assertion checks enabled. internal bool default off off f
53 debug_pretty_print on Reporting and Logging / What to Log Indents parse and plan tree displays. user bool default on on f
54 debug_print_parse off Reporting and Logging / What to Log Logs each query's parse tree. user bool default off off f
55 debug_print_plan off Reporting and Logging / What to Log Logs each query's execution plan. user bool default off off f
56 debug_print_rewritten off Reporting and Logging / What to Log Logs each query's rewritten parse tree. user bool default off off f
57 default_statistics_target 100 Query Tuning / Other Planner Options Sets the default statistics target. This applies to table columns that have not had a column-specific target set via ALTER TABLE SET STATISTICS. user integer default 1 10000 100 100 f
58 default_tablespace Client Connection Defaults / Statement Behavior Sets the default tablespace to create tables and indexes in. An empty string selects the database's default tablespace. user string default f
59 default_text_search_config pg_catalog.english Client Connection Defaults / Locale and Formatting Sets default text search configuration. user string configuration file pg_catalog.simple pg_catalog.english /etc/postgresql/9.6/main/postgresql.conf 574 f
60 default_transaction_deferrable off Client Connection Defaults / Statement Behavior Sets the default deferrable status of new transactions. user bool default off off f
61 default_transaction_isolation read committed Client Connection Defaults / Statement Behavior Sets the transaction isolation level of each new transaction. user enum default {serializable,"repeatable read","read committed","read uncommitted"} read committed read committed f
62 default_transaction_read_only off Client Connection Defaults / Statement Behavior Sets the default read-only status of new transactions. user bool default off off f
63 default_with_oids off Version and Platform Compatibility / Previous PostgreSQL Versions Create new tables with OIDs by default. user bool default off off f
64 dynamic_library_path $libdir Client Connection Defaults / Other Defaults Sets the path for dynamically loadable modules. If a dynamically loadable module needs to be opened and the specified name does not have a directory component (i.e., the name does not contain a slash), the system will search this path for the specified file. superuser string default $libdir $libdir f
65 dynamic_shared_memory_type posix Resource Usage / Memory Selects the dynamic shared memory implementation used. postmaster enum configuration file {posix,sysv,mmap,none} posix posix /etc/postgresql/9.6/main/postgresql.conf 127 f
66 effective_cache_size 524288 8kB Query Tuning / Planner Cost Constants Sets the planner's assumption about the size of the disk cache. That is, the portion of the kernel's disk cache that will be used for PostgreSQL data files. This is measured in disk pages, which are normally 8 kB each. user integer default 1 2147483647 524288 524288 f
67 effective_io_concurrency 1 Resource Usage / Asynchronous Behavior Number of simultaneous requests that can be handled efficiently by the disk subsystem. For RAID arrays, this should be approximately the number of drive spindles in the array. user integer default 0 1000 1 1 f
68 enable_bitmapscan on Query Tuning / Planner Method Configuration Enables the planner's use of bitmap-scan plans. user bool default on on f
69 enable_hashagg on Query Tuning / Planner Method Configuration Enables the planner's use of hashed aggregation plans. user bool default on on f
70 enable_hashjoin on Query Tuning / Planner Method Configuration Enables the planner's use of hash join plans. user bool default on on f
71 enable_indexonlyscan on Query Tuning / Planner Method Configuration Enables the planner's use of index-only-scan plans. user bool default on on f
72 enable_indexscan on Query Tuning / Planner Method Configuration Enables the planner's use of index-scan plans. user bool default on on f
73 enable_material on Query Tuning / Planner Method Configuration Enables the planner's use of materialization. user bool default on on f
74 enable_mergejoin on Query Tuning / Planner Method Configuration Enables the planner's use of merge join plans. user bool default on on f
75 enable_nestloop on Query Tuning / Planner Method Configuration Enables the planner's use of nested-loop join plans. user bool default on on f
76 enable_seqscan on Query Tuning / Planner Method Configuration Enables the planner's use of sequential-scan plans. user bool default on on f
77 enable_sort on Query Tuning / Planner Method Configuration Enables the planner's use of explicit sort steps. user bool default on on f
78 enable_tidscan on Query Tuning / Planner Method Configuration Enables the planner's use of TID scan plans. user bool default on on f
79 escape_string_warning on Version and Platform Compatibility / Previous PostgreSQL Versions Warn about backslash escapes in ordinary string literals. user bool default on on f
80 event_source PostgreSQL Reporting and Logging / Where to Log Sets the application name used to identify PostgreSQL messages in the event log. postmaster string default PostgreSQL PostgreSQL f
81 exit_on_error off Error Handling Terminate session on any error. user bool default off off f
82 external_pid_file /var/run/postgresql/9.6-main.pid File Locations Writes the postmaster PID to the specified file. postmaster string configuration file /var/run/postgresql/9.6-main.pid /etc/postgresql/9.6/main/postgresql.conf 49 f
83 extra_float_digits 0 Client Connection Defaults / Locale and Formatting Sets the number of digits displayed for floating-point values. This affects real, double precision, and geometric data types. The parameter value is added to the standard number of digits (FLT_DIG or DBL_DIG as appropriate). user integer default -15 3 0 0 f
84 force_parallel_mode off Query Tuning / Other Planner Options Forces use of parallel query facilities. If possible, run query using a parallel worker and with parallel restrictions. user enum default {off,on,regress} off off f
85 from_collapse_limit 8 Query Tuning / Other Planner Options Sets the FROM-list size beyond which subqueries are not collapsed. The planner will merge subqueries into upper queries if the resulting FROM list would have no more than this many items. user integer default 1 2147483647 8 8 f
86 fsync on Write-Ahead Log / Settings Forces synchronization of updates to disk. The server will use the fsync() system call in several places to make sure that updates are physically written to disk. This insures that a database cluster will recover to a consistent state after an operating system or hardware crash. sighup bool default on on f
87 full_page_writes on Write-Ahead Log / Settings Writes full pages to WAL when first modified after a checkpoint. A page write in process during an operating system crash might be only partially written to disk. During recovery, the row changes stored in WAL are not enough to recover. This option writes pages when first modified after a checkpoint to WAL so full recovery is possible. sighup bool default on on f
88 geqo on Query Tuning / Genetic Query Optimizer Enables genetic query optimization. This algorithm attempts to do planning without exhaustive searching. user bool default on on f
89 geqo_effort 5 Query Tuning / Genetic Query Optimizer GEQO: effort is used to set the default for other GEQO parameters. user integer default 1 10 5 5 f
90 geqo_generations 0 Query Tuning / Genetic Query Optimizer GEQO: number of iterations of the algorithm. Zero selects a suitable default value. user integer default 0 2147483647 0 0 f
91 geqo_pool_size 0 Query Tuning / Genetic Query Optimizer GEQO: number of individuals in the population. Zero selects a suitable default value. user integer default 0 2147483647 0 0 f
92 geqo_seed 0 Query Tuning / Genetic Query Optimizer GEQO: seed for random path selection. user real default 0 1 0 0 f
93 geqo_selection_bias 2 Query Tuning / Genetic Query Optimizer GEQO: selective pressure within the population. user real default 1.5 2 2 2 f
94 geqo_threshold 12 Query Tuning / Genetic Query Optimizer Sets the threshold of FROM items beyond which GEQO is used. user integer default 2 2147483647 12 12 f
95 gin_fuzzy_search_limit 0 Client Connection Defaults / Other Defaults Sets the maximum allowed result for exact search by GIN. user integer default 0 2147483647 0 0 f
96 gin_pending_list_limit 4096 kB Client Connection Defaults / Statement Behavior Sets the maximum size of the pending list for GIN index. user integer default 64 2147483647 4096 4096 f
97 hba_file /etc/postgresql/9.6/main/pg_hba.conf File Locations Sets the server's "hba" configuration file. postmaster string override /etc/postgresql/9.6/main/pg_hba.conf f
98 hot_standby off Replication / Standby Servers Allows connections and queries during recovery. postmaster bool default off off f
99 hot_standby_feedback off Replication / Standby Servers Allows feedback from a hot standby to the primary that will avoid query conflicts. sighup bool default off off f
100 huge_pages try Resource Usage / Memory Use of huge pages on Linux. postmaster enum default {off,on,try} try try f
101 ident_file /etc/postgresql/9.6/main/pg_ident.conf File Locations Sets the server's "ident" configuration file. postmaster string override /etc/postgresql/9.6/main/pg_ident.conf f
102 idle_in_transaction_session_timeout 0 ms Client Connection Defaults / Statement Behavior Sets the maximum allowed duration of any idling transaction. A value of 0 turns off the timeout. user integer default 0 2147483647 0 0 f
103 ignore_checksum_failure off Developer Options Continues processing after a checksum failure. Detection of a checksum failure normally causes PostgreSQL to report an error, aborting the current transaction. Setting ignore_checksum_failure to true causes the system to ignore the failure (but still report a warning), and continue processing. This behavior could cause crashes or other serious problems. Only has an effect if checksums are enabled. superuser bool default off off f
104 ignore_system_indexes off Developer Options Disables reading from system indexes. It does not prevent updating the indexes, so it is safe to use. The worst consequence is slowness. backend bool default off off f
105 integer_datetimes on Preset Options Datetimes are integer based. internal bool default on on f
106 IntervalStyle postgres Client Connection Defaults / Locale and Formatting Sets the display format for interval values. user enum default {postgres,postgres_verbose,sql_standard,iso_8601} postgres postgres f
107 join_collapse_limit 8 Query Tuning / Other Planner Options Sets the FROM-list size beyond which JOIN constructs are not flattened. The planner will flatten explicit JOIN constructs into lists of FROM items whenever a list of no more than this many items would result. user integer default 1 2147483647 8 8 f
108 krb_caseins_users off Connections and Authentication / Security and Authentication Sets whether Kerberos and GSSAPI user names should be treated as case-insensitive. sighup bool default off off f
109 krb_server_keyfile FILE:/etc/postgresql-common/krb5.keytab Connections and Authentication / Security and Authentication Sets the location of the Kerberos server key file. sighup string default FILE:/etc/postgresql-common/krb5.keytab FILE:/etc/postgresql-common/krb5.keytab f
110 lc_collate en_US.UTF-8 Client Connection Defaults / Locale and Formatting Shows the collation order locale. internal string override C en_US.UTF-8 f
111 lc_ctype en_US.UTF-8 Client Connection Defaults / Locale and Formatting Shows the character classification and case conversion locale. internal string override C en_US.UTF-8 f
112 lc_messages en_US.UTF-8 Client Connection Defaults / Locale and Formatting Sets the language in which messages are displayed. superuser string configuration file en_US.UTF-8 /etc/postgresql/9.6/main/postgresql.conf 567 f
113 lc_monetary en_US.UTF-8 Client Connection Defaults / Locale and Formatting Sets the locale for formatting monetary amounts. user string configuration file C en_US.UTF-8 /etc/postgresql/9.6/main/postgresql.conf 569 f
114 lc_numeric en_US.UTF-8 Client Connection Defaults / Locale and Formatting Sets the locale for formatting numbers. user string configuration file C en_US.UTF-8 /etc/postgresql/9.6/main/postgresql.conf 570 f
115 lc_time en_US.UTF-8 Client Connection Defaults / Locale and Formatting Sets the locale for formatting date and time values. user string configuration file C en_US.UTF-8 /etc/postgresql/9.6/main/postgresql.conf 571 f
116 listen_addresses localhost Connections and Authentication / Connection Settings Sets the host name or IP address(es) to listen to. postmaster string default localhost localhost f
117 lo_compat_privileges off Version and Platform Compatibility / Previous PostgreSQL Versions Enables backward compatibility mode for privilege checks on large objects. Skips privilege checks when reading or modifying large objects, for compatibility with PostgreSQL releases prior to 9.0. superuser bool default off off f
118 local_preload_libraries Client Connection Defaults / Shared Library Preloading Lists unprivileged shared libraries to preload into each backend. user string default f
119 lock_timeout 0 ms Client Connection Defaults / Statement Behavior Sets the maximum allowed duration of any wait for a lock. A value of 0 turns off the timeout. user integer default 0 2147483647 0 0 f
120 log_autovacuum_min_duration -1 ms Reporting and Logging / What to Log Sets the minimum execution time above which autovacuum actions will be logged. Zero prints all actions. -1 turns autovacuum logging off. sighup integer default -1 2147483647 -1 -1 f
121 log_checkpoints off Reporting and Logging / What to Log Logs each checkpoint. sighup bool default off off f
122 log_connections off Reporting and Logging / What to Log Logs each successful connection. superuser-backend bool default off off f
123 log_destination stderr Reporting and Logging / Where to Log Sets the destination for server log output. Valid values are combinations of "stderr", "syslog", "csvlog", and "eventlog", depending on the platform. sighup string default stderr stderr f
124 log_directory pg_log Reporting and Logging / Where to Log Sets the destination directory for log files. Can be specified as relative to the data directory or as absolute path. sighup string default pg_log pg_log f
125 log_disconnections off Reporting and Logging / What to Log Logs end of a session, including duration. superuser-backend bool default off off f
126 log_duration off Reporting and Logging / What to Log Logs the duration of each completed SQL statement. superuser bool default off off f
127 log_error_verbosity default Reporting and Logging / What to Log Sets the verbosity of logged messages. superuser enum default {terse,default,verbose} default default f
128 log_executor_stats off Statistics / Monitoring Writes executor performance statistics to the server log. superuser bool default off off f
129 log_file_mode 0600 Reporting and Logging / Where to Log Sets the file permissions for log files. The parameter value is expected to be a numeric mode specification in the form accepted by the chmod and umask system calls. (To use the customary octal format the number must start with a 0 (zero).) sighup integer default 0 511 384 384 f
130 log_filename postgresql-%Y-%m-%d_%H%M%S.log Reporting and Logging / Where to Log Sets the file name pattern for log files. sighup string default postgresql-%Y-%m-%d_%H%M%S.log postgresql-%Y-%m-%d_%H%M%S.log f
131 log_hostname off Reporting and Logging / What to Log Logs the host name in the connection logs. By default, connection logs only show the IP address of the connecting host. If you want them to show the host name you can turn this on, but depending on your host name resolution setup it might impose a non-negligible performance penalty. sighup bool default off off f
132 log_line_prefix %m [%p] %q%u@%d Reporting and Logging / What to Log Controls information prefixed to each log line. If blank, no prefix is used. sighup string configuration file %m [%p] %q%u@%d /etc/postgresql/9.6/main/postgresql.conf 431 f
133 log_lock_waits off Reporting and Logging / What to Log Logs long lock waits. superuser bool default off off f
134 log_min_duration_statement -1 ms Reporting and Logging / When to Log Sets the minimum execution time above which statements will be logged. Zero prints all queries. -1 turns this feature off. superuser integer default -1 2147483647 -1 -1 f
135 log_min_error_statement error Reporting and Logging / When to Log Causes all statements generating error at or above this level to be logged. Each level includes all the levels that follow it. The later the level, the fewer messages are sent. superuser enum default {debug5,debug4,debug3,debug2,debug1,info,notice,warning,error,log,fatal,panic} error error f
136 log_min_messages warning Reporting and Logging / When to Log Sets the message levels that are logged. Each level includes all the levels that follow it. The later the level, the fewer messages are sent. superuser enum default {debug5,debug4,debug3,debug2,debug1,info,notice,warning,error,log,fatal,panic} warning warning f
137 log_parser_stats off Statistics / Monitoring Writes parser performance statistics to the server log. superuser bool default off off f
138 log_planner_stats off Statistics / Monitoring Writes planner performance statistics to the server log. superuser bool default off off f
139 log_replication_commands off Reporting and Logging / What to Log Logs each replication command. superuser bool default off off f
140 log_rotation_age 1440 min Reporting and Logging / Where to Log Automatic log file rotation will occur after N minutes. sighup integer default 0 35791394 1440 1440 f
141 log_rotation_size 10240 kB Reporting and Logging / Where to Log Automatic log file rotation will occur after N kilobytes. sighup integer default 0 2097151 10240 10240 f
142 log_statement none Reporting and Logging / What to Log Sets the type of statements logged. superuser enum default {none,ddl,mod,all} none none f
143 log_statement_stats off Statistics / Monitoring Writes cumulative performance statistics to the server log. superuser bool default off off f
144 log_temp_files -1 kB Reporting and Logging / What to Log Log the use of temporary files larger than this number of kilobytes. Zero logs all files. The default is -1 (turning this feature off). superuser integer default -1 2147483647 -1 -1 f
145 log_timezone localtime Reporting and Logging / What to Log Sets the time zone to use in log messages. sighup string configuration file GMT localtime /etc/postgresql/9.6/main/postgresql.conf 458 f
146 log_truncate_on_rotation off Reporting and Logging / Where to Log Truncate existing log files of same name during log rotation. sighup bool default off off f
147 logging_collector off Reporting and Logging / Where to Log Start a subprocess to capture stderr output and/or csvlogs into log files. postmaster bool default off off f
148 maintenance_work_mem 65536 kB Resource Usage / Memory Sets the maximum memory to be used for maintenance operations. This includes operations such as VACUUM and CREATE INDEX. user integer default 1024 2147483647 65536 65536 f
149 max_connections 100 Connections and Authentication / Connection Settings Sets the maximum number of concurrent connections. postmaster integer configuration file 1 262143 100 100 /etc/postgresql/9.6/main/postgresql.conf 64 f
150 max_files_per_process 1000 Resource Usage / Kernel Resources Sets the maximum number of simultaneously open files for each server process. postmaster integer default 25 2147483647 1000 1000 f
151 max_function_args 100 Preset Options Shows the maximum number of function arguments. internal integer default 100 100 100 100 f
152 max_identifier_length 63 Preset Options Shows the maximum identifier length. internal integer default 63 63 63 63 f
153 max_index_keys 32 Preset Options Shows the maximum number of index keys. internal integer default 32 32 32 32 f
154 max_locks_per_transaction 64 Lock Management Sets the maximum number of locks per transaction. The shared lock table is sized on the assumption that at most max_locks_per_transaction * max_connections distinct objects will need to be locked at any one time. postmaster integer default 10 2147483647 64 64 f
155 max_parallel_workers_per_gather 0 Resource Usage / Asynchronous Behavior Sets the maximum number of parallel processes per executor node. user integer default 0 1024 0 0 f
156 max_pred_locks_per_transaction 64 Lock Management Sets the maximum number of predicate locks per transaction. The shared predicate lock table is sized on the assumption that at most max_pred_locks_per_transaction * max_connections distinct objects will need to be locked at any one time. postmaster integer default 10 2147483647 64 64 f
157 max_prepared_transactions 0 Resource Usage / Memory Sets the maximum number of simultaneously prepared transactions. postmaster integer default 0 262143 0 0 f
158 max_replication_slots 0 Replication / Sending Servers Sets the maximum number of simultaneously defined replication slots. postmaster integer default 0 262143 0 0 f
159 max_stack_depth 2048 kB Resource Usage / Memory Sets the maximum stack depth, in kilobytes. superuser integer environment variable 100 2147483647 100 2048 f
160 max_standby_archive_delay 30000 ms Replication / Standby Servers Sets the maximum delay before canceling queries when a hot standby server is processing archived WAL data. sighup integer default -1 2147483647 30000 30000 f
161 max_standby_streaming_delay 30000 ms Replication / Standby Servers Sets the maximum delay before canceling queries when a hot standby server is processing streamed WAL data. sighup integer default -1 2147483647 30000 30000 f
162 max_wal_senders 0 Replication / Sending Servers Sets the maximum number of simultaneously running WAL sender processes. postmaster integer default 0 262143 0 0 f
163 max_wal_size 64 16MB Write-Ahead Log / Checkpoints Sets the WAL size that triggers a checkpoint. sighup integer default 2 2147483647 64 64 f
164 max_worker_processes 8 Resource Usage / Asynchronous Behavior Maximum number of concurrent worker processes. postmaster integer default 0 262143 8 8 f
165 min_parallel_relation_size 1024 8kB Query Tuning / Planner Cost Constants Sets the minimum size of relations to be considered for parallel scan. user integer default 0 715827882 1024 1024 f
166 min_wal_size 5 16MB Write-Ahead Log / Checkpoints Sets the minimum size to shrink the WAL to. sighup integer default 2 2147483647 5 5 f
167 old_snapshot_threshold -1 min Resource Usage / Asynchronous Behavior Time before a snapshot is too old to read pages changed after the snapshot was taken. A value of -1 disables this feature. postmaster integer default -1 86400 -1 -1 f
168 operator_precedence_warning off Version and Platform Compatibility / Previous PostgreSQL Versions Emit a warning for constructs that changed meaning since PostgreSQL 9.4. user bool default off off f
169 parallel_setup_cost 1000 Query Tuning / Planner Cost Constants Sets the planner's estimate of the cost of starting up worker processes for parallel query. user real default 0 1.79769e+308 1000 1000 f
170 parallel_tuple_cost 0.1 Query Tuning / Planner Cost Constants Sets the planner's estimate of the cost of passing each tuple (row) from worker to master backend. user real default 0 1.79769e+308 0.1 0.1 f
171 password_encryption on Connections and Authentication / Security and Authentication Encrypt passwords. When a password is specified in CREATE USER or ALTER USER without writing either ENCRYPTED or UNENCRYPTED, this parameter determines whether the password is to be encrypted. user bool default on on f
172 port 5432 Connections and Authentication / Connection Settings Sets the TCP port the server listens on. postmaster integer configuration file 1 65535 5432 5432 /etc/postgresql/9.6/main/postgresql.conf 63 f
173 post_auth_delay 0 s Developer Options Waits N seconds on connection startup after authentication. This allows attaching a debugger to the process. backend integer default 0 2147 0 0 f
174 pre_auth_delay 0 s Developer Options Waits N seconds on connection startup before authentication. This allows attaching a debugger to the process. sighup integer default 0 60 0 0 f
175 quote_all_identifiers off Version and Platform Compatibility / Previous PostgreSQL Versions When generating SQL fragments, quote all identifiers. user bool default off off f
176 random_page_cost 4 Query Tuning / Planner Cost Constants Sets the planner's estimate of the cost of a nonsequentially fetched disk page. user real default 0 1.79769e+308 4 4 f
177 replacement_sort_tuples 150000 Resource Usage / Memory Sets the maximum number of tuples to be sorted using replacement selection. When more tuples than this are present, quicksort will be used. user integer default 0 2147483647 150000 150000 f
178 restart_after_crash on Error Handling Reinitialize server after backend crash. sighup bool default on on f
179 row_security on Connections and Authentication / Security and Authentication Enable row security. When enabled, row security will be applied to all users. user bool default on on f
180 search_path "$user", public Client Connection Defaults / Statement Behavior Sets the schema search order for names that are not schema-qualified. user string default "$user", public "$user", public f
181 segment_size 131072 8kB Preset Options Shows the number of pages per disk file. internal integer default 131072 131072 131072 131072 f
182 seq_page_cost 1 Query Tuning / Planner Cost Constants Sets the planner's estimate of the cost of a sequentially fetched disk page. user real default 0 1.79769e+308 1 1 f
183 server_encoding UTF8 Client Connection Defaults / Locale and Formatting Sets the server (database) character set encoding. internal string override SQL_ASCII UTF8 f
184 server_version 9.6.3 Preset Options Shows the server version. internal string default 9.6.3 9.6.3 f
185 server_version_num 90603 Preset Options Shows the server version as an integer. internal integer default 90603 90603 90603 90603 f
186 session_preload_libraries Client Connection Defaults / Shared Library Preloading Lists shared libraries to preload into each backend. superuser string default f
187 session_replication_role origin Client Connection Defaults / Statement Behavior Sets the session's behavior for triggers and rewrite rules. superuser enum default {origin,replica,local} origin origin f
188 shared_buffers 16384 8kB Resource Usage / Memory Sets the number of shared memory buffers used by the server. postmaster integer configuration file 16 1073741823 1024 16384 /etc/postgresql/9.6/main/postgresql.conf 113 f
189 shared_preload_libraries Client Connection Defaults / Shared Library Preloading Lists shared libraries to preload into server. postmaster string default f
190 sql_inheritance on Version and Platform Compatibility / Previous PostgreSQL Versions Causes subtables to be included by default in various commands. user bool default on on f
191 ssl on Connections and Authentication / Security and Authentication Enables SSL connections. postmaster bool configuration file off on /etc/postgresql/9.6/main/postgresql.conf 79 f
192 ssl_ca_file Connections and Authentication / Security and Authentication Location of the SSL certificate authority file. postmaster string default f
193 ssl_cert_file /etc/ssl/certs/ssl-cert-snakeoil.pem Connections and Authentication / Security and Authentication Location of the SSL server certificate file. postmaster string configuration file server.crt /etc/ssl/certs/ssl-cert-snakeoil.pem /etc/postgresql/9.6/main/postgresql.conf 84 f
194 ssl_ciphers HIGH:MEDIUM:+3DES:!aNULL Connections and Authentication / Security and Authentication Sets the list of allowed SSL ciphers. postmaster string default HIGH:MEDIUM:+3DES:!aNULL HIGH:MEDIUM:+3DES:!aNULL f
195 ssl_crl_file Connections and Authentication / Security and Authentication Location of the SSL certificate revocation list file. postmaster string default f
196 ssl_ecdh_curve prime256v1 Connections and Authentication / Security and Authentication Sets the curve to use for ECDH. postmaster string default prime256v1 prime256v1 f
197 ssl_key_file /etc/ssl/private/ssl-cert-snakeoil.key Connections and Authentication / Security and Authentication Location of the SSL server private key file. postmaster string configuration file server.key /etc/ssl/private/ssl-cert-snakeoil.key /etc/postgresql/9.6/main/postgresql.conf 85 f
198 ssl_prefer_server_ciphers on Connections and Authentication / Security and Authentication Give priority to server ciphersuite order. postmaster bool default on on f
199 standard_conforming_strings on Version and Platform Compatibility / Previous PostgreSQL Versions Causes '...' strings to treat backslashes literally. user bool default on on f
200 statement_timeout 0 ms Client Connection Defaults / Statement Behavior Sets the maximum allowed duration of any statement. A value of 0 turns off the timeout. user integer default 0 2147483647 0 0 f
201 stats_temp_directory /var/run/postgresql/9.6-main.pg_stat_tmp Statistics / Query and Index Statistics Collector Writes temporary statistics files to the specified directory. sighup string configuration file pg_stat_tmp /var/run/postgresql/9.6-main.pg_stat_tmp /etc/postgresql/9.6/main/postgresql.conf 479 f
202 superuser_reserved_connections 3 Connections and Authentication / Connection Settings Sets the number of connection slots reserved for superusers. postmaster integer default 0 262143 3 3 f
203 synchronize_seqscans on Version and Platform Compatibility / Previous PostgreSQL Versions Enable synchronized sequential scans. user bool default on on f
204 synchronous_commit on Write-Ahead Log / Settings Sets the current transaction's synchronization level. user enum default {local,remote_write,remote_apply,on,off} on on f
205 synchronous_standby_names Replication / Master Server Number of synchronous standbys and list of names of potential synchronous ones. sighup string default f
206 syslog_facility local0 Reporting and Logging / Where to Log Sets the syslog "facility" to be used when syslog enabled. sighup enum default {local0,local1,local2,local3,local4,local5,local6,local7} local0 local0 f
207 syslog_ident postgres Reporting and Logging / Where to Log Sets the program name used to identify PostgreSQL messages in syslog. sighup string default postgres postgres f
208 syslog_sequence_numbers on Reporting and Logging / Where to Log Add sequence number to syslog messages to avoid duplicate suppression. sighup bool default on on f
209 syslog_split_messages on Reporting and Logging / Where to Log Split messages sent to syslog by lines and to fit into 1024 bytes. sighup bool default on on f
210 tcp_keepalives_count 0 Client Connection Defaults / Other Defaults Maximum number of TCP keepalive retransmits. This controls the number of consecutive keepalive retransmits that can be lost before a connection is considered dead. A value of 0 uses the system default. user integer default 0 2147483647 0 0 f
211 tcp_keepalives_idle 0 s Client Connection Defaults / Other Defaults Time between issuing TCP keepalives. A value of 0 uses the system default. user integer default 0 2147483647 0 0 f
212 tcp_keepalives_interval 0 s Client Connection Defaults / Other Defaults Time between TCP keepalive retransmits. A value of 0 uses the system default. user integer default 0 2147483647 0 0 f
213 temp_buffers 1024 8kB Resource Usage / Memory Sets the maximum number of temporary buffers used by each session. user integer default 100 1073741823 1024 1024 f
214 temp_file_limit -1 kB Resource Usage / Disk Limits the total size of all temporary files used by each process. -1 means no limit. superuser integer default -1 2147483647 -1 -1 f
215 temp_tablespaces Client Connection Defaults / Statement Behavior Sets the tablespace(s) to use for temporary tables and sort files. user string default f
216 TimeZone localtime Client Connection Defaults / Locale and Formatting Sets the time zone for displaying and interpreting time stamps. user string configuration file GMT localtime /etc/postgresql/9.6/main/postgresql.conf 554 f
217 timezone_abbreviations Default Client Connection Defaults / Locale and Formatting Selects a file of time zone abbreviations. user string default Default f
218 trace_notify off Developer Options Generates debugging output for LISTEN and NOTIFY. user bool default off off f
219 trace_recovery_messages log Developer Options Enables logging of recovery-related debugging information. Each level includes all the levels that follow it. The later the level, the fewer messages are sent. sighup enum default {debug5,debug4,debug3,debug2,debug1,log,notice,warning,error} log log f
220 trace_sort off Developer Options Emit information about resource usage in sorting. user bool default off off f
221 track_activities on Statistics / Query and Index Statistics Collector Collects information about executing commands. Enables the collection of information on the currently executing command of each session, along with the time at which that command began execution. superuser bool default on on f
222 track_activity_query_size 1024 Resource Usage / Memory Sets the size reserved for pg_stat_activity.query, in bytes. postmaster integer default 100 102400 1024 1024 f
223 track_commit_timestamp off Replication Collects transaction commit time. postmaster bool default off off f
224 track_counts on Statistics / Query and Index Statistics Collector Collects statistics on database activity. superuser bool default on on f
225 track_functions none Statistics / Query and Index Statistics Collector Collects function-level statistics on database activity. superuser enum default {none,pl,all} none none f
226 track_io_timing off Statistics / Query and Index Statistics Collector Collects timing statistics for database I/O activity. superuser bool default off off f
227 transaction_deferrable off Client Connection Defaults / Statement Behavior Whether to defer a read-only serializable transaction until it can be executed with no possible serialization failures. user bool override off off f
228 transaction_isolation read committed Client Connection Defaults / Statement Behavior Sets the current transaction's isolation level. user string override default default f
229 transaction_read_only off Client Connection Defaults / Statement Behavior Sets the current transaction's read-only status. user bool override off off f
230 transform_null_equals off Version and Platform Compatibility / Other Platforms and Clients Treats "expr=NULL" as "expr IS NULL". When turned on, expressions of the form expr = NULL (or NULL = expr) are treated as expr IS NULL, that is, they return true if expr evaluates to the null value, and false otherwise. The correct behavior of expr = NULL is to always return null (unknown). user bool default off off f
231 unix_socket_directories /var/run/postgresql Connections and Authentication / Connection Settings Sets the directories where Unix-domain sockets will be created. postmaster string configuration file /var/run/postgresql /var/run/postgresql /etc/postgresql/9.6/main/postgresql.conf 66 f
232 unix_socket_group Connections and Authentication / Connection Settings Sets the owning group of the Unix-domain socket. The owning user of the socket is always the user that starts the server. postmaster string default f
233 unix_socket_permissions 0777 Connections and Authentication / Connection Settings Sets the access permissions of the Unix-domain socket. Unix-domain sockets use the usual Unix file system permission set. The parameter value is expected to be a numeric mode specification in the form accepted by the chmod and umask system calls. (To use the customary octal format the number must start with a 0 (zero).) postmaster integer default 0 511 511 511 f
234 update_process_title on Process Title Updates the process title to show the active SQL command. Enables updating of the process title every time a new SQL command is received by the server. superuser bool default on on f
235 vacuum_cost_delay 0 ms Resource Usage / Cost-Based Vacuum Delay Vacuum cost delay in milliseconds. user integer default 0 100 0 0 f
236 vacuum_cost_limit 200 Resource Usage / Cost-Based Vacuum Delay Vacuum cost amount available before napping. user integer default 1 10000 200 200 f
237 vacuum_cost_page_dirty 20 Resource Usage / Cost-Based Vacuum Delay Vacuum cost for a page dirtied by vacuum. user integer default 0 10000 20 20 f
238 vacuum_cost_page_hit 1 Resource Usage / Cost-Based Vacuum Delay Vacuum cost for a page found in the buffer cache. user integer default 0 10000 1 1 f
239 vacuum_cost_page_miss 10 Resource Usage / Cost-Based Vacuum Delay Vacuum cost for a page not found in the buffer cache. user integer default 0 10000 10 10 f
240 vacuum_defer_cleanup_age 0 Replication / Master Server Number of transactions by which VACUUM and HOT cleanup should be deferred, if any. sighup integer default 0 1000000 0 0 f
241 vacuum_freeze_min_age 50000000 Client Connection Defaults / Statement Behavior Minimum age at which VACUUM should freeze a table row. user integer default 0 1000000000 50000000 50000000 f
242 vacuum_freeze_table_age 150000000 Client Connection Defaults / Statement Behavior Age at which VACUUM should scan whole table to freeze tuples. user integer default 0 2000000000 150000000 150000000 f
243 vacuum_multixact_freeze_min_age 5000000 Client Connection Defaults / Statement Behavior Minimum age at which VACUUM should freeze a MultiXactId in a table row. user integer default 0 1000000000 5000000 5000000 f
244 vacuum_multixact_freeze_table_age 150000000 Client Connection Defaults / Statement Behavior Multixact age at which VACUUM should scan whole table to freeze tuples. user integer default 0 2000000000 150000000 150000000 f
245 wal_block_size 8192 Preset Options Shows the block size in the write ahead log. internal integer default 8192 8192 8192 8192 f
246 wal_buffers 512 8kB Write-Ahead Log / Settings Sets the number of disk-page buffers in shared memory for WAL. postmaster integer override -1 262143 -1 512 f
247 wal_compression off Write-Ahead Log / Settings Compresses full-page writes written in WAL file. superuser bool default off off f
248 wal_keep_segments 0 Replication / Sending Servers Sets the number of WAL files held for standby servers. sighup integer default 0 2147483647 0 0 f
249 wal_level minimal Write-Ahead Log / Settings Set the level of information written to the WAL. postmaster enum default {minimal,replica,logical} minimal minimal f
250 wal_log_hints off Write-Ahead Log / Settings Writes full pages to WAL when first modified after a checkpoint, even for a non-critical modifications. postmaster bool default off off f
251 wal_receiver_status_interval 10 s Replication / Standby Servers Sets the maximum interval between WAL receiver status reports to the primary. sighup integer default 0 2147483 10 10 f
252 wal_receiver_timeout 60000 ms Replication / Standby Servers Sets the maximum wait time to receive data from the primary. sighup integer default 0 2147483647 60000 60000 f
253 wal_retrieve_retry_interval 5000 ms Replication / Standby Servers Sets the time to wait before retrying to retrieve WAL after a failed attempt. sighup integer default 1 2147483647 5000 5000 f
254 wal_segment_size 2048 8kB Preset Options Shows the number of pages per write ahead log segment. internal integer default 2048 2048 2048 2048 f
255 wal_sender_timeout 60000 ms Replication / Sending Servers Sets the maximum time to wait for WAL replication. sighup integer default 0 2147483647 60000 60000 f
256 wal_sync_method fdatasync Write-Ahead Log / Settings Selects the method used for forcing WAL updates to disk. sighup enum default {fsync,fdatasync,open_sync,open_datasync} fdatasync fdatasync f
257 wal_writer_delay 200 ms Write-Ahead Log / Settings Time between WAL flushes performed in the WAL writer. sighup integer default 1 10000 200 200 f
258 wal_writer_flush_after 128 8kB Write-Ahead Log / Settings Amount of WAL written out by WAL writer that triggers a flush. sighup integer default 0 2147483647 128 128 f
259 work_mem 4096 kB Resource Usage / Memory Sets the maximum memory to be used for query workspaces. This much memory can be used by each internal sort operation and hash table before switching to temporary disk files. user integer default 64 2147483647 4096 4096 f
260 xmlbinary base64 Client Connection Defaults / Statement Behavior Sets how binary values are to be encoded in XML. user enum default {base64,hex} base64 base64 f
261 xmloption content Client Connection Defaults / Statement Behavior Sets whether XML data in implicit parsing and serialization operations is to be considered as documents or content fragments. user enum default {content,document} content content f
262 zero_damaged_pages off Developer Options Continues processing past damaged page headers. Detection of a damaged page header normally causes PostgreSQL to report an error, aborting the current transaction. Setting zero_damaged_pages to true causes the system to instead report a warning, zero out the damaged page, and continue processing. This behavior will destroy data, namely all the rows on the damaged page. superuser bool default off off f

View File

@@ -0,0 +1,43 @@
#
# OtterTune - create_metric_settings.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import json
import shutil
def main():
final_metrics = []
with open('oracle.txt', 'r') as f:
odd = 0
entry = {}
fields = {}
lines = f.readlines()
for line in lines:
line = line.strip().replace("\n", "")
if not line:
continue
if line == 'NAME' or line.startswith('-'):
continue
if odd == 0:
entry = {}
entry['model'] = 'website.MetricCatalog'
fields = {}
fields['name'] = "global." + line
fields['summary'] = line
fields['vartype'] = 2 # int
fields['scope'] = 'global'
fields['metric_type'] = 3 # stat
if fields['name'] == "global.user commits":
fields['metric_type'] = 1 # counter
fields['dbms'] = 18 # oracle
entry['fields'] = fields
final_metrics.append(entry)
with open('oracle_metrics.json', 'w') as f:
json.dump(final_metrics, f, indent=4)
shutil.copy('oracle_metrics.json', '../../../../website/fixtures/oracle_metrics.json')
if __name__ == '__main__':
main()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
postgres-96_numeric_metric_names.json

View File

@@ -0,0 +1,120 @@
#
# OtterTune - create_metric_settings.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import json
import shutil
COUNTER = 1
INFO = 2
INTEGER = 2
STRING = 1
TIMESTAMP = 6
NUMERIC_TYPES = ['oid', 'bigint', 'double precision', 'integer']
def load_data(filename):
with open(filename, 'r') as f:
csv_stats = f.readlines()
header = csv_stats[0].strip().split(',')
stats_dict = {}
for line in csv_stats[1:]:
parts = line.strip().split(',', 3)
assert len(parts) == 4, "parts: {}".format(parts)
stat = {}
stat['name'] = parts[header.index('column_name')]
stat['summary'] = parts[header.index('description')]
stat['metric_type'] = parts[header.index('metric_type')]
vartype = parts[header.index('data_type')]
if vartype in NUMERIC_TYPES:
vartype = INTEGER
elif vartype == 'name' or vartype == 'text':
vartype = STRING
elif vartype.startswith('timestamp'):
vartype = TIMESTAMP
else:
raise Exception(vartype)
stat['vartype'] = vartype
stats_dict[stat['name']] = stat
return stats_dict
def main():
dbstats = load_data('pg96_database_stats.csv')
gstats = load_data('pg96_global_stats.csv')
istats = load_data('pg96_index_stats.csv')
tstats = load_data('pg96_table_stats.csv')
with open('metrics_sample.json', 'r') as f:
metrics = json.load(f)
final_metrics = []
numeric_metric_names = []
vartypes = set()
for view_name, mets in sorted(metrics.items()):
if 'database' in view_name:
scope = 'database'
stats = dbstats
elif 'indexes' in view_name:
scope = 'index'
stats = istats
elif 'tables' in view_name:
scope = 'table'
stats = tstats
else:
scope = 'global'
stats = gstats
for metric_name in mets:
entry = {}
entry['model'] = 'website.MetricCatalog'
mstats = stats[metric_name]
fields = {}
fields['name'] = '{}.{}'.format(view_name, metric_name)
fields['vartype'] = mstats['vartype']
vartypes.add(fields['vartype'])
fields['summary'] = mstats['summary']
fields['scope'] = scope
metric_type = mstats['metric_type']
if metric_type == 'counter':
numeric_metric_names.append(fields['name'])
mt = COUNTER
elif metric_type == 'info':
mt = INFO
else:
raise Exception('Invalid metric type: {}'.format(metric_type))
fields['metric_type'] = mt
fields['dbms'] = 1
entry['fields'] = fields
final_metrics.append(entry)
# sorted_metric_names.append(fields['name'])
with open('postgres-96_metrics.json', 'w') as f:
json.dump(final_metrics, f, indent=4)
shutil.copy('postgres-96_metrics.json', '../../../../website/fixtures/postgres-96_metrics.json')
with open('postgres-96_numeric_metric_names.json', 'w') as f:
json.dump(numeric_metric_names, f, indent=4)
# sorted_metrics = [{
# 'model': 'website.PipelineResult',
# 'fields': {
# "dbms": 1,
# "task_type": 2,
# "component": 4,
# "hardware": 17,
# "version_id": 0,
# "value": json.dumps(sorted_metric_names),
# }
# }]
# fname = 'postgres-96_sorted_metric_labels.json'
# with open(fname, 'w') as f:
# json.dump(sorted_metrics, f, indent=4)
# shutil.copy(fname, '../../../preload/')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1 @@
{"pg_stat_database_conflicts": ["datname", "confl_deadlock", "confl_bufferpin", "datid", "confl_tablespace", "confl_lock", "confl_snapshot"], "pg_stat_user_indexes": ["indexrelid", "relid", "indexrelname", "relname", "idx_tup_fetch", "idx_tup_read", "idx_scan", "schemaname"], "pg_stat_archiver": ["failed_count", "archived_count", "stats_reset", "last_archived_time", "last_failed_time", "last_failed_wal", "last_archived_wal"], "pg_stat_database": ["numbackends", "datname", "blks_read", "deadlocks", "tup_fetched", "tup_updated", "stats_reset", "tup_inserted", "datid", "xact_commit", "tup_deleted", "blk_read_time", "xact_rollback", "conflicts", "blks_hit", "tup_returned", "temp_files", "blk_write_time", "temp_bytes"], "pg_stat_user_tables": ["last_vacuum", "n_tup_ins", "n_dead_tup", "last_analyze", "idx_tup_fetch", "n_tup_upd", "schemaname", "seq_tup_read", "vacuum_count", "n_mod_since_analyze", "n_tup_del", "last_autovacuum", "seq_scan", "relid", "n_tup_hot_upd", "autoanalyze_count", "n_live_tup", "relname", "last_autoanalyze", "idx_scan", "autovacuum_count", "analyze_count"], "pg_stat_bgwriter": ["buffers_backend", "checkpoints_timed", "buffers_alloc", "buffers_clean", "buffers_backend_fsync", "checkpoint_sync_time", "checkpoints_req", "checkpoint_write_time", "maxwritten_clean", "buffers_checkpoint", "stats_reset"], "pg_statio_user_indexes": ["indexrelid", "relid", "indexrelname", "idx_blks_hit", "relname", "idx_blks_read", "schemaname"], "pg_statio_user_tables": ["relid", "heap_blks_hit", "tidx_blks_read", "tidx_blks_hit", "toast_blks_hit", "idx_blks_hit", "relname", "toast_blks_read", "idx_blks_read", "schemaname", "heap_blks_read"]}

View File

@@ -0,0 +1,25 @@
column_name,data_type,metric_type,description
blk_read_time,double precision,counter,"Time spent reading data file blocks by backends in this database, in milliseconds"
blks_hit,bigint,counter,"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)"
blks_read,bigint,counter,Number of disk blocks read in this database
blk_write_time,double precision,counter,"Time spent writing data file blocks by backends in this database, in milliseconds"
conflicts,bigint,counter,"Number of queries canceled due to conflicts with recovery in this database. (Conflicts occur only on standby servers; see pg_stat_database_conflicts for details.)"
datid,oid,info,OID of a database
datname,name,info,Name of this database
deadlocks,bigint,counter,Number of deadlocks detected in this database
numbackends,integer,info,Number of backends currently connected to this database. This is the only column in this view that returns a value reflecting current state; all other columns return the accumulated values since the last reset.
stats_reset,timestamp with time zone,info,Time at which these statistics were last reset
temp_bytes,bigint,counter,"Total amount of data written to temporary files by queries in this database. All temporary files are counted, regardless of why the temporary file was created, and regardless of the log_temp_files setting."
temp_files,bigint,counter,"Number of temporary files created by queries in this database. All temporary files are counted, regardless of why the temporary file was created (e.g., sorting or hashing), and regardless of the log_temp_files setting."
tup_deleted,bigint,counter,Number of rows deleted by queries in this database
tup_fetched,bigint,counter,Number of rows fetched by queries in this database
tup_inserted,bigint,counter,Number of rows inserted by queries in this database
tup_returned,bigint,counter,Number of rows returned by queries in this database
tup_updated,bigint,counter,Number of rows updated by queries in this database
xact_commit,bigint,counter,Number of transactions in this database that have been committed
xact_rollback,bigint,counter,Number of transactions in this database that have been rolled back
confl_tablespace,bigint,counter,Number of queries in this database that have been canceled due to dropped tablespaces
confl_lock,bigint,counter,Number of queries in this database that have been canceled due to lock timeouts
confl_snapshot,bigint,counter,Number of queries in this database that have been canceled due to old snapshots
confl_bufferpin,bigint,counter,Number of queries in this database that have been canceled due to pinned buffers
confl_deadlock,bigint,counter,Number of queries in this database that have been canceled due to deadlocks
1 column_name data_type metric_type description
2 blk_read_time double precision counter Time spent reading data file blocks by backends in this database, in milliseconds
3 blks_hit bigint counter Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)
4 blks_read bigint counter Number of disk blocks read in this database
5 blk_write_time double precision counter Time spent writing data file blocks by backends in this database, in milliseconds
6 conflicts bigint counter Number of queries canceled due to conflicts with recovery in this database. (Conflicts occur only on standby servers; see pg_stat_database_conflicts for details.)
7 datid oid info OID of a database
8 datname name info Name of this database
9 deadlocks bigint counter Number of deadlocks detected in this database
10 numbackends integer info Number of backends currently connected to this database. This is the only column in this view that returns a value reflecting current state; all other columns return the accumulated values since the last reset.
11 stats_reset timestamp with time zone info Time at which these statistics were last reset
12 temp_bytes bigint counter Total amount of data written to temporary files by queries in this database. All temporary files are counted, regardless of why the temporary file was created, and regardless of the log_temp_files setting.
13 temp_files bigint counter Number of temporary files created by queries in this database. All temporary files are counted, regardless of why the temporary file was created (e.g., sorting or hashing), and regardless of the log_temp_files setting.
14 tup_deleted bigint counter Number of rows deleted by queries in this database
15 tup_fetched bigint counter Number of rows fetched by queries in this database
16 tup_inserted bigint counter Number of rows inserted by queries in this database
17 tup_returned bigint counter Number of rows returned by queries in this database
18 tup_updated bigint counter Number of rows updated by queries in this database
19 xact_commit bigint counter Number of transactions in this database that have been committed
20 xact_rollback bigint counter Number of transactions in this database that have been rolled back
21 confl_tablespace bigint counter Number of queries in this database that have been canceled due to dropped tablespaces
22 confl_lock bigint counter Number of queries in this database that have been canceled due to lock timeouts
23 confl_snapshot bigint counter Number of queries in this database that have been canceled due to old snapshots
24 confl_bufferpin bigint counter Number of queries in this database that have been canceled due to pinned buffers
25 confl_deadlock bigint counter Number of queries in this database that have been canceled due to deadlocks

View File

@@ -0,0 +1,19 @@
column_name,data_type,metric_type,description
buffers_alloc,bigint,counter,Number of buffers allocated
buffers_backend,bigint,counter,Number of buffers written directly by a backend
buffers_backend_fsync,bigint,counter,Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)
buffers_checkpoint,bigint,counter,Number of buffers written during checkpoints
buffers_clean,bigint,counter,Number of buffers written by the background writer
checkpoints_req,bigint,counter,Number of requested checkpoints that have been performed
checkpoints_timed,bigint,counter,Number of scheduled checkpoints that have been performed
checkpoint_sync_time,double precision,counter,"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds"
checkpoint_write_time,double precision,counter,"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds"
maxwritten_clean,bigint,counter,Number of times the background writer stopped a cleaning scan because it had written too many buffers
stats_reset,timestamp with time zone,info,Time at which these statistics were last reset
archived_count,bigint,counter,Number of WAL files that have been successfully archived
last_archived_wal,text,info,Name of the last WAL file successfully archived
last_archived_time,timestamp with time zone,info,Time of the last successful archive operation
failed_count,bigint,counter,Number of failed attempts for archiving WAL files
last_failed_wal,text,info,Name of the WAL file of the last failed archival operation
last_failed_time,timestamp with time zone,info,Time of the last failed archival operation
stats_reset,timestamp with time zone,info,Time at which these statistics were last reset
1 column_name data_type metric_type description
2 buffers_alloc bigint counter Number of buffers allocated
3 buffers_backend bigint counter Number of buffers written directly by a backend
4 buffers_backend_fsync bigint counter Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)
5 buffers_checkpoint bigint counter Number of buffers written during checkpoints
6 buffers_clean bigint counter Number of buffers written by the background writer
7 checkpoints_req bigint counter Number of requested checkpoints that have been performed
8 checkpoints_timed bigint counter Number of scheduled checkpoints that have been performed
9 checkpoint_sync_time double precision counter Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds
10 checkpoint_write_time double precision counter Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds
11 maxwritten_clean bigint counter Number of times the background writer stopped a cleaning scan because it had written too many buffers
12 stats_reset timestamp with time zone info Time at which these statistics were last reset
13 archived_count bigint counter Number of WAL files that have been successfully archived
14 last_archived_wal text info Name of the last WAL file successfully archived
15 last_archived_time timestamp with time zone info Time of the last successful archive operation
16 failed_count bigint counter Number of failed attempts for archiving WAL files
17 last_failed_wal text info Name of the WAL file of the last failed archival operation
18 last_failed_time timestamp with time zone info Time of the last failed archival operation
19 stats_reset timestamp with time zone info Time at which these statistics were last reset

View File

@@ -0,0 +1,11 @@
column_name,data_type,metric_type,description
idx_blks_hit,bigint,counter,Number of buffer hits in this index
idx_blks_read,bigint,counter,Number of disk blocks read from this index
idx_scan,bigint,counter,Number of index scans initiated on this index
idx_tup_fetch,bigint,counter,Number of live table rows fetched by simple index scans using this index
idx_tup_read,bigint,counter,Number of index entries returned by scans on this index
indexrelid,oid,info,OID of this index
indexrelname,name,info,Name of this index
relid,oid,info,OID of the table for this index
relname,name,info,Name of the table for this index
schemaname,name,info,Name of the schema this index is in
1 column_name data_type metric_type description
2 idx_blks_hit bigint counter Number of buffer hits in this index
3 idx_blks_read bigint counter Number of disk blocks read from this index
4 idx_scan bigint counter Number of index scans initiated on this index
5 idx_tup_fetch bigint counter Number of live table rows fetched by simple index scans using this index
6 idx_tup_read bigint counter Number of index entries returned by scans on this index
7 indexrelid oid info OID of this index
8 indexrelname name info Name of this index
9 relid oid info OID of the table for this index
10 relname name info Name of the table for this index
11 schemaname name info Name of the schema this index is in

View File

@@ -0,0 +1,31 @@
column_name,data_type,metric_type,description
analyze_count,bigint,counter,Number of times this table has been manually analyzed
autoanalyze_count,bigint,counter,Number of times this table has been analyzed by the autovacuum daemon
autovacuum_count,bigint,counter,Number of times this table has been vacuumed by the autovacuum daemon
heap_blks_hit,bigint,counter,Number of buffer hits in this table
heap_blks_read,bigint,counter,Number of disk blocks read from this table
idx_blks_hit,bigint,counter,Number of buffer hits in all indexes on this table
idx_blks_read,bigint,counter,Number of disk blocks read from all indexes on this table
idx_scan,bigint,counter,Number of index scans initiated on this table
idx_tup_fetch,bigint,counter,Number of live rows fetched by index scans
last_analyze,timestamp with time zone,info,Last time at which this table was manually analyzed
last_autoanalyze,timestamp with time zone,info,Last time at which this table was analyzed by the autovacuum daemon
last_autovacuum,timestamp with time zone,info,Last time at which this table was vacuumed by the autovacuum daemon
last_vacuum,timestamp with time zone,info,Last time at which this table was manually vacuumed (not counting VACUUM FULL)
n_dead_tup,bigint,counter,Estimated number of dead rows
n_live_tup,bigint,counter,Estimated number of live rows
n_tup_del,bigint,counter,Number of rows deleted
n_tup_hot_upd,bigint,counter,"Number of rows HOT updated (i.e., with no separate index update required)"
n_tup_ins,bigint,counter,Number of rows inserted
n_tup_upd,bigint,counter,Number of rows updated
n_mod_since_analyze,bigint,counter,Estimated number of rows modified since this table was last analyzed
relid,oid,info,OID of a table
relname,name,info,Name of this table
schemaname,name,info,Name of the schema that this table is in
seq_scan,bigint,counter,Number of sequential scans initiated on this table
seq_tup_read,bigint,counter,Number of live rows fetched by sequential scans
tidx_blks_hit,bigint,counter,Number of buffer hits in this table's TOAST table index (if any)
tidx_blks_read,bigint,counter,Number of disk blocks read from this table's TOAST table index (if any)
toast_blks_hit,bigint,counter,Number of buffer hits in this table's TOAST table (if any)
toast_blks_read,bigint,counter,Number of disk blocks read from this table's TOAST table (if any)
vacuum_count,bigint,counter,Number of times this table has been manually vacuumed (not counting VACUUM FULL)
1 column_name data_type metric_type description
2 analyze_count bigint counter Number of times this table has been manually analyzed
3 autoanalyze_count bigint counter Number of times this table has been analyzed by the autovacuum daemon
4 autovacuum_count bigint counter Number of times this table has been vacuumed by the autovacuum daemon
5 heap_blks_hit bigint counter Number of buffer hits in this table
6 heap_blks_read bigint counter Number of disk blocks read from this table
7 idx_blks_hit bigint counter Number of buffer hits in all indexes on this table
8 idx_blks_read bigint counter Number of disk blocks read from all indexes on this table
9 idx_scan bigint counter Number of index scans initiated on this table
10 idx_tup_fetch bigint counter Number of live rows fetched by index scans
11 last_analyze timestamp with time zone info Last time at which this table was manually analyzed
12 last_autoanalyze timestamp with time zone info Last time at which this table was analyzed by the autovacuum daemon
13 last_autovacuum timestamp with time zone info Last time at which this table was vacuumed by the autovacuum daemon
14 last_vacuum timestamp with time zone info Last time at which this table was manually vacuumed (not counting VACUUM FULL)
15 n_dead_tup bigint counter Estimated number of dead rows
16 n_live_tup bigint counter Estimated number of live rows
17 n_tup_del bigint counter Number of rows deleted
18 n_tup_hot_upd bigint counter Number of rows HOT updated (i.e., with no separate index update required)
19 n_tup_ins bigint counter Number of rows inserted
20 n_tup_upd bigint counter Number of rows updated
21 n_mod_since_analyze bigint counter Estimated number of rows modified since this table was last analyzed
22 relid oid info OID of a table
23 relname name info Name of this table
24 schemaname name info Name of the schema that this table is in
25 seq_scan bigint counter Number of sequential scans initiated on this table
26 seq_tup_read bigint counter Number of live rows fetched by sequential scans
27 tidx_blks_hit bigint counter Number of buffer hits in this table's TOAST table index (if any)
28 tidx_blks_read bigint counter Number of disk blocks read from this table's TOAST table index (if any)
29 toast_blks_hit bigint counter Number of buffer hits in this table's TOAST table (if any)
30 toast_blks_read bigint counter Number of disk blocks read from this table's TOAST table (if any)
31 vacuum_count bigint counter Number of times this table has been manually vacuumed (not counting VACUUM FULL)

View File

@@ -0,0 +1 @@
*.txt

View File

@@ -0,0 +1,97 @@
#
# OtterTune - create_pruned_metrics.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import os
import shutil
import json
import itertools
DATADIR = '/dataset/oltpbench/first_paper_experiments/analysis/workload_characterization'
CLUSTERS_FNAME = 'DetK_optimal_num_clusters.txt'
DBMSS = {'postgres-9.6': 1}
HARDWARES = {'m3.xlarge': 16}
TIMESTAMP = '2016-12-04 11:00'
CONVERT = True
TASK_TYPE = 1
MODEL = 'website.PipelineResult'
SUMMARY_MAP = {
'throughput_req_per_sec': 'Throughput (requests/second)',
'99th_lat_ms': '99th Percentile Latency (microseconds)',
'max_lat_ms': 'Maximum Latency (microseconds)',
}
def load_postgres_metrics():
with open('/dataset/oltpbench/first_paper_experiments/samples/sample.metrics', 'r') as f:
sample = json.load(f)
metric_map = {}
for query_name, entries in list(sample.items()):
assert len(entries) > 0
columns = list(entries[0].keys())
for column in columns:
if column not in metric_map:
metric_map[column] = []
metric_map[column].append(query_name)
return metric_map
def main():
for dbms, hw in itertools.product(list(DBMSS.keys()), HARDWARES):
datapath = os.path.join(DATADIR, '{}_{}'.format(dbms, hw))
if not os.path.exists(datapath):
raise IOError('Path does not exist: {}'.format(datapath))
with open(os.path.join(datapath, CLUSTERS_FNAME), 'r') as f:
num_clusters = int(f.read().strip())
with open(os.path.join(datapath, 'featured_metrics_{}.txt'.format(num_clusters)), 'r') as f:
mets = [p.strip() for p in f.read().split('\n')]
if CONVERT:
if dbms.startswith('postgres'):
metric_map = load_postgres_metrics()
pruned_metrics = []
for met in mets:
if met in SUMMARY_MAP:
pruned_metrics.append(SUMMARY_MAP[met])
else:
if met not in metric_map:
raise Exception('Unknown metric: {}'.format(met))
qnames = metric_map[met]
assert len(qnames) > 0
if len(qnames) > 1:
raise Exception(
'2+ queries have the same column name: {} ({})'.format(
met, qnames))
pruned_metrics.append('{}.{}'.format(qnames[0], met))
else:
raise NotImplementedError("Implement me!")
else:
pruned_metrics = mets
pruned_metrics = sorted(pruned_metrics)
basename = '{}_{}_pruned_metrics'.format(dbms, hw).replace('.', '')
with open(basename + '.txt', 'w') as f:
f.write('\n'.join(pruned_metrics))
django_entry = [{
'model': MODEL,
'fields': {
'dbms': DBMSS[dbms],
'hardware': HARDWARES[hw],
'creation_timestamp': TIMESTAMP,
'task_type': TASK_TYPE,
'value': json.dumps(pruned_metrics, indent=4)
}
}]
savepath = basename + '.json'
with open(savepath, 'w') as f:
json.dump(django_entry, f, indent=4)
shutil.copy(savepath, '../../preload/{}'.format(savepath))
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,12 @@
[
{
"fields": {
"hardware": 16,
"dbms": 1,
"task_type": 1,
"creation_timestamp": "2016-12-04 11:00",
"value": "[\n \"99th Percentile Latency (microseconds)\", \n \"Maximum Latency (microseconds)\", \n \"Throughput (requests/second)\", \n \"pg_stat_bgwriter.buffers_alloc\", \n \"pg_stat_bgwriter.buffers_checkpoint\", \n \"pg_stat_bgwriter.checkpoints_req\", \n \"pg_stat_bgwriter.maxwritten_clean\", \n \"pg_stat_database.blks_hit\", \n \"pg_stat_database.tup_deleted\", \n \"pg_stat_database.tup_inserted\", \n \"pg_stat_database.tup_returned\", \n \"pg_stat_database.tup_updated\", \n \"pg_stat_user_tables.autoanalyze_count\"\n]"
},
"model": "website.PipelineResult"
}
]

View File

@@ -0,0 +1,2 @@
.vagrant
ottertune

View File

@@ -0,0 +1,36 @@
# -*- mode: ruby -*-
# vi: set ft=ruby :
VM_NAME = "OtterTuneVM"
VM_MEMORY = "2048" # MB
Vagrant.configure(2) do |config|
# The online documentation for the configuration options is located at
# https://docs.vagrantup.com
# Our box
config.vm.box = "ubuntu/xenial64"
# Customize the amount of memory on the VM:
config.vm.provider "virtualbox" do |vb|
vb.name = VM_NAME
vb.memory = VM_MEMORY
end
# SSH
config.ssh.forward_agent = true
config.ssh.forward_x11 = true
config.ssh.keep_alive = true
# Forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine. Accessing
# "localhost:8000" will access port 8000 on the guest machine.
config.vm.network :forwarded_port, guest: 8000, host: 8000
# Link current repo into VM
config.vm.synced_folder "../../../..", "/ottertune"
# Custom provisioning and setup script
config.vm.provision :shell, path: "bootstrap.sh"
end

View File

@@ -0,0 +1,48 @@
#!/usr/bin/env bash
# Variables
DBHOST=localhost
DBNAME=ottertune
DBUSER=dbuser
DBPASSWD=test123
LOG=/vagrant/vm_build.log
REPOPATH=/ottertune
SETTINGSPATH=$REPOPATH/server/website/website/settings
# Clear old log contents
> $LOG
# Install Ubuntu packages
echo -e "\n--- Installing Ubuntu packages ---\n"
apt-get -qq update
apt-get -y install python3-pip python-dev python-mysqldb rabbitmq-server gradle default-jdk libmysqlclient-dev python3-tk >> $LOG 2>&1
echo -e "\n--- Installing Python packages ---\n"
pip3 install --upgrade pip >> $LOG 2>&1
pip install -r ${REPOPATH}/server/website/requirements.txt >> $LOG 2>&1
# Install MySQL
echo -e "\n--- Install MySQL specific packages and settings ---\n"
debconf-set-selections <<< "mysql-server mysql-server/root_password password $DBPASSWD"
debconf-set-selections <<< "mysql-server mysql-server/root_password_again password $DBPASSWD"
apt-get -y install mysql-server >> $LOG 2>&1
# Setup MySQL
echo -e "\n--- Setting up the MySQL user and database ---\n"
mysql -uroot -p$DBPASSWD -e "CREATE DATABASE IF NOT EXISTS $DBNAME" >> /vagrant/vm_build.log 2>&1
mysql -uroot -p$DBPASSWD -e "GRANT ALL PRIVILEGES ON $DBNAME.* TO '$DBUSER'@'localhost' IDENTIFIED BY '$DBPASSWD'" >> $LOG 2>&1
mysql -uroot -p$DBPASSWD -e "GRANT ALL PRIVILEGES ON test_$DBNAME.* TO '$DBUSER'@'localhost' IDENTIFIED BY '$DBPASSWD'" >> $LOG 2>&1
# Update Django settings
echo -e "\n--- Updating Django settings ---\n"
if [ ! -f "$SETTINGSPATH/credentials.py" ]; then
cp $SETTINGSPATH/credentials_TEMPLATE.py $SETTINGSPATH/credentials.py >> $LOG 2>&1
sed -i -e "s/^DEBUG.*/DEBUG = True/" \
-e "s/^ALLOWED_HOSTS.*/ALLOWED_HOSTS = ['0\.0\.0\.0']/" \
-e "s/'USER': 'ADD ME\!\!'/'USER': '$DBUSER'/" \
-e "s/'PASSWORD': 'ADD ME\!\!'/'PASSWORD': '$DBPASSWD'/" \
$SETTINGSPATH/credentials.py >> $LOG 2>&1
fi
rm /usr/bin/python
ln -s /usr/bin/python3.5 /usr/bin/python

View File

@@ -0,0 +1,45 @@
#
# OtterTune - upload.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import argparse
import logging
import os
import requests
# Logging
LOG = logging.getLogger(__name__)
LOG.addHandler(logging.StreamHandler())
LOG.setLevel(logging.INFO)
def upload(datadir, upload_code, url):
params = {
'summary': open(os.path.join(datadir, 'summary.json'), 'rb'),
'knobs': open(os.path.join(datadir, 'knobs.json'), 'rb'),
'metrics_before': open(os.path.join(datadir, 'metrics_before.json'), 'rb'),
'metrics_after': open(os.path.join(datadir, 'metrics_after.json'), 'rb'),
}
response = requests.post(url,
files=params,
data={'upload_code': upload_code})
LOG.info(response.content)
def main():
parser = argparse.ArgumentParser(description="Upload generated data to the website")
parser.add_argument('datadir', type=str, nargs=1,
help='Directory containing the generated data')
parser.add_argument('upload_code', type=str, nargs=1,
help='The website\'s upload code')
parser.add_argument('url', type=str, default='http://0.0.0.0:8000/new_result/',
nargs='?', help='The upload url: server_ip/new_result/')
args = parser.parse_args()
upload(args.datadir[0], args.upload_code[0], args.url)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,98 @@
#
# OtterTune - upload_batch.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import argparse
import logging
import os
import glob
import numpy as np
import requests
# Logging
LOG = logging.getLogger(__name__)
LOG.addHandler(logging.StreamHandler())
LOG.setLevel(logging.INFO)
class ResultUploader(object):
SUMMARY_EXT = '.summary'
PARAMS_EXT = '.params'
METRICS_EXT = '.metrics'
SAMPLES_EXT = '.samples'
EXPCFG_EXT = '.expconfig'
RAW_EXT = '.csv'
REQ_EXTS = [SUMMARY_EXT, PARAMS_EXT, METRICS_EXT, SAMPLES_EXT, EXPCFG_EXT]
def __init__(self, upload_code, upload_url):
self._upload_code = upload_code
self._upload_url = upload_url
def upload_batch(self, directories, max_files=5):
for d in directories:
cluster_name = os.path.basename(d)
fnames = glob.glob(os.path.join(d, '*.summary'))
if max_files < len(fnames):
fnames = list(np.random.choice(fnames, max_files))
bases = [fn.split('.summary')[0] for fn in fnames]
# Verify required extensions exist
for base in bases:
complete = True
for ext in self.REQ_EXTS:
next_file = base + ext
if not os.path.exists(next_file):
LOG.warning("WARNING: missing file %s, skipping...", next_file)
complete = False
break
if not complete:
continue
self.upload(base, cluster_name)
def upload(self, basepath, cluster_name):
exts = list(self.REQ_EXTS)
if os.path.exists(basepath + self.RAW_EXT):
exts.append(self.RAW_EXT)
fhandlers = {ext: open(basepath + ext, 'rb') for ext in exts}
params = {
'summary_data': fhandlers[self.SUMMARY_EXT],
'db_metrics_data': fhandlers[self.METRICS_EXT],
'db_parameters_data': fhandlers[self.PARAMS_EXT],
'sample_data': fhandlers[self.SAMPLES_EXT],
'benchmark_conf_data': fhandlers[self.EXPCFG_EXT],
}
if self.RAW_EXT in fhandlers:
params['raw_data'] = fhandlers[self.RAW_EXT]
response = requests.post(self._upload_url,
files=params,
data={'upload_code': self._upload_code,
'cluster_name': cluster_name})
LOG.info(response.content)
for fh in list(fhandlers.values()):
fh.close()
def main():
parser = argparse.ArgumentParser(description="Upload generated data to the website")
parser.add_argument('upload_code', type=str, nargs=1,
help='The website\'s upload code')
parser.add_argument('server', type=str, default='http://0.0.0.0:8000',
nargs='?', help='The server\'s address (ip:port)')
args = parser.parse_args()
url = args.server + '/new_result/'
upload_code = args.upload_code[0]
uploader = ResultUploader(upload_code, url)
dirnames = glob.glob(os.path.join(os.path.expanduser(
'~'), 'Dropbox/Apps/ottertune/data/sample_data/exps_*'))[:2]
uploader.upload_batch(dirnames, max_files=3)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,5 @@
#
# OtterTune - __init__.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#

View File

@@ -0,0 +1,22 @@
#
# OtterTune - runner.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jan 29, 2018
@author: dvanaken
'''
import logging
from django.test.runner import DiscoverRunner
class BaseRunner(DiscoverRunner):
def run_tests(self, test_labels, extra_tests=None, **kwargs):
# Disable logging while running tests
logging.disable(logging.CRITICAL)
return super(BaseRunner, self).run_tests(test_labels, extra_tests, **kwargs)

View File

@@ -0,0 +1,277 @@
{
"global": {
"global": {
"client_min_messages": "notice",
"autovacuum_vacuum_scale_factor": "0.2",
"bonjour_name": "",
"vacuum_freeze_table_age": "150000000",
"max_worker_processes": "8",
"syslog_facility": "local0",
"transaction_read_only": "off",
"wal_compression": "off",
"log_temp_files": "-1",
"authentication_timeout": "1min",
"ssl_dh_params_file": "",
"log_lock_waits": "off",
"integer_datetimes": "on",
"archive_mode": "off",
"superuser_reserved_connections": "3",
"track_functions": "none",
"synchronous_standby_names": "",
"max_replication_slots": "10",
"enable_hashjoin": "on",
"huge_pages": "try",
"bgwriter_flush_after": "0",
"lc_monetary": "en_US.UTF-8",
"geqo_seed": "0",
"DateStyle": "ISO, MDY",
"autovacuum_analyze_threshold": "50",
"dynamic_shared_memory_type": "posix",
"autovacuum_naptime": "1min",
"cluster_name": "",
"checkpoint_completion_target": "0.5",
"log_connections": "off",
"local_preload_libraries": "",
"effective_io_concurrency": "7",
"quote_all_identifiers": "off",
"log_checkpoints": "off",
"log_statement_stats": "off",
"wal_block_size": "8192",
"max_wal_size": "1GB",
"archive_timeout": "0",
"log_filename": "postgresql-%Y-%m-%d_%H%M%S.log",
"deadlock_timeout": "1s",
"shared_preload_libraries": "",
"statement_timeout": "0",
"dynamic_library_path": "$libdir",
"force_parallel_mode": "off",
"log_rotation_age": "1d",
"ssl": "off",
"max_function_args": "100",
"checkpoint_warning": "30s",
"log_hostname": "off",
"log_truncate_on_rotation": "off",
"cursor_tuple_fraction": "0.1",
"geqo_pool_size": "0",
"parallel_tuple_cost": "0.1",
"log_parser_stats": "off",
"autovacuum_max_workers": "3",
"fsync": "on",
"min_parallel_index_scan_size": "512kB",
"post_auth_delay": "0",
"server_version_num": "100001",
"cpu_index_tuple_cost": "0.005",
"lc_ctype": "en_US.UTF-8",
"ssl_ciphers": "HIGH:MEDIUM:+3DES:!aNULL",
"cpu_operator_cost": "0.0025",
"default_with_oids": "off",
"config_file": "/Users/MacadamiaKitten/Desktop/psql_db/postgresql.conf",
"escape_string_warning": "on",
"enable_bitmapscan": "on",
"from_collapse_limit": "8",
"max_logical_replication_workers": "4",
"vacuum_cost_page_hit": "1",
"backend_flush_after": "0",
"checkpoint_timeout": "5min",
"replacement_sort_tuples": "150000",
"lc_collate": "en_US.UTF-8",
"max_stack_depth": "2MB",
"standard_conforming_strings": "on",
"syslog_sequence_numbers": "on",
"pre_auth_delay": "0",
"gin_pending_list_limit": "4MB",
"debug_print_parse": "off",
"max_pred_locks_per_page": "2",
"enable_material": "on",
"port": "5432",
"maintenance_work_mem": "64MB",
"checkpoint_flush_after": "0",
"wal_keep_segments": "0",
"operator_precedence_warning": "off",
"stats_temp_directory": "pg_stat_tmp",
"wal_receiver_status_interval": "10s",
"wal_log_hints": "off",
"max_wal_senders": "10",
"extra_float_digits": "3",
"enable_hashagg": "on",
"ssl_ecdh_curve": "prime256v1",
"log_error_verbosity": "default",
"data_checksums": "off",
"shared_buffers": "7GB",
"min_wal_size": "80MB",
"zero_damaged_pages": "off",
"logging_collector": "off",
"enable_mergejoin": "on",
"lc_numeric": "en_US.UTF-8",
"commit_siblings": "5",
"log_min_error_statement": "error",
"krb_server_keyfile": "FILE:/usr/local/etc/postgresql/krb5.keytab",
"wal_level": "replica",
"vacuum_multixact_freeze_table_age": "150000000",
"vacuum_multixact_freeze_min_age": "5000000",
"wal_retrieve_retry_interval": "5s",
"hba_file": "/Users/MacadamiaKitten/Desktop/psql_db/pg_hba.conf",
"event_source": "PostgreSQL",
"lc_messages": "en_US.UTF-8",
"autovacuum": "on",
"enable_nestloop": "on",
"log_statement": "none",
"log_replication_commands": "off",
"trace_sort": "off",
"unix_socket_group": "",
"geqo_threshold": "12",
"max_pred_locks_per_relation": "-2",
"tcp_keepalives_count": "8",
"idle_in_transaction_session_timeout": "0",
"max_files_per_process": "1000",
"log_planner_stats": "off",
"allow_system_table_mods": "off",
"debug_print_plan": "off",
"log_min_messages": "warning",
"max_parallel_workers": "8",
"log_disconnections": "off",
"db_user_namespace": "off",
"trace_recovery_messages": "log",
"row_security": "on",
"enable_gathermerge": "on",
"log_duration": "off",
"autovacuum_vacuum_threshold": "50",
"xmloption": "content",
"syslog_split_messages": "on",
"wal_sender_timeout": "1min",
"password_encryption": "md5",
"ssl_cert_file": "server.crt",
"block_size": "8192",
"vacuum_cost_delay": "0",
"log_file_mode": "0600",
"max_connections": "100",
"hot_standby": "on",
"max_sync_workers_per_subscription": "2",
"timezone_abbreviations": "Default",
"log_line_prefix": "%m [%p] ",
"transaction_deferrable": "off",
"bgwriter_lru_maxpages": "100",
"archive_command": "(disabled)",
"default_text_search_config": "pg_catalog.english",
"min_parallel_table_scan_size": "8MB",
"data_directory": "/Users/MacadamiaKitten/Desktop/psql_db",
"autovacuum_analyze_scale_factor": "0.1",
"ident_file": "/Users/MacadamiaKitten/Desktop/psql_db/pg_ident.conf",
"default_transaction_deferrable": "off",
"lo_compat_privileges": "off",
"tcp_keepalives_idle": "7200",
"session_replication_role": "origin",
"log_timezone": "US/Eastern",
"log_directory": "log",
"listen_addresses": "localhost",
"server_encoding": "UTF8",
"xmlbinary": "base64",
"unix_socket_directories": "/tmp",
"search_path": "\"$user\", public",
"temp_buffers": "8MB",
"constraint_exclusion": "partition",
"wal_consistency_checking": "",
"autovacuum_vacuum_cost_limit": "-1",
"track_activity_query_size": "1024",
"geqo_selection_bias": "2",
"work_mem": "10GB",
"geqo_generations": "0",
"bonjour": "off",
"vacuum_freeze_min_age": "50000000",
"default_tablespace": "",
"vacuum_defer_cleanup_age": "0",
"default_statistics_target": "100",
"track_activities": "on",
"geqo": "on",
"external_pid_file": "",
"synchronous_commit": "on",
"restart_after_crash": "on",
"ssl_prefer_server_ciphers": "on",
"segment_size": "1GB",
"old_snapshot_threshold": "-1",
"effective_cache_size": "4GB",
"ssl_ca_file": "",
"application_name": "",
"debug_print_rewritten": "off",
"enable_tidscan": "on",
"lock_timeout": "0",
"tcp_keepalives_interval": "75",
"bytea_output": "hex",
"log_min_duration_statement": "-1",
"max_prepared_transactions": "0",
"wal_receiver_timeout": "1min",
"parallel_setup_cost": "1000",
"default_transaction_read_only": "off",
"autovacuum_multixact_freeze_max_age": "400000000",
"log_rotation_size": "10MB",
"krb_caseins_users": "off",
"IntervalStyle": "postgres",
"track_commit_timestamp": "off",
"TimeZone": "America/New_York",
"vacuum_cost_page_dirty": "20",
"log_executor_stats": "off",
"track_io_timing": "off",
"vacuum_cost_page_miss": "10",
"enable_seqscan": "on",
"full_page_writes": "on",
"temp_tablespaces": "",
"array_nulls": "on",
"log_destination": "stderr",
"wal_writer_delay": "200ms",
"enable_indexonlyscan": "on",
"exit_on_error": "off",
"debug_assertions": "off",
"max_parallel_workers_per_gather": "2",
"check_function_bodies": "on",
"cpu_tuple_cost": "0.01",
"random_page_cost": "4",
"wal_writer_flush_after": "1MB",
"autovacuum_work_mem": "-1",
"max_standby_archive_delay": "30s",
"bgwriter_lru_multiplier": "2",
"track_counts": "on",
"trace_notify": "off",
"wal_buffers": "4MB",
"max_standby_streaming_delay": "30s",
"commit_delay": "0",
"gin_fuzzy_search_limit": "0",
"ignore_checksum_failure": "off",
"max_index_keys": "32",
"wal_sync_method": "open_datasync",
"session_preload_libraries": "",
"vacuum_cost_limit": "200",
"default_transaction_isolation": "read committed",
"hot_standby_feedback": "off",
"unix_socket_permissions": "0777",
"max_pred_locks_per_transaction": "64",
"synchronize_seqscans": "on",
"checkpoint_timing": "3min",
"backslash_quote": "safe_encoding",
"wal_segment_size": "16MB",
"max_locks_per_transaction": "64",
"ssl_key_file": "server.key",
"transform_null_equals": "off",
"ssl_crl_file": "",
"lc_time": "en_US.UTF-8",
"server_version": "10.1",
"temp_file_limit": "-1",
"debug_pretty_print": "on",
"max_identifier_length": "63",
"client_encoding": "UTF8",
"seq_page_cost": "1",
"transaction_isolation": "read committed",
"autovacuum_freeze_max_age": "200000000",
"update_process_title": "on",
"ignore_system_indexes": "off",
"log_autovacuum_min_duration": "-1",
"bgwriter_delay": "200ms",
"join_collapse_limit": "8",
"autovacuum_vacuum_cost_delay": "20ms",
"geqo_effort": "5",
"enable_sort": "on",
"syslog_ident": "postgres",
"enable_indexscan": "on"
}
},
"local": null
}

View File

@@ -0,0 +1,582 @@
{
"global": {
"pg_stat_archiver": {
"archived_count": "0",
"stats_reset": "2017-11-10 10:59:47.397075-05",
"failed_count": "0"
},
"pg_stat_bgwriter": {
"buffers_backend": "81032",
"checkpoints_timed": "1277",
"buffers_alloc": 7477,
"buffers_clean": "49590",
"buffers_backend_fsync": "0",
"checkpoint_sync_time": "19",
"checkpoints_req": "2",
"checkpoint_write_time": "597851",
"maxwritten_clean": "325",
"buffers_checkpoint": "33250",
"stats_reset": "2017-11-10 10:59:47.397075-05"
}
},
"local": {
"table": {
"pg_stat_user_tables": {
"customer": {
"last_autoanalyze": "2017-11-20 15:59:18.824212-05",
"n_live_tup": "60000",
"vacuum_count": "0",
"n_tup_ins": 93806,
"n_tup_hot_upd": "262",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "1510",
"n_mod_since_analyze": "1594",
"relname": "customer",
"analyze_count": "0",
"idx_scan": "125261",
"idx_tup_fetch": "85299628",
"autovacuum_count": "0",
"n_tup_upd": "1594",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"relid": "16540"
},
"district": {
"last_autoanalyze": "2017-11-20 19:23:34.201509-05",
"n_live_tup": "20",
"vacuum_count": "0",
"n_tup_ins": 94452,
"n_tup_hot_upd": "1754",
"autoanalyze_count": "2",
"n_tup_del": "0",
"n_dead_tup": "33",
"n_mod_since_analyze": "0",
"relname": "district",
"analyze_count": "0",
"idx_scan": "122234",
"idx_tup_fetch": "122234",
"autovacuum_count": "0",
"n_tup_upd": "1754",
"schemaname": "public",
"seq_scan": "2221",
"seq_tup_read": "41522",
"relid": "16549"
},
"order_line": {
"last_autoanalyze": "2017-11-20 16:00:11.017507-05",
"n_live_tup": "608373",
"vacuum_count": "0",
"n_tup_ins": 95862,
"n_tup_hot_upd": "5393",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "2550",
"n_mod_since_analyze": "16230",
"relname": "order_line",
"analyze_count": "0",
"idx_scan": "1655",
"idx_tup_fetch": "33762",
"autovacuum_count": "0",
"n_tup_upd": "7329",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"relid": "16513"
},
"new_order": {
"last_autoanalyze": "2017-11-20 16:00:11.217111-05",
"n_live_tup": "16964",
"vacuum_count": "0",
"n_tup_ins": 94900,
"n_tup_hot_upd": "0",
"autoanalyze_count": "1",
"n_tup_del": "740",
"n_dead_tup": "751",
"n_mod_since_analyze": "1629",
"relname": "new_order",
"analyze_count": "0",
"idx_scan": "1481",
"idx_tup_fetch": "1480",
"autovacuum_count": "0",
"n_tup_upd": "0",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"relid": "16518"
},
"item": {
"last_autoanalyze": "2017-11-20 15:59:26.613728-05",
"n_live_tup": "102000",
"vacuum_count": "0",
"n_tup_ins": 99887,
"n_tup_hot_upd": "0",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "0",
"n_mod_since_analyze": "2000",
"relname": "item",
"analyze_count": "0",
"idx_scan": "209020",
"idx_tup_fetch": "209009",
"autovacuum_count": "0",
"n_tup_upd": "0",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"relid": "16554"
},
"oorder": {
"last_autoanalyze": "2017-11-20 15:59:54.690984-05",
"n_live_tup": "60889",
"vacuum_count": "0",
"n_tup_ins": 93463,
"n_tup_hot_upd": "662",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "117",
"n_mod_since_analyze": "1629",
"relname": "oorder",
"analyze_count": "0",
"idx_scan": "627652",
"idx_tup_fetch": "627652",
"autovacuum_count": "0",
"n_tup_upd": "740",
"schemaname": "public",
"seq_scan": "4",
"seq_tup_read": "0",
"relid": "16528"
},
"warehouse": {
"last_autoanalyze": "2017-11-20 19:23:34.236294-05",
"n_live_tup": "2",
"vacuum_count": "0",
"n_tup_ins": 81744,
"n_tup_hot_upd": "854",
"autoanalyze_count": "2",
"n_tup_del": "0",
"n_dead_tup": "0",
"n_mod_since_analyze": "0",
"relname": "warehouse",
"last_autovacuum": "2017-11-20 19:23:34.235793-05",
"analyze_count": "0",
"idx_scan": "202634",
"idx_tup_fetch": "202634",
"autovacuum_count": "2",
"n_tup_upd": "854",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"relid": "16559"
},
"stock": {
"last_autoanalyze": "2017-11-20 15:59:01.368483-05",
"n_live_tup": "200000",
"vacuum_count": "0",
"n_tup_ins": 82611,
"n_tup_hot_upd": "5305",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "4364",
"n_mod_since_analyze": "8901",
"relname": "stock",
"analyze_count": "0",
"idx_scan": "644561",
"idx_tup_fetch": "644561",
"autovacuum_count": "0",
"n_tup_upd": "8901",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"relid": "16523"
},
"history": {
"last_autoanalyze": "2017-11-20 15:59:02.567618-05",
"n_live_tup": "60854",
"vacuum_count": "0",
"n_tup_ins": 83824,
"n_tup_hot_upd": "0",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "0",
"relname": "history",
"analyze_count": "0",
"n_mod_since_analyze": "854",
"autovacuum_count": "0",
"n_tup_upd": "0",
"schemaname": "public",
"seq_scan": "2",
"seq_tup_read": "0",
"relid": "16536"
}
},
"pg_statio_user_tables": {
"customer": {
"relid": "16540",
"idx_blks_read": "2716",
"relname": "customer",
"tidx_blks_read": "0",
"toast_blks_hit": "0",
"idx_blks_hit": "1411491",
"tidx_blks_hit": "0",
"toast_blks_read": "0",
"heap_blks_hit": "70136669",
"schemaname": "public",
"heap_blks_read": "13826"
},
"district": {
"relid": "16549",
"heap_blks_hit": "249754",
"idx_blks_hit": "122259",
"relname": "district",
"idx_blks_read": "5",
"schemaname": "public",
"heap_blks_read": "3"
},
"order_line": {
"relid": "16513",
"heap_blks_hit": "1869417",
"idx_blks_hit": "1788651",
"relname": "order_line",
"idx_blks_read": "3708",
"schemaname": "public",
"heap_blks_read": "12419"
},
"new_order": {
"relid": "16518",
"heap_blks_hit": "37856",
"idx_blks_hit": "38225",
"relname": "new_order",
"idx_blks_read": "134",
"schemaname": "public",
"heap_blks_read": "192"
},
"item": {
"relid": "16554",
"heap_blks_hit": "509702",
"idx_blks_hit": "617914",
"relname": "item",
"idx_blks_read": "877",
"schemaname": "public",
"heap_blks_read": "4542"
},
"oorder": {
"relid": "16528",
"heap_blks_hit": "1378399",
"idx_blks_hit": "3979052",
"relname": "oorder",
"idx_blks_read": "1881",
"schemaname": "public",
"heap_blks_read": "928"
},
"warehouse": {
"relid": "16559",
"heap_blks_hit": "404486",
"idx_blks_hit": "202643",
"relname": "warehouse",
"idx_blks_read": "6",
"schemaname": "public",
"heap_blks_read": "80"
},
"stock": {
"relid": "16523",
"heap_blks_hit": "1920817",
"idx_blks_hit": "2447522",
"relname": "stock",
"idx_blks_read": "1530",
"schemaname": "public",
"heap_blks_read": "11757"
},
"history": {
"schemaname": "public",
"relname": "history",
"heap_blks_hit": "184380",
"heap_blks_read": "746",
"relid": "16536"
}
}
},
"database": {
"pg_stat_database_conflicts": {
"tpcc": {
"datname": "tpcc",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "16384",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
},
"template0": {
"datname": "template0",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "12557",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
},
"postgres": {
"datname": "postgres",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "12558",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
},
"template1": {
"datname": "template1",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "1",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
}
},
"pg_stat_database": {
"tpcc": {
"numbackends": "0",
"datname": "tpcc",
"blks_read": "0",
"deadlocks": "0",
"temp_files": "0",
"blks_hit": "0",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "16384",
"xact_commit": 72957,
"blk_read_time": "0",
"xact_rollback": "0",
"conflicts": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"tup_deleted": "0",
"tup_fetched": "0"
},
"template0": {
"numbackends": "0",
"datname": "template0",
"blks_read": "0",
"deadlocks": "0",
"temp_files": "0",
"blks_hit": "0",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "12557",
"xact_commit": 95353,
"blk_read_time": "0",
"xact_rollback": "0",
"conflicts": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"tup_deleted": "0",
"tup_fetched": "0"
},
"postgres": {
"numbackends": "1",
"datname": "postgres",
"blks_read": "104188",
"deadlocks": "0",
"temp_files": "0",
"stats_reset": "2017-11-10 11:14:57.116228-05",
"blks_hit": "115229324",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "12558",
"xact_commit": 80454,
"blk_read_time": "0",
"xact_rollback": "17",
"conflicts": "0",
"tup_inserted": "2210752",
"tup_returned": "110741743",
"tup_updated": "32675",
"tup_deleted": "1818",
"tup_fetched": "103355344"
},
"template1": {
"numbackends": "0",
"datname": "template1",
"blks_read": "0",
"deadlocks": "0",
"temp_files": "0",
"blks_hit": "0",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "1",
"xact_commit": 85045,
"blk_read_time": "0",
"xact_rollback": "0",
"conflicts": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"tup_deleted": "0",
"tup_fetched": "0"
}
}
},
"indexes": {
"pg_stat_user_indexes": {
"customer": {
"indexrelid": "16564",
"relid": "16540",
"indexrelname": "idx_customer_name",
"relname": "customer",
"idx_tup_fetch": "85256809",
"idx_tup_read": "85256841",
"idx_scan": "82442",
"schemaname": "public"
},
"district": {
"indexrelid": "16552",
"relid": "16549",
"indexrelname": "district_pkey",
"relname": "district",
"idx_tup_fetch": "122234",
"idx_tup_read": "122234",
"idx_scan": "122234",
"schemaname": "public"
},
"order_line": {
"indexrelid": "16516",
"relid": "16513",
"indexrelname": "order_line_pkey",
"relname": "order_line",
"idx_tup_fetch": "33762",
"idx_tup_read": "35698",
"idx_scan": "1655",
"schemaname": "public"
},
"new_order": {
"indexrelid": "16521",
"relid": "16518",
"indexrelname": "new_order_pkey",
"relname": "new_order",
"idx_tup_fetch": "1480",
"idx_tup_read": "2200",
"idx_scan": "1481",
"schemaname": "public"
},
"item": {
"indexrelid": "16557",
"relid": "16554",
"indexrelname": "item_pkey",
"relname": "item",
"idx_tup_fetch": "209009",
"idx_tup_read": "209009",
"idx_scan": "209020",
"schemaname": "public"
},
"oorder": {
"indexrelid": "16565",
"relid": "16528",
"indexrelname": "idx_order",
"relname": "oorder",
"idx_tup_fetch": "616371",
"idx_tup_read": "616371",
"idx_scan": "616371",
"schemaname": "public"
},
"warehouse": {
"indexrelid": "16562",
"relid": "16559",
"indexrelname": "warehouse_pkey",
"relname": "warehouse",
"idx_tup_fetch": "201331",
"idx_tup_read": "202634",
"idx_scan": "202634",
"schemaname": "public"
},
"stock": {
"indexrelid": "16526",
"relid": "16523",
"indexrelname": "stock_pkey",
"relname": "stock",
"idx_tup_fetch": "644561",
"idx_tup_read": "647319",
"idx_scan": "644561",
"schemaname": "public"
}
},
"pg_statio_user_indexes": {
"customer": {
"indexrelid": "16564",
"relid": "16540",
"indexrelname": "idx_customer_name",
"idx_blks_hit": 81021,
"relname": "customer",
"idx_blks_read": "1589",
"schemaname": "public"
},
"district": {
"indexrelid": "16552",
"relid": "16549",
"indexrelname": "district_pkey",
"idx_blks_hit": 76868,
"relname": "district",
"idx_blks_read": "5",
"schemaname": "public"
},
"order_line": {
"indexrelid": "16516",
"relid": "16513",
"indexrelname": "order_line_pkey",
"idx_blks_hit": 73690,
"relname": "order_line",
"idx_blks_read": "3708",
"schemaname": "public"
},
"new_order": {
"indexrelid": "16521",
"relid": "16518",
"indexrelname": "new_order_pkey",
"idx_blks_hit": 98309,
"relname": "new_order",
"idx_blks_read": "134",
"schemaname": "public"
},
"item": {
"indexrelid": "16557",
"relid": "16554",
"indexrelname": "item_pkey",
"idx_blks_hit": 90212,
"relname": "item",
"idx_blks_read": "877",
"schemaname": "public"
},
"oorder": {
"indexrelid": "16565",
"relid": "16528",
"indexrelname": "idx_order",
"idx_blks_hit": 78961,
"relname": "oorder",
"idx_blks_read": "733",
"schemaname": "public"
},
"warehouse": {
"indexrelid": "16562",
"relid": "16559",
"indexrelname": "warehouse_pkey",
"idx_blks_hit": 96942,
"relname": "warehouse",
"idx_blks_read": "6",
"schemaname": "public"
},
"stock": {
"indexrelid": "16526",
"relid": "16523",
"indexrelname": "stock_pkey",
"idx_blks_hit": 85457,
"relname": "stock",
"idx_blks_read": "1530",
"schemaname": "public"
}
}
}
}
}

View File

@@ -0,0 +1,582 @@
{
"global": {
"pg_stat_archiver": {
"archived_count": "0",
"stats_reset": "2017-11-10 10:59:47.397075-05",
"failed_count": "0"
},
"pg_stat_bgwriter": {
"buffers_backend": "81032",
"checkpoints_timed": "1277",
"buffers_alloc": 4914,
"buffers_clean": "49590",
"buffers_backend_fsync": "0",
"checkpoint_sync_time": "19",
"checkpoints_req": "2",
"checkpoint_write_time": "597851",
"maxwritten_clean": "325",
"buffers_checkpoint": "33250",
"stats_reset": "2017-11-10 10:59:47.397075-05"
}
},
"local": {
"table": {
"pg_stat_user_tables": {
"customer": {
"last_autoanalyze": "2017-11-20 15:59:18.824212-05",
"n_live_tup": "60000",
"vacuum_count": "0",
"n_tup_ins": 46752,
"n_tup_hot_upd": "262",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "1510",
"n_mod_since_analyze": "1594",
"relname": "customer",
"analyze_count": "0",
"idx_scan": "125261",
"idx_tup_fetch": "85299628",
"autovacuum_count": "0",
"n_tup_upd": "1594",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"relid": "16540"
},
"district": {
"last_autoanalyze": "2017-11-20 19:23:34.201509-05",
"n_live_tup": "20",
"vacuum_count": "0",
"n_tup_ins": 31239,
"n_tup_hot_upd": "1754",
"autoanalyze_count": "2",
"n_tup_del": "0",
"n_dead_tup": "33",
"n_mod_since_analyze": "0",
"relname": "district",
"analyze_count": "0",
"idx_scan": "122234",
"idx_tup_fetch": "122234",
"autovacuum_count": "0",
"n_tup_upd": "1754",
"schemaname": "public",
"seq_scan": "2221",
"seq_tup_read": "41522",
"relid": "16549"
},
"order_line": {
"last_autoanalyze": "2017-11-20 16:00:11.017507-05",
"n_live_tup": "608373",
"vacuum_count": "0",
"n_tup_ins": 38861,
"n_tup_hot_upd": "5393",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "2550",
"n_mod_since_analyze": "16230",
"relname": "order_line",
"analyze_count": "0",
"idx_scan": "1655",
"idx_tup_fetch": "33762",
"autovacuum_count": "0",
"n_tup_upd": "7329",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"relid": "16513"
},
"new_order": {
"last_autoanalyze": "2017-11-20 16:00:11.217111-05",
"n_live_tup": "16964",
"vacuum_count": "0",
"n_tup_ins": 38698,
"n_tup_hot_upd": "0",
"autoanalyze_count": "1",
"n_tup_del": "740",
"n_dead_tup": "751",
"n_mod_since_analyze": "1629",
"relname": "new_order",
"analyze_count": "0",
"idx_scan": "1481",
"idx_tup_fetch": "1480",
"autovacuum_count": "0",
"n_tup_upd": "0",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"relid": "16518"
},
"item": {
"last_autoanalyze": "2017-11-20 15:59:26.613728-05",
"n_live_tup": "102000",
"vacuum_count": "0",
"n_tup_ins": 50065,
"n_tup_hot_upd": "0",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "0",
"n_mod_since_analyze": "2000",
"relname": "item",
"analyze_count": "0",
"idx_scan": "209020",
"idx_tup_fetch": "209009",
"autovacuum_count": "0",
"n_tup_upd": "0",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"relid": "16554"
},
"oorder": {
"last_autoanalyze": "2017-11-20 15:59:54.690984-05",
"n_live_tup": "60889",
"vacuum_count": "0",
"n_tup_ins": 68200,
"n_tup_hot_upd": "662",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "117",
"n_mod_since_analyze": "1629",
"relname": "oorder",
"analyze_count": "0",
"idx_scan": "627652",
"idx_tup_fetch": "627652",
"autovacuum_count": "0",
"n_tup_upd": "740",
"schemaname": "public",
"seq_scan": "4",
"seq_tup_read": "0",
"relid": "16528"
},
"warehouse": {
"last_autoanalyze": "2017-11-20 19:23:34.236294-05",
"n_live_tup": "2",
"vacuum_count": "0",
"n_tup_ins": 66112,
"n_tup_hot_upd": "854",
"autoanalyze_count": "2",
"n_tup_del": "0",
"n_dead_tup": "0",
"n_mod_since_analyze": "0",
"relname": "warehouse",
"last_autovacuum": "2017-11-20 19:23:34.235793-05",
"analyze_count": "0",
"idx_scan": "202634",
"idx_tup_fetch": "202634",
"autovacuum_count": "2",
"n_tup_upd": "854",
"schemaname": "public",
"seq_scan": "1",
"seq_tup_read": "0",
"relid": "16559"
},
"stock": {
"last_autoanalyze": "2017-11-20 15:59:01.368483-05",
"n_live_tup": "200000",
"vacuum_count": "0",
"n_tup_ins": 66271,
"n_tup_hot_upd": "5305",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "4364",
"n_mod_since_analyze": "8901",
"relname": "stock",
"analyze_count": "0",
"idx_scan": "644561",
"idx_tup_fetch": "644561",
"autovacuum_count": "0",
"n_tup_upd": "8901",
"schemaname": "public",
"seq_scan": "3",
"seq_tup_read": "0",
"relid": "16523"
},
"history": {
"last_autoanalyze": "2017-11-20 15:59:02.567618-05",
"n_live_tup": "60854",
"vacuum_count": "0",
"n_tup_ins": 67987,
"n_tup_hot_upd": "0",
"autoanalyze_count": "1",
"n_tup_del": "0",
"n_dead_tup": "0",
"relname": "history",
"analyze_count": "0",
"n_mod_since_analyze": "854",
"autovacuum_count": "0",
"n_tup_upd": "0",
"schemaname": "public",
"seq_scan": "2",
"seq_tup_read": "0",
"relid": "16536"
}
},
"pg_statio_user_tables": {
"customer": {
"relid": "16540",
"idx_blks_read": "2716",
"relname": "customer",
"tidx_blks_read": "0",
"toast_blks_hit": "0",
"idx_blks_hit": "1411491",
"tidx_blks_hit": "0",
"toast_blks_read": "0",
"heap_blks_hit": "70136669",
"schemaname": "public",
"heap_blks_read": "13826"
},
"district": {
"relid": "16549",
"heap_blks_hit": "249754",
"idx_blks_hit": "122259",
"relname": "district",
"idx_blks_read": "5",
"schemaname": "public",
"heap_blks_read": "3"
},
"order_line": {
"relid": "16513",
"heap_blks_hit": "1869417",
"idx_blks_hit": "1788651",
"relname": "order_line",
"idx_blks_read": "3708",
"schemaname": "public",
"heap_blks_read": "12419"
},
"new_order": {
"relid": "16518",
"heap_blks_hit": "37856",
"idx_blks_hit": "38225",
"relname": "new_order",
"idx_blks_read": "134",
"schemaname": "public",
"heap_blks_read": "192"
},
"item": {
"relid": "16554",
"heap_blks_hit": "509702",
"idx_blks_hit": "617914",
"relname": "item",
"idx_blks_read": "877",
"schemaname": "public",
"heap_blks_read": "4542"
},
"oorder": {
"relid": "16528",
"heap_blks_hit": "1378399",
"idx_blks_hit": "3979052",
"relname": "oorder",
"idx_blks_read": "1881",
"schemaname": "public",
"heap_blks_read": "928"
},
"warehouse": {
"relid": "16559",
"heap_blks_hit": "404486",
"idx_blks_hit": "202643",
"relname": "warehouse",
"idx_blks_read": "6",
"schemaname": "public",
"heap_blks_read": "80"
},
"stock": {
"relid": "16523",
"heap_blks_hit": "1920817",
"idx_blks_hit": "2447522",
"relname": "stock",
"idx_blks_read": "1530",
"schemaname": "public",
"heap_blks_read": "11757"
},
"history": {
"schemaname": "public",
"relname": "history",
"heap_blks_hit": "184380",
"heap_blks_read": "746",
"relid": "16536"
}
}
},
"database": {
"pg_stat_database_conflicts": {
"tpcc": {
"datname": "tpcc",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "16384",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
},
"template0": {
"datname": "template0",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "12557",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
},
"postgres": {
"datname": "postgres",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "12558",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
},
"template1": {
"datname": "template1",
"confl_deadlock": "0",
"confl_bufferpin": "0",
"datid": "1",
"confl_tablespace": "0",
"confl_lock": "0",
"confl_snapshot": "0"
}
},
"pg_stat_database": {
"tpcc": {
"numbackends": "0",
"datname": "tpcc",
"blks_read": "0",
"deadlocks": "0",
"temp_files": "0",
"blks_hit": "0",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "16384",
"xact_commit": 37612,
"blk_read_time": "0",
"xact_rollback": "0",
"conflicts": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"tup_deleted": "0",
"tup_fetched": "0"
},
"template0": {
"numbackends": "0",
"datname": "template0",
"blks_read": "0",
"deadlocks": "0",
"temp_files": "0",
"blks_hit": "0",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "12557",
"xact_commit": 49929,
"blk_read_time": "0",
"xact_rollback": "0",
"conflicts": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"tup_deleted": "0",
"tup_fetched": "0"
},
"postgres": {
"numbackends": "1",
"datname": "postgres",
"blks_read": "104188",
"deadlocks": "0",
"temp_files": "0",
"stats_reset": "2017-11-10 11:14:57.116228-05",
"blks_hit": "115229324",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "12558",
"xact_commit": 52595,
"blk_read_time": "0",
"xact_rollback": "17",
"conflicts": "0",
"tup_inserted": "2210752",
"tup_returned": "110741743",
"tup_updated": "32675",
"tup_deleted": "1818",
"tup_fetched": "103355344"
},
"template1": {
"numbackends": "0",
"datname": "template1",
"blks_read": "0",
"deadlocks": "0",
"temp_files": "0",
"blks_hit": "0",
"temp_bytes": "0",
"blk_write_time": "0",
"datid": "1",
"xact_commit": 39030,
"blk_read_time": "0",
"xact_rollback": "0",
"conflicts": "0",
"tup_inserted": "0",
"tup_returned": "0",
"tup_updated": "0",
"tup_deleted": "0",
"tup_fetched": "0"
}
}
},
"indexes": {
"pg_stat_user_indexes": {
"customer": {
"indexrelid": "16564",
"relid": "16540",
"indexrelname": "idx_customer_name",
"relname": "customer",
"idx_tup_fetch": "85256809",
"idx_tup_read": "85256841",
"idx_scan": "82442",
"schemaname": "public"
},
"district": {
"indexrelid": "16552",
"relid": "16549",
"indexrelname": "district_pkey",
"relname": "district",
"idx_tup_fetch": "122234",
"idx_tup_read": "122234",
"idx_scan": "122234",
"schemaname": "public"
},
"order_line": {
"indexrelid": "16516",
"relid": "16513",
"indexrelname": "order_line_pkey",
"relname": "order_line",
"idx_tup_fetch": "33762",
"idx_tup_read": "35698",
"idx_scan": "1655",
"schemaname": "public"
},
"new_order": {
"indexrelid": "16521",
"relid": "16518",
"indexrelname": "new_order_pkey",
"relname": "new_order",
"idx_tup_fetch": "1480",
"idx_tup_read": "2200",
"idx_scan": "1481",
"schemaname": "public"
},
"item": {
"indexrelid": "16557",
"relid": "16554",
"indexrelname": "item_pkey",
"relname": "item",
"idx_tup_fetch": "209009",
"idx_tup_read": "209009",
"idx_scan": "209020",
"schemaname": "public"
},
"oorder": {
"indexrelid": "16565",
"relid": "16528",
"indexrelname": "idx_order",
"relname": "oorder",
"idx_tup_fetch": "616371",
"idx_tup_read": "616371",
"idx_scan": "616371",
"schemaname": "public"
},
"warehouse": {
"indexrelid": "16562",
"relid": "16559",
"indexrelname": "warehouse_pkey",
"relname": "warehouse",
"idx_tup_fetch": "201331",
"idx_tup_read": "202634",
"idx_scan": "202634",
"schemaname": "public"
},
"stock": {
"indexrelid": "16526",
"relid": "16523",
"indexrelname": "stock_pkey",
"relname": "stock",
"idx_tup_fetch": "644561",
"idx_tup_read": "647319",
"idx_scan": "644561",
"schemaname": "public"
}
},
"pg_statio_user_indexes": {
"customer": {
"indexrelid": "16564",
"relid": "16540",
"indexrelname": "idx_customer_name",
"idx_blks_hit": 43641,
"relname": "customer",
"idx_blks_read": "1589",
"schemaname": "public"
},
"district": {
"indexrelid": "16552",
"relid": "16549",
"indexrelname": "district_pkey",
"idx_blks_hit": 69722,
"relname": "district",
"idx_blks_read": "5",
"schemaname": "public"
},
"order_line": {
"indexrelid": "16516",
"relid": "16513",
"indexrelname": "order_line_pkey",
"idx_blks_hit": 34427,
"relname": "order_line",
"idx_blks_read": "3708",
"schemaname": "public"
},
"new_order": {
"indexrelid": "16521",
"relid": "16518",
"indexrelname": "new_order_pkey",
"idx_blks_hit": 41934,
"relname": "new_order",
"idx_blks_read": "134",
"schemaname": "public"
},
"item": {
"indexrelid": "16557",
"relid": "16554",
"indexrelname": "item_pkey",
"idx_blks_hit": 68779,
"relname": "item",
"idx_blks_read": "877",
"schemaname": "public"
},
"oorder": {
"indexrelid": "16565",
"relid": "16528",
"indexrelname": "idx_order",
"idx_blks_hit": 48553,
"relname": "oorder",
"idx_blks_read": "733",
"schemaname": "public"
},
"warehouse": {
"indexrelid": "16562",
"relid": "16559",
"indexrelname": "warehouse_pkey",
"idx_blks_hit": 69018,
"relname": "warehouse",
"idx_blks_read": "6",
"schemaname": "public"
},
"stock": {
"indexrelid": "16526",
"relid": "16523",
"indexrelname": "stock_pkey",
"idx_blks_hit": 48945,
"relname": "stock",
"idx_blks_read": "1530",
"schemaname": "public"
}
}
}
}
}

View File

@@ -0,0 +1,8 @@
{
"workload_name": "workload-0",
"observation_time": 300,
"database_type": "postgres",
"end_time": 1513113439011,
"start_time": 1513113139011,
"database_version": "9.6"
}

View File

@@ -0,0 +1,636 @@
#
# OtterTune - test_parser.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from abc import ABCMeta, abstractmethod
import mock
from django.test import TestCase
from website.parser.postgres import PostgresParser, Postgres96Parser
from website.types import BooleanType, VarType, KnobUnitType, MetricType
from website.models import KnobCatalog
class BaseParserTests(object, metaclass=ABCMeta):
def setUp(self):
self.test_dbms = None
def test_convert_bool(self):
mock_bool_knob = mock.Mock(spec=KnobCatalog)
for bool_val in self.test_dbms.valid_true_val:
self.assertEqual(BooleanType.TRUE,
self.test_dbms.convert_bool(bool_val, mock_bool_knob))
for bool_val in self.test_dbms.valid_false_val:
self.assertEqual(BooleanType.FALSE,
self.test_dbms.convert_bool(bool_val, mock_bool_knob))
with self.assertRaises(Exception):
self.test_dbms.convert_bool('ThisShouldNeverBeABool', mock_bool_knob)
def test_convert_enum(self):
mock_enum_knob = mock.Mock(spec=KnobCatalog)
mock_enum_knob.vartype = VarType.ENUM
mock_enum_knob.enumvals = 'apples,oranges,cake'
mock_enum_knob.name = 'Test'
self.assertEqual(self.test_dbms.convert_enum('apples', mock_enum_knob), 0)
self.assertEqual(self.test_dbms.convert_enum('oranges', mock_enum_knob), 1)
self.assertEqual(self.test_dbms.convert_enum('cake', mock_enum_knob), 2)
with self.assertRaises(Exception):
self.test_dbms.convert_enum('jackyl', mock_enum_knob)
def test_convert_integer(self):
mock_int_knob = mock.Mock(spec=KnobCatalog)
mock_int_knob.vartype = VarType.INTEGER
mock_int_knob.name = 'Test'
test_int = ['42', '-1', '0', '1', '42.0', '42.5', '42.7']
test_int_ans = [42, -1, 0, 1, 42, 42, 42]
for test_int, test_int_ans in zip(test_int, test_int_ans):
test_int_actual = self.test_dbms.convert_integer(test_int, mock_int_knob)
self.assertEqual(test_int_actual, test_int_ans)
with self.assertRaises(Exception):
self.test_dbms.convert_integer('notInt', mock_int_knob)
def test_convert_real(self):
mock_real_knob = mock.Mock(spec=KnobCatalog)
mock_real_knob.vartype = VarType.REAL
mock_real_knob.name = 'Test'
test_real = ['42.0', '42.2', '42.5', '42.7', '-1', '0', '1']
test_real_ans = [42.0, 42.2, 42.5, 42.7, -1.0, 0.0, 1.0]
for test_real, test_real_ans in zip(test_real, test_real_ans):
test_real_actual = self.test_dbms.convert_real(test_real, mock_real_knob)
self.assertEqual(test_real_actual, test_real_ans)
with self.assertRaises(Exception):
self.test_dbms.convert_real('notReal', mock_real_knob)
def test_convert_string(self):
# NOTE: Hasn't been used in any currently supported database
pass
def test_convert_timestamp(self):
# NOTE: Hasn't been used in any currently supported database
pass
@abstractmethod
def test_convert_dbms_knobs(self):
pass
@abstractmethod
def test_convert_dbms_metrics(self):
pass
@abstractmethod
def test_extract_valid_variables(self):
pass
def test_parse_helper(self):
test_view_vars = {'local': {'FAKE_KNOB': 'FAKE'}}
test_scope = 'global'
valid_vars = {}
test_parse = self.test_dbms.parse_helper(test_scope, valid_vars, test_view_vars)
self.assertEqual(len(list(test_parse.keys())), 1)
self.assertEqual(test_parse.get('local.FAKE_KNOB'), ['FAKE'])
def test_parse_dbms_variables(self):
test_dbms_vars = {'global': {'GlobalView1':
{'cpu_tuple_cost': 0.01,
'random_page_cost': 0.22},
'GlobalView2':
{'cpu_tuple_cost': 0.05,
'random_page_cost': 0.25}},
'local': {'CustomerTable':
{'LocalView1':
{'LocalObj1':
{'cpu_tuple_cost': 0.5,
'random_page_cost': 0.3}}}},
'fakeScope': None}
# NOTE: For local objects, method will not distinguish
# local objects or tables, might overwrite the variables
test_parse = self.test_dbms.parse_dbms_variables(test_dbms_vars)
self.assertEqual(len(list(test_parse.keys())), 6)
self.assertEqual(test_parse.get('GlobalView1.cpu_tuple_cost'), [0.01])
self.assertEqual(test_parse.get('GlobalView1.random_page_cost'), [0.22])
self.assertEqual(test_parse.get('GlobalView2.cpu_tuple_cost'), [0.05])
self.assertEqual(test_parse.get('GlobalView2.random_page_cost'), [0.25])
self.assertEqual(test_parse.get('LocalView1.cpu_tuple_cost'), [0.5])
self.assertEqual(test_parse.get('LocalView1.random_page_cost'), [0.3])
test_scope = {'unknownScope': {'GlobalView1':
{'cpu_tuple_cost': 0.01,
'random_page_cost': 0.22},
'GlobalView2':
{'cpu_tuple_cost': 0.05,
'random_page_cost': 0.25}}}
with self.assertRaises(Exception):
self.test_dbms.parse_dbms_variables(test_scope)
@abstractmethod
def test_parse_dbms_knobs(self):
pass
@abstractmethod
def test_parse_dbms_metrics(self):
pass
def test_calculate_change_in_metrics(self):
self.assertEqual(self.test_dbms.calculate_change_in_metrics({}, {}), {})
@abstractmethod
def test_create_knob_configuration(self):
pass
def test_get_nondefault_knob_settings(self):
self.assertEqual(self.test_dbms.get_nondefault_knob_settings({}), {})
def test_format_bool(self):
mock_other_knob = mock.Mock(spec=KnobCatalog)
mock_other_knob.unit = KnobUnitType.OTHER
self.assertEqual(self.test_dbms.format_bool(BooleanType.TRUE, mock_other_knob), 'on')
self.assertEqual(self.test_dbms.format_bool(BooleanType.FALSE, mock_other_knob), 'off')
def test_format_enum(self):
mock_enum_knob = mock.Mock(spec=KnobCatalog)
mock_enum_knob.enumvals = 'apple,oranges,cake'
self.assertEqual(self.test_dbms.format_enum(0, mock_enum_knob), "apple")
self.assertEqual(self.test_dbms.format_enum(1, mock_enum_knob), "oranges")
self.assertEqual(self.test_dbms.format_enum(2, mock_enum_knob), "cake")
def test_format_integer(self):
mock_other_knob = mock.Mock(spec=KnobCatalog)
mock_other_knob.unit = KnobUnitType.OTHER
test_int = [42, -1, 0, 0.5, 1, 42.0, 42.5, 42.7]
test_int_ans = [42, -1, 0, 1, 1, 42, 43, 43]
for test_int, actual_test_int in zip(test_int, test_int_ans):
self.assertEqual(
self.test_dbms.format_integer(test_int, mock_other_knob), actual_test_int)
def test_format_real(self):
mock_other_knob = mock.Mock(spec=KnobCatalog)
mock_other_knob.unit = KnobUnitType.OTHER
test_real = [42, -1, 0, 0.5, 1, 42.0, 42.5, 42.7]
test_real_ans = [42.0, -1.0, 0.0, 0.5, 1.0, 42.0, 42.5, 42.7]
for test_real, actual_test_real in zip(test_real, test_real_ans):
self.assertEqual(
self.test_dbms.format_real(test_real, mock_other_knob), actual_test_real)
def test_format_string(self):
pass
def test_format_timestamp(self):
pass
def test_format_dbms_knobs(self):
self.assertEqual(self.test_dbms.format_dbms_knobs({}), {})
test_exceptions = {'global.FAKE_KNOB': "20"}
with self.assertRaises(Exception):
self.test_dbms.format_dbms_knobs(test_exceptions)
@abstractmethod
def test_filter_numeric_metrics(self):
pass
@abstractmethod
def test_filter_tunable_knobs(self):
pass
class Postgres96ParserTests(BaseParserTests, TestCase):
def setUp(self):
self.test_dbms = Postgres96Parser(9.6)
def test_convert_dbms_knobs(self):
super(Postgres96ParserTests, self).test_convert_dbms_knobs()
test_knobs = {'global.wal_sync_method': 'open_sync', # Enum
'global.random_page_cost': 0.22, # Real
'global.archive_command': 'archive', # String
'global.cpu_tuple_cost': 0.55, # Real
'global.force_parallel_mode': 'regress', # Enum
'global.enable_hashjoin': 'on', # Bool
'global.geqo_effort': 5, # Int
'global.wal_buffers': 1024, # Int
'global.FAKE_KNOB': 20}
test_convert_knobs = self.test_dbms.convert_dbms_knobs(test_knobs)
self.assertEqual(len(list(test_convert_knobs.keys())), 3)
self.assertEqual(test_convert_knobs['global.random_page_cost'], 0.22)
self.assertEqual(test_convert_knobs['global.wal_sync_method'], 2)
self.assertEqual(test_convert_knobs['global.wal_buffers'], 1024)
test_except_knobs = {'global.wal_sync_method': '3'}
with self.assertRaises(Exception):
self.test_dbms.convert_dbms_knobs(test_except_knobs)
test_nontune_knobs = {'global.enable_hashjoin': 'on'}
self.assertEqual(self.test_dbms.convert_dbms_knobs(test_nontune_knobs), {})
def test_convert_dbms_metrics(self):
super(Postgres96ParserTests, self).test_convert_dbms_metrics()
test_metrics = {}
for key in list(self.test_dbms.numeric_metric_catalog_.keys()):
test_metrics[key] = 2
test_metrics['pg_stat_database.xact_commit'] = 10
test_metrics['pg_FAKE_METRIC'] = 0
self.assertEqual(test_metrics.get('throughput_txn_per_sec'), None)
test_convert_metrics = self.test_dbms.convert_dbms_metrics(test_metrics, 0.1)
for key, metadata in list(self.test_dbms.numeric_metric_catalog_.items()):
if (key == self.test_dbms.transactions_counter):
self.assertEqual(test_convert_metrics[key], 10 / 0.1)
continue
if metadata.metric_type == MetricType.COUNTER:
self.assertEqual(test_convert_metrics[key], 2 / 0.1)
else: # MetricType.STATISTICS
self.assertEqual(test_convert_metrics[key], 2)
self.assertEqual(test_convert_metrics['throughput_txn_per_sec'], 100)
self.assertEqual(test_convert_metrics.get('pg_FAKE_METRIC'), None)
def test_properties(self):
base_config = self.test_dbms.base_configuration_settings
base_config_set = set(base_config)
self.assertTrue('global.data_directory' in base_config_set)
self.assertTrue('global.hba_file' in base_config_set)
self.assertTrue('global.ident_file' in base_config_set)
self.assertTrue('global.external_pid_file' in base_config_set)
self.assertTrue('global.listen_addresses' in base_config_set)
self.assertTrue('global.port' in base_config_set)
self.assertTrue('global.max_connections' in base_config_set)
self.assertTrue('global.unix_socket_directories' in base_config_set)
self.assertTrue('global.log_line_prefix' in base_config_set)
self.assertTrue('global.track_counts' in base_config_set)
self.assertTrue('global.track_io_timing' in base_config_set)
self.assertTrue('global.autovacuum' in base_config_set)
self.assertTrue('global.default_text_search_config' in base_config_set)
self.assertEqual(self.test_dbms
.knob_configuration_filename, 'postgresql.conf')
self.assertEqual(self.test_dbms
.transactions_counter, 'pg_stat_database.xact_commit')
def test_parse_version_string(self):
self.assertTrue(self.test_dbms.parse_version_string("9.6.1"), "9.6")
self.assertTrue(self.test_dbms.parse_version_string("9.6.3"), "9.6")
self.assertTrue(self.test_dbms.parse_version_string("10.2.1"), "10.2")
self.assertTrue(self.test_dbms.parse_version_string("0.0.0"), "0.0")
with self.assertRaises(Exception):
self.test_dbms.parse_version_string("postgres")
with self.assertRaises(Exception):
self.test_dbms.parse_version_string("1.0")
def test_extract_valid_variables(self):
num_tunable_knobs = len(list(self.test_dbms.tunable_knob_catalog_.keys()))
test_empty, test_empty_diff = self.test_dbms.extract_valid_variables(
{}, self.test_dbms.tunable_knob_catalog_)
self.assertEqual(len(list(test_empty.keys())), num_tunable_knobs)
self.assertEqual(len(test_empty_diff), num_tunable_knobs)
test_vars = {'global.wal_sync_method': 'fsync',
'global.random_page_cost': 0.22,
'global.Wal_buffers': 1024,
'global.archive_command': 'archive',
'global.GEQO_EFFORT': 5,
'global.enable_hashjoin': 'on',
'global.cpu_tuple_cost': 0.55,
'global.force_parallel_mode': 'regress',
'global.FAKE_KNOB': 'fake'}
tune_extract, tune_diff = self.test_dbms.extract_valid_variables(
test_vars, self.test_dbms.tunable_knob_catalog_)
self.assertTrue(('miscapitalized', 'global.wal_buffers',
'global.Wal_buffers', 1024) in tune_diff)
self.assertTrue(('extra', None, 'global.GEQO_EFFORT', 5) in tune_diff)
self.assertTrue(('extra', None, 'global.enable_hashjoin', 'on') in tune_diff)
self.assertTrue(('missing', 'global.deadlock_timeout', None, None) in tune_diff)
self.assertTrue(('missing', 'global.temp_buffers', None, None) in tune_diff)
self.assertTrue(tune_extract.get('global.temp_buffers') is not None)
self.assertTrue(tune_extract.get('global.deadlock_timeout') is not None)
self.assertEqual(tune_extract.get('global.wal_buffers'), 1024)
self.assertEqual(tune_extract.get('global.Wal_buffers'), None)
self.assertEqual(len(tune_extract), len(self.test_dbms.tunable_knob_catalog_))
nontune_extract, nontune_diff = self.test_dbms.extract_valid_variables(
test_vars, self.test_dbms.knob_catalog_)
self.assertTrue(('miscapitalized', 'global.wal_buffers',
'global.Wal_buffers', 1024) in nontune_diff)
self.assertTrue(('miscapitalized', 'global.geqo_effort',
'global.GEQO_EFFORT', 5) in nontune_diff)
self.assertTrue(('extra', None, 'global.FAKE_KNOB', 'fake') in nontune_diff)
self.assertTrue(('missing', 'global.lc_ctype', None, None) in nontune_diff)
self.assertTrue(('missing', 'global.full_page_writes', None, None) in nontune_diff)
self.assertEqual(nontune_extract.get('global.wal_buffers'), 1024)
self.assertEqual(nontune_extract.get('global.geqo_effort'), 5)
self.assertEqual(nontune_extract.get('global.Wal_buffers'), None)
self.assertEqual(nontune_extract.get('global.GEQO_EFFORT'), None)
def test_convert_integer(self):
super(Postgres96ParserTests, self).test_convert_integer()
# Convert Integer
knob_unit_bytes = KnobUnitType()
knob_unit_bytes.unit = 1
knob_unit_time = KnobUnitType()
knob_unit_time.unit = 2
knob_unit_other = KnobUnitType()
knob_unit_other.unit = 3
self.assertEqual(self.test_dbms.convert_integer('5', knob_unit_other), 5)
self.assertEqual(self.test_dbms.convert_integer('0', knob_unit_other), 0)
self.assertEqual(self.test_dbms.convert_integer('0.0', knob_unit_other), 0)
self.assertEqual(self.test_dbms.convert_integer('0.5', knob_unit_other), 0)
self.assertEqual(self.test_dbms
.convert_integer('5kB', knob_unit_bytes), 5 * 1024)
self.assertEqual(self.test_dbms
.convert_integer('4MB', knob_unit_bytes), 4 * 1024 ** 2)
self.assertEqual(self.test_dbms.convert_integer('1d', knob_unit_time), 86400000)
self.assertEqual(self.test_dbms
.convert_integer('20h', knob_unit_time), 72000000)
self.assertEqual(self.test_dbms
.convert_integer('10min', knob_unit_time), 600000)
self.assertEqual(self.test_dbms.convert_integer('1s', knob_unit_time), 1000)
test_exceptions = [('A', knob_unit_other),
('', knob_unit_other),
('', knob_unit_bytes),
('', knob_unit_time),
('1S', knob_unit_time),
('1mb', knob_unit_bytes)]
for failure_case, knob_unit in test_exceptions:
with self.assertRaises(Exception):
self.test_dbms.convert_integer(failure_case, knob_unit)
def test_calculate_change_in_metrics(self):
super(Postgres96ParserTests, self).test_calculate_change_in_metrics()
test_metric_start = {'pg_stat_bgwriter.buffers_alloc': 256,
'pg_stat_archiver.last_failed_wal': "today",
'pg_stat_archiver.last_failed_time': "2018-01-10 11:24:30",
'pg_stat_user_tables.n_tup_upd': 123,
'pg_stat_user_tables.relname': "Customers",
'pg_stat_user_tables.relid': 2,
'pg_stat_user_tables.last_vacuum': "2018-01-09 12:00:00",
'pg_stat_database.tup_fetched': 156,
'pg_stat_database.datname': "testOttertune",
'pg_stat_database.datid': 1,
'pg_stat_database.stats_reset': "2018-01-09 13:00:00",
'pg_stat_user_indexes.idx_scan': 23,
'pg_stat_user_indexes.relname': "Managers",
'pg_stat_user_indexes.relid': 20}
test_metric_end = {'pg_stat_bgwriter.buffers_alloc': 300,
'pg_stat_archiver.last_failed_wal': "today",
'pg_stat_archiver.last_failed_time': "2018-01-11 11:24:30",
'pg_stat_user_tables.n_tup_upd': 150,
'pg_stat_user_tables.relname': "Customers",
'pg_stat_user_tables.relid': 2,
'pg_stat_user_tables.last_vacuum': "2018-01-10 12:00:00",
'pg_stat_database.tup_fetched': 260,
'pg_stat_database.datname': "testOttertune",
'pg_stat_database.datid': 1,
'pg_stat_database.stats_reset': "2018-01-10 13:00:00",
'pg_stat_user_indexes.idx_scan': 23,
'pg_stat_user_indexes.relname': "Managers",
'pg_stat_user_indexes.relid': 20}
test_adj_metrics = self.test_dbms.calculate_change_in_metrics(
test_metric_start, test_metric_end)
self.assertEqual(test_adj_metrics['pg_stat_bgwriter.buffers_alloc'], 44)
self.assertEqual(test_adj_metrics['pg_stat_archiver.last_failed_wal'], "today")
self.assertEqual(
test_adj_metrics['pg_stat_archiver.last_failed_time'], "2018-01-11 11:24:30")
self.assertEqual(test_adj_metrics['pg_stat_user_tables.n_tup_upd'], 27)
self.assertEqual(test_adj_metrics['pg_stat_user_tables.relname'], "Customers")
self.assertEqual(test_adj_metrics['pg_stat_user_tables.relid'], 2) # MetricType.INFO
self.assertEqual(test_adj_metrics['pg_stat_user_tables.last_vacuum'], "2018-01-10 12:00:00")
self.assertEqual(test_adj_metrics['pg_stat_database.tup_fetched'], 104)
self.assertEqual(test_adj_metrics['pg_stat_database.datname'], "testOttertune")
self.assertEqual(test_adj_metrics['pg_stat_database.datid'], 1) # MetricType.INFO
self.assertEqual(test_adj_metrics['pg_stat_database.stats_reset'], "2018-01-10 13:00:00")
self.assertEqual(test_adj_metrics['pg_stat_user_indexes.idx_scan'], 0)
self.assertEqual(test_adj_metrics['pg_stat_user_indexes.relid'], 20) # MetricType.INFO
def test_create_knob_configuration(self):
empty_config = self.test_dbms.create_knob_configuration({})
self.assertEqual(empty_config, {})
tuning_knobs = {"global.autovacuum": "on",
"global.log_planner_stats": "on",
"global.cpu_tuple_cost": 0.5,
"global.FAKE_KNOB": 20,
"pg_stat_archiver.last_failed_wal": "today"}
test_config = self.test_dbms.create_knob_configuration(tuning_knobs)
actual_keys = [("autovacuum", "on"),
("log_planner_stats", "on"),
("cpu_tuple_cost", 0.5),
("FAKE_KNOB", 20)]
self.assertTrue(len(list(test_config.keys())), 4)
for k, v in actual_keys:
self.assertEqual(test_config.get(k), v)
def test_format_integer(self):
test_dbms = PostgresParser(2)
knob_unit_bytes = KnobUnitType()
knob_unit_bytes.unit = 1
knob_unit_time = KnobUnitType()
knob_unit_time.unit = 2
knob_unit_other = KnobUnitType()
knob_unit_other.unit = 3
self.assertEqual(test_dbms.format_integer(5, knob_unit_other), 5)
self.assertEqual(test_dbms.format_integer(0, knob_unit_other), 0)
self.assertEqual(test_dbms.format_integer(-1, knob_unit_other), -1)
self.assertEqual(test_dbms.format_integer(5120, knob_unit_bytes), '5kB')
self.assertEqual(test_dbms.format_integer(4194304, knob_unit_bytes), '4MB')
self.assertEqual(test_dbms.format_integer(4194500, knob_unit_bytes), '4MB')
self.assertEqual(test_dbms.format_integer(86400000, knob_unit_time), '1d')
self.assertEqual(test_dbms.format_integer(72000000, knob_unit_time), '20h')
self.assertEqual(test_dbms.format_integer(600000, knob_unit_time), '10min')
self.assertEqual(test_dbms.format_integer(1000, knob_unit_time), '1s')
self.assertEqual(test_dbms.format_integer(500, knob_unit_time), '500ms')
def test_format_dbms_knobs(self):
super(Postgres96ParserTests, self).test_format_dbms_knobs()
test_knobs = {'global.wal_sync_method': 2, # Enum
'global.random_page_cost': 0.22, # Real
'global.archive_command': "archive", # String
'global.cpu_tuple_cost': 0.55, # Real
'global.force_parallel_mode': 2, # Enum
'global.enable_hashjoin': BooleanType.TRUE, # Bool
'global.geqo_effort': 5, # Int
'global.wal_buffers': 1024} # Int
test_formatted_knobs = self.test_dbms.format_dbms_knobs(test_knobs)
self.assertEqual(test_formatted_knobs.get('global.wal_sync_method'), 'open_sync')
self.assertEqual(test_formatted_knobs.get('global.random_page_cost'), 0.22)
self.assertEqual(test_formatted_knobs.get('global.archive_command'), "archive")
self.assertEqual(test_formatted_knobs.get('global.cpu_tuple_cost'), 0.55)
self.assertEqual(test_formatted_knobs.get('global.force_parallel_mode'), 'regress')
self.assertEqual(test_formatted_knobs.get('global.enable_hashjoin'), 'on')
self.assertEqual(test_formatted_knobs.get('global.geqo_effort'), 5)
self.assertEqual(test_formatted_knobs.get('global.wal_buffers'), '1kB')
def test_filter_numeric_metrics(self):
super(Postgres96ParserTests, self).test_filter_numeric_metrics()
test_metrics = {'pg_stat_bgwriter.checkpoints_req': (2, 'global'),
'pg_stat_archiver.last_failed_wal': (1, 'global'),
'pg_stat_database.stats_reset': (6, 'database'),
'pg_statio_user_indexes.indexrelname': (1, 'index'),
'pg_stat_bgwriter.maxwritten_clean': (2, 'global'),
'pg_stat_database.tup_fetched': (2, 'database'),
'pg_statio_user_tables.heap_blks_read': (2, 'table'),
'pg_FAKE_METRIC': (2, 'database')}
filtered_metrics = self.test_dbms.filter_numeric_metrics(test_metrics)
self.assertEqual(len(list(filtered_metrics.keys())), 4)
self.assertEqual(filtered_metrics.get('pg_stat_bgwriter.checkpoints_req'),
(2, 'global'))
self.assertEqual(filtered_metrics.get('pg_stat_archiver.last_failed_wal'), None)
self.assertEqual(filtered_metrics.get('pg_stat_database.stats_reset'), None)
self.assertEqual(filtered_metrics.get('pg_statio_user_indexes.indexrelname'),
None)
self.assertEqual(filtered_metrics.get('pg_stat_bgwriter.maxwritten_clean'),
(2, 'global'))
self.assertEqual(filtered_metrics.get('pg_stat_database.tup_fetched'),
(2, 'database'))
self.assertEqual(filtered_metrics.get('pg_statio_user_tables.heap_blks_read'),
(2, 'table'))
self.assertEqual(filtered_metrics.get('pg_FAKE_KNOB'), None)
def test_filter_tunable_knobs(self):
super(Postgres96ParserTests, self).test_filter_tunable_knobs()
test_knobs = {'global.wal_sync_method': 5,
'global.random_page_cost': 3,
'global.archive_command': 1,
'global.cpu_tuple_cost': 3,
'global.force_parallel_mode': 5,
'global.enable_hashjoin': 3,
'global.geqo_effort': 2,
'global.wal_buffers': 2,
'global.FAKE_KNOB': 2}
filtered_knobs = self.test_dbms.filter_tunable_knobs(test_knobs)
self.assertEqual(len(list(filtered_knobs.keys())), 3)
self.assertEqual(filtered_knobs.get('global.wal_sync_method'), 5)
self.assertEqual(filtered_knobs.get('global.wal_buffers'), 2)
self.assertEqual(filtered_knobs.get('global.random_page_cost'), 3)
self.assertEqual(filtered_knobs.get('global.cpu_tuple_cost'), None)
self.assertEqual(filtered_knobs.get('global.FAKE_KNOB'), None)
def test_parse_helper(self):
super(Postgres96ParserTests, self).test_parse_helper()
test_view_vars = {'global': {'wal_sync_method': 'open_sync',
'random_page_cost': 0.22},
'local': {'FAKE_KNOB': 'FAKE'}}
valid_vars = {}
test_scope = 'global'
test_parse = self.test_dbms.parse_helper(test_scope, valid_vars, test_view_vars)
self.assertEqual(len(list(test_parse.keys())), 3)
self.assertEqual(test_parse.get('global.wal_sync_method'), ['open_sync'])
self.assertEqual(test_parse.get('global.random_page_cost'), [0.22])
self.assertEqual(test_parse.get('local.FAKE_KNOB'), ['FAKE'])
def test_parse_dbms_knobs(self):
test_knobs = {'global': {'global':
{'wal_sync_method': 'fsync',
'random_page_cost': 0.22,
'wal_buffers': 1024,
'archive_command': 'archive',
'geqo_effort': 5,
'enable_hashjoin': 'on',
'cpu_tuple_cost': 0.55,
'force_parallel_mode': 'regress',
'FAKE_KNOB': 'fake'}}}
(test_parse_dict, test_parse_log) = self.test_dbms.parse_dbms_knobs(test_knobs)
self.assertEqual(len(test_parse_log), len(list(self.test_dbms.knob_catalog_.keys())) - 7)
self.assertTrue(('extra', None, 'global.FAKE_KNOB', 'fake') in test_parse_log)
self.assertEqual(len(list(test_parse_dict.keys())),
len(list(self.test_dbms.knob_catalog_.keys())))
self.assertEqual(test_parse_dict['global.wal_sync_method'], 'fsync')
self.assertEqual(test_parse_dict['global.random_page_cost'], 0.22)
def test_parse_dbms_metrics(self):
test_metrics = {'global':
{'pg_stat_archiver.last_failed_wal': "today",
'pg_stat_bgwriter.buffers_alloc': 256,
'pg_stat_archiver.last_failed_time': "2018-01-10 11:24:30"},
'database':
{'pg_stat_database.tup_fetched': 156,
'pg_stat_database.datid': 1,
'pg_stat_database.datname': "testOttertune",
'pg_stat_database.stats_reset': "2018-01-09 13:00:00"},
'table':
{'pg_stat_user_tables.last_vacuum': "2018-01-09 12:00:00",
'pg_stat_user_tables.relid': 20,
'pg_stat_user_tables.relname': "Managers",
'pg_stat_user_tables.n_tup_upd': 123},
'index':
{'pg_stat_user_indexes.idx_scan': 23,
'pg_stat_user_indexes.relname': "Customers",
'pg_stat_user_indexes.relid': 2}}
# Doesn't support table or index scope
with self.assertRaises(Exception):
test_parse_dict, test_parse_log = self.test_dbms.parse_dbms_metrics(test_metrics)
self.assertEqual(len(list(test_parse_dict.keys())),
len(list(self.test_dbms.metric_catalog_.keys())))
self.assertEqual(len(test_parse_log),
len(list(self.test_dbms.metric_catalog_.keys())) - 14)

View File

@@ -0,0 +1,189 @@
#
# OtterTune - test_tasks.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import copy
import numpy as np
from django.test import TestCase, override_settings
from django.db import transaction
from website.models import (Workload, PipelineRun, PipelineData,
Result, Session, DBMSCatalog, Hardware)
from website.tasks.periodic_tasks import (run_background_tasks,
aggregate_data,
run_workload_characterization,
run_knob_identification)
from website.types import PipelineTaskType, WorkloadStatusType
CELERY_TEST_RUNNER = 'djcelery.contrib.test_runner.CeleryTestSuiteRunner'
@override_settings(CELERY_ALWAYS_EAGER=True, TEST_RUNNER=CELERY_TEST_RUNNER)
class BackgroundTestCase(TestCase):
fixtures = ['test_website.json']
serialized_rollback = True
def testNoError(self):
result = run_background_tasks.delay()
self.assertTrue(result.successful())
def testProcessedWorkloadStatus(self):
before_workloads = Workload.objects.filter(status=WorkloadStatusType.MODIFIED)
run_background_tasks.delay()
for w in before_workloads:
self.assertEqual(w.status, WorkloadStatusType.PROCESSED)
def testNoModifiedWorkload(self):
# First Execution of Modified Workloads
run_background_tasks.delay()
first_pipeline_run = PipelineRun.objects.get_latest()
# Second Execution with no modified workloads
run_background_tasks.delay()
second_pipeline_run = PipelineRun.objects.get_latest()
# Check that the BG task has not run
self.assertEqual(first_pipeline_run.start_time, second_pipeline_run.start_time)
# Test that an empty workload is ignored by the BG task
def testEmptyWorkload(self):
with transaction.atomic():
# Create empty workload
empty_workload = Workload.objects.create_workload(dbms=DBMSCatalog.objects.get(pk=1),
hardware=Hardware.objects.get(pk=1),
name="empty_workload")
result = run_background_tasks.delay()
# Check that BG task successfully finished
self.assertTrue(result.successful())
# Check that the empty workload is still in MODIFIED Status
self.assertEqual(empty_workload.status, 1)
pipeline_data = PipelineData.objects.filter(pipeline_run=PipelineRun.objects.get_latest())
# Check that the empty workload is not in the pipeline datas
self.assertNotIn(empty_workload.pk, pipeline_data.values_list('workload_id', flat=True))
# Test that a workload that contain only one knob configuration will be ignored by the BG task
def testUniqueKnobConfigurationWorkload(self):
# Get workload to copy data from
origin_workload = Workload.objects.get(pk=1)
origin_session = Session.objects.get(pk=1)
# Create empty workload
fix_workload = Workload.objects.create_workload(dbms=origin_workload.dbms,
hardware=origin_workload.hardware,
name="fixed_knob_workload")
fix_knob_data = Result.objects.filter(workload=origin_workload,
session=origin_session)[0].knob_data
# Add 5 Result with the same Knob Configuration
for res in Result.objects.filter(workload=origin_workload, session=origin_session)[:4]:
Result.objects.create_result(res.session, res.dbms, fix_workload,
fix_knob_data, res.metric_data,
res.observation_start_time,
res.observation_end_time,
res.observation_time)
result = run_background_tasks.delay()
# Check that BG task successfully finished
self.assertTrue(result.successful())
# Check that the empty workload is still in MODIFIED Status
self.assertEqual(fix_workload.status, 1)
pipeline_data = PipelineData.objects.filter(pipeline_run=PipelineRun.objects.get_latest())
# Check that the empty workload is not in the pipeline datas
self.assertNotIn(fix_workload.pk, pipeline_data.values_list('workload_id', flat=True))
def testNoWorkloads(self):
# delete any existing workloads
workloads = Workload.objects.all()
workloads.delete()
# background task should not fail
result = run_background_tasks.delay()
self.assertTrue(result.successful())
def testNewPipelineRun(self):
# this test currently relies on the fixture data so that
# it actually tests anything
workloads = Workload.objects.all()
if len(workloads) > 0:
runs_before = len(PipelineRun.objects.all())
run_background_tasks.delay()
runs_after = len(PipelineRun.objects.all())
self.assertEqual(runs_before + 1, runs_after)
def checkNewTask(self, task_type):
workloads = Workload.objects.all()
pruned_before = [len(PipelineData.objects.filter(
workload=workload, task_type=task_type)) for workload in workloads]
run_background_tasks.delay()
pruned_after = [len(PipelineData.objects.filter(
workload=workload, task_type=task_type)) for workload in workloads]
for before, after in zip(pruned_before, pruned_after):
self.assertEqual(before + 1, after)
def testNewPrunedMetrics(self):
self.checkNewTask(PipelineTaskType.PRUNED_METRICS)
def testNewRankedKnobs(self):
self.checkNewTask(PipelineTaskType.RANKED_KNOBS)
class AggregateTestCase(TestCase):
fixtures = ['test_website.json']
def testValidWorkload(self):
workloads = Workload.objects.all()
valid_workload = workloads[0]
wkld_results = Result.objects.filter(workload=valid_workload)
dicts = aggregate_data(wkld_results)
keys = ['data', 'rowlabels', 'columnlabels']
for d in dicts:
for k in keys:
self.assertIn(k, d)
class PrunedMetricTestCase(TestCase):
fixtures = ['test_website.json']
def testValidPrunedMetrics(self):
workloads = Workload.objects.all()
wkld_results = Result.objects.filter(workload=workloads[0])
metric_data = aggregate_data(wkld_results)[1]
pruned_metrics = run_workload_characterization(metric_data)
for m in pruned_metrics:
self.assertIn(m, metric_data['columnlabels'])
class RankedKnobTestCase(TestCase):
fixtures = ['test_website.json']
def testValidImportantKnobs(self):
workloads = Workload.objects.all()
wkld_results = Result.objects.filter(workload=workloads[0])
knob_data, metric_data = aggregate_data(wkld_results)
# instead of doing actual metric pruning by factor analysis / clustering,
# just randomly select 5 nonconstant metrics
nonconst_metric_columnlabels = []
for col, cl in zip(metric_data['data'].T, metric_data['columnlabels']):
if np.any(col != col[0]):
nonconst_metric_columnlabels.append(cl)
num_metrics = min(5, len(nonconst_metric_columnlabels))
selected_columnlabels = np.random.choice(nonconst_metric_columnlabels,
num_metrics, replace=False)
pruned_metric_idxs = [i for i, metric_name in
enumerate(metric_data['columnlabels'])
if metric_name in selected_columnlabels]
pruned_metric_data = {
'data': metric_data['data'][:, pruned_metric_idxs],
'rowlabels': copy.deepcopy(metric_data['rowlabels']),
'columnlabels': [metric_data['columnlabels'][i] for i in pruned_metric_idxs]
}
# run knob_identification using knob_data and fake pruned metrics
ranked_knobs = run_knob_identification(knob_data, pruned_metric_data,
workloads[0].dbms)
for k in ranked_knobs:
self.assertIn(k, knob_data['columnlabels'])

View File

@@ -0,0 +1,106 @@
#
# OtterTune - test_upload.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import os
from django.core.urlresolvers import reverse
from django.test import TestCase
from website.models import Result, Workload
from website.settings import PROJECT_ROOT
from .utils import (TEST_BASIC_SESSION_ID, TEST_BASIC_SESSION_UPLOAD_CODE,
TEST_PASSWORD, TEST_TUNING_SESSION_ID, TEST_TUNING_SESSION_UPLOAD_CODE,
TEST_USERNAME, TEST_WORKLOAD_ID)
class UploadResultsTests(TestCase):
fixtures = ['test_website.json']
def setUp(self):
self.client.login(username=TEST_USERNAME, password=TEST_PASSWORD)
test_files_dir = os.path.join(PROJECT_ROOT, 'tests', 'test_files')
self.upload_files = {
'metrics_before': os.path.join(test_files_dir, 'sample_metrics_start.json'),
'metrics_after': os.path.join(test_files_dir, 'sample_metrics_end.json'),
'knobs': os.path.join(test_files_dir, 'sample_knobs.json'),
'summary': os.path.join(test_files_dir, 'sample_summary.json')
}
@staticmethod
def open_files(file_info):
files = {}
for name, path in list(file_info.items()):
files[name] = open(path)
return files
@staticmethod
def close_files(files):
for name, fp in list(files.items()):
if name != 'upload_code':
fp.close()
def upload_to_session_ok(self, session_id, upload_code):
num_initial_results = Result.objects.filter(session__id=session_id).count()
form_addr = reverse('new_result')
post_data = self.open_files(self.upload_files)
post_data['upload_code'] = upload_code
response = self.client.post(form_addr, post_data)
self.close_files(post_data)
self.assertEqual(response.status_code, 200)
num_final_results = Result.objects.filter(session__id=session_id).count()
self.assertEqual(num_final_results - num_initial_results, 1)
def upload_to_session_fail_invalidation(self, session_id, upload_code):
form_addr = reverse('new_result')
post_data = {'upload_code': upload_code}
response = self.client.post(form_addr, post_data)
self.assertEqual(response.status_code, 200)
self.assertContains(response, "New result form is not valid:")
self.assertContains(response, "This field is required", 4)
def upload_to_session_invalid_upload_code(self, session_id):
form_addr = reverse('new_result')
post_data = self.open_files(self.upload_files)
post_data['upload_code'] = "invalid_upload_code"
response = self.client.post(form_addr, post_data)
self.close_files(post_data)
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Invalid upload code")
def test_upload_form_not_post(self):
form_addr = reverse('new_result')
response = self.client.get(form_addr)
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Request type was not POST")
def test_set_modified_workload_on_upload(self):
workload0 = Workload.objects.get(pk=TEST_WORKLOAD_ID)
workload0.status = 3
workload0.save()
self.upload_to_session_ok(TEST_BASIC_SESSION_ID, TEST_BASIC_SESSION_UPLOAD_CODE)
status = Workload.objects.get(pk=TEST_WORKLOAD_ID).status
self.assertEqual(status, 1)
def test_upload_to_basic_session_ok(self):
self.upload_to_session_ok(TEST_BASIC_SESSION_ID, TEST_BASIC_SESSION_UPLOAD_CODE)
def test_upload_to_tuning_session_ok(self):
self.upload_to_session_ok(TEST_TUNING_SESSION_ID, TEST_TUNING_SESSION_UPLOAD_CODE)
def test_upload_to_basic_session_fail_invalidation(self):
self.upload_to_session_fail_invalidation(TEST_BASIC_SESSION_ID,
TEST_BASIC_SESSION_UPLOAD_CODE)
def test_upload_to_tuning_session_fail_invalidation(self):
self.upload_to_session_fail_invalidation(TEST_TUNING_SESSION_ID,
TEST_TUNING_SESSION_UPLOAD_CODE)
def test_upload_to_basic_session_invalid_upload_code(self):
self.upload_to_session_invalid_upload_code(TEST_BASIC_SESSION_ID)
def test_upload_to_tuning_session_invalid_upload_code(self):
self.upload_to_session_invalid_upload_code(TEST_TUNING_SESSION_ID)

View File

@@ -0,0 +1,333 @@
#
# OtterTune - test_utils.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
import string
import numpy as np
from django.test import TestCase
from website.utils import JSONUtil, MediaUtil, DataUtil, ConversionUtil, LabelUtil, TaskUtil
from website.parser.postgres import PostgresParser
from website.types import LabelStyleType, VarType
from website.models import Result, DBMSCatalog
class JSONUtilTest(TestCase):
def test_util(self):
json_str = \
"""{
"glossary": {
"title": "example glossary",
"GlossDiv": {
"title": "S",
"GlossList": {
"GlossEntry": {
"ID": "SGML",
"SortAs": "SGML",
"GlossTerm": "Standard Generalized Markup Language",
"Acronym": "SGML",
"Abbrev": "ISO 8879:1986",
"GlossDef": {
"para": "A meta-markup language",
"GlossSeeAlso": ["GML", "XML"]
},
"GlossSee": "markup"
}
}
}
}
}"""
compress_str = """{"glossary": {"title": "example glossary",
"GlossDiv": {"title": "S", "GlossList": {"GlossEntry": {"ID": "SGML",
"SortAs": "SGML", "GlossTerm": "Standard Generalized Markup
Language", "Acronym": "SGML", "Abbrev": "ISO 8879:1986", "GlossDef":
{"para": "A meta-markup language", "GlossSeeAlso": ["GML", "XML"]}, "GlossSee":
"markup"}}}}}"""
results = JSONUtil.loads(json_str)
self.assertEqual(list(results.keys())[0], "glossary")
self.assertTrue("title" in list(results["glossary"].keys()))
self.assertTrue("GlossDiv" in list(results["glossary"].keys()))
self.assertEqual(results["glossary"]["GlossDiv"]
["GlossList"]["GlossEntry"]["ID"], "SGML")
self.assertEqual(results["glossary"]["GlossDiv"]
["GlossList"]["GlossEntry"]["GlossSee"], "markup")
result_str = "".join(JSONUtil.dumps(results).split())
self.assertEqual(result_str, "".join(compress_str.split()))
class MediaUtilTest(TestCase):
def test_codegen(self):
code20 = MediaUtil.upload_code_generator(20)
self.assertEqual(len(code20), 20)
self.assertTrue(code20.isalnum())
code40 = MediaUtil.upload_code_generator(40)
self.assertEqual(len(code40), 40)
self.assertTrue(code40.isalnum())
digit_code = MediaUtil.upload_code_generator(40, string.digits)
self.assertEqual(len(digit_code), 40)
self.assertTrue(digit_code.isdigit())
letter_code = MediaUtil.upload_code_generator(60,
string.ascii_uppercase)
self.assertEqual(len(letter_code), 60)
self.assertTrue(letter_code.isalpha())
class TaskUtilTest(TestCase):
def test_get_task_status(self):
# FIXME: Actually setup celery tasks instead of a dummy class?
test_tasks = []
(status, num_complete) = TaskUtil.get_task_status(test_tasks)
self.assertTrue(status is None and num_complete == 0)
test_tasks2 = [VarType() for i in range(5)]
for task in test_tasks2:
task.status = "SUCCESS"
(status, num_complete) = TaskUtil.get_task_status(test_tasks2)
self.assertTrue(status == "SUCCESS" and num_complete == 5)
test_tasks3 = test_tasks2
test_tasks3[3].status = "FAILURE"
(status, num_complete) = TaskUtil.get_task_status(test_tasks3)
self.assertTrue(status == "FAILURE" and num_complete == 3)
test_tasks4 = test_tasks3
test_tasks4[2].status = "REVOKED"
(status, num_complete) = TaskUtil.get_task_status(test_tasks4)
self.assertTrue(status == "REVOKED" and num_complete == 2)
test_tasks5 = test_tasks4
test_tasks5[1].status = "RETRY"
(status, num_complete) = TaskUtil.get_task_status(test_tasks5)
self.assertTrue(status == "RETRY" and num_complete == 1)
test_tasks6 = [VarType() for i in range(10)]
for i, task in enumerate(test_tasks6):
task.status = "PENDING" if i % 2 == 0 else "SUCCESS"
(status, num_complete) = TaskUtil.get_task_status(test_tasks6)
self.assertTrue(status == "PENDING" and num_complete == 5)
test_tasks7 = test_tasks6
test_tasks7[9].status = "STARTED"
(status, num_complete) = TaskUtil.get_task_status(test_tasks7)
self.assertTrue(status == "STARTED" and num_complete == 4)
test_tasks8 = test_tasks7
test_tasks8[9].status = "RECEIVED"
(status, num_complete) = TaskUtil.get_task_status(test_tasks8)
self.assertTrue(status == "RECEIVED" and num_complete == 4)
with self.assertRaises(Exception):
test_tasks9 = [VarType() for i in range(1)]
test_tasks9[0].status = "attemped"
TaskUtil.get_task_status(test_tasks9)
class DataUtilTest(TestCase):
fixtures = ['test_website.json', 'postgres-96_knobs.json']
def test_aggregate(self):
workload2 = Result.objects.filter(workload=2)
num_results = Result.objects.filter(workload=2).count()
knobs = list(JSONUtil.loads(workload2[0].knob_data.data).keys())
metrics = list(JSONUtil.loads(workload2[0].metric_data.data).keys())
num_knobs = len(knobs)
num_metrics = len(metrics)
test_result = DataUtil.aggregate_data(workload2)
self.assertTrue('X_matrix' in list(test_result.keys()))
self.assertTrue('y_matrix' in list(test_result.keys()))
self.assertTrue('rowlabels' in list(test_result.keys()))
self.assertTrue('X_columnlabels' in list(test_result.keys()))
self.assertTrue('y_columnlabels' in list(test_result.keys()))
self.assertEqual(test_result['X_columnlabels'], knobs)
self.assertEqual(test_result['y_columnlabels'], metrics)
self.assertEqual(test_result['X_matrix'].shape[0], num_results)
self.assertEqual(test_result['y_matrix'].shape[0], num_results)
self.assertEqual(test_result['X_matrix'].shape[1], num_knobs)
self.assertEqual(test_result['y_matrix'].shape[1], num_metrics)
def test_combine(self):
test_dedup_row_labels = np.array(["Workload-0", "Workload-1"])
test_dedup_x = np.matrix([[0.22, 5, "string", "11:11", "fsync", True],
[0.21, 6, "string", "11:12", "fsync", True]])
test_dedup_y = np.matrix([[30, 30, 40],
[10, 10, 40]])
test_x, test_y, row_labels = DataUtil.combine_duplicate_rows(
test_dedup_x, test_dedup_y, test_dedup_row_labels)
self.assertEqual(len(test_x), len(test_y))
self.assertEqual(len(test_x), len(row_labels))
self.assertEqual(row_labels[0], tuple([test_dedup_row_labels[0]]))
self.assertEqual(row_labels[1], tuple([test_dedup_row_labels[1]]))
self.assertTrue((test_x[0] == test_dedup_x[0]).all())
self.assertTrue((test_x[1] == test_dedup_x[1]).all())
self.assertTrue((test_y[0] == test_dedup_y[0]).all())
self.assertTrue((test_y[1] == test_dedup_y[1]).all())
test_row_labels = np.array(["Workload-0",
"Workload-1",
"Workload-2",
"Workload-3"])
test_x_matrix = np.matrix([[0.22, 5, "string", "timestamp", "enum", True],
[0.3, 5, "rstring", "timestamp2", "enum", False],
[0.22, 5, "string", "timestamp", "enum", True],
[0.3, 5, "r", "timestamp2", "enum", False]])
test_y_matrix = np.matrix([[20, 30, 40],
[30, 30, 40],
[20, 30, 40],
[32, 30, 40]])
test_x, test_y, row_labels = DataUtil.combine_duplicate_rows(
test_x_matrix, test_y_matrix, test_row_labels)
self.assertTrue(len(test_x) <= len(test_x_matrix))
self.assertTrue(len(test_y) <= len(test_y_matrix))
self.assertEqual(len(test_x), len(test_y))
self.assertEqual(len(test_x), len(row_labels))
row_labels_set = set(row_labels)
self.assertTrue(tuple(["Workload-0", "Workload-2"]) in row_labels_set)
self.assertTrue(("Workload-1",) in row_labels_set)
self.assertTrue(("Workload-3",) in row_labels_set)
rows = set()
for i in test_x:
self.assertTrue(tuple(i) not in rows)
self.assertTrue(i in test_x_matrix)
rows.add(tuple(i))
rowys = set()
for i in test_y:
self.assertTrue(tuple(i) not in rowys)
self.assertTrue(i in test_y_matrix)
rowys.add(tuple(i))
def test_no_featured_categorical(self):
featured_knobs = ['global.backend_flush_after',
'global.bgwriter_delay',
'global.wal_writer_delay',
'global.work_mem']
postgres96 = DBMSCatalog.objects.get(pk=1)
categorical_info = DataUtil.dummy_encoder_helper(featured_knobs,
dbms=postgres96)
self.assertEqual(len(categorical_info['n_values']), 0)
self.assertEqual(len(categorical_info['categorical_features']), 0)
self.assertEqual(categorical_info['cat_columnlabels'], [])
self.assertEqual(categorical_info['noncat_columnlabels'], featured_knobs)
def test_featured_categorical(self):
featured_knobs = ['global.backend_flush_after',
'global.bgwriter_delay',
'global.wal_writer_delay',
'global.work_mem',
'global.wal_sync_method'] # last knob categorical
postgres96 = DBMSCatalog.objects.get(pk=1)
categorical_info = DataUtil.dummy_encoder_helper(featured_knobs,
dbms=postgres96)
self.assertEqual(len(categorical_info['n_values']), 1)
self.assertEqual(categorical_info['n_values'][0], 4)
self.assertEqual(len(categorical_info['categorical_features']), 1)
self.assertEqual(categorical_info['categorical_features'][0], 4)
self.assertEqual(categorical_info['cat_columnlabels'], ['global.wal_sync_method'])
self.assertEqual(categorical_info['noncat_columnlabels'], featured_knobs[:-1])
class ConversionUtilTest(TestCase):
def test_get_raw_size(self):
# Bytes - In Bytes
byte_test_convert = ['1PB', '2TB', '3GB', '4MB', '5kB', '6B']
byte_ans = [1024**5, 2 * 1024**4, 3 * 1024**3, 4 * 1024**2, 5 * 1024**1, 6]
for i, byte_test in enumerate(byte_test_convert):
byte_conversion = ConversionUtil.get_raw_size(
byte_test, system=PostgresParser.POSTGRES_BYTES_SYSTEM)
self.assertEqual(byte_conversion, byte_ans[i])
# Time - In Milliseconds
day_test_convert = ['1000ms', '1s', '10min', '20h', '1d']
day_ans = [1000, 1000, 600000, 72000000, 86400000]
for i, day_test in enumerate(day_test_convert):
day_conversion = ConversionUtil.get_raw_size(
day_test, system=PostgresParser.POSTGRES_TIME_SYSTEM)
self.assertEqual(day_conversion, day_ans[i])
def test_get_human_readable(self):
# Bytes
byte_test_convert = [1024**5, 2 * 1024**4, 3 * 1024**3,
4 * 1024**2, 5 * 1024**1, 6]
byte_ans = ['1PB', '2TB', '3GB', '4MB', '5kB', '6B']
for i, byte_test in enumerate(byte_test_convert):
byte_readable = ConversionUtil.get_human_readable(
byte_test, system=PostgresParser.POSTGRES_BYTES_SYSTEM)
self.assertEqual(byte_readable, byte_ans[i])
# Time
day_test_convert = [500, 1000, 55000, 600000, 72000000, 86400000]
day_ans = ['500ms', '1s', '55s', '10min', '20h', '1d']
for i, day_test in enumerate(day_test_convert):
day_readable = ConversionUtil.get_human_readable(
day_test, system=PostgresParser.POSTGRES_TIME_SYSTEM)
self.assertEqual(day_readable, day_ans[i])
class LabelUtilTest(TestCase):
def test_style_labels(self):
label_style = LabelStyleType()
test_label_map = {"Name": "Postgres",
"Test": "LabelUtils",
"DBMS": "dbms",
"??": "Dbms",
"???": "DBms",
"CapF": "random Word"}
res_title_label_map = LabelUtil.style_labels(test_label_map,
style=label_style.TITLE)
test_keys = ["Name", "Test", "DBMS", "??", "???", "CapF"]
title_ans = ["Postgres", "Labelutils", "DBMS", "DBMS", "DBMS",
"Random Word"]
for i, key in enumerate(test_keys):
self.assertEqual(res_title_label_map[key], title_ans[i])
res_capfirst_label_map = LabelUtil.style_labels(test_label_map,
style=label_style.CAPFIRST)
cap_ans = ["Postgres", "LabelUtils", "DBMS", "DBMS", "DBMS",
"Random Word"]
for i, key in enumerate(test_keys):
if (key == "???"): # DBms -> DBMS or DBms?
continue
self.assertEqual(res_capfirst_label_map[key], cap_ans[i])
res_lower_label_map = LabelUtil.style_labels(test_label_map,
style=label_style.LOWER)
lower_ans = ["postgres", "labelutils", "dbms", "dbms", "dbms",
"random word"]
for i, key in enumerate(test_keys):
self.assertEqual(res_lower_label_map[key], lower_ans[i])
with self.assertRaises(Exception):
LabelUtil.style_labels(test_label_map,
style=label_style.Invalid)

View File

@@ -0,0 +1,274 @@
#
# OtterTune - test_views.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Dec 13, 2017
@author: dvanaken
'''
from django.contrib.auth import get_user
from django.core.urlresolvers import reverse
from django.test import TestCase
from .utils import (TEST_BASIC_SESSION_ID, TEST_PASSWORD, TEST_PROJECT_ID, TEST_USERNAME)
class UserAuthViewTests(TestCase):
fixtures = ['test_user.json', 'test_user_sessions.json']
def setUp(self):
pass
def test_valid_login(self):
data = {
'username': TEST_USERNAME,
'password': TEST_PASSWORD
}
response = self.client.post(reverse('login'), data=data)
self.assertRedirects(response, reverse('home_projects'))
user = get_user(self.client)
self.assertTrue(user.is_authenticated())
def test_invalid_login(self):
data = {
'username': 'invalid_user',
'password': 'invalid_password'
}
response = self.client.post(reverse('login'), data=data)
self.assertEqual(response.status_code, 200)
user = get_user(self.client)
self.assertFalse(user.is_authenticated())
def test_login_view(self):
response = self.client.get(reverse('login'))
self.assertEqual(response.status_code, 200)
def test_new_signup(self):
response = self.client.get(reverse('signup'))
self.assertEqual(response.status_code, 200)
self.assertContains(response, "Create Your Account")
def test_logout_view(self):
self.client.logout()
user = get_user(self.client)
self.assertFalse(user.is_authenticated())
class ProjectViewsTests(TestCase):
fixtures = ['test_website.json']
def setUp(self):
self.client.login(username=TEST_USERNAME, password=TEST_PASSWORD)
def test_new_project_form(self):
response = self.client.get(reverse('new_project'))
self.assertEqual(response.status_code, 200)
def test_create_project_fail_invalidation(self):
form_addr = reverse('new_project')
post_data = {}
response = self.client.post(form_addr, post_data)
self.assertEqual(response.status_code, 200)
self.assertFormError(response, 'form', 'name', "This field is required.")
def test_create_project_ok(self):
form_addr = reverse('new_project')
post_data = {
'name': 'test_create_project',
'description': 'testing create project...'
}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
project_id = response.context['project'].pk
self.assertRedirects(response, reverse('project_sessions',
kwargs={'project_id': project_id}))
def test_edit_project_fail_invalidation(self):
form_addr = reverse('edit_project', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {}
response = self.client.post(form_addr, post_data)
self.assertFormError(response, 'form', 'name', "This field is required.")
def test_edit_project_ok(self):
form_addr = reverse('edit_project', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {'name': 'new_project_name'}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('project_sessions',
kwargs={'project_id': TEST_PROJECT_ID}))
def test_delete_zero_project(self):
form_addr = reverse('delete_project')
post_data = {'projects': []}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('home_projects'))
def test_delete_one_project(self):
form_addr = reverse('delete_project')
post_data = {'projects': [TEST_PROJECT_ID]}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('home_projects'))
def test_delete_multiple_projects(self):
create_form_addr = reverse('new_project')
project_ids = []
for i in range(5):
post_data = {
'name': 'project_{}'.format(i),
'description': ""
}
response = self.client.post(create_form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
project_ids.append(response.context['project'].pk)
delete_form_addr = reverse('delete_project')
post_data = {'projects': project_ids}
response = self.client.post(delete_form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('home_projects'))
class SessionViewsTests(TestCase):
fixtures = ['test_website.json']
def setUp(self):
self.client.login(username=TEST_USERNAME, password=TEST_PASSWORD)
def test_new_session_form(self):
response = self.client.get(reverse('new_session', kwargs={'project_id': TEST_PROJECT_ID}))
self.assertEqual(response.status_code, 200)
def test_create_session_fail_invalidation(self):
form_addr = reverse('new_session', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {}
response = self.client.post(form_addr, post_data)
self.assertEqual(response.status_code, 200)
self.assertFormError(response, 'form', 'name', "This field is required.")
def test_create_basic_session_ok(self):
form_addr = reverse('new_session', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {
'name': 'test_create_basic_session',
'description': 'testing create basic session...',
'tuning_session': 'no_tuning_session',
'cpu': '2',
'memory': '16.0',
'storage': '32',
'dbms': 1
}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
session_id = response.context['session'].pk
self.assertRedirects(response, reverse('session',
kwargs={'project_id': TEST_PROJECT_ID,
'session_id': session_id}))
def test_create_tuning_session_ok(self):
form_addr = reverse('new_session', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {
'name': 'test_create_basic_session',
'description': 'testing create basic session...',
'tuning_session': 'tuning_session',
'cpu': '2',
'memory': '16.0',
'storage': '32',
'dbms': 1,
'target_objective': 'throughput_txn_per_sec'
}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
session_id = response.context['session'].pk
self.assertRedirects(response, reverse('session',
kwargs={'project_id': TEST_PROJECT_ID,
'session_id': session_id}))
def test_edit_session_fail_invalidation(self):
form_addr = reverse('edit_session', kwargs={'project_id': TEST_PROJECT_ID,
'session_id': TEST_BASIC_SESSION_ID})
post_data = {}
response = self.client.post(form_addr, post_data)
self.assertFormError(response, 'form', 'name', "This field is required.")
def test_edit_basic_session_ok(self):
form_addr = reverse('edit_session', kwargs={'project_id': TEST_PROJECT_ID,
'session_id': TEST_BASIC_SESSION_ID})
post_data = {
'name': 'new_session_name',
'description': 'testing edit basic session...',
'tuning_session': 'tuning_session',
'cpu': '2',
'memory': '16.0',
'storage': '32',
'dbms': 1,
'target_objective': 'throughput_txn_per_sec'
}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('session',
kwargs={'project_id': TEST_PROJECT_ID,
'session_id': TEST_BASIC_SESSION_ID}))
def test_edit_all_knobs_ok(self):
response = self.client.get(reverse('edit_knobs',
kwargs={'project_id': TEST_PROJECT_ID,
'session_id': TEST_BASIC_SESSION_ID}))
self.assertEqual(response.status_code, 200)
def test_edit_knob_ok(self):
form_addr = reverse('edit_knobs', kwargs={'project_id': TEST_PROJECT_ID,
'session_id': TEST_BASIC_SESSION_ID})
post_data = {
'name': 'global.wal_writer_delay',
'minval': '1',
'maxval': '1000',
'tunable': 'on'
}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 204)
def test_delete_zero_sessions(self):
form_addr = reverse('delete_session', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {'sessions': []}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('project_sessions',
kwargs={'project_id': TEST_PROJECT_ID}))
def test_delete_one_session(self):
form_addr = reverse('delete_session', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {'sessions': [TEST_BASIC_SESSION_ID]}
response = self.client.post(form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('project_sessions',
kwargs={'project_id': TEST_PROJECT_ID}))
def test_delete_multiple_sessions(self):
create_form_addr = reverse('new_session', kwargs={'project_id': TEST_PROJECT_ID})
session_ids = []
for i in range(5):
post_data = {
'name': 'session_{}'.format(i),
'description': "",
'tuning_session': 'no_tuning_session',
'cpu': '2',
'memory': '16.0',
'storage': '32',
'dbms': 1,
'target_objective': 'throughput_txn_per_sec'
}
response = self.client.post(create_form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
session_ids.append(response.context['session'].pk)
delete_form_addr = reverse('delete_session', kwargs={'project_id': TEST_PROJECT_ID})
post_data = {'sessions': session_ids}
response = self.client.post(delete_form_addr, post_data, follow=True)
self.assertEqual(response.status_code, 200)
self.assertRedirects(response, reverse('project_sessions',
kwargs={'project_id': TEST_PROJECT_ID}))

View File

@@ -0,0 +1,20 @@
#
# OtterTune - utils.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
TEST_USERNAME = 'user'
TEST_PASSWORD = 'abcd123'
TEST_PROJECT_ID = 1
TEST_BASIC_SESSION_ID = 1
TEST_TUNING_SESSION_ID = 2
TEST_WORKLOAD_ID = 1
TEST_BASIC_SESSION_UPLOAD_CODE = '1234567890'
TEST_TUNING_SESSION_UPLOAD_CODE = '0987654321'

View File

@@ -0,0 +1,5 @@
#
# OtterTune - __init__.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#

View File

@@ -0,0 +1,144 @@
#
# OtterTune - admin.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
from django.contrib import admin
from djcelery.models import TaskMeta
from .models import (BackupData, DBMSCatalog, KnobCatalog,
KnobData, MetricCatalog, MetricData,
PipelineData, PipelineRun, Project,
Result, Session, Workload, Hardware,
SessionKnob)
class BaseAdmin(admin.ModelAdmin):
@staticmethod
def dbms_info(obj):
try:
return obj.dbms.full_name
except AttributeError:
return obj.full_name
class DBMSCatalogAdmin(BaseAdmin):
list_display = ['dbms_info']
class KnobCatalogAdmin(BaseAdmin):
list_display = ['name', 'dbms_info', 'tunable']
ordering = ['name', 'dbms__type', 'dbms__version']
list_filter = ['tunable']
class MetricCatalogAdmin(BaseAdmin):
list_display = ['name', 'dbms_info', 'metric_type']
ordering = ['name', 'dbms__type', 'dbms__version']
list_filter = ['metric_type']
class ProjectAdmin(admin.ModelAdmin):
list_display = ('name', 'user', 'last_update', 'creation_time')
fields = ['name', 'user', 'last_update', 'creation_time']
class SessionAdmin(admin.ModelAdmin):
list_display = ('name', 'user', 'last_update', 'creation_time')
list_display_links = ('name',)
class SessionKnobAdmin(admin.ModelAdmin):
list_display = ('knob', 'session', 'minval', 'maxval', 'tunable')
class HardwareAdmin(admin.ModelAdmin):
list_display = ('cpu', 'memory', 'storage')
class KnobDataAdmin(BaseAdmin):
list_display = ['name', 'dbms_info', 'creation_time']
fields = ['session', 'name', 'creation_time',
'knobs', 'data', 'dbms']
class MetricDataAdmin(BaseAdmin):
list_display = ['name', 'dbms_info', 'creation_time']
fields = ['session', 'name', 'creation_time',
'metrics', 'data', 'dbms']
class TaskMetaAdmin(admin.ModelAdmin):
list_display = ['id', 'status', 'date_done']
class ResultAdmin(BaseAdmin):
list_display = ['result_id', 'dbms_info', 'workload', 'creation_time',
'observation_time']
list_filter = ['dbms__type', 'dbms__version']
ordering = ['id']
@staticmethod
def result_id(obj):
return obj.id
@staticmethod
def workload(obj):
return obj.workload.name
class BackupDataAdmin(admin.ModelAdmin):
list_display = ['id', 'result_id']
@staticmethod
def result_id(obj):
return obj.id
class PipelineDataAdmin(admin.ModelAdmin):
list_display = ['id', 'version', 'task_type', 'workload',
'creation_time']
ordering = ['-creation_time']
@staticmethod
def version(obj):
return obj.pipeline_run.id
class PipelineRunAdmin(admin.ModelAdmin):
list_display = ['id', 'start_time', 'end_time']
class PipelineResultAdmin(BaseAdmin):
list_display = ['task_type', 'dbms_info',
'hardware_info', 'creation_timestamp']
@staticmethod
def hardware_info(obj):
return obj.hardware.name
class WorkloadAdmin(admin.ModelAdmin):
list_display = ['workload_id', 'name']
@staticmethod
def workload_id(obj):
return obj.pk
admin.site.register(DBMSCatalog, DBMSCatalogAdmin)
admin.site.register(KnobCatalog, KnobCatalogAdmin)
admin.site.register(MetricCatalog, MetricCatalogAdmin)
admin.site.register(Session, SessionAdmin)
admin.site.register(Project, ProjectAdmin)
admin.site.register(KnobData, KnobDataAdmin)
admin.site.register(MetricData, MetricDataAdmin)
admin.site.register(TaskMeta, TaskMetaAdmin)
admin.site.register(Result, ResultAdmin)
admin.site.register(BackupData, BackupDataAdmin)
admin.site.register(PipelineData, PipelineDataAdmin)
admin.site.register(PipelineRun, PipelineRunAdmin)
admin.site.register(Workload, WorkloadAdmin)
admin.site.register(SessionKnob, SessionKnobAdmin)
admin.site.register(Hardware, HardwareAdmin)

View File

@@ -0,0 +1,59 @@
[
{
"model":"website.DBMSCatalog",
"pk":2,
"fields":{
"type":2,
"version":"9.2"
}
},
{
"model":"website.DBMSCatalog",
"pk":3,
"fields":{
"type":2,
"version":"9.3"
}
},
{
"model":"website.DBMSCatalog",
"pk":4,
"fields":{
"type":2,
"version":"9.4"
}
},
{
"model":"website.DBMSCatalog",
"pk":5,
"fields":{
"type":2,
"version":"9.5"
}
},
{
"model":"website.DBMSCatalog",
"pk":1,
"fields":{
"type":2,
"version":"9.6"
}
},
{
"model":"website.DBMSCatalog",
"pk":9,
"fields":{
"type":9,
"version":"5.6"
}
},
{
"model":"website.DBMSCatalog",
"pk":18,
"fields":{
"type":4,
"version":"19.0.0.0.0"
}
}
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,937 @@
[
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.buffers_backend",
"summary": "Number of buffers written directly by a backend",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.checkpoints_timed",
"summary": "Number of scheduled checkpoints that have been performed",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.buffers_alloc",
"summary": "Number of buffers allocated",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.buffers_clean",
"summary": "Number of buffers written by the background writer",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.buffers_backend_fsync",
"summary": "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.checkpoint_sync_time",
"summary": "\"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.checkpoints_req",
"summary": "Number of requested checkpoints that have been performed",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.checkpoint_write_time",
"summary": "\"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.maxwritten_clean",
"summary": "Number of times the background writer stopped a cleaning scan because it had written too many buffers",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.buffers_checkpoint",
"summary": "Number of buffers written during checkpoints",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_bgwriter.stats_reset",
"summary": "Time at which these statistics were last reset",
"vartype": 6,
"metric_type": 2,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.numbackends",
"summary": "Number of backends currently connected to this database. This is the only column in this view that returns a value reflecting current state; all other columns return the accumulated values since the last reset.",
"vartype": 2,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.datname",
"summary": "Name of this database",
"vartype": 1,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.blks_read",
"summary": "Number of disk blocks read in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.deadlocks",
"summary": "Number of deadlocks detected in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.tup_fetched",
"summary": "Number of rows fetched by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.tup_updated",
"summary": "Number of rows updated by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.stats_reset",
"summary": "Time at which these statistics were last reset",
"vartype": 6,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.tup_inserted",
"summary": "Number of rows inserted by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.datid",
"summary": "OID of a database",
"vartype": 2,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.xact_commit",
"summary": "Number of transactions in this database that have been committed",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.tup_deleted",
"summary": "Number of rows deleted by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.blk_read_time",
"summary": "\"Time spent reading data file blocks by backends in this database, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.xact_rollback",
"summary": "Number of transactions in this database that have been rolled back",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.conflicts",
"summary": "\"Number of queries canceled due to conflicts with recovery in this database. (Conflicts occur only on standby servers; see pg_stat_database_conflicts for details.)\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.blks_hit",
"summary": "\"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.tup_returned",
"summary": "Number of rows returned by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.temp_files",
"summary": "\"Number of temporary files created by queries in this database. All temporary files are counted, regardless of why the temporary file was created (e.g., sorting or hashing), and regardless of the log_temp_files setting.\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.blk_write_time",
"summary": "\"Time spent writing data file blocks by backends in this database, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database.temp_bytes",
"summary": "\"Total amount of data written to temporary files by queries in this database. All temporary files are counted, regardless of why the temporary file was created, and regardless of the log_temp_files setting.\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database_conflicts.datname",
"summary": "Name of this database",
"vartype": 1,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database_conflicts.confl_deadlock",
"summary": "Number of queries in this database that have been canceled due to deadlocks",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database_conflicts.confl_bufferpin",
"summary": "Number of queries in this database that have been canceled due to pinned buffers",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database_conflicts.datid",
"summary": "OID of a database",
"vartype": 2,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database_conflicts.confl_tablespace",
"summary": "Number of queries in this database that have been canceled due to dropped tablespaces",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database_conflicts.confl_lock",
"summary": "Number of queries in this database that have been canceled due to lock timeouts",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_database_conflicts.confl_snapshot",
"summary": "Number of queries in this database that have been canceled due to old snapshots",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.indexrelid",
"summary": "OID of this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.relid",
"summary": "OID of the table for this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.indexrelname",
"summary": "Name of this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.relname",
"summary": "Name of the table for this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.idx_tup_fetch",
"summary": "Number of live table rows fetched by simple index scans using this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.idx_tup_read",
"summary": "Number of index entries returned by scans on this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.idx_scan",
"summary": "Number of index scans initiated on this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_indexes.schemaname",
"summary": "Name of the schema this index is in",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.last_vacuum",
"summary": "Last time at which this table was manually vacuumed (not counting VACUUM FULL)",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.n_tup_ins",
"summary": "Number of rows inserted",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.n_dead_tup",
"summary": "Estimated number of dead rows",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.last_analyze",
"summary": "Last time at which this table was manually analyzed",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.idx_tup_fetch",
"summary": "Number of live rows fetched by index scans",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.n_tup_upd",
"summary": "Number of rows updated",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.schemaname",
"summary": "Name of the schema that this table is in",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.seq_tup_read",
"summary": "Number of live rows fetched by sequential scans",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.vacuum_count",
"summary": "Number of times this table has been manually vacuumed (not counting VACUUM FULL)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.n_mod_since_analyze",
"summary": "Estimated number of rows modified since this table was last analyzed",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.n_tup_del",
"summary": "Number of rows deleted",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.last_autovacuum",
"summary": "Last time at which this table was vacuumed by the autovacuum daemon",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.seq_scan",
"summary": "Number of sequential scans initiated on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.relid",
"summary": "OID of a table",
"vartype": 2,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.n_tup_hot_upd",
"summary": "\"Number of rows HOT updated (i.e., with no separate index update required)\"",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.autoanalyze_count",
"summary": "Number of times this table has been analyzed by the autovacuum daemon",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.n_live_tup",
"summary": "Estimated number of live rows",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.relname",
"summary": "Name of this table",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.last_autoanalyze",
"summary": "Last time at which this table was analyzed by the autovacuum daemon",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.idx_scan",
"summary": "Number of index scans initiated on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.autovacuum_count",
"summary": "Number of times this table has been vacuumed by the autovacuum daemon",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_stat_user_tables.analyze_count",
"summary": "Number of times this table has been manually analyzed",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_indexes.indexrelid",
"summary": "OID of this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_indexes.relid",
"summary": "OID of the table for this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_indexes.indexrelname",
"summary": "Name of this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_indexes.idx_blks_hit",
"summary": "Number of buffer hits in this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_indexes.relname",
"summary": "Name of the table for this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_indexes.idx_blks_read",
"summary": "Number of disk blocks read from this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_indexes.schemaname",
"summary": "Name of the schema this index is in",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.relid",
"summary": "OID of a table",
"vartype": 2,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.heap_blks_hit",
"summary": "Number of buffer hits in this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.tidx_blks_read",
"summary": "Number of disk blocks read from this table's TOAST table index (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.tidx_blks_hit",
"summary": "Number of buffer hits in this table's TOAST table index (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.toast_blks_hit",
"summary": "Number of buffer hits in this table's TOAST table (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.idx_blks_hit",
"summary": "Number of buffer hits in all indexes on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.relname",
"summary": "Name of this table",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.toast_blks_read",
"summary": "Number of disk blocks read from this table's TOAST table (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.idx_blks_read",
"summary": "Number of disk blocks read from all indexes on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.schemaname",
"summary": "Name of the schema that this table is in",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 2,
"name": "pg_statio_user_tables.heap_blks_read",
"summary": "Number of disk blocks read from this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
}
]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,937 @@
[
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.buffers_backend",
"summary": "Number of buffers written directly by a backend",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.checkpoints_timed",
"summary": "Number of scheduled checkpoints that have been performed",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.buffers_alloc",
"summary": "Number of buffers allocated",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.buffers_clean",
"summary": "Number of buffers written by the background writer",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.buffers_backend_fsync",
"summary": "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.checkpoint_sync_time",
"summary": "\"Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.checkpoints_req",
"summary": "Number of requested checkpoints that have been performed",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.checkpoint_write_time",
"summary": "\"Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.maxwritten_clean",
"summary": "Number of times the background writer stopped a cleaning scan because it had written too many buffers",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.buffers_checkpoint",
"summary": "Number of buffers written during checkpoints",
"vartype": 2,
"metric_type": 1,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_bgwriter.stats_reset",
"summary": "Time at which these statistics were last reset",
"vartype": 6,
"metric_type": 2,
"scope": "global"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.numbackends",
"summary": "Number of backends currently connected to this database. This is the only column in this view that returns a value reflecting current state; all other columns return the accumulated values since the last reset.",
"vartype": 2,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.datname",
"summary": "Name of this database",
"vartype": 1,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.blks_read",
"summary": "Number of disk blocks read in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.deadlocks",
"summary": "Number of deadlocks detected in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.tup_fetched",
"summary": "Number of rows fetched by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.tup_updated",
"summary": "Number of rows updated by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.stats_reset",
"summary": "Time at which these statistics were last reset",
"vartype": 6,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.tup_inserted",
"summary": "Number of rows inserted by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.datid",
"summary": "OID of a database",
"vartype": 2,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.xact_commit",
"summary": "Number of transactions in this database that have been committed",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.tup_deleted",
"summary": "Number of rows deleted by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.blk_read_time",
"summary": "\"Time spent reading data file blocks by backends in this database, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.xact_rollback",
"summary": "Number of transactions in this database that have been rolled back",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.conflicts",
"summary": "\"Number of queries canceled due to conflicts with recovery in this database. (Conflicts occur only on standby servers; see pg_stat_database_conflicts for details.)\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.blks_hit",
"summary": "\"Number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system's file system cache)\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.tup_returned",
"summary": "Number of rows returned by queries in this database",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.temp_files",
"summary": "\"Number of temporary files created by queries in this database. All temporary files are counted, regardless of why the temporary file was created (e.g., sorting or hashing), and regardless of the log_temp_files setting.\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.blk_write_time",
"summary": "\"Time spent writing data file blocks by backends in this database, in milliseconds\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database.temp_bytes",
"summary": "\"Total amount of data written to temporary files by queries in this database. All temporary files are counted, regardless of why the temporary file was created, and regardless of the log_temp_files setting.\"",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database_conflicts.datname",
"summary": "Name of this database",
"vartype": 1,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database_conflicts.confl_deadlock",
"summary": "Number of queries in this database that have been canceled due to deadlocks",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database_conflicts.confl_bufferpin",
"summary": "Number of queries in this database that have been canceled due to pinned buffers",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database_conflicts.datid",
"summary": "OID of a database",
"vartype": 2,
"metric_type": 2,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database_conflicts.confl_tablespace",
"summary": "Number of queries in this database that have been canceled due to dropped tablespaces",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database_conflicts.confl_lock",
"summary": "Number of queries in this database that have been canceled due to lock timeouts",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_database_conflicts.confl_snapshot",
"summary": "Number of queries in this database that have been canceled due to old snapshots",
"vartype": 2,
"metric_type": 1,
"scope": "database"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.indexrelid",
"summary": "OID of this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.relid",
"summary": "OID of the table for this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.indexrelname",
"summary": "Name of this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.relname",
"summary": "Name of the table for this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.idx_tup_fetch",
"summary": "Number of live table rows fetched by simple index scans using this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.idx_tup_read",
"summary": "Number of index entries returned by scans on this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.idx_scan",
"summary": "Number of index scans initiated on this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_indexes.schemaname",
"summary": "Name of the schema this index is in",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.last_vacuum",
"summary": "Last time at which this table was manually vacuumed (not counting VACUUM FULL)",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.n_tup_ins",
"summary": "Number of rows inserted",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.n_dead_tup",
"summary": "Estimated number of dead rows",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.last_analyze",
"summary": "Last time at which this table was manually analyzed",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.idx_tup_fetch",
"summary": "Number of live rows fetched by index scans",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.n_tup_upd",
"summary": "Number of rows updated",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.schemaname",
"summary": "Name of the schema that this table is in",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.seq_tup_read",
"summary": "Number of live rows fetched by sequential scans",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.vacuum_count",
"summary": "Number of times this table has been manually vacuumed (not counting VACUUM FULL)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.n_mod_since_analyze",
"summary": "Estimated number of rows modified since this table was last analyzed",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.n_tup_del",
"summary": "Number of rows deleted",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.last_autovacuum",
"summary": "Last time at which this table was vacuumed by the autovacuum daemon",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.seq_scan",
"summary": "Number of sequential scans initiated on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.relid",
"summary": "OID of a table",
"vartype": 2,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.n_tup_hot_upd",
"summary": "\"Number of rows HOT updated (i.e., with no separate index update required)\"",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.autoanalyze_count",
"summary": "Number of times this table has been analyzed by the autovacuum daemon",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.n_live_tup",
"summary": "Estimated number of live rows",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.relname",
"summary": "Name of this table",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.last_autoanalyze",
"summary": "Last time at which this table was analyzed by the autovacuum daemon",
"vartype": 6,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.idx_scan",
"summary": "Number of index scans initiated on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.autovacuum_count",
"summary": "Number of times this table has been vacuumed by the autovacuum daemon",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_stat_user_tables.analyze_count",
"summary": "Number of times this table has been manually analyzed",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_indexes.indexrelid",
"summary": "OID of this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_indexes.relid",
"summary": "OID of the table for this index",
"vartype": 2,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_indexes.indexrelname",
"summary": "Name of this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_indexes.idx_blks_hit",
"summary": "Number of buffer hits in this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_indexes.relname",
"summary": "Name of the table for this index",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_indexes.idx_blks_read",
"summary": "Number of disk blocks read from this index",
"vartype": 2,
"metric_type": 1,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_indexes.schemaname",
"summary": "Name of the schema this index is in",
"vartype": 1,
"metric_type": 2,
"scope": "index"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.relid",
"summary": "OID of a table",
"vartype": 2,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.heap_blks_hit",
"summary": "Number of buffer hits in this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.tidx_blks_read",
"summary": "Number of disk blocks read from this table's TOAST table index (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.tidx_blks_hit",
"summary": "Number of buffer hits in this table's TOAST table index (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.toast_blks_hit",
"summary": "Number of buffer hits in this table's TOAST table (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.idx_blks_hit",
"summary": "Number of buffer hits in all indexes on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.relname",
"summary": "Name of this table",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.toast_blks_read",
"summary": "Number of disk blocks read from this table's TOAST table (if any)",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.idx_blks_read",
"summary": "Number of disk blocks read from all indexes on this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.schemaname",
"summary": "Name of the schema that this table is in",
"vartype": 1,
"metric_type": 2,
"scope": "table"
},
"model": "website.MetricCatalog"
},
{
"fields": {
"dbms": 3,
"name": "pg_statio_user_tables.heap_blks_read",
"summary": "Number of disk blocks read from this table",
"vartype": 2,
"metric_type": 1,
"scope": "table"
},
"model": "website.MetricCatalog"
}
]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
[
{
"fields": {
"hardware": 16,
"dbms": 1,
"task_type": 1,
"creation_timestamp": "2016-12-04 11:00",
"value": "[\n \"throughput_txn_per_sec\", \n \"pg_stat_bgwriter.buffers_alloc\", \n \"pg_stat_bgwriter.buffers_checkpoint\", \n \"pg_stat_bgwriter.checkpoints_req\", \n \"pg_stat_bgwriter.maxwritten_clean\", \n \"pg_stat_database.blks_hit\", \n \"pg_stat_database.tup_deleted\", \n \"pg_stat_database.tup_inserted\", \n \"pg_stat_database.tup_returned\", \n \"pg_stat_database.tup_updated\", \n \"pg_stat_user_tables.autoanalyze_count\"\n]"
},
"model": "website.PipelineResult"
}
]

View File

@@ -0,0 +1,12 @@
[
{
"fields": {
"hardware": 16,
"dbms": 1,
"task_type": 2,
"creation_timestamp": "2016-12-04 11:00",
"value": "[\n \"global.shared_buffers\", \n \"global.effective_cache_size\", \n \"global.bgwriter_lru_maxpages\", \n \"global.bgwriter_delay\", \n \"global.checkpoint_completion_target\", \n \"global.deadlock_timeout\", \n \"global.default_statistics_target\", \n \"global.effective_io_concurrency\", \n \"global.checkpoint_timeout\", \n \"global.commit_delay\", \n \"global.commit_siblings\", \n \"global.wal_buffers\", \n \"global.temp_buffers\", \n \"global.from_collapse_limit\", \n \"global.join_collapse_limit\", \n \"global.bgwriter_lru_multiplier\", \n \"global.random_page_cost\", \n \"global.work_mem\", \n \"global.maintenance_work_mem\", \n \"global.min_wal_size\", \n \"global.max_parallel_workers_per_gather\", \n \"global.seq_page_cost\", \n \"global.max_worker_processes\", \n \"global.wal_sync_method\", \n \"global.checkpoint_flush_after\", \n \"global.wal_writer_delay\", \n \"global.backend_flush_after\", \n \"global.bgwriter_flush_after\", \n \"global.min_parallel_relation_size\", \n \"global.wal_writer_flush_after\", \n \"global.max_wal_size\"\n]"
},
"model": "website.PipelineResult"
}
]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,21 @@
[
{
"model": "auth.user",
"pk": 1,
"fields": {
"password": "pbkdf2_sha256$30000$wefcDHxU5ctV$8NUjP4GZouOdr4LU47/WGZgqGU4V4brcS8Xt1Yx7ut0=",
"last_login": null,
"is_superuser": false,
"username": "user",
"first_name": "test",
"last_name": "user",
"email": "user@email.com",
"is_staff": false,
"is_active": true,
"date_joined": "2017-12-04T20:46:28.365Z",
"groups": [],
"user_permissions": []
}
}
]

View File

@@ -0,0 +1,62 @@
[
{
"fields": {
"user": 1,
"name": "test_project",
"description": "",
"creation_time": "2017-11-30T02:00:49.611Z",
"last_update": "2017-11-30T02:00:49.611Z"
},
"model": "website.Project",
"pk": 1
},
{
"fields": {
"type": 1,
"name": "New Hardware",
"cpu": 4,
"memory": 16.0,
"storage": "32",
"storage_type": "",
"additional_specs": ""
},
"model": "website.Hardware",
"pk": 1
},
{
"fields": {
"user": 1,
"name": "basic_session",
"description": "(no tuning)",
"dbms": 1,
"hardware": 1,
"project": 1,
"upload_code": "1234567890",
"tuning_session": "no_tuning_session",
"target_objective": null,
"nondefault_settings": null,
"creation_time": "2017-11-30T02:00:49.611Z",
"last_update": "2017-11-30T02:00:49.611Z"
},
"model": "website.Session",
"pk": 1
},
{
"fields": {
"user": 1,
"name": "tuning_session",
"description": "",
"dbms": 1,
"hardware": 1,
"project": 1,
"upload_code": "0987654321",
"tuning_session": "tuning_session",
"target_objective": "throughput_txn_per_sec",
"nondefault_settings": null,
"creation_time": "2017-11-30T02:00:49.611Z",
"last_update": "2017-11-30T02:00:49.611Z"
},
"model": "website.Session",
"pk": 2
}
]

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,116 @@
#
# OtterTune - forms.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#
'''
Created on Jul 25, 2017
@author: dvanaken
'''
from django import forms
from django.db.models import Max
from .models import Session, Project, Hardware, SessionKnob
class NewResultForm(forms.Form):
upload_code = forms.CharField(max_length=30)
metrics_before = forms.FileField()
metrics_after = forms.FileField()
knobs = forms.FileField()
summary = forms.FileField()
class ProjectForm(forms.ModelForm):
class Meta: # pylint: disable=old-style-class,no-init
model = Project
fields = ['name', 'description']
widgets = {
'name': forms.TextInput(attrs={'required': True}),
'description': forms.Textarea(attrs={'maxlength': 500,
'rows': 5}),
}
class SessionForm(forms.ModelForm):
gen_upload_code = forms.BooleanField(widget=forms.CheckboxInput,
initial=False,
required=False,
label='Get new upload code')
cpu = forms.IntegerField(label='Number of Processors')
memory = forms.FloatField(label='RAM (GB)')
storage = forms.IntegerField(label='Storage (GB)')
def __init__(self, *args, **kwargs):
super(SessionForm, self).__init__(*args, **kwargs)
self.fields['description'].required = False
self.fields['target_objective'].required = False
self.fields['tuning_session'].required = True
self.fields['cpu'].initial = 2
self.fields['memory'].initial = 16.0
self.fields['storage'].initial = 32
def save(self, commit=True):
model = super(SessionForm, self).save(commit=False)
cpu2 = self.cleaned_data['cpu']
memory2 = self.cleaned_data['memory']
storage2 = self.cleaned_data['storage']
if hasattr(model, 'hardware'):
model.hardware.cpu = cpu2
model.hardware.memory = memory2
model.hardware.storage = storage2
model.hardware.save()
else:
last_type = Hardware.objects.aggregate(Max('type'))['type__max']
if last_type is None:
last_type = 0
model.hardware = Hardware.objects.create(type=last_type + 1,
name='New Hardware',
cpu=cpu2,
memory=memory2,
storage=storage2,
storage_type='Default',
additional_specs='{}')
if commit:
model.save()
return model
class Meta: # pylint: disable=old-style-class,no-init
model = Session
fields = ('name', 'description', 'tuning_session', 'dbms', 'cpu', 'memory', 'storage',
'target_objective')
widgets = {
'name': forms.TextInput(attrs={'required': True}),
'description': forms.Textarea(attrs={'maxlength': 500,
'rows': 5}),
}
labels = {
'dbms': 'DBMS',
}
class SessionKnobForm(forms.ModelForm):
name = forms.CharField(max_length=128)
def __init__(self, *args, **kwargs):
super(SessionKnobForm, self).__init__(*args, **kwargs)
self.fields['session'].required = False
self.fields['knob'].required = False
self.fields['name'].widget.attrs['readonly'] = True
class Meta: # pylint: disable=old-style-class,no-init
model = SessionKnob
fields = ['session', 'knob', 'minval', 'maxval', 'tunable']

View File

@@ -0,0 +1,255 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.1 on 2018-03-26 02:21
from django.conf import settings
import django.core.validators
from django.db import migrations, models
import django.db.models.deletion
import re
class Migration(migrations.Migration):
initial = True
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name='BackupData',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('raw_knobs', models.TextField()),
('raw_initial_metrics', models.TextField()),
('raw_final_metrics', models.TextField()),
('raw_summary', models.TextField()),
('knob_log', models.TextField()),
('metric_log', models.TextField()),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='DBMSCatalog',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('type', models.IntegerField(choices=[(1, b'MySQL'), (2, b'Postgres'), (3, b'Db2'), (4, b'Oracle'), (5, b'SQL Server'), (6, b'SQLite'), (7, b'HStore'), (8, b'Vector'), (9, b'MyRocks')])),
('version', models.CharField(max_length=16)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='Hardware',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('type', models.IntegerField()),
('name', models.CharField(max_length=32)),
('cpu', models.IntegerField()),
('memory', models.FloatField()),
('storage', models.CharField(max_length=64, validators=[django.core.validators.RegexValidator(re.compile('^\\d+(?:\\,\\d+)*\\Z'), code='invalid', message='Enter only digits separated by commas.')])),
('storage_type', models.CharField(max_length=16)),
('additional_specs', models.TextField(null=True)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='KnobCatalog',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
('vartype', models.IntegerField(choices=[(1, b'STRING'), (2, b'INTEGER'), (3, b'REAL'), (4, b'BOOL'), (5, b'ENUM'), (6, b'TIMESTAMP')], verbose_name=b'variable type')),
('unit', models.IntegerField(choices=[(1, b'bytes'), (2, b'milliseconds'), (3, b'other')])),
('category', models.TextField(null=True)),
('summary', models.TextField(null=True, verbose_name=b'description')),
('description', models.TextField(null=True)),
('scope', models.CharField(max_length=16)),
('minval', models.CharField(max_length=32, null=True, verbose_name=b'minimum value')),
('maxval', models.CharField(max_length=32, null=True, verbose_name=b'maximum value')),
('default', models.TextField(verbose_name=b'default value')),
('enumvals', models.TextField(null=True, verbose_name=b'valid values')),
('context', models.CharField(max_length=32)),
('tunable', models.BooleanField(verbose_name=b'tunable')),
('resource', models.IntegerField(choices=[(1, b'Memory'), (2, b'CPU'), (3, b'Storage'), (4, b'Other')], default=4)),
('dbms', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.DBMSCatalog')),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='KnobData',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=50)),
('creation_time', models.DateTimeField()),
('data', models.TextField()),
('knobs', models.TextField()),
('dbms', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.DBMSCatalog')),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='MetricCatalog',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128)),
('vartype', models.IntegerField(choices=[(1, b'STRING'), (2, b'INTEGER'), (3, b'REAL'), (4, b'BOOL'), (5, b'ENUM'), (6, b'TIMESTAMP')])),
('summary', models.TextField(null=True, verbose_name=b'description')),
('scope', models.CharField(max_length=16)),
('metric_type', models.IntegerField(choices=[(1, b'COUNTER'), (2, b'INFO'), (3,b'STATISTICS')])),
('dbms', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.DBMSCatalog')),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='MetricData',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=50)),
('creation_time', models.DateTimeField()),
('data', models.TextField()),
('metrics', models.TextField()),
('dbms', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.DBMSCatalog')),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='PipelineData',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('task_type', models.IntegerField(choices=[(1, b'Pruned Metrics'), (2, b'Ranked Knobs'), (3, b'Knob Data'), (4, b'Metric Data')])),
('data', models.TextField()),
('creation_time', models.DateTimeField()),
],
),
migrations.CreateModel(
name='PipelineRun',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('start_time', models.DateTimeField()),
('end_time', models.DateTimeField(null=True)),
],
options={
'ordering': ['-id'],
},
),
migrations.CreateModel(
name='Project',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=64, verbose_name=b'project name')),
('description', models.TextField(blank=True, null=True)),
('creation_time', models.DateTimeField()),
('last_update', models.DateTimeField()),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='Result',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('creation_time', models.DateTimeField()),
('observation_start_time', models.DateTimeField()),
('observation_end_time', models.DateTimeField()),
('observation_time', models.FloatField()),
('task_ids', models.CharField(max_length=180, null=True)),
('next_configuration', models.TextField(null=True)),
('dbms', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.DBMSCatalog')),
('knob_data', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.KnobData')),
('metric_data', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.MetricData')),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='Session',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=64, verbose_name=b'session name')),
('description', models.TextField(blank=True, null=True)),
('creation_time', models.DateTimeField()),
('last_update', models.DateTimeField()),
('upload_code', models.CharField(max_length=30, unique=True)),
('tuning_session', models.CharField(choices=[('tuning_sesion', 'Tuning Session'), ('no_tuning_session', 'No Tuning'), ('randomly_generate', 'Randomly Generate')], max_length=64)),
('target_objective', models.CharField(choices=[(b'throughput_txn_per_sec', b'Throughput'), (b'99th_lat_ms', b'99 Percentile Latency')], max_length=64, null=True)),
('nondefault_settings', models.TextField(null=True)),
('dbms', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.DBMSCatalog')),
('hardware', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Hardware')),
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Project')),
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
],
options={
'abstract': False,
},
),
migrations.CreateModel(
name='Workload',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=128, verbose_name=b'workload name')),
('dbms', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.DBMSCatalog')),
('hardware', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Hardware')),
],
),
migrations.AddField(
model_name='result',
name='session',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Session', verbose_name=b'session name'),
),
migrations.AddField(
model_name='result',
name='workload',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Workload'),
),
migrations.AddField(
model_name='pipelinedata',
name='pipeline_run',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.PipelineRun'),
),
migrations.AddField(
model_name='pipelinedata',
name='workload',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Workload'),
),
migrations.AddField(
model_name='metricdata',
name='session',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Session'),
),
migrations.AddField(
model_name='knobdata',
name='session',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Session'),
),
migrations.AddField(
model_name='backupdata',
name='result',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='website.Result'),
),
migrations.AlterUniqueTogether(
name='workload',
unique_together=set([('dbms', 'hardware', 'name')]),
),
migrations.AlterUniqueTogether(
name='pipelinedata',
unique_together=set([('pipeline_run', 'task_type', 'workload')]),
),
]

View File

@@ -0,0 +1,59 @@
# -*- coding: utf-8 -*-
import logging
from django.conf import settings
#from django.core.exceptions import ProgrammingError
from django.db import connection, migrations, ProgrammingError
LOG = logging.getLogger(__name__)
TABLES_TO_COMPRESS = [
"website_backupdata",
"website_knobdata",
"website_metricdata",
"website_pipelinedata",
]
class Migration(migrations.Migration):
dependencies = [
('website', '0001_initial'),
]
try:
LOG.info('***** VENDOR: %s', connection.vendor)
if connection.vendor == 'mysql':
version = (0, 0, 0)
with connection.cursor() as cursor:
cursor.execute('SELECT VERSION()')
version = cursor.fetchone()[0]
version = version.split('-')[0]
version = version.split('.')
version = tuple(int(v) for v in version)
LOG.info('***** DB VERSION: %s', version)
if version >= (5, 7, 0):
operations = [
migrations.RunSQL(["ALTER TABLE " + table_name + " COMPRESSION='zlib';",
"OPTIMIZE TABLE " + table_name + ";"],
["ALTER TABLE " + table_name + " COMPRESSION='none';",
"OPTIMIZE TABLE " + table_name + ";"])
for table_name in TABLES_TO_COMPRESS
]
LOG.info('***** DONE ENABLING COMPRESSION')
else:
operations = []
LOG.info('***** COMPRESSION NOT SUPPORTED: %s < (5, 7, 0)', version)
else:
LOG.info('***** DB COMPRESSION NOT SUPPORTED: %s', connection.vendor)
except ProgrammingError as err:
LOG.warning("Error applying migration '0002_enable_compression'... Skipping")
operations = []

View File

@@ -0,0 +1,21 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.1 on 2018-08-02 07:58
from __future__ import unicode_literals
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
('website', '0002_enable_compression'),
]
operations = [
migrations.AddField(
model_name='workload',
name='status',
field=models.IntegerField(choices=[(1, 'MODIFIED'), (2, 'PROCESSING'), (3, 'PROCESSED')], default=1, editable=False),
)
]

View File

@@ -0,0 +1,49 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.10.1 on 2017-12-07 19:51
from django.core.management import call_command
from django.db import migrations
def load_initial_data(apps, schema_editor):
initial_data_fixtures = [
"dbms_catalog.json",
"postgres-96_knobs.json",
"postgres-96_metrics.json",
"postgres-92_knobs.json",
"postgres-92_metrics.json",
"postgres-93_knobs.json",
"postgres-93_metrics.json",
"postgres-94_knobs.json",
"postgres-94_metrics.json",
"myrocks-5.6_knobs.json",
"myrocks-5.6_metrics.json",
"oracle_knobs.json",
"oracle_metrics.json"
]
for fixture in initial_data_fixtures:
call_command("loaddata", fixture, app_label="website")
def unload_initial_data(apps, schema_editor):
model_names = [
"DBMSCatalog",
"KnobCatalog",
"MetricCatalog",
"Hardware"
]
for model_name in model_names:
model = apps.get_model("website", model_name)
model.objects.all().delete()
class Migration(migrations.Migration):
dependencies = [
('website', '0003_background_task_optimization'),
]
operations = [
migrations.RunPython(load_initial_data, unload_initial_data)
]

Some files were not shown because too many files have changed in this diff Show More