507 lines
17 KiB
Python
507 lines
17 KiB
Python
#!/usr/bin/env python
|
|
# encoding: utf-8
|
|
#
|
|
# OtterTune - source_validator.py
|
|
#
|
|
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
|
|
#
|
|
|
|
# ==============================================
|
|
# SOURCE VALIDATOR
|
|
# ==============================================
|
|
#
|
|
# Adapted from the source validator used by Peloton.
|
|
# (see https://github.com/cmu-db/peloton/blob/master/script/validators/source_validator.py)
|
|
|
|
import argparse
|
|
import logging
|
|
import imp
|
|
import os
|
|
import re
|
|
import sys
|
|
import json
|
|
import functools
|
|
from collections import namedtuple
|
|
from fabric.api import lcd, local, settings, quiet
|
|
|
|
EXIT_SUCCESS = 0
|
|
EXIT_FAILURE = -1
|
|
|
|
# ==============================================
|
|
# CONFIGURATION
|
|
# ==============================================
|
|
|
|
# Logging
|
|
LOG = logging.getLogger(__name__)
|
|
LOG.addHandler(logging.StreamHandler())
|
|
LOG.setLevel(logging.INFO)
|
|
|
|
# NOTE: the absolute path to ottertune directory is calculated from current
|
|
# directory structure: ottertune/server/website/scripts/validators/<this_file>
|
|
# OTTERTUNE_DIR needs to be redefined if the directory structure is changed.
|
|
CODE_SOURCE_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
OTTERTUNE_DIR = os.path.abspath(functools.reduce(os.path.join,
|
|
[CODE_SOURCE_DIR,
|
|
os.path.pardir,
|
|
os.path.pardir]))
|
|
|
|
# Other directory paths used are relative to OTTERTUNE_DIR
|
|
DEFAULT_DIRS = [
|
|
OTTERTUNE_DIR
|
|
]
|
|
|
|
# Directories that should NOT be checked
|
|
EXCLUDE_DIRECTORIES = [
|
|
# Django-generated directories
|
|
os.path.join(OTTERTUNE_DIR, "server/website/website/migrations"),
|
|
|
|
# Source code files from json.org
|
|
os.path.join(OTTERTUNE_DIR, "client/controller/src/main/java/com/controller/util/json"),
|
|
|
|
# Django settings
|
|
os.path.join(OTTERTUNE_DIR, 'server/website/website/settings'),
|
|
|
|
# Docker files
|
|
os.path.join(OTTERTUNE_DIR, 'docker'),
|
|
|
|
# Django manage.py extensions
|
|
os.path.join(OTTERTUNE_DIR, "server/website/website/management"),
|
|
|
|
# Stand-alone scripts
|
|
os.path.join(OTTERTUNE_DIR, "server/website/script"),
|
|
]
|
|
|
|
# Files that should NOT be checked
|
|
EXCLUDE_FILES = [
|
|
# Django-generated files
|
|
os.path.join(OTTERTUNE_DIR, 'server/website/manage.py'),
|
|
# file causing import error
|
|
os.path.join(OTTERTUNE_DIR, 'server/analysis/simulation.py'),
|
|
]
|
|
|
|
# Regex patterns
|
|
PYCODESTYLE_COMMENT_PATTERN = re.compile(r'#\s*pycodestyle:\s*disable\s*=\s*[\w\,\s]+$')
|
|
|
|
PYTHON_ILLEGAL_PATTERNS = [
|
|
(re.compile(r'^print[ (]'), "Do not use 'print'. Use the logging module instead.")
|
|
]
|
|
|
|
JAVA_ILLEGAL_PATTERNS = [
|
|
(re.compile(r'^System.out.println'), "Do not use println. Use the logging module instead.")
|
|
]
|
|
|
|
PYTHON_HEADER_PATTERN = re.compile(r'#\n#.*\n#\n# Copyright.*\n#\n')
|
|
JAVA_HEADER_PATTERN = re.compile(r'/\*\n \*.*\n \*\n \* Copyright.*\n \*/\n\n')
|
|
|
|
# Stdout format strings
|
|
SEPARATOR = 80 * '-'
|
|
OUTPUT_FMT = (
|
|
'' + SEPARATOR + '\n\n'
|
|
'\033[1m' # start bold text
|
|
'%s\n'
|
|
'FAILED: %s\n\n'
|
|
'\033[0m' # end bold text
|
|
'%s'
|
|
)
|
|
VALIDATOR_FMT = '{name}\n{u}\n{out}'.format
|
|
MSG_PREFIX_FMT = ' {filename}:{line:3d}: '.format
|
|
MSG_SUFFIX_FMT = ' ({symbol})'.format
|
|
|
|
|
|
# ==============================================
|
|
# UTILITY FUNCTION DEFINITIONS
|
|
# ==============================================
|
|
|
|
def format_message(filename, line, message, symbol=None):
|
|
out_prefix = MSG_PREFIX_FMT(filename=filename, line=line)
|
|
out_suffix = '' if symbol is None else MSG_SUFFIX_FMT(symbol=symbol)
|
|
|
|
# Crop the message details to make the output more readable
|
|
max_msg_len = 80 - len(out_prefix) - len(out_suffix)
|
|
if len(message) > max_msg_len:
|
|
message = message[:max_msg_len - 3] + '...'
|
|
output = (out_prefix + message + out_suffix).replace('\n', '')
|
|
return output + '\n'
|
|
|
|
|
|
def validate_validator(modules, config_path):
|
|
status = True
|
|
|
|
# Check if required modules are installed
|
|
for module in modules:
|
|
if module is not None:
|
|
try:
|
|
imp.find_module(module)
|
|
except ImportError:
|
|
LOG.error("Cannot find module %s", module)
|
|
status = False
|
|
|
|
# Check that the config file exists if assigned
|
|
if config_path is not None and not os.path.isfile(config_path):
|
|
LOG.error("Cannot find config file %s", config_path)
|
|
status = False
|
|
return status
|
|
|
|
|
|
# Validate the file passed as argument
|
|
def validate_file(file_path):
|
|
if file_path in EXCLUDE_FILES:
|
|
return True
|
|
if not file_path.endswith(".py") and not file_path.endswith(".java"):
|
|
return True
|
|
for exclude_dir in EXCLUDE_DIRECTORIES:
|
|
if file_path.startswith(exclude_dir):
|
|
return True
|
|
|
|
LOG.debug("Validating file: %s", file_path)
|
|
status = True
|
|
output = []
|
|
failed_validators = []
|
|
for validator in VALIDATORS:
|
|
val_status, val_output = validator.validate_fn(
|
|
file_path, validator.config_path)
|
|
if not val_status:
|
|
status = False
|
|
output.append(VALIDATOR_FMT(name=validator.name,
|
|
u='-' * len(validator.name),
|
|
out=val_output))
|
|
failed_validators.append(validator.name)
|
|
if not status:
|
|
LOG.info(OUTPUT_FMT, file_path, ', '.join(failed_validators), '\n'.join(output))
|
|
return status
|
|
|
|
|
|
# Validate all the files in the root_dir passed as argument
|
|
def validate_dir(root_dir):
|
|
for exclude_dir in EXCLUDE_DIRECTORIES:
|
|
if root_dir.startswith(exclude_dir):
|
|
return True
|
|
|
|
status = True
|
|
for root, dirs, files in os.walk(root_dir): # pylint: disable=not-an-iterable
|
|
# Remove excluded dirs from list
|
|
valid_dirs = []
|
|
for d in dirs:
|
|
valid = True
|
|
for exclude_dir in EXCLUDE_DIRECTORIES:
|
|
if d.startswith(exclude_dir):
|
|
valid = False
|
|
break
|
|
if valid:
|
|
valid_dirs.append(d)
|
|
dirs[:] = valid_dirs
|
|
|
|
# Validate files
|
|
for file_path in files:
|
|
file_path = os.path.join(root, file_path)
|
|
|
|
if not validate_file(file_path):
|
|
status = False
|
|
return status
|
|
|
|
|
|
def get_git_files(state):
|
|
if state == 'staged':
|
|
# Files staged for commit
|
|
cmd = r"git diff --name-only --cached --diff-filter=d | grep -E '.*\.(py|java)$'"
|
|
|
|
elif state == 'unstaged':
|
|
# Tracked files not staged for commit
|
|
cmd = r"git diff --name-only --diff-filter=d | grep -E '.*\.(py|java)$'"
|
|
|
|
elif state == 'untracked':
|
|
# Untracked files not staged for commit
|
|
cmd = r"git ls-files --other --exclude-standard | grep -E '.*\.(py|java)$'"
|
|
|
|
with settings(warn_only=True):
|
|
res = local(cmd, capture=True)
|
|
|
|
if res.succeeded:
|
|
targets = res.stdout.strip().split('\n')
|
|
|
|
if not targets:
|
|
LOG.warning("No %s files found.", state)
|
|
else:
|
|
LOG.error("An error occurred while fetching %s files (exit code %d). "
|
|
"Exiting...\n\n%s\n", state, res.return_code, res.stderr)
|
|
sys.exit(EXIT_FAILURE)
|
|
|
|
return targets
|
|
|
|
|
|
# ==============================================
|
|
# VALIDATOR FUNCTION DEFINITIONS
|
|
# ==============================================
|
|
|
|
def check_pylint(file_path, config_path=None):
|
|
if not file_path.endswith(".py"):
|
|
return True, None
|
|
|
|
options = [
|
|
'--output-format=json',
|
|
'--reports=yes',
|
|
]
|
|
if config_path is not None:
|
|
options.append('--rcfile=' + config_path)
|
|
|
|
with settings(warn_only=True), quiet():
|
|
res = local('pylint {} {}'.format(' '.join(options), file_path), capture=True)
|
|
|
|
if res.stdout == '':
|
|
if res.return_code != 0:
|
|
raise Exception(
|
|
'An error occurred while running pylint on {} (exit code {}).\n\n{}\n'.format(
|
|
file_path, res.return_code, res.stderr))
|
|
return True, None
|
|
|
|
output = []
|
|
errors = json.loads(res.stdout)
|
|
for entry in errors:
|
|
# Remove extra whitespace and hints
|
|
msg = entry['message'].replace('^', '').replace('|', '')
|
|
msg = re.sub(' +', ' ', msg)
|
|
msg = msg.strip()
|
|
output.append(format_message(os.path.basename(file_path), entry['line'],
|
|
msg, entry['symbol']))
|
|
output = ''.join(output)
|
|
return res.return_code == 0, output
|
|
|
|
|
|
def check_pycodestyle(file_path, config_path=None):
|
|
import pycodestyle
|
|
|
|
if not file_path.endswith(".py"):
|
|
return True, None
|
|
|
|
# A custom reporter class for pycodestyle that checks for disabled errors
|
|
# and formats the style report output.
|
|
class CustomReporter(pycodestyle.StandardReport):
|
|
def get_file_results(self):
|
|
# Iterates through the lines of code that generated lint errors and
|
|
# checks if the given error has been disabled for that line via an
|
|
# inline comment (e.g., # pycodestyle: disable=E201,E226). Those
|
|
# that have been disabled are not treated as errors.
|
|
self._deferred_print.sort()
|
|
results = []
|
|
prev_line_num = -1
|
|
prev_line_errs = []
|
|
for line_number, _, code, text, _ in self._deferred_print:
|
|
if prev_line_num == line_number:
|
|
err_codes = prev_line_errs
|
|
else:
|
|
line = self.lines[line_number - 1]
|
|
m = PYCODESTYLE_COMMENT_PATTERN.search(line)
|
|
if m and m.group(0):
|
|
err_codes = [ec.strip() for ec in m.group(0).split('=')[1].split(',')]
|
|
else:
|
|
err_codes = []
|
|
prev_line_num = line_number
|
|
prev_line_errs = err_codes
|
|
if code in err_codes:
|
|
# Error is disabled in source
|
|
continue
|
|
|
|
results.append(format_message(os.path.basename(file_path),
|
|
self.line_offset + line_number,
|
|
text, code))
|
|
return results, len(results) == 0
|
|
# END CustomReporter class
|
|
|
|
options = {} if config_path is None else {'config_file': config_path}
|
|
style = pycodestyle.StyleGuide(quiet=True, **options)
|
|
|
|
# Set the reporter option to our custom one
|
|
style.options.reporter = CustomReporter
|
|
style.init_report()
|
|
report = style.check_files([file_path])
|
|
results, status = report.get_file_results()
|
|
output = None if status else ''.join(results)
|
|
return status, output
|
|
|
|
|
|
def check_java_checkstyle(file_path, config_path=None):
|
|
if not file_path.endswith(".java"):
|
|
return True, None
|
|
|
|
options = '' if config_path is None else '-c ' + config_path
|
|
with quiet():
|
|
res = local("checkstyle {} {}".format(options, file_path), capture=True)
|
|
lines = res.stdout.split('\n')
|
|
assert len(lines) >= 2 and lines[0] == "Starting audit..." and lines[-1] == "Audit done."
|
|
if len(lines) == 2:
|
|
return True, None
|
|
output = []
|
|
for line in lines[1:-1]:
|
|
parts = line.strip().split(':')
|
|
line_number = int(parts[1])
|
|
text, code = parts[-1].rsplit('[', 1)
|
|
text = text.strip()
|
|
code = code[:-1]
|
|
output.append(format_message(os.path.basename(file_path), line_number, text, code))
|
|
output = ''.join(output)
|
|
return False, output
|
|
|
|
|
|
def check_illegal_patterns(file_path, config_path=None): # pylint: disable=unused-argument
|
|
if file_path.endswith(".py"):
|
|
illegal_patterns = PYTHON_ILLEGAL_PATTERNS
|
|
comment = "#"
|
|
elif file_path.endswith(".java"):
|
|
illegal_patterns = JAVA_ILLEGAL_PATTERNS
|
|
comment = "//"
|
|
else:
|
|
return True, None
|
|
|
|
line_num = 1
|
|
output = []
|
|
status = True
|
|
with open(file_path, 'r') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
for pattern_info in illegal_patterns:
|
|
if not line.startswith(comment) and pattern_info[0].search(line):
|
|
output.append(format_message(filename=os.path.basename(file_path),
|
|
line=line_num,
|
|
message=pattern_info[1]))
|
|
status = False
|
|
line_num += 1
|
|
output = None if status else ''.join(output)
|
|
return status, output
|
|
|
|
|
|
def check_header(file_path, config_file=None): # pylint: disable=unused-argument
|
|
if file_path.endswith(".py"):
|
|
header_pattern = PYTHON_HEADER_PATTERN
|
|
elif file_path.endswith(".java"):
|
|
header_pattern = JAVA_HEADER_PATTERN
|
|
else:
|
|
return True, None
|
|
|
|
status = True
|
|
output = None
|
|
with open(file_path, 'r') as f:
|
|
file_contents = f.read()
|
|
|
|
header_match = header_pattern.search(file_contents)
|
|
filename = os.path.basename(file_path)
|
|
if header_match:
|
|
if filename not in header_match.group(0):
|
|
status = False
|
|
output = format_message(filename=filename, line=2,
|
|
message="Incorrect filename in header")
|
|
|
|
else:
|
|
status = False
|
|
output = format_message(filename=filename, line=1,
|
|
message='Missing header')
|
|
return status, output
|
|
|
|
|
|
# ==============================================
|
|
# VALIDATORS
|
|
# ==============================================
|
|
|
|
# Struct for storing validator metadata
|
|
Validator = namedtuple('Validator', 'name validate_fn modules config_path')
|
|
|
|
VALIDATORS = [
|
|
# Runs pylint on python source
|
|
Validator('check_pylint', check_pylint, ['pylint'],
|
|
os.path.join(OTTERTUNE_DIR, "script/formatting/config/pylintrc")),
|
|
|
|
# Runs pycodestyle on python source
|
|
Validator('check_pycodestyle', check_pycodestyle, ['pycodestyle'],
|
|
os.path.join(OTTERTUNE_DIR, "script/formatting/config/pycodestyle")),
|
|
|
|
# Runs checkstyle on the java source
|
|
Validator("check_java_checkstyle", check_java_checkstyle, [],
|
|
os.path.join(OTTERTUNE_DIR, "script/formatting/config/google_checks.xml")),
|
|
|
|
# Checks that the python/java source files do not use illegal patterns
|
|
Validator('check_illegal_patterns', check_illegal_patterns, [], None),
|
|
|
|
# Checks that the python/java source files have headers
|
|
Validator('check_header', check_header, [], None)
|
|
]
|
|
|
|
|
|
# ==============================================
|
|
# MAIN FUNCTION
|
|
# ==============================================
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Validate OtterTune's source code")
|
|
parser.add_argument('paths', metavar='PATH', type=str, nargs='*',
|
|
help='Files or directories to (recursively) validate')
|
|
parser.add_argument('-v', '--verbose', action='store_true',
|
|
help='Enable verbose output')
|
|
parser.add_argument('--staged-files', action='store_true',
|
|
help='Apply the selected action(s) to all staged files (git)')
|
|
parser.add_argument('--unstaged-files', action='store_true',
|
|
help='Apply the selected action(s) to all unstaged tracked files (git)')
|
|
parser.add_argument('--untracked-files', action='store_true',
|
|
help='Apply the selected action(s) to all untracked files (git)')
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose:
|
|
LOG.setLevel(logging.DEBUG)
|
|
|
|
LOG.info('\nRunning source validators:\n%s\n',
|
|
'\n'.join(' ' + v.name for v in VALIDATORS))
|
|
for validator in VALIDATORS:
|
|
if not validate_validator(validator.modules, validator.config_path):
|
|
sys.exit(EXIT_FAILURE)
|
|
|
|
targets = []
|
|
|
|
if args.paths or args.staged_files or args.unstaged_files or args.untracked_files:
|
|
if args.paths:
|
|
targets += args.paths
|
|
|
|
if args.staged_files:
|
|
targets += get_git_files('staged')
|
|
|
|
if args.unstaged_files:
|
|
targets += get_git_files('unstaged')
|
|
|
|
if args.untracked_files:
|
|
targets += get_git_files('untracked')
|
|
|
|
if not targets:
|
|
LOG.error("No files/directories found. Exiting...")
|
|
sys.exit(EXIT_FAILURE)
|
|
|
|
else:
|
|
targets = DEFAULT_DIRS
|
|
|
|
targets = sorted(os.path.abspath(t) for t in targets)
|
|
LOG.info('\nFiles/directories to validate:\n%s\n',
|
|
'\n'.join(' ' + t for t in targets))
|
|
|
|
status = True
|
|
for target in targets:
|
|
if os.path.isfile(target):
|
|
LOG.debug("Scanning file: %s\n", target)
|
|
target_status = validate_file(target)
|
|
elif os.path.isdir(target):
|
|
LOG.debug("Scanning directory: %s\n", target)
|
|
target_status = validate_dir(target)
|
|
else:
|
|
LOG.error("%s isn't a file or directory", target)
|
|
sys.exit(EXIT_FAILURE)
|
|
|
|
if not target_status:
|
|
status = False
|
|
|
|
if not status:
|
|
LOG.info(SEPARATOR + '\n')
|
|
LOG.info("Validation NOT successful\n")
|
|
sys.exit(EXIT_FAILURE)
|
|
|
|
LOG.info("Validation successful\n")
|
|
sys.exit(EXIT_SUCCESS)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|