#!/usr/bin/env python
# encoding: utf-8
#
# OtterTune - source_validator.py
#
# Copyright (c) 2017-18, Carnegie Mellon University Database Group
#

# ==============================================
# SOURCE VALIDATOR
# ==============================================
#
# Adapted from the source validator used by Peloton.
# (see https://github.com/cmu-db/peloton/blob/master/script/validators/source_validator.py)

import argparse
import logging
import imp
import os
import re
import subprocess
import sys
import json
import functools
from collections import namedtuple
from fabric.api import lcd, local, settings, quiet

EXIT_SUCCESS = 0
EXIT_FAILURE = -1

# ==============================================
# CONFIGURATION
# ==============================================

# Logging
LOG = logging.getLogger(__name__)
LOG.addHandler(logging.StreamHandler())
LOG.setLevel(logging.INFO)

# NOTE: the absolute path to ottertune directory is calculated from current
# directory structure: ottertune/server/website/scripts/validators/<this_file>
# OTTERTUNE_DIR needs to be redefined if the directory structure is changed.
CODE_SOURCE_DIR = os.path.abspath(os.path.dirname(__file__))
OTTERTUNE_DIR = os.path.abspath(functools.reduce(os.path.join,
                                                 [CODE_SOURCE_DIR,
                                                  os.path.pardir,
                                                  os.path.pardir]))

# Other directory paths used are relative to OTTERTUNE_DIR
DEFAULT_DIRS = [
    OTTERTUNE_DIR
]

# Directories that should NOT be checked
EXCLUDE_DIRECTORIES = [
    # Django-generated directories
    os.path.join(OTTERTUNE_DIR, "server/website/website/migrations"),

    # Source code files from json.org
    os.path.join(OTTERTUNE_DIR, "client/controller/src/main/java/com/controller/util/json"),

    # Django settings
    os.path.join(OTTERTUNE_DIR, 'server/website/website/settings'),

    # Docker files
    os.path.join(OTTERTUNE_DIR, 'docker'),

    # Django manage.py extensions
    os.path.join(OTTERTUNE_DIR, "server/website/website/management"),
]

# Files that should NOT be checked
EXCLUDE_FILES = [
    # Django-generated files
    os.path.join(OTTERTUNE_DIR, 'server/website/manage.py'),

    # Docker files
    os.path.join(OTTERTUNE_DIR, 'server/website/createadmin.py'),
]

CHECKSTYLE_JAR_PATH = os.path.join(OTTERTUNE_DIR,
                                   "client/controller/build/libs/checkstyle-8.8-all.jar")

# Regex patterns
PYCODESTYLE_COMMENT_PATTERN = re.compile(r'#\s*pycodestyle:\s*disable\s*=\s*[\w\,\s]+$')

PYTHON_ILLEGAL_PATTERNS = [
    (re.compile(r'^print[ (]'), "Do not use 'print'. Use the logging module instead.")
]

JAVA_ILLEGAL_PATTERNS = [
    (re.compile(r'^System.out.println'), "Do not use println. Use the logging module instead.")
]

PYTHON_HEADER_PATTERN = re.compile(r'#\n#.*\n#\n# Copyright.*\n#\n')
JAVA_HEADER_PATTERN = re.compile(r'/\*\n \*.*\n \*\n \* Copyright.*\n \*/\n\n')

# Stdout format strings
SEPARATOR = 80 * '-'
OUTPUT_FMT = (
    '' + SEPARATOR + '\n\n'
    '\033[1m'        # start bold text
    '%s\n'
    'FAILED: %s\n\n'
    '\033[0m'        # end bold text
    '%s'
)
VALIDATOR_FMT = '{name}\n{u}\n{out}'.format
MSG_PREFIX_FMT = ' {filename}:{line:3d}: '.format
MSG_SUFFIX_FMT = ' ({symbol})'.format


# ==============================================
# UTILITY FUNCTION DEFINITIONS
# ==============================================

def format_message(filename, line, message, symbol=None):
    out_prefix = MSG_PREFIX_FMT(filename=filename, line=line)
    out_suffix = '' if symbol is None else MSG_SUFFIX_FMT(symbol=symbol)

    # Crop the message details to make the output more readable
    max_msg_len = 80 - len(out_prefix) - len(out_suffix)
    if len(message) > max_msg_len:
        message = message[:max_msg_len - 3] + '...'
    output = (out_prefix + message + out_suffix).replace('\n', '')
    return output + '\n'


def validate_validator(modules, config_path):
    status = True

    # Check if required modules are installed
    for module in modules:
        if module is not None:
            try:
                imp.find_module(module)
            except ImportError:
                LOG.error("Cannot find module %s", module)
                status = False

    # Check that the config file exists if assigned
    if config_path is not None and not os.path.isfile(config_path):
        LOG.error("Cannot find config file %s", config_path)
        status = False
    return status


# Validate the file passed as argument
def validate_file(file_path):
    if file_path in EXCLUDE_FILES:
        return True
    if not file_path.endswith(".py") and not file_path.endswith(".java"):
        return True

    LOG.debug("Validating file: %s", file_path)
    status = True
    output = []
    failed_validators = []
    for validator in VALIDATORS:
        val_status, val_output = validator.validate_fn(
            file_path, validator.config_path)
        if not val_status:
            status = False
            output.append(VALIDATOR_FMT(name=validator.name,
                                        u='-' * len(validator.name),
                                        out=val_output))
            failed_validators.append(validator.name)
    if not status:
        LOG.info(OUTPUT_FMT, file_path, ', '.join(failed_validators), '\n'.join(output))
    return status


# Validate all the files in the root_dir passed as argument
def validate_dir(root_dir):
    if root_dir in EXCLUDE_DIRECTORIES:
        return True

    status = True
    for root, dirs, files in os.walk(root_dir):  # pylint: disable=not-an-iterable
        # Remove excluded dirs from list
        dirs[:] = [d for d in dirs if os.path.join(root, d) not in EXCLUDE_DIRECTORIES]
        for file_path in files:
            file_path = os.path.join(root, file_path)

            if not validate_file(file_path):
                status = False
    return status


# ==============================================
# VALIDATOR FUNCTION DEFINITIONS
# ==============================================

def check_pylint(file_path, config_path=None):
    if not file_path.endswith(".py"):
        return True, None

    options = [
        '--output-format=json',
        '--reports=yes',
    ]
    if config_path is not None:
        options.append('--rcfile=' + config_path)

    with settings(warn_only=True), quiet():
        res = local('pylint {} {}'.format(' '.join(options), file_path), capture=True)
    if res.stdout == '':
        assert res.return_code == 0, 'return_code={}, expected=0\n{}'.format(
            res.return_code, res.stderr)
        return True, None

    output = []
    errors = json.loads(res.stdout)
    for entry in errors:
        # Remove extra whitespace and hints
        msg = entry['message'].replace('^', '').replace('|', '')
        msg = re.sub(' +', ' ', msg)
        msg = msg.strip()
        output.append(format_message(os.path.basename(file_path), entry['line'],
                                     msg, entry['symbol']))
    output = ''.join(output)
    return res.return_code == 0, output


def check_pycodestyle(file_path, config_path=None):
    import pycodestyle

    if not file_path.endswith(".py"):
        return True, None

    # A custom reporter class for pycodestyle that checks for disabled errors
    # and formats the style report output.
    class CustomReporter(pycodestyle.StandardReport):
        def get_file_results(self):
            # Iterates through the lines of code that generated lint errors and
            # checks if the given error has been disabled for that line via an
            # inline comment (e.g., # pycodestyle: disable=E201,E226). Those
            # that have been disabled are not treated as errors.
            self._deferred_print.sort()
            results = []
            prev_line_num = -1
            prev_line_errs = []
            for line_number, _, code, text, _ in self._deferred_print:
                if prev_line_num == line_number:
                    err_codes = prev_line_errs
                else:
                    line = self.lines[line_number - 1]
                    m = PYCODESTYLE_COMMENT_PATTERN.search(line)
                    if m and m.group(0):
                        err_codes = [ec.strip() for ec in m.group(0).split('=')[1].split(',')]
                    else:
                        err_codes = []
                prev_line_num = line_number
                prev_line_errs = err_codes
                if code in err_codes:
                    # Error is disabled in source
                    continue

                results.append(format_message(os.path.basename(file_path),
                                              self.line_offset + line_number,
                                              text, code))
            return results, len(results) == 0
    # END CustomReporter class

    options = {} if config_path is None else {'config_file': config_path}
    style = pycodestyle.StyleGuide(quiet=True, **options)

    # Set the reporter option to our custom one
    style.options.reporter = CustomReporter
    style.init_report()
    report = style.check_files([file_path])
    results, status = report.get_file_results()
    output = None if status else ''.join(results)
    return status, output


def check_java_checkstyle(file_path, config_path=None):
    if not file_path.endswith(".java"):
        return True, None

    if not os.path.exists(CHECKSTYLE_JAR_PATH):
        with lcd(os.path.join(OTTERTUNE_DIR, "client/controller")):  # pylint: disable=not-context-manager
            local("gradle downloadJars")

    options = '' if config_path is None else '-c ' + config_path
    with quiet():
        res = local("java -jar {} {} {}".format(CHECKSTYLE_JAR_PATH, options, file_path),
                    capture=True)
    lines = res.stdout.split('\n')
    assert len(lines) >= 2 and lines[0] == "Starting audit..." and lines[-1] == "Audit done."
    if len(lines) == 2:
        return True, None
    output = []
    for line in lines[1:-1]:
        parts = line.strip().split(':')
        line_number = int(parts[1])
        text, code = parts[-1].rsplit('[', 1)
        text = text.strip()
        code = code[:-1]
        output.append(format_message(os.path.basename(file_path), line_number, text, code))
    output = ''.join(output)
    return False, output


def check_illegal_patterns(file_path, config_path=None):  # pylint: disable=unused-argument
    if file_path.endswith(".py"):
        illegal_patterns = PYTHON_ILLEGAL_PATTERNS
        comment = "#"
    elif file_path.endswith(".java"):
        illegal_patterns = JAVA_ILLEGAL_PATTERNS
        comment = "//"
    else:
        return True, None

    line_num = 1
    output = []
    status = True
    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()
            for pattern_info in illegal_patterns:
                if not line.startswith(comment) and pattern_info[0].search(line):
                    output.append(format_message(filename=os.path.basename(file_path),
                                                 line=line_num,
                                                 message=pattern_info[1]))
                    status = False
            line_num += 1
    output = None if status else ''.join(output)
    return status, output


def check_header(file_path, config_file=None):  # pylint: disable=unused-argument
    if file_path.endswith(".py"):
        header_pattern = PYTHON_HEADER_PATTERN
    elif file_path.endswith(".java"):
        header_pattern = JAVA_HEADER_PATTERN
    else:
        return True, None

    status = True
    output = None
    with open(file_path, 'r') as f:
        file_contents = f.read()

    header_match = header_pattern.search(file_contents)
    filename = os.path.basename(file_path)
    if header_match:
        if filename not in header_match.group(0):
            status = False
            output = format_message(filename=filename, line=2,
                                    message="Incorrect filename in header")

    else:
        status = False
        output = format_message(filename=filename, line=1,
                                message='Missing header')
    return status, output


# ==============================================
# VALIDATORS
# ==============================================

# Struct for storing validator metadata
Validator = namedtuple('Validator', 'name validate_fn modules config_path')

VALIDATORS = [
    # Runs pylint on python source
    Validator('check_pylint', check_pylint, ['pylint'],
              os.path.join(OTTERTUNE_DIR, "script/formatting/config/pylintrc")),

    # Runs pycodestyle on python source
    Validator('check_pycodestyle', check_pycodestyle, ['pycodestyle'],
              os.path.join(OTTERTUNE_DIR, "script/formatting/config/pycodestyle")),

    # Runs checkstyle on the java source
    Validator("check_java_checkstyle", check_java_checkstyle, [],
              os.path.join(OTTERTUNE_DIR, "script/formatting/config/google_checks.xml")),

    # Checks that the python/java source files do not use illegal patterns
    Validator('check_illegal_patterns', check_illegal_patterns, [], None),

    # Checks that the python/java source files have headers
    Validator('check_header', check_header, [], None)
]


# ==============================================
# MAIN FUNCTION
# ==============================================

def main():
    parser = argparse.ArgumentParser(description="Validate OtterTune's source code")
    parser.add_argument('paths', metavar='PATH', type=str, nargs='*',
                        help='Files or directories to (recursively) validate')
    parser.add_argument('--staged-files', action='store_true',
                        help='Apply the selected action(s) to all staged files (git)')
    args = parser.parse_args()

    LOG.info('\nRunning source validators:\n%s\n',
             '\n'.join('  ' + v.name for v in VALIDATORS))
    for validator in VALIDATORS:
        if not validate_validator(validator.modules, validator.config_path):
            sys.exit(EXIT_FAILURE)

    if args.staged_files:
        targets = [os.path.abspath(os.path.join(OTTERTUNE_DIR, f))
                   for f in subprocess.check_output(["git", "diff", "--name-only", "HEAD",
                                                     "--cached", "--diff-filter=d"]).split()]
        if not targets:
            LOG.error("No staged files or not calling from a repository. Exiting...")
            sys.exit(EXIT_FAILURE)
    elif args.paths:
        targets = args.paths
    else:
        targets = DEFAULT_DIRS

    for target in targets:
        target = os.path.abspath(target)
        if os.path.isfile(target):
            LOG.debug("Scanning file: %s\n", target)
            status = validate_file(target)
        elif os.path.isdir(target):
            LOG.debug("Scanning directory: %s\n", target)
            status = validate_dir(target)
        else:
            LOG.error("%s isn't a file or directory", target)
            sys.exit(EXIT_FAILURE)

    if not status:
        LOG.info(SEPARATOR + '\n')
        LOG.info("Validation NOT successful\n")
        sys.exit(EXIT_FAILURE)

    LOG.info("Validation successful\n")
    sys.exit(EXIT_SUCCESS)


if __name__ == '__main__':
    main()