| # Copyright 2018 The LUCI Authors. All rights reserved. |
| # Use of this source code is governed under the Apache License, Version 2.0 |
| # that can be found in the LICENSE file. |
| |
| """API for Tricium analyzers to use. |
| |
| This recipe module is intended to support different kinds of |
| analyzer recipes, including: |
| * Recipes that wrap one or more legacy analyzers. |
| * Recipes that accucumulate comments one by one. |
| * Recipes that wrap other tools and parse their output. |
| """ |
| |
| import collections |
| import fnmatch |
| |
| from google.protobuf import json_format |
| |
| from recipe_engine import recipe_api |
| |
| from . import legacy_analyzers |
| |
| from PB.tricium.data import Data |
| |
| # The maximum number of comments to post. The Tricium service will refuse to |
| # post more than this many comments, as it would be a poor user experience. |
| MAX_NUM_COMMENTS = 50 |
| |
| |
| class TriciumApi(recipe_api.RecipeApi): |
| """TriciumApi provides basic support for Tricium.""" |
| |
| # Expose pre-defined analyzers, as well the LegacyAnalyzer class. |
| LegacyAnalyzer = legacy_analyzers.LegacyAnalyzer |
| analyzers = legacy_analyzers.Analyzers |
| |
| def __init__(self, **kwargs): |
| """Sets up the API. |
| |
| Initializes an empty list of comments for use with |
| add_comment and write_comments. |
| """ |
| super(TriciumApi, self).__init__(**kwargs) |
| self._comments = [] |
| |
| def add_comment(self, |
| category, |
| message, |
| path, |
| start_line=0, |
| end_line=0, |
| start_char=0, |
| end_char=0, |
| suggestions=()): |
| """Adds one comment to accumulate.""" |
| comment = Data.Comment() |
| comment.category = category |
| comment.message = message |
| comment.path = path |
| comment.start_line = start_line |
| comment.end_line = end_line |
| comment.start_char = start_char |
| comment.end_char = end_char |
| for s in suggestions: |
| # Convert from dict to proto message by way of JSON. |
| json_format.Parse(self.m.json.dumps(s), comment.suggestions.add()) |
| if comment not in self._comments: |
| self._comments.append(comment) |
| |
| def write_comments(self): |
| """Emit the results accumulated by `add_comment`.""" |
| results = Data.Results() |
| results.comments.extend(self._comments) |
| self.emit_results(results) |
| |
| def emit_results(self, results): |
| """Sets the tricium output property with results. |
| |
| This overwrites any previous results; it is expected to be called only once |
| in a recipe. |
| """ |
| step = self.m.step('write results', []) |
| num_comments = len(results.comments) |
| if num_comments > MAX_NUM_COMMENTS: |
| # Tricium will refuse to post comments if there are too many; we can just |
| # avoid emitting such results. |
| step.presentation.status = self.m.step.WARNING |
| step.presentation.step_text = ( |
| '%s comments, exceeded maximum %s comments' % |
| (num_comments, MAX_NUM_COMMENTS)) |
| return |
| # The "tricium" output property is read by the Tricium service. |
| results_json = json_format.MessageToJson(results) |
| step.presentation.properties['tricium'] = results_json |
| |
| def run_legacy(self, analyzers, input_base, affected_files, commit_message): |
| """Runs legacy analyzers. |
| |
| Args: |
| * analyzers (List(LegacyAnalyer)): Analyzers to run. |
| * input_base (Path): The Tricium input dir, generally a checkout base. |
| * affected_files (List(str)): Paths of files in the change, relative |
| to input_base. |
| * commit_message (str): Commit message from Gerrit. |
| """ |
| self._write_files_data(affected_files, commit_message, input_base) |
| # Accumulate all analyzer results together. Each comment individually |
| # contains a analyzer/category name, so this won't cause confusion. |
| all_results = Data.Results() |
| # For each analyzer, download the CIPD package, run it and accumulate |
| # results. Note: Each analyzer could potentially be run in parallel. |
| for analyzer in analyzers: |
| with self.m.step.nest(analyzer.name) as parent_step: |
| # Check analyzer.path_filters and conditionally skip. |
| if not _matches_path_filters(affected_files, analyzer.path_filters): |
| parent_step.presentation.step_text = 'skipped due to path filters' |
| try: |
| analyzer_dir = self.m.path['cleanup'].join(analyzer.name) |
| output_base = analyzer_dir.join('out') |
| package_dir = analyzer_dir.join('package') |
| self._fetch_legacy_analyzer(package_dir, analyzer) |
| results = self._run_legacy_analyzer( |
| package_dir, |
| analyzer, |
| input_dir=input_base, |
| output_dir=output_base) |
| # Show step results. If there are too many comments, don't include |
| # them. If one analyzer fails, continue running the rest. |
| num_comments = len(results.comments) |
| parent_step.presentation.step_text = '%s comment(s)' % num_comments |
| parent_step.presentation.logs['result'] = json_format.MessageToJson( |
| results) |
| if num_comments > MAX_NUM_COMMENTS: |
| parent_step.presentation.status = self.m.step.WARNING |
| parent_step.presentation.step_text += (' exceeds max %s' % |
| MAX_NUM_COMMENTS) |
| continue |
| all_results.comments.extend(results.comments) |
| except self.m.step.StepFailure as f: |
| parent_step.presentation.step_text = 'failed' |
| # The tricium data dir with files.json is written in the checkout cache |
| # directory and should be cleaned up. |
| self.m.file.rmtree('clean up tricium data dir', input_base.join('tricium')) |
| self.emit_results(all_results) |
| |
| def _write_files_data(self, affected_files, commit_message, base_dir): |
| """Writes a Files input message to a file. |
| |
| Args: |
| * affected_files (List(str)): File paths. This should |
| be relataive to `base_dir`. |
| * commit_message (str): The commit message from Gerrit. |
| * base_dir (Path): Input files base directory. |
| """ |
| files = Data.Files() |
| files.commit_message = commit_message |
| for path in affected_files: |
| # TODO(qyearsley): Set the is_binary and status fields for each file. |
| # Analyzers use these fields to determine whether to skip files. |
| f = files.files.add() |
| f.path = path |
| data_dir = self._ensure_data_dir(base_dir) |
| # Note: The JSON written self.m.file.write_proto doesn't work for what |
| # Tricium analyzers expect, but json_format.MessageToJson does. |
| files_json = json_format.MessageToJson(files) |
| self.m.file.write_text('write files.json', data_dir.join('files.json'), |
| files_json) |
| |
| def _read_results(self, base_dir): |
| """Reads a Tricium Results message from a file. |
| |
| Args: |
| * base_dir (Path): A directory. Generally this will |
| be the same as the -output arg passed to the analyzer. |
| |
| Returns: Results protobuf message. |
| """ |
| data_dir = self._ensure_data_dir(base_dir) |
| results_json = self.m.file.read_text( |
| 'read results', |
| data_dir.join('results.json'), |
| test_data='{"comments":[]}') |
| return json_format.Parse(results_json, Data.Results()) |
| |
| def _ensure_data_dir(self, base_dir): |
| """Creates the Tricium data directory if it doesn't exist. |
| |
| Simple Tricium analyzers assume that data is input/output from a |
| particular subpath relative to the input/output paths passed. |
| |
| Args: |
| * base_dir (Path): A directory, could be either the -input |
| or -output passed to a Tricium analyzer. |
| |
| Returns: Tricium data file directory inside base_dir. |
| """ |
| data_dir = base_dir.join('tricium', 'data') |
| self.m.file.ensure_directory('ensure tricium data dir', data_dir) |
| return data_dir |
| |
| def _fetch_legacy_analyzer(self, package_dir, analyzer): |
| """Fetches an analyzer package from CIPD. |
| |
| Args: |
| * packages_dir (Path): The path to fetch to. |
| * analyzer (LegacyAnalyzer): Analyzer package to fetch. |
| """ |
| ensure_file = self.m.cipd.EnsureFile() |
| ensure_file.add_package(analyzer.package, version='live') |
| self.m.cipd.ensure(package_dir, ensure_file) |
| |
| def _run_legacy_analyzer(self, package_dir, analyzer, input_dir, output_dir): |
| """Runs a simple legacy analyzer executable and returns the results. |
| |
| Args: |
| * package_dir (Path): The directory where the analyzer CIPD package |
| contents have been unpacked to. |
| * analyzer (LegacyAnalyzer): Analyzer object to run. |
| * input_dir (Path): The Tricium input dir, which is expected to contain |
| files as well as the metadata at tricium/data/files.json. |
| * output_dir (Path): The directory to write results into. |
| """ |
| # Some analyzers depend on other files in the CIPD package, so cwd is |
| # expected to be the directory with the analyzer. |
| with self.m.context(cwd=package_dir): |
| cmd = [ |
| package_dir.join(analyzer.executable), '-input', input_dir, '-output', |
| output_dir |
| ] + analyzer.extra_args |
| self.m.step('run analyzer', |
| cmd).presentation.logs['cmd'] = ' '.join(str(c) for c in cmd) |
| return self._read_results(output_dir) |
| |
| |
| def _matches_path_filters(files, patterns): |
| if len(patterns) == 0: |
| return True |
| for p in patterns: |
| if any(fnmatch.fnmatch(f, p) for f in files): |
| return True |
| return False |