blob: 0999f9d1999c3b780402ba240b6198d4f76c7a44 [file] [log] [blame]
# Copyright 2018 The LUCI Authors. All rights reserved.
# Use of this source code is governed under the Apache License, Version 2.0
# that can be found in the LICENSE file.
"""API for Tricium analyzers to use.
This recipe module is intended to support different kinds of
analyzer recipes, including:
* Recipes that wrap one or more legacy analyzers.
* Recipes that accucumulate comments one by one.
* Recipes that wrap other tools and parse their output.
"""
import collections
import fnmatch
from google.protobuf import json_format
from recipe_engine import recipe_api
from . import legacy_analyzers
from PB.tricium.data import Data
# The maximum number of comments to post. The Tricium service will refuse to
# post more than this many comments, as it would be a poor user experience.
MAX_NUM_COMMENTS = 50
class TriciumApi(recipe_api.RecipeApi):
"""TriciumApi provides basic support for Tricium."""
# Expose pre-defined analyzers, as well the LegacyAnalyzer class.
LegacyAnalyzer = legacy_analyzers.LegacyAnalyzer
analyzers = legacy_analyzers.Analyzers
def __init__(self, **kwargs):
"""Sets up the API.
Initializes an empty list of comments for use with
add_comment and write_comments.
"""
super(TriciumApi, self).__init__(**kwargs)
self._comments = []
def add_comment(self,
category,
message,
path,
start_line=0,
end_line=0,
start_char=0,
end_char=0,
suggestions=()):
"""Adds one comment to accumulate."""
comment = Data.Comment()
comment.category = category
comment.message = message
comment.path = path
comment.start_line = start_line
comment.end_line = end_line
comment.start_char = start_char
comment.end_char = end_char
for s in suggestions:
# Convert from dict to proto message by way of JSON.
json_format.Parse(self.m.json.dumps(s), comment.suggestions.add())
if comment not in self._comments:
self._comments.append(comment)
def write_comments(self):
"""Emit the results accumulated by `add_comment`."""
results = Data.Results()
results.comments.extend(self._comments)
self.emit_results(results)
def emit_results(self, results):
"""Sets the tricium output property with results.
This overwrites any previous results; it is expected to be called only once
in a recipe.
"""
step = self.m.step('write results', [])
num_comments = len(results.comments)
if num_comments > MAX_NUM_COMMENTS:
# Tricium will refuse to post comments if there are too many; we can just
# avoid emitting such results.
step.presentation.status = self.m.step.WARNING
step.presentation.step_text = (
'%s comments, exceeded maximum %s comments' %
(num_comments, MAX_NUM_COMMENTS))
return
# The "tricium" output property is read by the Tricium service.
results_json = json_format.MessageToJson(results)
step.presentation.properties['tricium'] = results_json
def run_legacy(self, analyzers, input_base, affected_files, commit_message):
"""Runs legacy analyzers.
Args:
* analyzers (List(LegacyAnalyer)): Analyzers to run.
* input_base (Path): The Tricium input dir, generally a checkout base.
* affected_files (List(str)): Paths of files in the change, relative
to input_base.
* commit_message (str): Commit message from Gerrit.
"""
self._write_files_data(affected_files, commit_message, input_base)
# Accumulate all analyzer results together. Each comment individually
# contains a analyzer/category name, so this won't cause confusion.
all_results = Data.Results()
# For each analyzer, download the CIPD package, run it and accumulate
# results. Note: Each analyzer could potentially be run in parallel.
for analyzer in analyzers:
with self.m.step.nest(analyzer.name) as parent_step:
# Check analyzer.path_filters and conditionally skip.
if not _matches_path_filters(affected_files, analyzer.path_filters):
parent_step.presentation.step_text = 'skipped due to path filters'
try:
analyzer_dir = self.m.path['cleanup'].join(analyzer.name)
output_base = analyzer_dir.join('out')
package_dir = analyzer_dir.join('package')
self._fetch_legacy_analyzer(package_dir, analyzer)
results = self._run_legacy_analyzer(
package_dir,
analyzer,
input_dir=input_base,
output_dir=output_base)
# Show step results. If there are too many comments, don't include
# them. If one analyzer fails, continue running the rest.
num_comments = len(results.comments)
parent_step.presentation.step_text = '%s comment(s)' % num_comments
parent_step.presentation.logs['result'] = json_format.MessageToJson(
results)
if num_comments > MAX_NUM_COMMENTS:
parent_step.presentation.status = self.m.step.WARNING
parent_step.presentation.step_text += (' exceeds max %s' %
MAX_NUM_COMMENTS)
continue
all_results.comments.extend(results.comments)
except self.m.step.StepFailure as f:
parent_step.presentation.step_text = 'failed'
# The tricium data dir with files.json is written in the checkout cache
# directory and should be cleaned up.
self.m.file.rmtree('clean up tricium data dir', input_base.join('tricium'))
self.emit_results(all_results)
def _write_files_data(self, affected_files, commit_message, base_dir):
"""Writes a Files input message to a file.
Args:
* affected_files (List(str)): File paths. This should
be relataive to `base_dir`.
* commit_message (str): The commit message from Gerrit.
* base_dir (Path): Input files base directory.
"""
files = Data.Files()
files.commit_message = commit_message
for path in affected_files:
# TODO(qyearsley): Set the is_binary and status fields for each file.
# Analyzers use these fields to determine whether to skip files.
f = files.files.add()
f.path = path
data_dir = self._ensure_data_dir(base_dir)
# Note: The JSON written self.m.file.write_proto doesn't work for what
# Tricium analyzers expect, but json_format.MessageToJson does.
files_json = json_format.MessageToJson(files)
self.m.file.write_text('write files.json', data_dir.join('files.json'),
files_json)
def _read_results(self, base_dir):
"""Reads a Tricium Results message from a file.
Args:
* base_dir (Path): A directory. Generally this will
be the same as the -output arg passed to the analyzer.
Returns: Results protobuf message.
"""
data_dir = self._ensure_data_dir(base_dir)
results_json = self.m.file.read_text(
'read results',
data_dir.join('results.json'),
test_data='{"comments":[]}')
return json_format.Parse(results_json, Data.Results())
def _ensure_data_dir(self, base_dir):
"""Creates the Tricium data directory if it doesn't exist.
Simple Tricium analyzers assume that data is input/output from a
particular subpath relative to the input/output paths passed.
Args:
* base_dir (Path): A directory, could be either the -input
or -output passed to a Tricium analyzer.
Returns: Tricium data file directory inside base_dir.
"""
data_dir = base_dir.join('tricium', 'data')
self.m.file.ensure_directory('ensure tricium data dir', data_dir)
return data_dir
def _fetch_legacy_analyzer(self, package_dir, analyzer):
"""Fetches an analyzer package from CIPD.
Args:
* packages_dir (Path): The path to fetch to.
* analyzer (LegacyAnalyzer): Analyzer package to fetch.
"""
ensure_file = self.m.cipd.EnsureFile()
ensure_file.add_package(analyzer.package, version='live')
self.m.cipd.ensure(package_dir, ensure_file)
def _run_legacy_analyzer(self, package_dir, analyzer, input_dir, output_dir):
"""Runs a simple legacy analyzer executable and returns the results.
Args:
* package_dir (Path): The directory where the analyzer CIPD package
contents have been unpacked to.
* analyzer (LegacyAnalyzer): Analyzer object to run.
* input_dir (Path): The Tricium input dir, which is expected to contain
files as well as the metadata at tricium/data/files.json.
* output_dir (Path): The directory to write results into.
"""
# Some analyzers depend on other files in the CIPD package, so cwd is
# expected to be the directory with the analyzer.
with self.m.context(cwd=package_dir):
cmd = [
package_dir.join(analyzer.executable), '-input', input_dir, '-output',
output_dir
] + analyzer.extra_args
self.m.step('run analyzer',
cmd).presentation.logs['cmd'] = ' '.join(str(c) for c in cmd)
return self._read_results(output_dir)
def _matches_path_filters(files, patterns):
if len(patterns) == 0:
return True
for p in patterns:
if any(fnmatch.fnmatch(f, p) for f in files):
return True
return False