blob: 748367ab1345f04b1eefaddb90bccab386b151ad [file] [log] [blame]
# Copyright (c) 2014 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import json
import logging
import time
from autotest_lib.client.common_lib import error
from autotest_lib.server.cros import moblab_test
from autotest_lib.server.hosts import moblab_host
from autotest_lib.utils import labellib
SUITE_TIMEOUT = 10800
WAIT_FOR_CHILD_JOB_CREATION_TIMEOUT = 5 * 60
# We can't actually import run_suite here because importing run_suite pulls
# in certain MySQLdb dependencies that fail to load in the context of a
# test.
JSON_START_TOKEN = "#JSON_START#"
JSON_END_TOKEN = "#JSON_END#"
class moblab_RunSuite(moblab_test.MoblabTest):
"""
Moblab run suite test. Ensures that a Moblab can run a suite from start
to finish by kicking off a suite which will have the Moblab stage an
image, provision its DUTs and run the tests.
"""
version = 1
def run_once(self, host, suite_name, moblab_suite_max_retries,
target_build=''):
"""Runs a suite on a Moblab Host against its test DUTS.
@param host: Moblab Host that will run the suite.
@param suite_name: Name of the suite to run.
@param moblab_suite_max_retries: The maximum number of test retries
allowed within the suite launched on moblab.
@param target_build: Optional build to be use in the run_suite
call on moblab. This argument is passed as is to run_suite. It
must be a sensible build target for the board of the sub-DUTs
attached to the moblab.
@raises AutoservRunError if the suite does not complete successfully.
"""
# Fetch the board of the DUT's assigned to this Moblab. There should
# only be one type.
try:
dut = host.afe.get_hosts()[0]
except IndexError:
raise error.TestFail('All hosts for this MobLab are down. Please '
'request the lab admins to take a look.')
labels = labellib.LabelsMapping(dut.labels)
board = labels['board']
if not target_build:
stable_version_map = host.afe.get_stable_version_map(
host.afe.CROS_IMAGE_TYPE)
target_build = stable_version_map.get_image_name(board)
logging.info('Running suite: %s.', suite_name)
cmd = ("%s/site_utils/run_suite.py --pool='' --board=%s --build=%s "
"--suite_name=%s --retry=True " "--max_retries=%d"
"--create_and_return --json_dump" %
(moblab_host.AUTOTEST_INSTALL_DIR, board, target_build,
suite_name, moblab_suite_max_retries))
logging.debug('Run suite command: %s', cmd)
try:
result = host.run_as_moblab(cmd, timeout=SUITE_TIMEOUT)
logging.debug('Suite Kickoff Output:\n%s', result.stdout)
except error.AutoservRunError as e:
if _is_run_suite_error_critical(e.result_obj.exit_status):
raise
else:
logging.exception(
'Ignoring a non-critical exception in run_suite.')
return
job_id = _parse_job_id(result.stdout)
if job_id is None:
raise error.TestFail('Could not start a suite job, or could not '
'parse a job_id from the run_suite.py output.')
found_child = self._wait_for_child_job_creation(
job_id, timeout=WAIT_FOR_CHILD_JOB_CREATION_TIMEOUT)
if not found_child:
# See crbug.com/718618. If the scheduler crashes, no child job will
# be created.
raise error.TestFail(
'Failed to start a child job within the time limit '
'(%s minutes); probably this is due to a bug in the '
'scheduler.'
% (WAIT_FOR_CHILD_JOB_CREATION_TIMEOUT / 60.0))
try:
result = host.run_as_moblab(
'%s/site_utils/run_suite.py --board=%s --build=%s '
'--suite_name=%s --mock_job_id %s' %
(moblab_host.AUTOTEST_INSTALL_DIR, board, target_build,
suite_name, job_id),
timeout=SUITE_TIMEOUT)
logging.debug('Suite Result Output:\n%s', result.stdout)
except error.AutoservRunError as e:
if _is_run_suite_error_critical(e.result_obj.exit_status):
raise
else:
logging.exception(
'Ignoring a non-critical exception in run_suite.')
def _wait_for_child_job_creation(self, job_id, timeout,
poll_interval_seconds=10):
"""Waits |timeout| seconds for job |job_id| to have a child job.
@param job_id: The job id of the suite job.
@param timeout: The maximum amount of time in seconds to poll for.
@param poll_interval_seconds: The number of seconds to sleep in between
each query RPC.
"""
start_time = time.time()
while time.time() - start_time < timeout:
jobs = self._afe.get_jobs(parent_job_id=job_id)
if not jobs:
time.sleep(poll_interval_seconds)
else:
return True
return False
def _parse_job_id(run_suite_output):
"""Parses the job id from the output of a run suite invocation."""
try:
_, json_start = run_suite_output.split(JSON_START_TOKEN)
json_body = json_start.split(JSON_END_TOKEN)
return json.loads(json_body).get('job_id')
except ValueError:
return None
def _is_run_suite_error_critical(return_code):
# We can't actually import run_suite here because importing run_suite pulls
# in certain MySQLdb dependencies that fail to load in the context of a
# test.
# OTOH, these return codes are unlikely to change because external users /
# builders depend on them.
return return_code not in (
0, # run_suite.RETURN_CODES.OK
2, # run_suite.RETURN_CODES.WARNING
)