scripts/slave/recipe_modules/isolate/resources/compare_build_artifacts.py - chromium/tools/build - Git at Google

 #!/usr/bin/env python
 # Copyright 2014 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Compare the artifacts from two builds."""

 import difflib
 import json
 import optparse
 import os
 import struct
 import sys
 import time

 from infra.libs.infra_types import freeze

 BASE_DIR = os.path.dirname(os.path.abspath(__file__))


 # List of files that are known to be non deterministic. This is a "temporary"
 # workaround to find regression on the deterministic builders.
 #
 # PNaCl general bug: http://crbug.com/429358
 #
 # TODO(sebmarchand): Remove this once all the files are deterministic.
 WHITELIST = freeze({
   # http://crbug.com/383340
   'android': {
     # Completed.
   },

   # http://crbug.com/330263
   'linux': {
     # Completed.
   },

   # http://crbug.com/330262
   'mac': {
     'base_unittests',
     'base_unittests.isolated',
     'browser_tests',
     'browser_tests.isolated',
     'content_browsertests',
     'content_browsertests.isolated',
     'content_unittests',
     'content_unittests.isolated',
     'crash_inspector',
     'crashpad_handler',
     'd8',
     'exif.so',
     'ffmpegsumo.so',
     'genmacro',
     'genmodule',
     'genperf',
     'genstring',
     'genversion',
     'image_diff',
     'infoplist_strings_tool',
     'interactive_ui_tests',
     'interactive_ui_tests.isolated',
     'ipc_mojo_perftests',
     'ipc_mojo_unittests',
     'libclearkeycdm.dylib',
     'mksnapshot',
     'net_unittests',
     'net_unittests.isolated',
     'osmesa.so',
     'pdfsqueeze',
     'peerconnection_server',
     'protoc',
     're2c',
     'sync_integration_tests',
     'sync_integration_tests.isolated',
     'tls_edit',
     'unit_tests',
     'unit_tests.isolated',
     'yasm',
   },

   # http://crbug.com/330260
   'win': {
     'base_unittests.exe',
     'base_unittests.isolated',
     'browser_tests.exe',
     'browser_tests.isolated',
     'chrome.dll',
     'chrome.exe',
     'chrome_child.dll',
     'chrome_watcher.dll',
     'clearkeycdm.dll',
     'content_browsertests.exe',
     'content_browsertests.isolated',
     'content_unittests.exe',
     'content_unittests.isolated',
     'd8.exe',
     'delegate_execute.exe',
     'delegate_execute_unittests.exe',
     'ipc_mojo_perftests.exe',
     'ipc_mojo_unittests.exe',
     'interactive_ui_tests.exe',
     'interactive_ui_tests.isolated',
     'metro_driver.dll',
     'mksnapshot.exe',
     'mock_nacl_gdb.exe',
     'net_unittests.exe',
     'net_unittests.isolated',
     'np_test_netscape_plugin.dll',
     'npapi_test_plugin.dll',
     'peerconnection_server.exe',
     'sync_integration_tests.exe',
     'sync_integration_tests.isolated',
     'test_registrar.exe',
     'unit_tests.exe',
     'unit_tests.isolated',
   },
 })

 def get_files_to_compare(build_dir, recursive=False):
   """Get the list of files to compare."""
   allowed = frozenset(
       ('', '.apk', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so'))
   non_x_ok_exts = frozenset(('.apk', '.isolated'))
   def check(f):
     if not os.path.isfile(f):
       return False
     if os.path.basename(f).startswith('.'):
       return False
     ext = os.path.splitext(f)[1]
     if ext in non_x_ok_exts:
       return True
     return ext in allowed and os.access(f, os.X_OK)

   ret_files = set()
   for root, dirs, files in os.walk(build_dir):
     if not recursive:
       dirs[:] = [d for d in dirs if d.endswith('_apk')]
     for f in (f for f in files if check(os.path.join(root, f))):
       ret_files.add(os.path.relpath(os.path.join(root, f), build_dir))
   return ret_files


 def diff_dict(a, b):
   """Returns a yaml-like textural diff of two dict.

   It is currently optimized for the .isolated format.
   """
   out = ''
   for key in set(a) | set(b):
     va = a.get(key)
     vb = b.get(key)
     if va.__class__ != vb.__class__:
       out += '- %s:  %r != %r\n' % (key, va, vb)
     elif isinstance(va, dict):
       c = diff_dict(va, vb)
       if c:
         out += '- %s:\n%s\n' % (
             key, '\n'.join('  ' + l for l in c.splitlines()))
     elif va != vb:
       out += '- %s:  %s != %s\n' % (key, va, vb)
   return out.rstrip()


 def diff_binary(first_filepath, second_filepath, file_len):
   """Returns a compact binary diff if the diff is small enough."""
   CHUNK_SIZE = 32
   MAX_STREAMS = 10
   diffs = 0
   streams = []
   offset = 0
   with open(first_filepath, 'rb') as lhs:
     with open(second_filepath, 'rb') as rhs:
       while True:
         lhs_data = lhs.read(CHUNK_SIZE)
         rhs_data = rhs.read(CHUNK_SIZE)
         if not lhs_data:
           break
         if lhs_data != rhs_data:
           diffs += sum(l != r for l, r in zip(lhs_data, rhs_data))
           if streams is not None:
             if len(streams) < MAX_STREAMS:
               streams.append((offset, lhs_data, rhs_data))
             else:
               streams = None
         offset += len(lhs_data)
         del lhs_data
         del rhs_data
   if not diffs:
     return None
   result = '%d out of %d bytes are different (%.2f%%)' % (
         diffs, file_len, 100.0 * diffs / file_len)
   if streams:
     encode = lambda text: ''.join(i if 31 < ord(i) < 128 else '.' for i in text)
     for offset, lhs_data, rhs_data in streams:
       lhs_line = '%s \'%s\'' % (lhs_data.encode('hex'), encode(lhs_data))
       rhs_line = '%s \'%s\'' % (rhs_data.encode('hex'), encode(rhs_data))
       diff = list(difflib.Differ().compare([lhs_line], [rhs_line]))[-1][2:-1]
       result += '\n  0x%-8x: %s\n              %s\n              %s' % (
             offset, lhs_line, rhs_line, diff)
   return result


 def compare_files(first_filepath, second_filepath):
   """Compares two binaries and return the number of differences between them.

   Returns None if the files are equal, a string otherwise.
   """
   if first_filepath.endswith('.isolated'):
     with open(first_filepath, 'rb') as f:
       lhs = json.load(f)
     with open(second_filepath, 'rb') as f:
       rhs = json.load(f)
     diff = diff_dict(lhs, rhs)
     if diff:
       return '\n' + '\n'.join('  ' + line for line in diff.splitlines())
     # else, falls through binary comparison, it must be binary equal too.

   file_len = os.stat(first_filepath).st_size
   if file_len != os.stat(second_filepath).st_size:
     return 'different size: %d != %d' % (
         file_len, os.stat(second_filepath).st_size)

   return diff_binary(first_filepath, second_filepath, file_len)


 def compare_build_artifacts(first_dir, second_dir, target_platform,
                             recursive=False):
   """Compares the artifacts from two distinct builds."""
   if not os.path.isdir(first_dir):
     print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir
     return 1
   if not os.path.isdir(second_dir):
     print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir
     return 1

   epoch_hex = struct.pack('<I', int(time.time())).encode('hex')
   print('Epoch: %s' %
       ' '.join(epoch_hex[i:i+2] for i in xrange(0, len(epoch_hex), 2)))

   with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f:
     blacklist = frozenset(json.load(f))
   whitelist = WHITELIST[target_platform]

   # The two directories.
   first_list = get_files_to_compare(first_dir, recursive) - blacklist
   second_list = get_files_to_compare(second_dir, recursive) - blacklist

   equals = []
   expected_diffs = []
   unexpected_diffs = []
   unexpected_equals = []
   all_files = sorted(first_list & second_list)
   missing_files = sorted(first_list.symmetric_difference(second_list))
   if missing_files:
     print >> sys.stderr, 'Different list of files in both directories:'
     print >> sys.stderr, '\n'.join('  ' + i for i in missing_files)
     unexpected_diffs.extend(missing_files)

   max_filepath_len = max(len(n) for n in all_files)
   for f in all_files:
     first_file = os.path.join(first_dir, f)
     second_file = os.path.join(second_dir, f)
     result = compare_files(first_file, second_file)
     if not result:
       tag = 'equal'
       equals.append(f)
       if f in whitelist:
         unexpected_equals.append(f)
     else:
       if f in whitelist:
         expected_diffs.append(f)
         tag = 'expected'
       else:
         unexpected_diffs.append(f)
         tag = 'unexpected'
       result = 'DIFFERENT (%s): %s' % (tag, result)
     print('%-*s: %s' % (max_filepath_len, f, result))
   unexpected_diffs.sort()

   print('Equals:           %d' % len(equals))
   print('Expected diffs:   %d' % len(expected_diffs))
   print('Unexpected diffs: %d' % len(unexpected_diffs))
   if unexpected_diffs:
     print('Unexpected files with diffs:\n')
     for u in unexpected_diffs:
       print('  %s\n' % u)
   if unexpected_equals:
     print('Unexpected files with no diffs:\n')
     for u in unexpected_equals:
       print('  %s\n' % u)

   return int(bool(unexpected_diffs))


 def main():
   parser = optparse.OptionParser(usage='%prog [options]')
   parser.add_option(
       '-f', '--first-build-dir', help='The first build directory.')
   parser.add_option(
       '-s', '--second-build-dir', help='The second build directory.')
   parser.add_option('-r', '--recursive', action='store_true', default=False,
                     help='Indicates if the comparison should be recursive.')
   parser.add_option('-t', '--target-platform', help='The target platform.')
   options, _ = parser.parse_args()

   if not options.first_build_dir:
     parser.error('--first-build-dir is required')
   if not options.second_build_dir:
     parser.error('--second-build-dir is required')
   if not options.target_platform:
     parser.error('--target-platform is required')

   return compare_build_artifacts(os.path.abspath(options.first_build_dir),
                                  os.path.abspath(options.second_build_dir),
                                  options.target_platform,
                                  options.recursive)


 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/env python
	# Copyright 2014 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Compare the artifacts from two builds."""

	import difflib
	import json
	import optparse
	import os
	import struct
	import sys
	import time

	from infra.libs.infra_types import freeze

	BASE_DIR = os.path.dirname(os.path.abspath(__file__))


	# List of files that are known to be non deterministic. This is a "temporary"
	# workaround to find regression on the deterministic builders.
	#
	# PNaCl general bug: http://crbug.com/429358
	#
	# TODO(sebmarchand): Remove this once all the files are deterministic.
	WHITELIST = freeze({
	# http://crbug.com/383340
	'android': {
	# Completed.
	},

	# http://crbug.com/330263
	'linux': {
	# Completed.
	},

	# http://crbug.com/330262
	'mac': {
	'base_unittests',
	'base_unittests.isolated',
	'browser_tests',
	'browser_tests.isolated',
	'content_browsertests',
	'content_browsertests.isolated',
	'content_unittests',
	'content_unittests.isolated',
	'crash_inspector',
	'crashpad_handler',
	'd8',
	'exif.so',
	'ffmpegsumo.so',
	'genmacro',
	'genmodule',
	'genperf',
	'genstring',
	'genversion',
	'image_diff',
	'infoplist_strings_tool',
	'interactive_ui_tests',
	'interactive_ui_tests.isolated',
	'ipc_mojo_perftests',
	'ipc_mojo_unittests',
	'libclearkeycdm.dylib',
	'mksnapshot',
	'net_unittests',
	'net_unittests.isolated',
	'osmesa.so',
	'pdfsqueeze',
	'peerconnection_server',
	'protoc',
	're2c',
	'sync_integration_tests',
	'sync_integration_tests.isolated',
	'tls_edit',
	'unit_tests',
	'unit_tests.isolated',
	'yasm',
	},

	# http://crbug.com/330260
	'win': {
	'base_unittests.exe',
	'base_unittests.isolated',
	'browser_tests.exe',
	'browser_tests.isolated',
	'chrome.dll',
	'chrome.exe',
	'chrome_child.dll',
	'chrome_watcher.dll',
	'clearkeycdm.dll',
	'content_browsertests.exe',
	'content_browsertests.isolated',
	'content_unittests.exe',
	'content_unittests.isolated',
	'd8.exe',
	'delegate_execute.exe',
	'delegate_execute_unittests.exe',
	'ipc_mojo_perftests.exe',
	'ipc_mojo_unittests.exe',
	'interactive_ui_tests.exe',
	'interactive_ui_tests.isolated',
	'metro_driver.dll',
	'mksnapshot.exe',
	'mock_nacl_gdb.exe',
	'net_unittests.exe',
	'net_unittests.isolated',
	'np_test_netscape_plugin.dll',
	'npapi_test_plugin.dll',
	'peerconnection_server.exe',
	'sync_integration_tests.exe',
	'sync_integration_tests.isolated',
	'test_registrar.exe',
	'unit_tests.exe',
	'unit_tests.isolated',
	},
	})

	def get_files_to_compare(build_dir, recursive=False):
	"""Get the list of files to compare."""
	allowed = frozenset(
	('', '.apk', '.app', '.dll', '.dylib', '.exe', '.nexe', '.so'))
	non_x_ok_exts = frozenset(('.apk', '.isolated'))
	def check(f):
	if not os.path.isfile(f):
	return False
	if os.path.basename(f).startswith('.'):
	return False
	ext = os.path.splitext(f)[1]
	if ext in non_x_ok_exts:
	return True
	return ext in allowed and os.access(f, os.X_OK)

	ret_files = set()
	for root, dirs, files in os.walk(build_dir):
	if not recursive:
	dirs[:] = [d for d in dirs if d.endswith('_apk')]
	for f in (f for f in files if check(os.path.join(root, f))):
	ret_files.add(os.path.relpath(os.path.join(root, f), build_dir))
	return ret_files


	def diff_dict(a, b):
	"""Returns a yaml-like textural diff of two dict.

	It is currently optimized for the .isolated format.
	"""
	out = ''
	for key in set(a) \| set(b):
	va = a.get(key)
	vb = b.get(key)
	if va.__class__ != vb.__class__:
	out += '- %s: %r != %r\n' % (key, va, vb)
	elif isinstance(va, dict):
	c = diff_dict(va, vb)
	if c:
	out += '- %s:\n%s\n' % (
	key, '\n'.join(' ' + l for l in c.splitlines()))
	elif va != vb:
	out += '- %s: %s != %s\n' % (key, va, vb)
	return out.rstrip()


	def diff_binary(first_filepath, second_filepath, file_len):
	"""Returns a compact binary diff if the diff is small enough."""
	CHUNK_SIZE = 32
	MAX_STREAMS = 10
	diffs = 0
	streams = []
	offset = 0
	with open(first_filepath, 'rb') as lhs:
	with open(second_filepath, 'rb') as rhs:
	while True:
	lhs_data = lhs.read(CHUNK_SIZE)
	rhs_data = rhs.read(CHUNK_SIZE)
	if not lhs_data:
	break
	if lhs_data != rhs_data:
	diffs += sum(l != r for l, r in zip(lhs_data, rhs_data))
	if streams is not None:
	if len(streams) < MAX_STREAMS:
	streams.append((offset, lhs_data, rhs_data))
	else:
	streams = None
	offset += len(lhs_data)
	del lhs_data
	del rhs_data
	if not diffs:
	return None
	result = '%d out of %d bytes are different (%.2f%%)' % (
	diffs, file_len, 100.0 * diffs / file_len)
	if streams:
	encode = lambda text: ''.join(i if 31 < ord(i) < 128 else '.' for i in text)
	for offset, lhs_data, rhs_data in streams:
	lhs_line = '%s \'%s\'' % (lhs_data.encode('hex'), encode(lhs_data))
	rhs_line = '%s \'%s\'' % (rhs_data.encode('hex'), encode(rhs_data))
	diff = list(difflib.Differ().compare([lhs_line], [rhs_line]))[-1][2:-1]
	result += '\n 0x%-8x: %s\n %s\n %s' % (
	offset, lhs_line, rhs_line, diff)
	return result


	def compare_files(first_filepath, second_filepath):
	"""Compares two binaries and return the number of differences between them.

	Returns None if the files are equal, a string otherwise.
	"""
	if first_filepath.endswith('.isolated'):
	with open(first_filepath, 'rb') as f:
	lhs = json.load(f)
	with open(second_filepath, 'rb') as f:
	rhs = json.load(f)
	diff = diff_dict(lhs, rhs)
	if diff:
	return '\n' + '\n'.join(' ' + line for line in diff.splitlines())
	# else, falls through binary comparison, it must be binary equal too.

	file_len = os.stat(first_filepath).st_size
	if file_len != os.stat(second_filepath).st_size:
	return 'different size: %d != %d' % (
	file_len, os.stat(second_filepath).st_size)

	return diff_binary(first_filepath, second_filepath, file_len)


	def compare_build_artifacts(first_dir, second_dir, target_platform,
	recursive=False):
	"""Compares the artifacts from two distinct builds."""
	if not os.path.isdir(first_dir):
	print >> sys.stderr, '%s isn\'t a valid directory.' % first_dir
	return 1
	if not os.path.isdir(second_dir):
	print >> sys.stderr, '%s isn\'t a valid directory.' % second_dir
	return 1

	epoch_hex = struct.pack('<I', int(time.time())).encode('hex')
	print('Epoch: %s' %
	' '.join(epoch_hex[i:i+2] for i in xrange(0, len(epoch_hex), 2)))

	with open(os.path.join(BASE_DIR, 'deterministic_build_blacklist.json')) as f:
	blacklist = frozenset(json.load(f))
	whitelist = WHITELIST[target_platform]

	# The two directories.
	first_list = get_files_to_compare(first_dir, recursive) - blacklist
	second_list = get_files_to_compare(second_dir, recursive) - blacklist

	equals = []
	expected_diffs = []
	unexpected_diffs = []
	unexpected_equals = []
	all_files = sorted(first_list & second_list)
	missing_files = sorted(first_list.symmetric_difference(second_list))
	if missing_files:
	print >> sys.stderr, 'Different list of files in both directories:'
	print >> sys.stderr, '\n'.join(' ' + i for i in missing_files)
	unexpected_diffs.extend(missing_files)

	max_filepath_len = max(len(n) for n in all_files)
	for f in all_files:
	first_file = os.path.join(first_dir, f)
	second_file = os.path.join(second_dir, f)
	result = compare_files(first_file, second_file)
	if not result:
	tag = 'equal'
	equals.append(f)
	if f in whitelist:
	unexpected_equals.append(f)
	else:
	if f in whitelist:
	expected_diffs.append(f)
	tag = 'expected'
	else:
	unexpected_diffs.append(f)
	tag = 'unexpected'
	result = 'DIFFERENT (%s): %s' % (tag, result)
	print('%-*s: %s' % (max_filepath_len, f, result))
	unexpected_diffs.sort()

	print('Equals: %d' % len(equals))
	print('Expected diffs: %d' % len(expected_diffs))
	print('Unexpected diffs: %d' % len(unexpected_diffs))
	if unexpected_diffs:
	print('Unexpected files with diffs:\n')
	for u in unexpected_diffs:
	print(' %s\n' % u)
	if unexpected_equals:
	print('Unexpected files with no diffs:\n')
	for u in unexpected_equals:
	print(' %s\n' % u)

	return int(bool(unexpected_diffs))


	def main():
	parser = optparse.OptionParser(usage='%prog [options]')
	parser.add_option(
	'-f', '--first-build-dir', help='The first build directory.')
	parser.add_option(
	'-s', '--second-build-dir', help='The second build directory.')
	parser.add_option('-r', '--recursive', action='store_true', default=False,
	help='Indicates if the comparison should be recursive.')
	parser.add_option('-t', '--target-platform', help='The target platform.')
	options, _ = parser.parse_args()

	if not options.first_build_dir:
	parser.error('--first-build-dir is required')
	if not options.second_build_dir:
	parser.error('--second-build-dir is required')
	if not options.target_platform:
	parser.error('--target-platform is required')

	return compare_build_artifacts(os.path.abspath(options.first_build_dir),
	os.path.abspath(options.second_build_dir),
	options.target_platform,
	options.recursive)


	if __name__ == '__main__':
	sys.exit(main())