blob: 22c8c6110742cadf47cdfab9f97f31e1da5ea6c0 [file] [log] [blame]
# Copyright 2017 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Classes that comprise the data model for binary size analysis.
The primary classes are Symbol, and SymbolGroup.
Description of common properties:
* address: The start address of the symbol.
May be 0 (e.g. for .bss or for SymbolGroups).
* size: The number of bytes this symbol takes up, including padding that comes
before |address|.
* num_aliases: The number of symbols with the same address (including self).
* pss: size / num_aliases.
* padding: The number of bytes of padding before |address| due to this symbol.
* name: Symbol names with parameter list removed.
Never None, but will be '' for anonymous symbols.
* full_name: Symbols names with parameter list left in.
Never None, but will be '' for anonymous symbols, and for symbols that do
not contain a parameter list.
* is_anonymous: True when the symbol exists in an anonymous namespace (which
are removed from both full_name and name during normalization).
* section_name: E.g. ".text", ".rodata", ".data.rel.local"
* section: The second character of |section_name|. E.g. "t", "r", "d".
"""
import collections
import logging
import os
import re
import cluster_symbols
import match_util
METADATA_GIT_REVISION = 'git_revision'
METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory.
METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory.
METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h
METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory.
METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc.
METADATA_ELF_BUILD_ID = 'elf_build_id'
METADATA_GN_ARGS = 'gn_args'
SECTION_TO_SECTION_NAME = {
'b': '.bss',
'd': '.data',
'r': '.rodata',
't': '.text',
}
FLAG_ANONYMOUS = 1
FLAG_STARTUP = 2
FLAG_UNLIKELY = 4
FLAG_REL = 8
FLAG_REL_LOCAL = 16
class SizeInfo(object):
"""Represents all size information for a single binary.
Fields:
section_sizes: A dict of section_name -> size.
symbols: A SymbolGroup containing all symbols, sorted by address.
metadata: A dict.
"""
__slots__ = (
'section_sizes',
'symbols',
'metadata',
)
"""Root size information."""
def __init__(self, section_sizes, symbols, metadata=None):
self.section_sizes = section_sizes # E.g. {'.text': 0}
self.symbols = symbols
self.metadata = metadata or {}
def Cluster(self):
"""Returns a new SizeInfo with some symbols moved into subgroups.
See SymbolGroup.Cluster() for more details.
"""
return SizeInfo(self.section_sizes, self.symbols.Cluster(), self.metadata)
class SizeInfoDiff(object):
"""What you get when you Diff() two SizeInfo objects.
Fields:
section_sizes: A dict of section_name -> size delta.
symbols: A SymbolDiff with all symbols in it.
before_metadata: metadata of the "before" SizeInfo.
after_metadata: metadata of the "after" SizeInfo.
"""
__slots__ = (
'section_sizes',
'symbols',
'before_metadata',
'after_metadata',
)
def __init__(self, section_sizes, symbols, before_metadata, after_metadata):
self.section_sizes = section_sizes
self.symbols = symbols
self.before_metadata = before_metadata
self.after_metadata = after_metadata
class BaseSymbol(object):
"""Base class for Symbol and SymbolGroup.
Refer to module docs for field descriptions.
"""
__slots__ = ()
@property
def section(self):
"""Returns the one-letter section.
E.g. If section_name == '.rodata', then section == 'r'.
"""
return self.section_name[1]
@property
def size_without_padding(self):
return self.size - self.padding
@property
def end_address(self):
return self.address + self.size_without_padding
@property
def is_anonymous(self):
return bool(self.flags & FLAG_ANONYMOUS)
@property
def num_aliases(self):
return len(self.aliases) if self.aliases else 1
def FlagsString(self):
# Most flags are 0.
flags = self.flags
if not flags and not self.aliases:
return '{}'
parts = []
if flags & FLAG_ANONYMOUS:
parts.append('anon')
if flags & FLAG_STARTUP:
parts.append('startup')
if flags & FLAG_UNLIKELY:
parts.append('unlikely')
if flags & FLAG_REL:
parts.append('rel')
if flags & FLAG_REL_LOCAL:
parts.append('rel.loc')
# Not actually a part of flags, but useful to show it here.
if self.aliases:
parts.append('{} aliases'.format(self.num_aliases))
return '{%s}' % ','.join(parts)
def IsBss(self):
return self.section_name == '.bss'
def IsGroup(self):
return False
def IsGenerated(self):
# TODO(agrieve): Also match generated functions such as:
# startup._GLOBAL__sub_I_page_allocator.cc
return self.name.endswith(']') and not self.name.endswith('[]')
class Symbol(BaseSymbol):
"""Represents a single symbol within a binary.
Refer to module docs for field descriptions.
"""
__slots__ = (
'address',
'full_name',
'flags',
'object_path',
'name',
'aliases',
'padding',
'section_name',
'source_path',
'size',
)
def __init__(self, section_name, size_without_padding, address=None,
name=None, source_path=None, object_path=None, full_name=None,
flags=0, aliases=None):
self.section_name = section_name
self.address = address or 0
self.name = name or ''
self.full_name = full_name or ''
self.source_path = source_path or ''
self.object_path = object_path or ''
self.size = size_without_padding
self.flags = flags
self.aliases = aliases
self.padding = 0
def __repr__(self):
template = ('{}@{:x}(size_without_padding={},padding={},name={},'
'object_path={},source_path={},flags={})')
return template.format(
self.section_name, self.address, self.size_without_padding,
self.padding, self.name, self.object_path, self.source_path,
self.FlagsString())
@property
def pss(self):
return float(self.size) / self.num_aliases
@property
def pss_without_padding(self):
return float(self.size_without_padding) / self.num_aliases
class SymbolGroup(BaseSymbol):
"""Represents a group of symbols using the same interface as Symbol.
SymbolGroups are immutable. All filtering / sorting will return new
SymbolGroups objects.
Overrides many __functions__. E.g. the following are all valid:
* len(group)
* iter(group)
* group[0]
* group['0x1234'] # By symbol address
* without_group2 = group1 - group2
* unioned = group1 + group2
"""
__slots__ = (
'_padding',
'_size',
'_pss',
'_symbols',
'_filtered_symbols',
'full_name',
'name',
'section_name',
'is_sorted',
)
def __init__(self, symbols, filtered_symbols=None, name=None,
full_name=None, section_name=None, is_sorted=False):
self._padding = None
self._size = None
self._pss = None
self._symbols = symbols
self._filtered_symbols = filtered_symbols or []
self.name = name or ''
self.full_name = full_name
self.section_name = section_name or '.*'
self.is_sorted = is_sorted
def __repr__(self):
return 'Group(name=%s,count=%d,size=%d)' % (
self.name, len(self), self.size)
def __iter__(self):
return iter(self._symbols)
def __len__(self):
return len(self._symbols)
def __eq__(self, other):
return self._symbols == other._symbols
def __getitem__(self, key):
"""|key| can be an index or an address.
Raises if multiple symbols map to the address.
"""
if isinstance(key, slice):
return self._symbols.__getitem__(key)
if isinstance(key, basestring) or key > len(self._symbols):
found = self.WhereAddressInRange(key)
if len(found) != 1:
raise KeyError('%d symbols found at address %s.' % (len(found), key))
return found[0]
return self._symbols[key]
def __sub__(self, other):
other_ids = set(id(s) for s in other)
after_symbols = [s for s in self if id(s) not in other_ids]
return self._CreateTransformed(after_symbols,
section_name=self.section_name)
def __add__(self, other):
self_ids = set(id(s) for s in self)
after_symbols = self._symbols + [s for s in other if id(s) not in self_ids]
return self._CreateTransformed(
after_symbols, section_name=self.section_name, is_sorted=False)
@property
def address(self):
first = self._symbols[0].address
return first if all(s.address == first for s in self._symbols) else 0
@property
def flags(self):
first = self._symbols[0].flags
return first if all(s.flags == first for s in self._symbols) else 0
@property
def object_path(self):
first = self._symbols[0].object_path
return first if all(s.object_path == first for s in self._symbols) else ''
@property
def source_path(self):
first = self._symbols[0].source_path
return first if all(s.source_path == first for s in self._symbols) else ''
def IterUniqueSymbols(self):
seen_aliases_lists = set()
for s in self:
if not s.aliases:
yield s
elif id(s.aliases) not in seen_aliases_lists:
seen_aliases_lists.add(id(s.aliases))
yield s
@property
def size(self):
if self._size is None:
if self.IsBss():
self._size = sum(s.size for s in self)
else:
self._size = sum(s.size for s in self.IterUniqueSymbols())
return self._size
@property
def pss(self):
if self._pss is None:
if self.IsBss():
self._pss = float(self.size)
else:
self._pss = sum(s.pss for s in self)
return self._pss
@property
def padding(self):
if self._padding is None:
self._padding = sum(s.padding for s in self.IterUniqueSymbols())
return self._padding
@property
def aliases(self):
return None
def IsGroup(self):
return True
def _CreateTransformed(self, symbols, filtered_symbols=None, name=None,
full_name=None, section_name=None, is_sorted=None):
if is_sorted is None:
is_sorted = self.is_sorted
return SymbolGroup(symbols, filtered_symbols=filtered_symbols, name=name,
full_name=full_name, section_name=section_name,
is_sorted=is_sorted)
def Cluster(self):
"""Returns a new SymbolGroup with some symbols moved into subgroups.
Subgroups include:
* Symbols that have [clone] in their name (created during inlining).
* Star symbols (such as "** merge strings", and "** symbol gap")
To view created groups:
Print(clustered.Filter(lambda s: s.IsGroup()), recursive=True)
"""
return self._CreateTransformed(cluster_symbols.ClusterSymbols(self))
def Sorted(self, cmp_func=None, key=None, reverse=False):
# Default to sorting by abs(size) then name.
if cmp_func is None and key is None:
cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name),
(b.IsBss(), abs(a.size), b.name))
after_symbols = sorted(self._symbols, cmp_func, key, reverse)
return self._CreateTransformed(
after_symbols, filtered_symbols=self._filtered_symbols,
section_name=self.section_name, is_sorted=True)
def SortedByName(self, reverse=False):
return self.Sorted(key=(lambda s:s.name), reverse=reverse)
def SortedByAddress(self, reverse=False):
return self.Sorted(key=(lambda s:s.address), reverse=reverse)
def SortedByCount(self, reverse=False):
return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1),
reverse=not reverse)
def Filter(self, func):
filtered_and_kept = ([], [])
symbol = None
try:
for symbol in self:
filtered_and_kept[int(bool(func(symbol)))].append(symbol)
except:
logging.warning('Filter failed on symbol %r', symbol)
raise
return self._CreateTransformed(filtered_and_kept[1],
filtered_symbols=filtered_and_kept[0],
section_name=self.section_name)
def WhereBiggerThan(self, min_size):
return self.Filter(lambda s: s.size >= min_size)
def WhereInSection(self, section):
if len(section) == 1:
ret = self.Filter(lambda s: s.section == section)
ret.section_name = SECTION_TO_SECTION_NAME[section]
else:
ret = self.Filter(lambda s: s.section_name == section)
ret.section_name = section
return ret
def WhereIsGenerated(self):
return self.Filter(lambda s: s.IsGenerated())
def WhereNameMatches(self, pattern):
regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern))
return self.Filter(lambda s: regex.search(s.name))
def WhereObjectPathMatches(self, pattern):
regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern))
return self.Filter(lambda s: regex.search(s.object_path))
def WhereSourcePathMatches(self, pattern):
regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern))
return self.Filter(lambda s: regex.search(s.source_path))
def WherePathMatches(self, pattern):
regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern))
return self.Filter(lambda s: (regex.search(s.source_path) or
regex.search(s.object_path)))
def WhereMatches(self, pattern):
"""Looks for |pattern| within all paths & names."""
regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern))
return self.Filter(lambda s: (regex.search(s.source_path) or
regex.search(s.object_path) or
regex.search(s.full_name or '') or
regex.search(s.name)))
def WhereAddressInRange(self, start, end=None):
"""Searches for addesses within [start, end).
Args may be ints or hex strings. Default value for |end| is |start| + 1.
"""
if isinstance(start, basestring):
start = int(start, 16)
if end is None:
end = start + 1
return self.Filter(lambda s: s.address >= start and s.address < end)
def WhereHasAnyAttribution(self):
return self.Filter(lambda s: s.name or s.source_path or s.object_path)
def Inverted(self):
"""Returns the symbols that were filtered out by the previous filter.
Applies only when the previous call was a filter.
Example:
# Symbols that do not have "third_party" in their path.
symbols.WherePathMatches(r'third_party').Inverted()
# Symbols within third_party that do not contain the string "foo".
symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted()
"""
return self._CreateTransformed(
self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False)
def GroupBy(self, func, min_count=0):
"""Returns a SymbolGroup of SymbolGroups, indexed by |func|.
Args:
func: Grouping function. Passed a symbol and returns a string for the
name of the subgroup to put the symbol in. If None is returned, the
symbol is omitted.
min_count: Miniumum number of symbols for a group. If fewer than this many
symbols end up in a group, they will not be put within a group.
Use a negative value to omit symbols entirely rather than
include them outside of a group.
"""
after_syms = []
filtered_symbols = []
symbols_by_token = collections.defaultdict(list)
# Index symbols by |func|.
for symbol in self:
token = func(symbol)
if token is None:
filtered_symbols.append(symbol)
symbols_by_token[token].append(symbol)
# Create the subgroups.
include_singles = min_count >= 0
min_count = abs(min_count)
for token, symbols in symbols_by_token.iteritems():
if len(symbols) >= min_count:
after_syms.append(self._CreateTransformed(
symbols, name=token, section_name=self.section_name,
is_sorted=False))
elif include_singles:
after_syms.extend(symbols)
else:
filtered_symbols.extend(symbols)
return self._CreateTransformed(
after_syms, filtered_symbols=filtered_symbols,
section_name=self.section_name, is_sorted=False)
def GroupBySectionName(self):
return self.GroupBy(lambda s: s.section_name)
def GroupByNamespace(self, depth=0, fallback='{global}', min_count=0):
"""Groups by symbol namespace (as denoted by ::s).
Does not differentiate between C++ namespaces and C++ classes.
Args:
depth: When 0 (default), groups by entire namespace. When 1, groups by
top-level name, when 2, groups by top 2 names, etc.
fallback: Use this value when no namespace exists.
min_count: Miniumum number of symbols for a group. If fewer than this many
symbols end up in a group, they will not be put within a group.
Use a negative value to omit symbols entirely rather than
include them outside of a group.
"""
def extract_namespace(symbol):
# Remove template params.
name = symbol.name
template_idx = name.find('<')
if template_idx:
name = name[:template_idx]
# Remove after the final :: (not part of the namespace).
colon_idx = name.rfind('::')
if colon_idx == -1:
return fallback
name = name[:colon_idx]
return _ExtractPrefixBeforeSeparator(name, '::', depth)
return self.GroupBy(extract_namespace, min_count=min_count)
def GroupBySourcePath(self, depth=0, fallback='{no path}',
fallback_to_object_path=True, min_count=0):
"""Groups by source_path.
Args:
depth: When 0 (default), groups by entire path. When 1, groups by
top-level directory, when 2, groups by top 2 directories, etc.
fallback: Use this value when no namespace exists.
fallback_to_object_path: When True (default), uses object_path when
source_path is missing.
min_count: Miniumum number of symbols for a group. If fewer than this many
symbols end up in a group, they will not be put within a group.
Use a negative value to omit symbols entirely rather than
include them outside of a group.
"""
def extract_path(symbol):
path = symbol.source_path
if fallback_to_object_path and not path:
path = symbol.object_path
path = path or fallback
return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth)
return self.GroupBy(extract_path, min_count=min_count)
def GroupByObjectPath(self, depth=0, fallback='{no path}', min_count=0):
"""Groups by object_path.
Args:
depth: When 0 (default), groups by entire path. When 1, groups by
top-level directory, when 2, groups by top 2 directories, etc.
fallback: Use this value when no namespace exists.
min_count: Miniumum number of symbols for a group. If fewer than this many
symbols end up in a group, they will not be put within a group.
Use a negative value to omit symbols entirely rather than
include them outside of a group.
"""
def extract_path(symbol):
path = symbol.object_path or fallback
return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth)
return self.GroupBy(extract_path, min_count=min_count)
class SymbolDiff(SymbolGroup):
"""A SymbolGroup subclass representing a diff of two other SymbolGroups.
All Symbols contained within have a |size| which is actually the size delta.
Additionally, metadata is kept about which symbols were added / removed /
changed.
"""
__slots__ = (
'_added_ids',
'_removed_ids',
)
def __init__(self, added, removed, similar, name=None, full_name=None,
section_name=None):
self._added_ids = set(id(s) for s in added)
self._removed_ids = set(id(s) for s in removed)
symbols = []
symbols.extend(added)
symbols.extend(removed)
symbols.extend(similar)
super(SymbolDiff, self).__init__(symbols, name=name, full_name=full_name,
section_name=section_name)
def __repr__(self):
return '%s(%d added, %d removed, %d changed, %d unchanged, size=%d)' % (
'SymbolGroup', self.added_count, self.removed_count, self.changed_count,
self.unchanged_count, self.size)
def _CreateTransformed(self, symbols, filtered_symbols=None, name=None,
full_name=None, section_name=None, is_sorted=None):
# Printing sorts, so short-circuit the same symbols case.
if len(symbols) == len(self._symbols):
new_added_ids = self._added_ids
new_removed_ids = self._removed_ids
else:
old_added_ids = self._added_ids
old_removed_ids = self._removed_ids
def get_status(sym):
obj_id = id(sym)
if obj_id in old_added_ids:
return 0
if obj_id in old_removed_ids:
return 1
if sym.IsGroup():
first_status = get_status(sym[0])
if all(get_status(s) == first_status for s in sym[1:]):
return first_status
return 2
new_added_ids = set()
new_removed_ids = set()
for sym in symbols:
status = get_status(sym)
if status == 0:
new_added_ids.add(id(sym))
elif status == 1:
new_removed_ids.add(id(sym))
ret = SymbolDiff.__new__(SymbolDiff)
ret._added_ids = new_added_ids
ret._removed_ids = new_removed_ids
super(SymbolDiff, ret).__init__(
symbols, filtered_symbols=filtered_symbols, name=name,
full_name=full_name, section_name=section_name, is_sorted=is_sorted)
return ret
@property
def added_count(self):
return len(self._added_ids)
@property
def removed_count(self):
return len(self._removed_ids)
@property
def changed_count(self):
not_changed = self.unchanged_count + self.added_count + self.removed_count
return len(self) - not_changed
@property
def unchanged_count(self):
return sum(1 for s in self if self.IsSimilar(s) and s.size == 0)
def IsAdded(self, sym):
return id(sym) in self._added_ids
def IsSimilar(self, sym):
key = id(sym)
return key not in self._added_ids and key not in self._removed_ids
def IsRemoved(self, sym):
return id(sym) in self._removed_ids
def WhereNotUnchanged(self):
return self.Filter(lambda s: not self.IsSimilar(s) or s.size)
def _ExtractPrefixBeforeSeparator(string, separator, count=1):
idx = -len(separator)
prev_idx = None
for _ in xrange(count):
idx = string.find(separator, idx + len(separator))
if idx < 0:
break
prev_idx = idx
return string[:prev_idx]