| # Copyright 2017 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| """Classes that comprise the data model for binary size analysis. |
| |
| The primary classes are Symbol, and SymbolGroup. |
| |
| Description of common properties: |
| * address: The start address of the symbol. |
| May be 0 (e.g. for .bss or for SymbolGroups). |
| * size: The number of bytes this symbol takes up, including padding that comes |
| before |address|. |
| * num_aliases: The number of symbols with the same address (including self). |
| * pss: size / num_aliases. |
| * padding: The number of bytes of padding before |address| due to this symbol. |
| * name: Symbol names with parameter list removed. |
| Never None, but will be '' for anonymous symbols. |
| * full_name: Symbols names with parameter list left in. |
| Never None, but will be '' for anonymous symbols, and for symbols that do |
| not contain a parameter list. |
| * is_anonymous: True when the symbol exists in an anonymous namespace (which |
| are removed from both full_name and name during normalization). |
| * section_name: E.g. ".text", ".rodata", ".data.rel.local" |
| * section: The second character of |section_name|. E.g. "t", "r", "d". |
| """ |
| |
| import collections |
| import logging |
| import os |
| import re |
| |
| import cluster_symbols |
| import match_util |
| |
| |
| METADATA_GIT_REVISION = 'git_revision' |
| METADATA_APK_FILENAME = 'apk_file_name' # Path relative to output_directory. |
| METADATA_MAP_FILENAME = 'map_file_name' # Path relative to output_directory. |
| METADATA_ELF_ARCHITECTURE = 'elf_arch' # "Machine" field from readelf -h |
| METADATA_ELF_FILENAME = 'elf_file_name' # Path relative to output_directory. |
| METADATA_ELF_MTIME = 'elf_mtime' # int timestamp in utc. |
| METADATA_ELF_BUILD_ID = 'elf_build_id' |
| METADATA_GN_ARGS = 'gn_args' |
| |
| |
| SECTION_TO_SECTION_NAME = { |
| 'b': '.bss', |
| 'd': '.data', |
| 'r': '.rodata', |
| 't': '.text', |
| } |
| |
| FLAG_ANONYMOUS = 1 |
| FLAG_STARTUP = 2 |
| FLAG_UNLIKELY = 4 |
| FLAG_REL = 8 |
| FLAG_REL_LOCAL = 16 |
| |
| |
| class SizeInfo(object): |
| """Represents all size information for a single binary. |
| |
| Fields: |
| section_sizes: A dict of section_name -> size. |
| symbols: A SymbolGroup containing all symbols, sorted by address. |
| metadata: A dict. |
| """ |
| __slots__ = ( |
| 'section_sizes', |
| 'symbols', |
| 'metadata', |
| ) |
| |
| """Root size information.""" |
| def __init__(self, section_sizes, symbols, metadata=None): |
| self.section_sizes = section_sizes # E.g. {'.text': 0} |
| self.symbols = symbols |
| self.metadata = metadata or {} |
| |
| def Cluster(self): |
| """Returns a new SizeInfo with some symbols moved into subgroups. |
| |
| See SymbolGroup.Cluster() for more details. |
| """ |
| return SizeInfo(self.section_sizes, self.symbols.Cluster(), self.metadata) |
| |
| |
| class SizeInfoDiff(object): |
| """What you get when you Diff() two SizeInfo objects. |
| |
| Fields: |
| section_sizes: A dict of section_name -> size delta. |
| symbols: A SymbolDiff with all symbols in it. |
| before_metadata: metadata of the "before" SizeInfo. |
| after_metadata: metadata of the "after" SizeInfo. |
| """ |
| __slots__ = ( |
| 'section_sizes', |
| 'symbols', |
| 'before_metadata', |
| 'after_metadata', |
| ) |
| |
| def __init__(self, section_sizes, symbols, before_metadata, after_metadata): |
| self.section_sizes = section_sizes |
| self.symbols = symbols |
| self.before_metadata = before_metadata |
| self.after_metadata = after_metadata |
| |
| |
| class BaseSymbol(object): |
| """Base class for Symbol and SymbolGroup. |
| |
| Refer to module docs for field descriptions. |
| """ |
| __slots__ = () |
| |
| @property |
| def section(self): |
| """Returns the one-letter section. |
| |
| E.g. If section_name == '.rodata', then section == 'r'. |
| """ |
| return self.section_name[1] |
| |
| @property |
| def size_without_padding(self): |
| return self.size - self.padding |
| |
| @property |
| def end_address(self): |
| return self.address + self.size_without_padding |
| |
| @property |
| def is_anonymous(self): |
| return bool(self.flags & FLAG_ANONYMOUS) |
| |
| @property |
| def num_aliases(self): |
| return len(self.aliases) if self.aliases else 1 |
| |
| def FlagsString(self): |
| # Most flags are 0. |
| flags = self.flags |
| if not flags and not self.aliases: |
| return '{}' |
| parts = [] |
| if flags & FLAG_ANONYMOUS: |
| parts.append('anon') |
| if flags & FLAG_STARTUP: |
| parts.append('startup') |
| if flags & FLAG_UNLIKELY: |
| parts.append('unlikely') |
| if flags & FLAG_REL: |
| parts.append('rel') |
| if flags & FLAG_REL_LOCAL: |
| parts.append('rel.loc') |
| # Not actually a part of flags, but useful to show it here. |
| if self.aliases: |
| parts.append('{} aliases'.format(self.num_aliases)) |
| return '{%s}' % ','.join(parts) |
| |
| def IsBss(self): |
| return self.section_name == '.bss' |
| |
| def IsGroup(self): |
| return False |
| |
| def IsGenerated(self): |
| # TODO(agrieve): Also match generated functions such as: |
| # startup._GLOBAL__sub_I_page_allocator.cc |
| return self.name.endswith(']') and not self.name.endswith('[]') |
| |
| |
| class Symbol(BaseSymbol): |
| """Represents a single symbol within a binary. |
| |
| Refer to module docs for field descriptions. |
| """ |
| |
| __slots__ = ( |
| 'address', |
| 'full_name', |
| 'flags', |
| 'object_path', |
| 'name', |
| 'aliases', |
| 'padding', |
| 'section_name', |
| 'source_path', |
| 'size', |
| ) |
| |
| def __init__(self, section_name, size_without_padding, address=None, |
| name=None, source_path=None, object_path=None, full_name=None, |
| flags=0, aliases=None): |
| self.section_name = section_name |
| self.address = address or 0 |
| self.name = name or '' |
| self.full_name = full_name or '' |
| self.source_path = source_path or '' |
| self.object_path = object_path or '' |
| self.size = size_without_padding |
| self.flags = flags |
| self.aliases = aliases |
| self.padding = 0 |
| |
| def __repr__(self): |
| template = ('{}@{:x}(size_without_padding={},padding={},name={},' |
| 'object_path={},source_path={},flags={})') |
| return template.format( |
| self.section_name, self.address, self.size_without_padding, |
| self.padding, self.name, self.object_path, self.source_path, |
| self.FlagsString()) |
| |
| @property |
| def pss(self): |
| return float(self.size) / self.num_aliases |
| |
| @property |
| def pss_without_padding(self): |
| return float(self.size_without_padding) / self.num_aliases |
| |
| |
| class SymbolGroup(BaseSymbol): |
| """Represents a group of symbols using the same interface as Symbol. |
| |
| SymbolGroups are immutable. All filtering / sorting will return new |
| SymbolGroups objects. |
| |
| Overrides many __functions__. E.g. the following are all valid: |
| * len(group) |
| * iter(group) |
| * group[0] |
| * group['0x1234'] # By symbol address |
| * without_group2 = group1 - group2 |
| * unioned = group1 + group2 |
| """ |
| |
| __slots__ = ( |
| '_padding', |
| '_size', |
| '_pss', |
| '_symbols', |
| '_filtered_symbols', |
| 'full_name', |
| 'name', |
| 'section_name', |
| 'is_sorted', |
| ) |
| |
| def __init__(self, symbols, filtered_symbols=None, name=None, |
| full_name=None, section_name=None, is_sorted=False): |
| self._padding = None |
| self._size = None |
| self._pss = None |
| self._symbols = symbols |
| self._filtered_symbols = filtered_symbols or [] |
| self.name = name or '' |
| self.full_name = full_name |
| self.section_name = section_name or '.*' |
| self.is_sorted = is_sorted |
| |
| def __repr__(self): |
| return 'Group(name=%s,count=%d,size=%d)' % ( |
| self.name, len(self), self.size) |
| |
| def __iter__(self): |
| return iter(self._symbols) |
| |
| def __len__(self): |
| return len(self._symbols) |
| |
| def __eq__(self, other): |
| return self._symbols == other._symbols |
| |
| def __getitem__(self, key): |
| """|key| can be an index or an address. |
| |
| Raises if multiple symbols map to the address. |
| """ |
| if isinstance(key, slice): |
| return self._symbols.__getitem__(key) |
| if isinstance(key, basestring) or key > len(self._symbols): |
| found = self.WhereAddressInRange(key) |
| if len(found) != 1: |
| raise KeyError('%d symbols found at address %s.' % (len(found), key)) |
| return found[0] |
| return self._symbols[key] |
| |
| def __sub__(self, other): |
| other_ids = set(id(s) for s in other) |
| after_symbols = [s for s in self if id(s) not in other_ids] |
| return self._CreateTransformed(after_symbols, |
| section_name=self.section_name) |
| |
| def __add__(self, other): |
| self_ids = set(id(s) for s in self) |
| after_symbols = self._symbols + [s for s in other if id(s) not in self_ids] |
| return self._CreateTransformed( |
| after_symbols, section_name=self.section_name, is_sorted=False) |
| |
| @property |
| def address(self): |
| first = self._symbols[0].address |
| return first if all(s.address == first for s in self._symbols) else 0 |
| |
| @property |
| def flags(self): |
| first = self._symbols[0].flags |
| return first if all(s.flags == first for s in self._symbols) else 0 |
| |
| @property |
| def object_path(self): |
| first = self._symbols[0].object_path |
| return first if all(s.object_path == first for s in self._symbols) else '' |
| |
| @property |
| def source_path(self): |
| first = self._symbols[0].source_path |
| return first if all(s.source_path == first for s in self._symbols) else '' |
| |
| def IterUniqueSymbols(self): |
| seen_aliases_lists = set() |
| for s in self: |
| if not s.aliases: |
| yield s |
| elif id(s.aliases) not in seen_aliases_lists: |
| seen_aliases_lists.add(id(s.aliases)) |
| yield s |
| |
| @property |
| def size(self): |
| if self._size is None: |
| if self.IsBss(): |
| self._size = sum(s.size for s in self) |
| else: |
| self._size = sum(s.size for s in self.IterUniqueSymbols()) |
| return self._size |
| |
| @property |
| def pss(self): |
| if self._pss is None: |
| if self.IsBss(): |
| self._pss = float(self.size) |
| else: |
| self._pss = sum(s.pss for s in self) |
| return self._pss |
| |
| @property |
| def padding(self): |
| if self._padding is None: |
| self._padding = sum(s.padding for s in self.IterUniqueSymbols()) |
| return self._padding |
| |
| @property |
| def aliases(self): |
| return None |
| |
| def IsGroup(self): |
| return True |
| |
| def _CreateTransformed(self, symbols, filtered_symbols=None, name=None, |
| full_name=None, section_name=None, is_sorted=None): |
| if is_sorted is None: |
| is_sorted = self.is_sorted |
| return SymbolGroup(symbols, filtered_symbols=filtered_symbols, name=name, |
| full_name=full_name, section_name=section_name, |
| is_sorted=is_sorted) |
| |
| def Cluster(self): |
| """Returns a new SymbolGroup with some symbols moved into subgroups. |
| |
| Subgroups include: |
| * Symbols that have [clone] in their name (created during inlining). |
| * Star symbols (such as "** merge strings", and "** symbol gap") |
| |
| To view created groups: |
| Print(clustered.Filter(lambda s: s.IsGroup()), recursive=True) |
| """ |
| return self._CreateTransformed(cluster_symbols.ClusterSymbols(self)) |
| |
| def Sorted(self, cmp_func=None, key=None, reverse=False): |
| # Default to sorting by abs(size) then name. |
| if cmp_func is None and key is None: |
| cmp_func = lambda a, b: cmp((a.IsBss(), abs(b.size), a.name), |
| (b.IsBss(), abs(a.size), b.name)) |
| |
| after_symbols = sorted(self._symbols, cmp_func, key, reverse) |
| return self._CreateTransformed( |
| after_symbols, filtered_symbols=self._filtered_symbols, |
| section_name=self.section_name, is_sorted=True) |
| |
| def SortedByName(self, reverse=False): |
| return self.Sorted(key=(lambda s:s.name), reverse=reverse) |
| |
| def SortedByAddress(self, reverse=False): |
| return self.Sorted(key=(lambda s:s.address), reverse=reverse) |
| |
| def SortedByCount(self, reverse=False): |
| return self.Sorted(key=(lambda s:len(s) if s.IsGroup() else 1), |
| reverse=not reverse) |
| |
| def Filter(self, func): |
| filtered_and_kept = ([], []) |
| symbol = None |
| try: |
| for symbol in self: |
| filtered_and_kept[int(bool(func(symbol)))].append(symbol) |
| except: |
| logging.warning('Filter failed on symbol %r', symbol) |
| raise |
| |
| return self._CreateTransformed(filtered_and_kept[1], |
| filtered_symbols=filtered_and_kept[0], |
| section_name=self.section_name) |
| |
| def WhereBiggerThan(self, min_size): |
| return self.Filter(lambda s: s.size >= min_size) |
| |
| def WhereInSection(self, section): |
| if len(section) == 1: |
| ret = self.Filter(lambda s: s.section == section) |
| ret.section_name = SECTION_TO_SECTION_NAME[section] |
| else: |
| ret = self.Filter(lambda s: s.section_name == section) |
| ret.section_name = section |
| return ret |
| |
| def WhereIsGenerated(self): |
| return self.Filter(lambda s: s.IsGenerated()) |
| |
| def WhereNameMatches(self, pattern): |
| regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern)) |
| return self.Filter(lambda s: regex.search(s.name)) |
| |
| def WhereObjectPathMatches(self, pattern): |
| regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern)) |
| return self.Filter(lambda s: regex.search(s.object_path)) |
| |
| def WhereSourcePathMatches(self, pattern): |
| regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern)) |
| return self.Filter(lambda s: regex.search(s.source_path)) |
| |
| def WherePathMatches(self, pattern): |
| regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern)) |
| return self.Filter(lambda s: (regex.search(s.source_path) or |
| regex.search(s.object_path))) |
| |
| def WhereMatches(self, pattern): |
| """Looks for |pattern| within all paths & names.""" |
| regex = re.compile(match_util.ExpandRegexIdentifierPlaceholder(pattern)) |
| return self.Filter(lambda s: (regex.search(s.source_path) or |
| regex.search(s.object_path) or |
| regex.search(s.full_name or '') or |
| regex.search(s.name))) |
| |
| def WhereAddressInRange(self, start, end=None): |
| """Searches for addesses within [start, end). |
| |
| Args may be ints or hex strings. Default value for |end| is |start| + 1. |
| """ |
| if isinstance(start, basestring): |
| start = int(start, 16) |
| if end is None: |
| end = start + 1 |
| return self.Filter(lambda s: s.address >= start and s.address < end) |
| |
| def WhereHasAnyAttribution(self): |
| return self.Filter(lambda s: s.name or s.source_path or s.object_path) |
| |
| def Inverted(self): |
| """Returns the symbols that were filtered out by the previous filter. |
| |
| Applies only when the previous call was a filter. |
| |
| Example: |
| # Symbols that do not have "third_party" in their path. |
| symbols.WherePathMatches(r'third_party').Inverted() |
| # Symbols within third_party that do not contain the string "foo". |
| symbols.WherePathMatches(r'third_party').WhereMatches('foo').Inverted() |
| """ |
| return self._CreateTransformed( |
| self._filtered_symbols, filtered_symbols=self._symbols, is_sorted=False) |
| |
| def GroupBy(self, func, min_count=0): |
| """Returns a SymbolGroup of SymbolGroups, indexed by |func|. |
| |
| Args: |
| func: Grouping function. Passed a symbol and returns a string for the |
| name of the subgroup to put the symbol in. If None is returned, the |
| symbol is omitted. |
| min_count: Miniumum number of symbols for a group. If fewer than this many |
| symbols end up in a group, they will not be put within a group. |
| Use a negative value to omit symbols entirely rather than |
| include them outside of a group. |
| """ |
| after_syms = [] |
| filtered_symbols = [] |
| symbols_by_token = collections.defaultdict(list) |
| # Index symbols by |func|. |
| for symbol in self: |
| token = func(symbol) |
| if token is None: |
| filtered_symbols.append(symbol) |
| symbols_by_token[token].append(symbol) |
| # Create the subgroups. |
| include_singles = min_count >= 0 |
| min_count = abs(min_count) |
| for token, symbols in symbols_by_token.iteritems(): |
| if len(symbols) >= min_count: |
| after_syms.append(self._CreateTransformed( |
| symbols, name=token, section_name=self.section_name, |
| is_sorted=False)) |
| elif include_singles: |
| after_syms.extend(symbols) |
| else: |
| filtered_symbols.extend(symbols) |
| return self._CreateTransformed( |
| after_syms, filtered_symbols=filtered_symbols, |
| section_name=self.section_name, is_sorted=False) |
| |
| def GroupBySectionName(self): |
| return self.GroupBy(lambda s: s.section_name) |
| |
| def GroupByNamespace(self, depth=0, fallback='{global}', min_count=0): |
| """Groups by symbol namespace (as denoted by ::s). |
| |
| Does not differentiate between C++ namespaces and C++ classes. |
| |
| Args: |
| depth: When 0 (default), groups by entire namespace. When 1, groups by |
| top-level name, when 2, groups by top 2 names, etc. |
| fallback: Use this value when no namespace exists. |
| min_count: Miniumum number of symbols for a group. If fewer than this many |
| symbols end up in a group, they will not be put within a group. |
| Use a negative value to omit symbols entirely rather than |
| include them outside of a group. |
| """ |
| def extract_namespace(symbol): |
| # Remove template params. |
| name = symbol.name |
| template_idx = name.find('<') |
| if template_idx: |
| name = name[:template_idx] |
| |
| # Remove after the final :: (not part of the namespace). |
| colon_idx = name.rfind('::') |
| if colon_idx == -1: |
| return fallback |
| name = name[:colon_idx] |
| |
| return _ExtractPrefixBeforeSeparator(name, '::', depth) |
| return self.GroupBy(extract_namespace, min_count=min_count) |
| |
| def GroupBySourcePath(self, depth=0, fallback='{no path}', |
| fallback_to_object_path=True, min_count=0): |
| """Groups by source_path. |
| |
| Args: |
| depth: When 0 (default), groups by entire path. When 1, groups by |
| top-level directory, when 2, groups by top 2 directories, etc. |
| fallback: Use this value when no namespace exists. |
| fallback_to_object_path: When True (default), uses object_path when |
| source_path is missing. |
| min_count: Miniumum number of symbols for a group. If fewer than this many |
| symbols end up in a group, they will not be put within a group. |
| Use a negative value to omit symbols entirely rather than |
| include them outside of a group. |
| """ |
| def extract_path(symbol): |
| path = symbol.source_path |
| if fallback_to_object_path and not path: |
| path = symbol.object_path |
| path = path or fallback |
| return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth) |
| return self.GroupBy(extract_path, min_count=min_count) |
| |
| def GroupByObjectPath(self, depth=0, fallback='{no path}', min_count=0): |
| """Groups by object_path. |
| |
| Args: |
| depth: When 0 (default), groups by entire path. When 1, groups by |
| top-level directory, when 2, groups by top 2 directories, etc. |
| fallback: Use this value when no namespace exists. |
| min_count: Miniumum number of symbols for a group. If fewer than this many |
| symbols end up in a group, they will not be put within a group. |
| Use a negative value to omit symbols entirely rather than |
| include them outside of a group. |
| """ |
| def extract_path(symbol): |
| path = symbol.object_path or fallback |
| return _ExtractPrefixBeforeSeparator(path, os.path.sep, depth) |
| return self.GroupBy(extract_path, min_count=min_count) |
| |
| |
| class SymbolDiff(SymbolGroup): |
| """A SymbolGroup subclass representing a diff of two other SymbolGroups. |
| |
| All Symbols contained within have a |size| which is actually the size delta. |
| Additionally, metadata is kept about which symbols were added / removed / |
| changed. |
| """ |
| __slots__ = ( |
| '_added_ids', |
| '_removed_ids', |
| ) |
| |
| def __init__(self, added, removed, similar, name=None, full_name=None, |
| section_name=None): |
| self._added_ids = set(id(s) for s in added) |
| self._removed_ids = set(id(s) for s in removed) |
| symbols = [] |
| symbols.extend(added) |
| symbols.extend(removed) |
| symbols.extend(similar) |
| super(SymbolDiff, self).__init__(symbols, name=name, full_name=full_name, |
| section_name=section_name) |
| |
| def __repr__(self): |
| return '%s(%d added, %d removed, %d changed, %d unchanged, size=%d)' % ( |
| 'SymbolGroup', self.added_count, self.removed_count, self.changed_count, |
| self.unchanged_count, self.size) |
| |
| def _CreateTransformed(self, symbols, filtered_symbols=None, name=None, |
| full_name=None, section_name=None, is_sorted=None): |
| # Printing sorts, so short-circuit the same symbols case. |
| if len(symbols) == len(self._symbols): |
| new_added_ids = self._added_ids |
| new_removed_ids = self._removed_ids |
| else: |
| old_added_ids = self._added_ids |
| old_removed_ids = self._removed_ids |
| |
| def get_status(sym): |
| obj_id = id(sym) |
| if obj_id in old_added_ids: |
| return 0 |
| if obj_id in old_removed_ids: |
| return 1 |
| if sym.IsGroup(): |
| first_status = get_status(sym[0]) |
| if all(get_status(s) == first_status for s in sym[1:]): |
| return first_status |
| return 2 |
| |
| new_added_ids = set() |
| new_removed_ids = set() |
| for sym in symbols: |
| status = get_status(sym) |
| if status == 0: |
| new_added_ids.add(id(sym)) |
| elif status == 1: |
| new_removed_ids.add(id(sym)) |
| |
| ret = SymbolDiff.__new__(SymbolDiff) |
| ret._added_ids = new_added_ids |
| ret._removed_ids = new_removed_ids |
| super(SymbolDiff, ret).__init__( |
| symbols, filtered_symbols=filtered_symbols, name=name, |
| full_name=full_name, section_name=section_name, is_sorted=is_sorted) |
| return ret |
| |
| @property |
| def added_count(self): |
| return len(self._added_ids) |
| |
| @property |
| def removed_count(self): |
| return len(self._removed_ids) |
| |
| @property |
| def changed_count(self): |
| not_changed = self.unchanged_count + self.added_count + self.removed_count |
| return len(self) - not_changed |
| |
| @property |
| def unchanged_count(self): |
| return sum(1 for s in self if self.IsSimilar(s) and s.size == 0) |
| |
| def IsAdded(self, sym): |
| return id(sym) in self._added_ids |
| |
| def IsSimilar(self, sym): |
| key = id(sym) |
| return key not in self._added_ids and key not in self._removed_ids |
| |
| def IsRemoved(self, sym): |
| return id(sym) in self._removed_ids |
| |
| def WhereNotUnchanged(self): |
| return self.Filter(lambda s: not self.IsSimilar(s) or s.size) |
| |
| |
| def _ExtractPrefixBeforeSeparator(string, separator, count=1): |
| idx = -len(separator) |
| prev_idx = None |
| for _ in xrange(count): |
| idx = string.find(separator, idx + len(separator)) |
| if idx < 0: |
| break |
| prev_idx = idx |
| return string[:prev_idx] |