| #!/usr/bin/env python |
| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Describe the size difference of two binaries. |
| |
| Generates a description of the size difference of two binaries based |
| on the difference of the size of various symbols. |
| |
| This tool needs "nm" dumps of each binary with full symbol |
| information. You can obtain the necessary dumps by running the |
| run_binary_size_analysis.py script upon each binary, with the |
| "--nm-out" parameter set to the location in which you want to save the |
| dumps. Example: |
| |
| # obtain symbol data from first binary in /tmp/nm1.dump |
| cd $CHECKOUT1_SRC |
| ninja -C out/Release binary_size_tool |
| tools/binary_size/run_binary_size_analysis \ |
| --library <path_to_library> |
| --destdir /tmp/throwaway |
| --nm-out /tmp/nm1.dump |
| |
| # obtain symbol data from second binary in /tmp/nm2.dump |
| cd $CHECKOUT2_SRC |
| ninja -C out/Release binary_size_tool |
| tools/binary_size/run_binary_size_analysis \ |
| --library <path_to_library> |
| --destdir /tmp/throwaway |
| --nm-out /tmp/nm2.dump |
| |
| # cleanup useless files |
| rm -r /tmp/throwaway |
| |
| # run this tool |
| explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump |
| """ |
| |
| import collections |
| from collections import Counter |
| from math import ceil |
| import operator |
| import optparse |
| import os |
| import sys |
| |
| import binary_size_utils |
| |
| |
| def CalculateSharedAddresses(symbols): |
| """Checks how many symbols share the same memory space. This returns a |
| Counter result where result[address] will tell you how many times address was |
| used by symbols.""" |
| count = Counter() |
| for _, _, _, _, address in symbols: |
| count[address] += 1 |
| |
| return count |
| |
| |
| def CalculateEffectiveSize(share_count, address, symbol_size): |
| """Given a raw symbol_size and an address, this method returns the |
| size we should blame on this symbol considering it might share the |
| machine code/data with other symbols. Using the raw symbol_size for |
| each symbol would in those cases over estimate the true cost of that |
| block. |
| |
| """ |
| shared_count = share_count[address] |
| if shared_count == 1: |
| return symbol_size |
| |
| assert shared_count > 1 |
| return int(ceil(symbol_size / float(shared_count))) |
| |
| class SymbolDelta(object): |
| """Stores old size, new size and some metadata.""" |
| def __init__(self, shared): |
| self.old_size = None |
| self.new_size = None |
| self.shares_space_with_other_symbols = shared |
| |
| def __eq__(self, other): |
| return (self.old_size == other.old_size and |
| self.new_size == other.new_size and |
| self.shares_space_with_other_symbols == |
| other.shares_space_with_other_symbols) |
| |
| def __ne__(self, other): |
| return not self.__eq__(other) |
| |
| def copy_symbol_delta(self): |
| symbol_delta = SymbolDelta(self.shares_space_with_other_symbols) |
| symbol_delta.old_size = self.old_size |
| symbol_delta.new_size = self.new_size |
| return symbol_delta |
| |
| class DeltaInfo(SymbolDelta): |
| """Summary of a the change for one symbol between two instances.""" |
| def __init__(self, file_path, symbol_type, symbol_name, shared): |
| SymbolDelta.__init__(self, shared) |
| self.file_path = file_path |
| self.symbol_type = symbol_type |
| self.symbol_name = symbol_name |
| |
| def __eq__(self, other): |
| return (self.file_path == other.file_path and |
| self.symbol_type == other.symbol_type and |
| self.symbol_name == other.symbol_name and |
| SymbolDelta.__eq__(self, other)) |
| |
| def __ne__(self, other): |
| return not self.__eq__(other) |
| |
| def ExtractSymbolDelta(self): |
| """Returns a copy of the SymbolDelta for this DeltaInfo.""" |
| return SymbolDelta.copy_symbol_delta(self) |
| |
| def Compare(symbols1, symbols2): |
| """Executes a comparison of the symbols in symbols1 and symbols2. |
| |
| Returns: |
| tuple of lists: (added_symbols, removed_symbols, changed_symbols, others) |
| where each list contains DeltaInfo objects. |
| """ |
| added = [] # tuples |
| removed = [] # tuples |
| changed = [] # tuples |
| unchanged = [] # tuples |
| |
| cache1 = {} |
| cache2 = {} |
| # Make a map of (file, symbol_type) : (symbol_name, effective_symbol_size) |
| share_count1 = CalculateSharedAddresses(symbols1) |
| share_count2 = CalculateSharedAddresses(symbols2) |
| for cache, symbols, share_count in ((cache1, symbols1, share_count1), |
| (cache2, symbols2, share_count2)): |
| for symbol_name, symbol_type, symbol_size, file_path, address in symbols: |
| if 'vtable for ' in symbol_name: |
| symbol_type = '@' # hack to categorize these separately |
| if file_path: |
| file_path = os.path.normpath(file_path) |
| if sys.platform.startswith('win'): |
| file_path = file_path.replace('\\', '/') |
| else: |
| file_path = '(No Path)' |
| # Take into consideration that multiple symbols might share the same |
| # block of code. |
| effective_symbol_size = CalculateEffectiveSize(share_count, address, |
| symbol_size) |
| key = (file_path, symbol_type) |
| bucket = cache.setdefault(key, {}) |
| size_list = bucket.setdefault(symbol_name, []) |
| size_list.append((effective_symbol_size, |
| effective_symbol_size != symbol_size)) |
| |
| # Now diff them. We iterate over the elements in cache1. For each symbol |
| # that we find in cache2, we record whether it was deleted, changed, or |
| # unchanged. We then remove it from cache2; all the symbols that remain |
| # in cache2 at the end of the iteration over cache1 are the 'new' symbols. |
| for key, bucket1 in cache1.items(): |
| bucket2 = cache2.get(key) |
| file_path, symbol_type = key; |
| if not bucket2: |
| # A file was removed. Everything in bucket1 is dead. |
| for symbol_name, symbol_size_list in bucket1.items(): |
| for (symbol_size, shared) in symbol_size_list: |
| delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared) |
| delta_info.old_size = symbol_size |
| removed.append(delta_info) |
| else: |
| # File still exists, look for changes within. |
| for symbol_name, symbol_size_list in bucket1.items(): |
| size_list2 = bucket2.get(symbol_name) |
| if size_list2 is None: |
| # Symbol no longer exists in bucket2. |
| for (symbol_size, shared) in symbol_size_list: |
| delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared) |
| delta_info.old_size = symbol_size |
| removed.append(delta_info) |
| else: |
| del bucket2[symbol_name] # Symbol is not new, delete from cache2. |
| if len(symbol_size_list) == 1 and len(size_list2) == 1: |
| symbol_size, shared1 = symbol_size_list[0] |
| size2, shared2 = size_list2[0] |
| delta_info = DeltaInfo(file_path, symbol_type, symbol_name, |
| shared1 or shared2) |
| delta_info.old_size = symbol_size |
| delta_info.new_size = size2 |
| if symbol_size != size2: |
| # Symbol has change size in bucket. |
| changed.append(delta_info) |
| else: |
| # Symbol is unchanged. |
| unchanged.append(delta_info) |
| else: |
| # Complex comparison for when a symbol exists multiple times |
| # in the same file (where file can be "unknown file"). |
| symbol_size_counter = collections.Counter(symbol_size_list) |
| delta_counter = collections.Counter(symbol_size_list) |
| delta_counter.subtract(size_list2) |
| for delta_counter_key in sorted(delta_counter.keys()): |
| delta = delta_counter[delta_counter_key] |
| unchanged_count = symbol_size_counter[delta_counter_key] |
| (symbol_size, shared) = delta_counter_key |
| if delta > 0: |
| unchanged_count -= delta |
| for _ in range(unchanged_count): |
| delta_info = DeltaInfo(file_path, symbol_type, |
| symbol_name, shared) |
| delta_info.old_size = symbol_size |
| delta_info.new_size = symbol_size |
| unchanged.append(delta_info) |
| if delta > 0: # Used to be more of these than there is now. |
| for _ in range(delta): |
| delta_info = DeltaInfo(file_path, symbol_type, |
| symbol_name, shared) |
| delta_info.old_size = symbol_size |
| removed.append(delta_info) |
| elif delta < 0: # More of this (symbol,size) now. |
| for _ in range(-delta): |
| delta_info = DeltaInfo(file_path, symbol_type, |
| symbol_name, shared) |
| delta_info.new_size = symbol_size |
| added.append(delta_info) |
| |
| if len(bucket2) == 0: |
| del cache1[key] # Entire bucket is empty, delete from cache2 |
| |
| # We have now analyzed all symbols that are in cache1 and removed all of |
| # the encountered symbols from cache2. What's left in cache2 is the new |
| # symbols. |
| for key, bucket2 in cache2.iteritems(): |
| file_path, symbol_type = key; |
| for symbol_name, symbol_size_list in bucket2.items(): |
| for (symbol_size, shared) in symbol_size_list: |
| delta_info = DeltaInfo(file_path, symbol_type, symbol_name, shared) |
| delta_info.new_size = symbol_size |
| added.append(delta_info) |
| return (added, removed, changed, unchanged) |
| |
| |
| def DeltaStr(number): |
| """Returns the number as a string with a '+' prefix if it's > 0 and |
| a '-' prefix if it's < 0.""" |
| result = str(number) |
| if number > 0: |
| result = '+' + result |
| return result |
| |
| |
| def SharedInfoStr(symbol_info): |
| """Returns a string (prefixed by space) explaining that numbers are |
| adjusted because of shared space between symbols, or an empty string |
| if space had not been shared.""" |
| |
| if symbol_info.shares_space_with_other_symbols: |
| return " (adjusted sizes because of memory sharing)" |
| |
| return "" |
| |
| class CrunchStatsData(object): |
| """Stores a summary of data of a certain kind.""" |
| def __init__(self, symbols): |
| self.symbols = symbols |
| self.sources = set() |
| self.before_size = 0 |
| self.after_size = 0 |
| self.symbols_by_path = {} |
| |
| |
| def CrunchStats(added, removed, changed, unchanged, showsources, showsymbols): |
| """Outputs to stdout a summary of changes based on the symbol lists.""" |
| # Split changed into grown and shrunk because that is easier to |
| # discuss. |
| grown = [] |
| shrunk = [] |
| for item in changed: |
| if item.old_size < item.new_size: |
| grown.append(item) |
| else: |
| shrunk.append(item) |
| |
| new_symbols = CrunchStatsData(added) |
| removed_symbols = CrunchStatsData(removed) |
| grown_symbols = CrunchStatsData(grown) |
| shrunk_symbols = CrunchStatsData(shrunk) |
| sections = [new_symbols, removed_symbols, grown_symbols, shrunk_symbols] |
| for section in sections: |
| for item in section.symbols: |
| section.sources.add(item.file_path) |
| if item.old_size is not None: |
| section.before_size += item.old_size |
| if item.new_size is not None: |
| section.after_size += item.new_size |
| bucket = section.symbols_by_path.setdefault(item.file_path, []) |
| bucket.append((item.symbol_name, item.symbol_type, |
| item.ExtractSymbolDelta())) |
| |
| total_change = sum(s.after_size - s.before_size for s in sections) |
| summary = 'Total change: %s bytes' % DeltaStr(total_change) |
| print(summary) |
| print('=' * len(summary)) |
| for section in sections: |
| if not section.symbols: |
| continue |
| if section.before_size == 0: |
| description = ('added, totalling %s bytes' % DeltaStr(section.after_size)) |
| elif section.after_size == 0: |
| description = ('removed, totalling %s bytes' % |
| DeltaStr(-section.before_size)) |
| else: |
| if section.after_size > section.before_size: |
| type_str = 'grown' |
| else: |
| type_str = 'shrunk' |
| description = ('%s, for a net change of %s bytes ' |
| '(%d bytes before, %d bytes after)' % |
| (type_str, DeltaStr(section.after_size - section.before_size), |
| section.before_size, section.after_size)) |
| print(' %d %s across %d sources' % |
| (len(section.symbols), description, len(section.sources))) |
| |
| maybe_unchanged_sources = set() |
| unchanged_symbols_size = 0 |
| for item in unchanged: |
| maybe_unchanged_sources.add(item.file_path) |
| unchanged_symbols_size += item.old_size # == item.new_size |
| print(' %d unchanged, totalling %d bytes' % |
| (len(unchanged), unchanged_symbols_size)) |
| |
| # High level analysis, always output. |
| unchanged_sources = maybe_unchanged_sources |
| for section in sections: |
| unchanged_sources = unchanged_sources - section.sources |
| new_sources = (new_symbols.sources - |
| maybe_unchanged_sources - |
| removed_symbols.sources) |
| removed_sources = (removed_symbols.sources - |
| maybe_unchanged_sources - |
| new_symbols.sources) |
| partially_changed_sources = (grown_symbols.sources | |
| shrunk_symbols.sources | new_symbols.sources | |
| removed_symbols.sources) - removed_sources - new_sources |
| allFiles = set() |
| for section in sections: |
| allFiles = allFiles | section.sources |
| allFiles = allFiles | maybe_unchanged_sources |
| print 'Source stats:' |
| print(' %d sources encountered.' % len(allFiles)) |
| print(' %d completely new.' % len(new_sources)) |
| print(' %d removed completely.' % len(removed_sources)) |
| print(' %d partially changed.' % len(partially_changed_sources)) |
| print(' %d completely unchanged.' % len(unchanged_sources)) |
| remainder = (allFiles - new_sources - removed_sources - |
| partially_changed_sources - unchanged_sources) |
| assert len(remainder) == 0 |
| |
| if not showsources: |
| return # Per-source analysis, only if requested |
| print 'Per-source Analysis:' |
| delta_by_path = {} |
| for section in sections: |
| for path in section.symbols_by_path: |
| entry = delta_by_path.get(path) |
| if not entry: |
| entry = {'plus': 0, 'minus': 0} |
| delta_by_path[path] = entry |
| for symbol_name, symbol_type, symbol_delta in \ |
| section.symbols_by_path[path]: |
| if symbol_delta.old_size is None: |
| delta = symbol_delta.new_size |
| elif symbol_delta.new_size is None: |
| delta = -symbol_delta.old_size |
| else: |
| delta = symbol_delta.new_size - symbol_delta.old_size |
| |
| if delta > 0: |
| entry['plus'] += delta |
| else: |
| entry['minus'] += (-1 * delta) |
| |
| def delta_sort_key(item): |
| _path, size_data = item |
| growth = size_data['plus'] - size_data['minus'] |
| return growth |
| |
| for path, size_data in sorted(delta_by_path.iteritems(), key=delta_sort_key, |
| reverse=True): |
| gain = size_data['plus'] |
| loss = size_data['minus'] |
| delta = size_data['plus'] - size_data['minus'] |
| header = ' %s - Source: %s - (gained %d, lost %d)' % (DeltaStr(delta), |
| path, gain, loss) |
| divider = '-' * len(header) |
| print '' |
| print divider |
| print header |
| print divider |
| if showsymbols: |
| def ExtractNewSize(tup): |
| symbol_delta = tup[2] |
| return symbol_delta.new_size |
| def ExtractOldSize(tup): |
| symbol_delta = tup[2] |
| return symbol_delta.old_size |
| if path in new_symbols.symbols_by_path: |
| print ' New symbols:' |
| for symbol_name, symbol_type, symbol_delta in \ |
| sorted(new_symbols.symbols_by_path[path], |
| key=ExtractNewSize, |
| reverse=True): |
| print (' %8s: %s type=%s, size=%d bytes%s' % |
| (DeltaStr(symbol_delta.new_size), symbol_name, symbol_type, |
| symbol_delta.new_size, SharedInfoStr(symbol_delta))) |
| if path in removed_symbols.symbols_by_path: |
| print ' Removed symbols:' |
| for symbol_name, symbol_type, symbol_delta in \ |
| sorted(removed_symbols.symbols_by_path[path], |
| key=ExtractOldSize): |
| print (' %8s: %s type=%s, size=%d bytes%s' % |
| (DeltaStr(-symbol_delta.old_size), symbol_name, symbol_type, |
| symbol_delta.old_size, |
| SharedInfoStr(symbol_delta))) |
| for (changed_symbols_by_path, type_str) in [ |
| (grown_symbols.symbols_by_path, "Grown"), |
| (shrunk_symbols.symbols_by_path, "Shrunk")]: |
| if path in changed_symbols_by_path: |
| print ' %s symbols:' % type_str |
| def changed_symbol_sortkey(item): |
| symbol_name, _symbol_type, symbol_delta = item |
| return (symbol_delta.old_size - symbol_delta.new_size, symbol_name) |
| for symbol_name, symbol_type, symbol_delta in \ |
| sorted(changed_symbols_by_path[path], key=changed_symbol_sortkey): |
| print (' %8s: %s type=%s, (was %d bytes, now %d bytes)%s' |
| % (DeltaStr(symbol_delta.new_size - symbol_delta.old_size), |
| symbol_name, symbol_type, |
| symbol_delta.old_size, symbol_delta.new_size, |
| SharedInfoStr(symbol_delta))) |
| |
| |
| def main(): |
| usage = """%prog [options] |
| |
| Analyzes the symbolic differences between two binary files |
| (typically, not necessarily, two different builds of the same |
| library) and produces a detailed description of symbols that have |
| been added, removed, or whose size has changed. |
| |
| Example: |
| explain_binary_size_delta.py --nm1 /tmp/nm1.dump --nm2 /tmp/nm2.dump |
| |
| Options are available via '--help'. |
| """ |
| parser = optparse.OptionParser(usage=usage) |
| parser.add_option('--nm1', metavar='PATH', |
| help='the nm dump of the first library') |
| parser.add_option('--nm2', metavar='PATH', |
| help='the nm dump of the second library') |
| parser.add_option('--showsources', action='store_true', default=False, |
| help='show per-source statistics') |
| parser.add_option('--showsymbols', action='store_true', default=False, |
| help='show all symbol information; implies --showsources') |
| parser.add_option('--verbose', action='store_true', default=False, |
| help='output internal debugging stuff') |
| opts, _args = parser.parse_args() |
| |
| if not opts.nm1: |
| parser.error('--nm1 is required') |
| if not opts.nm2: |
| parser.error('--nm2 is required') |
| symbols = [] |
| for path in [opts.nm1, opts.nm2]: |
| with file(path, 'r') as nm_input: |
| if opts.verbose: |
| print 'parsing ' + path + '...' |
| symbols.append(list(binary_size_utils.ParseNm(nm_input))) |
| (added, removed, changed, unchanged) = Compare(symbols[0], symbols[1]) |
| CrunchStats(added, removed, changed, unchanged, |
| opts.showsources | opts.showsymbols, opts.showsymbols) |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |