#! /usr/bin/python import os import sys import string import re ## hash from symbol name to list of symbols with that name, ## where the list of symbols contains a list representing each symbol symbols = {} roots = {} def createBacklinks(name, syms): for s in syms: refs = s[2] for r in refs: ## for each ref, add ourselves as a referencer if symbols.has_key(r): targets = symbols[r] for t in targets: if name not in t[5]: t[5].append(name) def markSymbol(frm, name): if not symbols.has_key(name): print "%s referenced but was not in the objdump" syms = symbols[name] ## print ambiguous references unless they are internal noise like ".L129" if len(syms) > 1 and name[0] != '.': print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name) print syms for s in syms: if s[4]: pass ## already marked else: s[4] = 1 refs = s[2] for r in refs: markSymbol(s[0], r) def cmpFilename(a, b): v = cmp(a[1], b[1]) if v == 0: v = cmp(a[0], b[0]) return v def sizeAsString(bytes): if bytes < 1024: return "%d bytes" % bytes elif bytes < 1024*1024: return "%.2gK" % (bytes / 1024.0) else: return "%.2gM" % (bytes / 1024.0 / 1024.0) def printLost(): list = [] filename = None for (name, syms) in symbols.items(): s = syms[0] ## we always mark all or none for now if not s[4] and name[0] != '.': ## skip .L129 type symbols filename = s[3] if not filename: filename = "unknown file" list.append ((name, filename, s[5], s[7])) file_summaries = [] total_unused = 0 total_this_file = 0 filename = None list.sort(cmpFilename) for l in list: next_filename = l[1] if next_filename != filename: if total_this_file > 0: file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename)) print "%s has these symbols not reachable from exported symbols:" % next_filename filename = next_filename total_this_file = 0 print " %s %s" % (l[0], sizeAsString(l[3])) total_unused = total_unused + l[3] total_this_file = total_this_file + l[3] for trace in l[2]: print " referenced from %s" % trace for fs in file_summaries: print fs print "%s total may be unused" % sizeAsString(total_unused) def main(): ## 0001aa44 <_dbus_message_get_network_data>: sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:') ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa> ref_re = re.compile (' <([^>]+)> *$') ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139 file_re = re.compile ('^(\/[^:].*):[0-9]+$') ## _dbus_message_get_network_data+0xa funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+') ## 00005410 T dbus_address_entries_free dynsym_re = re.compile ('T ([^ \n]+)$') filename = sys.argv[1] command = """ objdump -D --demangle -l %s """ % filename command = string.strip (command) print "Running: %s" % command f = os.popen(command) ## first we find which functions reference which other functions current_sym = None lines = f.readlines() for l in lines: addr = None name = None target = None file = None match = sym_re.match(l) if match: addr = match.group(1) name = match.group(2) else: match = ref_re.search(l) if match: target = match.group(1) else: match = file_re.match(l) if match: file = match.group(1) if name: ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size item = [name, addr, [], None, 0, [], 0, 0] if symbols.has_key(name): symbols[name].append(item) else: symbols[name] = [item] if current_sym: prev_addr = long(current_sym[1], 16) our_addr = long(item[1], 16) item[7] = our_addr - prev_addr if item[7] < 0: print "Computed negative size %d for %s" % (item[7], item[0]) item[7] = 0 current_sym = item elif target and current_sym: match = funcname_re.match(target) if match: ## dump the "+address" target = match.group(1) if target == current_sym[0]: pass ## skip self-references else: current_sym[2].append (target) elif file and current_sym: if file.startswith('/usr/include'): ## inlined libc thingy pass elif current_sym[0].startswith('.debug'): ## debug info pass elif current_sym[3] and current_sym[3] != file: raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file)) else: current_sym[3] = file ## now we need to find the roots (exported symbols) command = "nm -D %s" % filename print "Running: %s" % command f = os.popen(command) lines = f.readlines () for l in lines: match = dynsym_re.search(l) if match: name = match.group(1) if roots.has_key(name): raise Exception("symbol %s exported twice?" % name) else: roots[name] = 1 print "%d symbols exported from this object" % len(roots) ## these functions are used only indirectly, so we don't ## notice they are used. Manually add them as roots... vtable_roots = ['unix_finalize', 'unix_handle_watch', 'unix_disconnect', 'unix_connection_set', 'unix_do_iteration', 'unix_live_messages_changed', 'unix_get_unix_fd', 'handle_client_data_cookie_sha1_mech', 'handle_client_data_external_mech', 'handle_server_data_cookie_sha1_mech', 'handle_server_data_external_mech', 'handle_client_initial_response_cookie_sha1_mech', 'handle_client_initial_response_external_mech', 'handle_client_shutdown_cookie_sha1_mech', 'handle_client_shutdown_external_mech', 'handle_server_shutdown_cookie_sha1_mech', 'handle_server_shutdown_external_mech' ] for vr in vtable_roots: if roots.has_key(vr): raise Exception("%s is already a root" % vr) roots[vr] = 1 for k in roots.keys(): markSymbol("root", k) for (k, v) in symbols.items(): createBacklinks(k, v) print """ The symbols mentioned below don't appear to be reachable starting from the dynamic exports of the library. However, this program is pretty dumb; a limitation that creates false positives is that it can only trace 'reachable' through hardcoded function calls, if a function is called only through a vtable, it won't be marked reachable (and neither will its children in the call graph). """ print "The following are hardcoded in as vtable roots: %s" % vtable_roots printLost() if __name__ == "__main__": main()