diff options
Diffstat (limited to 'test/unused-code-gc.py')
-rwxr-xr-x | test/unused-code-gc.py | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/test/unused-code-gc.py b/test/unused-code-gc.py new file mode 100755 index 00000000..7bc1930b --- /dev/null +++ b/test/unused-code-gc.py @@ -0,0 +1,240 @@ +#! /usr/bin/python + +import os +import sys +import string +import re + +## hash from symbol name to list of symbols with that name, +## where the list of symbols contains a list representing each symbol +symbols = {} +roots = {} + +def createBacklinks(name, syms): + for s in syms: + refs = s[2] + for r in refs: + ## for each ref, add ourselves as a referencer + if symbols.has_key(r): + targets = symbols[r] + for t in targets: + if name not in t[5]: + t[5].append(name) + +def markSymbol(frm, name): + if not symbols.has_key(name): + print "%s referenced but was not in the objdump" + syms = symbols[name] + ## print ambiguous references unless they are internal noise like ".L129" + if len(syms) > 1 and name[0] != '.': + print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name) + print syms + for s in syms: + if s[4]: + pass ## already marked + else: + s[4] = 1 + refs = s[2] + for r in refs: + markSymbol(s[0], r) + +def cmpFilename(a, b): + v = cmp(a[1], b[1]) + if v == 0: + v = cmp(a[0], b[0]) + return v + +def sizeAsString(bytes): + if bytes < 1024: + return "%d bytes" % bytes + elif bytes < 1024*1024: + return "%.2gK" % (bytes / 1024.0) + else: + return "%.2gM" % (bytes / 1024.0 / 1024.0) + +def printLost(): + list = [] + filename = None + for (name, syms) in symbols.items(): + s = syms[0] ## we always mark all or none for now + if not s[4] and name[0] != '.': ## skip .L129 type symbols + filename = s[3] + if not filename: + filename = "unknown file" + list.append ((name, filename, s[5], s[7])) + + file_summaries = [] + total_unused = 0 + total_this_file = 0 + filename = None + list.sort(cmpFilename) + for l in list: + next_filename = l[1] + if next_filename != filename: + if total_this_file > 0: + file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename)) + print "%s has these symbols not reachable from exported symbols:" % next_filename + filename = next_filename + total_this_file = 0 + print " %s %s" % (l[0], sizeAsString(l[3])) + total_unused = total_unused + l[3] + total_this_file = total_this_file + l[3] + for trace in l[2]: + print " referenced from %s" % trace + + for fs in file_summaries: + print fs + print "%s total may be unused" % sizeAsString(total_unused) + +def main(): + + ## 0001aa44 <_dbus_message_get_network_data>: + sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:') + ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa> + ref_re = re.compile (' <([^>]+)> *$') + ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139 + file_re = re.compile ('^(\/[^:].*):[0-9]+$') + ## _dbus_message_get_network_data+0xa + funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+') + ## 00005410 T dbus_address_entries_free + dynsym_re = re.compile ('T ([^ \n]+)$') + + filename = sys.argv[1] + + command = """ + objdump -D --demangle -l %s + """ % filename + + command = string.strip (command) + + print "Running: %s" % command + + f = os.popen(command) + + ## first we find which functions reference which other functions + current_sym = None + lines = f.readlines() + for l in lines: + addr = None + name = None + target = None + file = None + + match = sym_re.match(l) + if match: + addr = match.group(1) + name = match.group(2) + else: + match = ref_re.search(l) + if match: + target = match.group(1) + else: + match = file_re.match(l) + if match: + file = match.group(1) + + if name: + ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size + item = [name, addr, [], None, 0, [], 0, 0] + if symbols.has_key(name): + symbols[name].append(item) + else: + symbols[name] = [item] + + if current_sym: + prev_addr = long(current_sym[1], 16) + our_addr = long(item[1], 16) + item[7] = our_addr - prev_addr + if item[7] < 0: + print "Computed negative size %d for %s" % (item[7], item[0]) + item[7] = 0 + + current_sym = item + + elif target and current_sym: + match = funcname_re.match(target) + if match: + ## dump the "+address" + target = match.group(1) + if target == current_sym[0]: + pass ## skip self-references + else: + current_sym[2].append (target) + + elif file and current_sym: + if file.startswith('/usr/include'): + ## inlined libc thingy + pass + elif current_sym[0].startswith('.debug'): + ## debug info + pass + elif current_sym[3] and current_sym[3] != file: + raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file)) + else: + current_sym[3] = file + + ## now we need to find the roots (exported symbols) + command = "nm -D %s" % filename + print "Running: %s" % command + f = os.popen(command) + lines = f.readlines () + for l in lines: + match = dynsym_re.search(l) + if match: + name = match.group(1) + if roots.has_key(name): + raise Exception("symbol %s exported twice?" % name) + else: + roots[name] = 1 + + print "%d symbols exported from this object" % len(roots) + + ## these functions are used only indirectly, so we don't + ## notice they are used. Manually add them as roots... + vtable_roots = ['unix_finalize', + 'unix_handle_watch', + 'unix_disconnect', + 'unix_connection_set', + 'unix_do_iteration', + 'unix_live_messages_changed', + 'unix_get_unix_fd', + 'handle_client_data_cookie_sha1_mech', + 'handle_client_data_external_mech', + 'handle_server_data_cookie_sha1_mech', + 'handle_server_data_external_mech', + 'handle_client_initial_response_cookie_sha1_mech', + 'handle_client_initial_response_external_mech', + 'handle_client_shutdown_cookie_sha1_mech', + 'handle_client_shutdown_external_mech', + 'handle_server_shutdown_cookie_sha1_mech', + 'handle_server_shutdown_external_mech' + ] + + for vr in vtable_roots: + if roots.has_key(vr): + raise Exception("%s is already a root" % vr) + roots[vr] = 1 + + for k in roots.keys(): + markSymbol("root", k) + + for (k, v) in symbols.items(): + createBacklinks(k, v) + + print """ + +The symbols mentioned below don't appear to be reachable starting from +the dynamic exports of the library. However, this program is pretty +dumb; a limitation that creates false positives is that it can only +trace 'reachable' through hardcoded function calls, if a function is +called only through a vtable, it won't be marked reachable (and +neither will its children in the call graph). + +""" + + print "The following are hardcoded in as vtable roots: %s" % vtable_roots + + printLost() + +if __name__ == "__main__": + main() |