summaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rwxr-xr-xtest/unused-code-gc.py240
1 files changed, 240 insertions, 0 deletions
diff --git a/test/unused-code-gc.py b/test/unused-code-gc.py
new file mode 100755
index 00000000..7bc1930b
--- /dev/null
+++ b/test/unused-code-gc.py
@@ -0,0 +1,240 @@
+#! /usr/bin/python
+
+import os
+import sys
+import string
+import re
+
+## hash from symbol name to list of symbols with that name,
+## where the list of symbols contains a list representing each symbol
+symbols = {}
+roots = {}
+
+def createBacklinks(name, syms):
+ for s in syms:
+ refs = s[2]
+ for r in refs:
+ ## for each ref, add ourselves as a referencer
+ if symbols.has_key(r):
+ targets = symbols[r]
+ for t in targets:
+ if name not in t[5]:
+ t[5].append(name)
+
+def markSymbol(frm, name):
+ if not symbols.has_key(name):
+ print "%s referenced but was not in the objdump"
+ syms = symbols[name]
+ ## print ambiguous references unless they are internal noise like ".L129"
+ if len(syms) > 1 and name[0] != '.':
+ print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name)
+ print syms
+ for s in syms:
+ if s[4]:
+ pass ## already marked
+ else:
+ s[4] = 1
+ refs = s[2]
+ for r in refs:
+ markSymbol(s[0], r)
+
+def cmpFilename(a, b):
+ v = cmp(a[1], b[1])
+ if v == 0:
+ v = cmp(a[0], b[0])
+ return v
+
+def sizeAsString(bytes):
+ if bytes < 1024:
+ return "%d bytes" % bytes
+ elif bytes < 1024*1024:
+ return "%.2gK" % (bytes / 1024.0)
+ else:
+ return "%.2gM" % (bytes / 1024.0 / 1024.0)
+
+def printLost():
+ list = []
+ filename = None
+ for (name, syms) in symbols.items():
+ s = syms[0] ## we always mark all or none for now
+ if not s[4] and name[0] != '.': ## skip .L129 type symbols
+ filename = s[3]
+ if not filename:
+ filename = "unknown file"
+ list.append ((name, filename, s[5], s[7]))
+
+ file_summaries = []
+ total_unused = 0
+ total_this_file = 0
+ filename = None
+ list.sort(cmpFilename)
+ for l in list:
+ next_filename = l[1]
+ if next_filename != filename:
+ if total_this_file > 0:
+ file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename))
+ print "%s has these symbols not reachable from exported symbols:" % next_filename
+ filename = next_filename
+ total_this_file = 0
+ print " %s %s" % (l[0], sizeAsString(l[3]))
+ total_unused = total_unused + l[3]
+ total_this_file = total_this_file + l[3]
+ for trace in l[2]:
+ print " referenced from %s" % trace
+
+ for fs in file_summaries:
+ print fs
+ print "%s total may be unused" % sizeAsString(total_unused)
+
+def main():
+
+ ## 0001aa44 <_dbus_message_get_network_data>:
+ sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:')
+ ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa>
+ ref_re = re.compile (' <([^>]+)> *$')
+ ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139
+ file_re = re.compile ('^(\/[^:].*):[0-9]+$')
+ ## _dbus_message_get_network_data+0xa
+ funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+')
+ ## 00005410 T dbus_address_entries_free
+ dynsym_re = re.compile ('T ([^ \n]+)$')
+
+ filename = sys.argv[1]
+
+ command = """
+ objdump -D --demangle -l %s
+ """ % filename
+
+ command = string.strip (command)
+
+ print "Running: %s" % command
+
+ f = os.popen(command)
+
+ ## first we find which functions reference which other functions
+ current_sym = None
+ lines = f.readlines()
+ for l in lines:
+ addr = None
+ name = None
+ target = None
+ file = None
+
+ match = sym_re.match(l)
+ if match:
+ addr = match.group(1)
+ name = match.group(2)
+ else:
+ match = ref_re.search(l)
+ if match:
+ target = match.group(1)
+ else:
+ match = file_re.match(l)
+ if match:
+ file = match.group(1)
+
+ if name:
+ ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size
+ item = [name, addr, [], None, 0, [], 0, 0]
+ if symbols.has_key(name):
+ symbols[name].append(item)
+ else:
+ symbols[name] = [item]
+
+ if current_sym:
+ prev_addr = long(current_sym[1], 16)
+ our_addr = long(item[1], 16)
+ item[7] = our_addr - prev_addr
+ if item[7] < 0:
+ print "Computed negative size %d for %s" % (item[7], item[0])
+ item[7] = 0
+
+ current_sym = item
+
+ elif target and current_sym:
+ match = funcname_re.match(target)
+ if match:
+ ## dump the "+address"
+ target = match.group(1)
+ if target == current_sym[0]:
+ pass ## skip self-references
+ else:
+ current_sym[2].append (target)
+
+ elif file and current_sym:
+ if file.startswith('/usr/include'):
+ ## inlined libc thingy
+ pass
+ elif current_sym[0].startswith('.debug'):
+ ## debug info
+ pass
+ elif current_sym[3] and current_sym[3] != file:
+ raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file))
+ else:
+ current_sym[3] = file
+
+ ## now we need to find the roots (exported symbols)
+ command = "nm -D %s" % filename
+ print "Running: %s" % command
+ f = os.popen(command)
+ lines = f.readlines ()
+ for l in lines:
+ match = dynsym_re.search(l)
+ if match:
+ name = match.group(1)
+ if roots.has_key(name):
+ raise Exception("symbol %s exported twice?" % name)
+ else:
+ roots[name] = 1
+
+ print "%d symbols exported from this object" % len(roots)
+
+ ## these functions are used only indirectly, so we don't
+ ## notice they are used. Manually add them as roots...
+ vtable_roots = ['unix_finalize',
+ 'unix_handle_watch',
+ 'unix_disconnect',
+ 'unix_connection_set',
+ 'unix_do_iteration',
+ 'unix_live_messages_changed',
+ 'unix_get_unix_fd',
+ 'handle_client_data_cookie_sha1_mech',
+ 'handle_client_data_external_mech',
+ 'handle_server_data_cookie_sha1_mech',
+ 'handle_server_data_external_mech',
+ 'handle_client_initial_response_cookie_sha1_mech',
+ 'handle_client_initial_response_external_mech',
+ 'handle_client_shutdown_cookie_sha1_mech',
+ 'handle_client_shutdown_external_mech',
+ 'handle_server_shutdown_cookie_sha1_mech',
+ 'handle_server_shutdown_external_mech'
+ ]
+
+ for vr in vtable_roots:
+ if roots.has_key(vr):
+ raise Exception("%s is already a root" % vr)
+ roots[vr] = 1
+
+ for k in roots.keys():
+ markSymbol("root", k)
+
+ for (k, v) in symbols.items():
+ createBacklinks(k, v)
+
+ print """
+
+The symbols mentioned below don't appear to be reachable starting from
+the dynamic exports of the library. However, this program is pretty
+dumb; a limitation that creates false positives is that it can only
+trace 'reachable' through hardcoded function calls, if a function is
+called only through a vtable, it won't be marked reachable (and
+neither will its children in the call graph).
+
+"""
+
+ print "The following are hardcoded in as vtable roots: %s" % vtable_roots
+
+ printLost()
+
+if __name__ == "__main__":
+ main()