diff options
Diffstat (limited to 'test')
| -rwxr-xr-x | test/unused-code-gc.py | 240 | 
1 files changed, 240 insertions, 0 deletions
diff --git a/test/unused-code-gc.py b/test/unused-code-gc.py new file mode 100755 index 00000000..7bc1930b --- /dev/null +++ b/test/unused-code-gc.py @@ -0,0 +1,240 @@ +#! /usr/bin/python + +import os +import sys +import string +import re + +## hash from symbol name to list of symbols with that name, +## where the list of symbols contains a list representing each symbol +symbols = {} +roots = {} + +def createBacklinks(name, syms): +    for s in syms: +        refs = s[2] +        for r in refs: +            ## for each ref, add ourselves as a referencer +            if symbols.has_key(r): +                targets = symbols[r] +                for t in targets: +                    if name not in t[5]: +                        t[5].append(name) + +def markSymbol(frm, name): +    if not symbols.has_key(name): +        print "%s referenced but was not in the objdump" +    syms = symbols[name] +    ## print ambiguous references unless they are internal noise like ".L129" +    if len(syms) > 1 and name[0] != '.': +        print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name) +        print syms +    for s in syms: +        if s[4]: +            pass ## already marked +        else: +            s[4] = 1 +            refs = s[2] +            for r in refs: +                markSymbol(s[0], r) + +def cmpFilename(a, b): +    v = cmp(a[1], b[1]) +    if v == 0: +        v = cmp(a[0], b[0]) +    return v + +def sizeAsString(bytes): +    if bytes < 1024: +        return "%d bytes" % bytes +    elif bytes < 1024*1024: +        return "%.2gK" % (bytes / 1024.0) +    else: +        return "%.2gM" % (bytes / 1024.0 / 1024.0) + +def printLost(): +    list = [] +    filename = None +    for (name, syms) in symbols.items(): +        s = syms[0] ## we always mark all or none for now +        if not s[4] and name[0] != '.': ## skip .L129 type symbols +            filename = s[3] +            if not filename: +                filename = "unknown file" +            list.append ((name, filename, s[5], s[7])) + +    file_summaries = [] +    total_unused = 0 +    total_this_file = 0 +    filename = None +    list.sort(cmpFilename) +    for l in list: +        next_filename = l[1] +        if next_filename != filename: +            if total_this_file > 0: +                file_summaries.append ("  %s may be unused in %s" % (sizeAsString(total_this_file), filename)) +            print "%s has these symbols not reachable from exported symbols:" % next_filename +            filename = next_filename +            total_this_file = 0 +        print "    %s %s" % (l[0], sizeAsString(l[3])) +        total_unused = total_unused + l[3] +        total_this_file = total_this_file + l[3] +        for trace in l[2]: +            print "       referenced from %s" % trace + +    for fs in file_summaries: +        print fs +    print "%s total may be unused" % sizeAsString(total_unused) + +def main(): + +    ## 0001aa44 <_dbus_message_get_network_data>: +    sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:') +    ## 1aa49:       e8 00 00 00 00          call   1aa4e <_dbus_message_get_network_data+0xa> +    ref_re = re.compile (' <([^>]+)> *$') +    ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139 +    file_re = re.compile ('^(\/[^:].*):[0-9]+$') +    ## _dbus_message_get_network_data+0xa +    funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+') +    ## 00005410 T dbus_address_entries_free +    dynsym_re = re.compile ('T ([^ \n]+)$') +     +    filename = sys.argv[1] + +    command = """ +    objdump -D --demangle -l %s +    """ % filename + +    command = string.strip (command) + +    print "Running: %s" % command +     +    f = os.popen(command)     + +    ## first we find which functions reference which other functions +    current_sym = None +    lines = f.readlines() +    for l in lines: +        addr = None +        name = None +        target = None +        file = None +         +        match = sym_re.match(l) +        if match: +            addr = match.group(1) +            name = match.group(2) +        else: +            match = ref_re.search(l) +            if match: +                target = match.group(1) +            else: +                match = file_re.match(l) +                if match: +                    file = match.group(1) + +        if name: +            ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size +            item = [name, addr, [], None, 0, [], 0, 0] +            if symbols.has_key(name): +                symbols[name].append(item) +            else: +                symbols[name] = [item] + +            if current_sym: +                prev_addr = long(current_sym[1], 16) +                our_addr = long(item[1], 16) +                item[7] = our_addr - prev_addr +                if item[7] < 0: +                    print "Computed negative size %d for %s" % (item[7], item[0]) +                    item[7] = 0 +                                   +            current_sym = item +             +        elif target and current_sym: +            match = funcname_re.match(target) +            if match: +                ## dump the "+address" +                target = match.group(1) +            if target == current_sym[0]: +                pass ## skip self-references +            else: +                current_sym[2].append (target) + +        elif file and current_sym: +            if file.startswith('/usr/include'): +                ## inlined libc thingy +                pass +            elif current_sym[0].startswith('.debug'): +                ## debug info +                pass +            elif current_sym[3] and current_sym[3] != file: +                raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file)) +            else: +                current_sym[3] = file + +    ## now we need to find the roots (exported symbols) +    command = "nm -D %s" % filename +    print "Running: %s" % command +    f = os.popen(command) +    lines = f.readlines () +    for l in lines: +        match = dynsym_re.search(l) +        if match: +            name = match.group(1) +            if roots.has_key(name): +                raise Exception("symbol %s exported twice?" % name) +            else: +                roots[name] = 1 + +    print "%d symbols exported from this object" % len(roots) + +    ## these functions are used only indirectly, so we don't +    ## notice they are used. Manually add them as roots... +    vtable_roots = ['unix_finalize', +                    'unix_handle_watch', +                    'unix_disconnect', +                    'unix_connection_set', +                    'unix_do_iteration', +                    'unix_live_messages_changed', +                    'unix_get_unix_fd', +                    'handle_client_data_cookie_sha1_mech', +                    'handle_client_data_external_mech', +                    'handle_server_data_cookie_sha1_mech', +                    'handle_server_data_external_mech', +                    'handle_client_initial_response_cookie_sha1_mech',                   +                    'handle_client_initial_response_external_mech', +                    'handle_client_shutdown_cookie_sha1_mech', +                    'handle_client_shutdown_external_mech', +                    'handle_server_shutdown_cookie_sha1_mech', +                    'handle_server_shutdown_external_mech' +                    ] + +    for vr in vtable_roots: +        if roots.has_key(vr): +            raise Exception("%s is already a root" % vr) +        roots[vr] = 1 + +    for k in roots.keys(): +        markSymbol("root", k) + +    for (k, v) in symbols.items(): +        createBacklinks(k, v) + +    print """ + +The symbols mentioned below don't appear to be reachable starting from +the dynamic exports of the library. However, this program is pretty +dumb; a limitation that creates false positives is that it can only +trace 'reachable' through hardcoded function calls, if a function is +called only through a vtable, it won't be marked reachable (and +neither will its children in the call graph). + +""" +     +    print "The following are hardcoded in as vtable roots: %s" % vtable_roots +     +    printLost() +         +if __name__ == "__main__": +    main()  | 
