#!/usr/bin/python import sse_feed, sse_db from sse_config import * from sse_defs import * import tarfile, zipfile, sys, os, time, zlib, tempfile, struct from md5 import new as message_digest tar_suffixes = [ ".tar.gz", ".tar.bz2", ".tgz", ".tbz2", ".tar" ] def copy_file_to(dst, x): if type(dst) is str: o = file(dst, "w") elif type(dst) is int: o = os.fdopen(dst, "w") else: o = dst while True: data = x.read(SSE_BLOCK_SIZE) if len(data) <= 0: break o.write(data) o.close() def uncompress_tar(archive, root, package_id, meta = {}, tmp = None, descend = True, subarchive = None): global n_depth n = 0 if tmp is None: f = tarfile.open(archive, "r") print "Processing TAR file %s." % archive else: f = tarfile.open(tmp, "r") print "Processing temporary TAR file %s." % tmp while True: i = f.next() if i is None: break if not i.isreg(): continue if subarchive is None: dst = os.path.join(root, i.name) else: dst = os.path.join(root, subarchive, i.name) if os.access(dst, os.F_OK): print "WARNING: File '%s' already extracted." % dst continue if descend: for t in tar_suffixes: if i.name.lower().endswith(t): print "Found subarchive '%s', descending recursively..." % i.name try: os.makedirs(dst) except: pass o, tmp_archive = tempfile.mkstemp() copy_file_to(o, f.extractfile(i)) try: n += uncompress_tar(archive, root, package_id, meta, tmp_archive, False, i.name) finally: os.unlink(tmp_archive) print "Subarchive ended, continuing with top level archive..." continue if not sse_feed.supported_source(i.name): continue try: os.makedirs(os.path.dirname(dst)) except: pass copy_file_to(dst, f.extractfile(i)) os.utime(dst, (i.mtime, i.mtime)) if subarchive is None: fn = i.name else: fn = os.path.join(subarchive, i.name) sse_feed.process_source(archive, root, fn, package_id, meta) n += 1 f.close() return n def uncompress_zip(archive, root, package_id, meta = {}): n = 0 f = zipfile.ZipFile(archive, "r") print "Processing ZIP file %s." % archive for i in f.infolist(): if not sse_feed.supported_source(i.filename): continue dst = os.path.join(root, i.filename) if os.access(dst, os.F_OK): print "WARNING: File '%s' already extracted" % dst continue try: os.makedirs(os.path.dirname(dst)) except: pass o = file(dst, "w") o.write(f.read(i.filename)) o.close() (year, month, day, hour, minute, second) = i.date_time t = time.mktime([year, month, day, hour, minute, second, 0, 0, 0]) os.utime(dst, (t, t)) sse_feed.process_source(archive, root, i.filename, package_id, meta) n += 1 f.close() return n def uncompress_archive(archive, root, package_id, meta = {}): n = -1 try: try: n = uncompress_tar(archive, root, package_id, meta) except tarfile.TarError: n = uncompress_zip(archive, root, package_id, meta) except (zipfile.error, zlib.error, EOFError, IOError, struct.error, AttributeError, TypeError), e: print "WARNING: Broken archive: %s" % e return n def calc_md(fn): m = message_digest() f = file(fn) while True: data = f.read(1024) if len(data) <= 0: break m.update(data) f.close() return m.hexdigest() def rm_rf(root): for root, dirs, files in os.walk(root, topdown = False): for f in files: os.remove(os.path.join(root, f)) for d in dirs: os.rmdir(os.path.join(root, d)) os.rmdir(root) def clear_dir(root): try: rm_rf(root) except: pass os.makedirs(root) def process_archive(archive, meta = {}, recid = None, provider_id = SSE_PROVIDER_NONE): if recid is None: recid = os.path.basename(archive) if not meta.has_key("md"): md = calc_md(archive) meta["md"] = md sse_db.start_transaction() done = False try: package_id = sse_db.find_package(md) if not package_id is None: print "Package '%s' already in database." % recid # Update provider record sse_db.new_provider_record(recid, package_id, provider_id, meta) sse_db.commit() done = True else: root = os.path.join(HOME, "sources", md) package_id = sse_db.new_package(archive, root, meta) print "Package '%s' is new in database." % recid sse_db.new_provider_record(recid, package_id, provider_id, meta) clear_dir(root) n = uncompress_archive(archive, root, package_id, meta) if n >= 0: print "Successfully processed %i files." % n sse_db.commit() done = True finally: if not done: sse_db.rollback() if __name__ == "__main__": process_archive(sys.argv[1])