#!/usr/bin/python import sse_feed, sse_db from sse_config import * from sse_defs import * import tarfile, zipfile, sys, os, time, zlib from md5 import new as message_digest def uncompress_tar(archive, root, package_id, meta = {}): n = 0 f = tarfile.open(archive, "r") print "Processing TAR file %s." % archive while True: i = f.next() if i is None: break if not i.isreg(): continue if not sse_feed.supported_source(i.name): continue dst = os.path.join(root, i.name) if os.access(dst, os.F_OK): print "WARNING: File '%s' already extracted." % dst continue try: os.makedirs(os.path.dirname(dst)) except: pass x = f.extractfile(i) o = file(dst, "w") while True: data = x.read(SSE_BLOCK_SIZE) if len(data) <= 0: break o.write(data) o.close() os.utime(dst, (i.mtime, i.mtime)) sse_feed.process_source(archive, root, i.name, package_id, meta) n += 1 f.close() return n def uncompress_zip(archive, root, package_id, meta = {}): n = 0 f = zipfile.ZipFile(archive, "r") print "Processing ZIP file %s." % archive for i in f.infolist(): if not sse_feed.supported_source(i.filename): continue dst = os.path.join(root, i.filename) if os.access(dst, os.F_OK): print "WARNING: File '%s' already extracted" % dst continue try: os.makedirs(os.path.dirname(dst)) except: pass o = file(dst, "w") o.write(f.read(i.filename)) o.close() (year, month, day, hour, minute, second) = i.date_time t = time.mktime([year, month, day, hour, minute, second, 0, 0, 0]) os.utime(dst, (t, t)) sse_feed.process_source(archive, root, i.filename, package_id, meta) n += 1 f.close() return n def uncompress_archive(archive, root, package_id, meta = {}): n = -1 try: n = uncompress_tar(archive, root, package_id, meta) except tarfile.TarError: try: n = uncompress_zip(archive, root, package_id, meta) except zipfile.error: print "WARNING: Unknown file format." except IOError: print "WARNING: Broken archive." except IOError, e: if e[0] == "CRC check failed": print "WARNING: Broken archive." else: raise e except zlib.error: print "WARNING: Broken archive." return n def calc_md(fn): m = message_digest() f = file(fn) while True: data = f.read(1024) if len(data) <= 0: break m.update(data) f.close() return m.hexdigest() def rm_rf(root): for root, dirs, files in os.walk(root, topdown = False): for f in files: os.remove(os.path.join(root, f)) for d in dirs: os.rmdir(os.path.join(root, d)) os.rmdir(root) def clear_dir(root): try: rm_rf(root) except: pass os.makedirs(root) def process_archive(archive, meta = {}, recid = None, provider_id = SSE_PROVIDER_NONE): if recid is None: recid = os.path.basename(archive) if not meta.has_key("md"): md = calc_md(archive) meta["md"] = md sse_db.start_transaction() done = False try: package_id = sse_db.find_package(md) if not package_id is None: print "Package '%s' already in database." % recid # Update provider record sse_db.new_provider_record(recid, package_id, provider_id, meta) sse_db.commit() done = True else: root = os.path.join(HOME, "sources", md) package_id = sse_db.new_package(archive, root, meta) print "Package '%s' is new in database." % recid sse_db.new_provider_record(recid, package_id, provider_id, meta) clear_dir(root) n = uncompress_archive(archive, root, package_id, meta) if n >= 0: print "Successfully processed %i files." % n sse_db.commit() done = True finally: if not done: sse_db.rollback() if __name__ == "__main__": process_archive(sys.argv[1])