From b25ed52fcfd3cb9c81ae4ddf924b35adb6149e89 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 26 Nov 2005 18:51:19 +0000 Subject: deal with debian source files containing subarchives git-svn-id: file:///home/lennart/svn/public/sse/trunk@71 5fbabb74-0606-0410-a5e4-b5cc6a42724e --- feed/sse_config.py | 8 ++++++++ feed/sse_debian.py | 9 +++------ feed/sse_fm.py | 2 +- feed/sse_grab.py | 5 ++++- feed/sse_tar.py | 39 ++++++++++++++++++++++++++++++++------- 5 files changed, 48 insertions(+), 15 deletions(-) (limited to 'feed') diff --git a/feed/sse_config.py b/feed/sse_config.py index a882fbf..bb9ca59 100644 --- a/feed/sse_config.py +++ b/feed/sse_config.py @@ -10,6 +10,14 @@ SSE_DB_USER= "sse_web" SSE_DB_PASSWORD = "xxx" SSE_DB_DATABASE = "sse" +#SSE_DEBIAN_URL_DOWNLOAD = "http://update.alturo-server.de/debian/" +#SSE_DEBIAN_URL_DOWNLOAD = "http://ftp.fr.debian.org/debian/" +SSE_DEBIAN_URL_DOWNLOAD = "http://ftp.us.debian.org/debian/" +SSE_DEBIAN_URL = "http://ftp.us.debian.org/debian/" + +SSE_DISTRIBUTION = "unstable" +SSE_RELEASE = "main" + from sse_config_local import * assert SSE_CRAWLER_ID != 0 diff --git a/feed/sse_debian.py b/feed/sse_debian.py index 841c354..ff4fa9e 100755 --- a/feed/sse_debian.py +++ b/feed/sse_debian.py @@ -1,13 +1,9 @@ #!/usr/bin/python -#SSE_DEBIAN_URL = "http://ftp.us.debian.org/debian/" -SSE_DEBIAN_URL = "http://update.alturo-server.de/debian/" -SSE_DISTRIBUTION = "unstable" -SSE_RELEASE = "main" - import sys from sse_defs import * +from sse_config import * import sse_grab def process_entry(entry): @@ -28,10 +24,11 @@ def process_entry(entry): meta["name"] = entry["Package"] meta["version"] = entry["Version"] meta["archive-url"] = SSE_DEBIAN_URL + entry["Directory"] + "/" + meta["archive"] + meta["archive-url-download"] = SSE_DEBIAN_URL_DOWNLOAD + entry["Directory"] + "/" + meta["archive"] meta["license"] = "DFSG approved" meta["project-url"] = "http://packages.debian.org/"+ SSE_DISTRIBUTION + "/source/" + entry["Package"] - print "Next record '%s'" % meta["name"] + print "Next record '%s' (debian:%s)" % (meta["name"], meta["name"]) sse_grab.grab_archive(meta, "debian:" + meta["name"], SSE_PROVIDER_DEBIAN) diff --git a/feed/sse_fm.py b/feed/sse_fm.py index b73f93f..1243f7b 100755 --- a/feed/sse_fm.py +++ b/feed/sse_fm.py @@ -74,7 +74,7 @@ def process_record(meta): meta["id"] = int(meta["id"]) - print "Next record '%s'" % meta["name"] + print "Next record '%s' (freshmeat:%i)" % (meta["name"], meta["id"]) archive_url = None diff --git a/feed/sse_grab.py b/feed/sse_grab.py index c2b5d06..095081e 100644 --- a/feed/sse_grab.py +++ b/feed/sse_grab.py @@ -33,7 +33,10 @@ def grab_archive(meta, recid, provider_id = SSE_PROVIDER_NONE): download = f.read(SSE_BLOCK_SIZE) != meta["version"] f.close() - archive_url = meta["archive-url"] + try: + archive_url = meta["archive-url-download"] + except KeyError: + archive_url = meta["archive-url"] if not download: print "File %s up-to-date." % archive_url diff --git a/feed/sse_tar.py b/feed/sse_tar.py index 8a1a492..d6dcc2b 100755 --- a/feed/sse_tar.py +++ b/feed/sse_tar.py @@ -8,11 +8,19 @@ from sse_defs import * import tarfile, zipfile, sys, os, time, zlib from md5 import new as message_digest -def uncompress_tar(archive, root, package_id, meta = {}): +tar_suffixes = [ ".tar.gz", ".tar.bz2", ".tgz", ".tbz2", ".tar" ] + +def uncompress_tar(archive, root, package_id, meta = {}, fo = None, descend = True, subarchive = None): + global n_depth + n = 0 - f = tarfile.open(archive, "r") - print "Processing TAR file %s." % archive + if fo is None: + f = tarfile.open(archive, "r") + print "Processing TAR file %s." % archive + else: + f = tarfile.open(subarchive, "r", fo) + print "Processing subarchive TAR file %s." % subarchive while True: @@ -24,15 +32,32 @@ def uncompress_tar(archive, root, package_id, meta = {}): if not i.isreg(): continue - if not sse_feed.supported_source(i.name): - continue - - dst = os.path.join(root, i.name) + if subarchive is None: + dst = os.path.join(root, i.name) + else: + dst = os.path.join(root, subarchive, i.name) if os.access(dst, os.F_OK): print "WARNING: File '%s' already extracted." % dst continue + if descend: + for t in tar_suffixes: + if i.name.lower().endswith(t): + print "Found subarchive '%s', descending recursively..." % i.name + + try: + os.makedirs(dst) + except: + pass + + n += uncompress_tar(archive, root, package_id, meta, f.extractfile(i), False, i.name) + print "Subarchive ended, continuing with top level archive..." + continue + + if not sse_feed.supported_source(i.name): + continue + try: os.makedirs(os.path.dirname(dst)) except: -- cgit