summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2005-11-26 18:51:19 +0000
committerLennart Poettering <lennart@poettering.net>2005-11-26 18:51:19 +0000
commitb25ed52fcfd3cb9c81ae4ddf924b35adb6149e89 (patch)
tree1f455323a285774257f8ad9157af86ce016fc4ec
parent122016ccea9857c0099edbd3bd78b73d30304e34 (diff)
deal with debian source files containing subarchives
git-svn-id: file:///home/lennart/svn/public/sse/trunk@71 5fbabb74-0606-0410-a5e4-b5cc6a42724e
-rw-r--r--feed/sse_config.py8
-rwxr-xr-xfeed/sse_debian.py9
-rwxr-xr-xfeed/sse_fm.py2
-rw-r--r--feed/sse_grab.py5
-rwxr-xr-xfeed/sse_tar.py39
5 files changed, 48 insertions, 15 deletions
diff --git a/feed/sse_config.py b/feed/sse_config.py
index a882fbf..bb9ca59 100644
--- a/feed/sse_config.py
+++ b/feed/sse_config.py
@@ -10,6 +10,14 @@ SSE_DB_USER= "sse_web"
SSE_DB_PASSWORD = "xxx"
SSE_DB_DATABASE = "sse"
+#SSE_DEBIAN_URL_DOWNLOAD = "http://update.alturo-server.de/debian/"
+#SSE_DEBIAN_URL_DOWNLOAD = "http://ftp.fr.debian.org/debian/"
+SSE_DEBIAN_URL_DOWNLOAD = "http://ftp.us.debian.org/debian/"
+SSE_DEBIAN_URL = "http://ftp.us.debian.org/debian/"
+
+SSE_DISTRIBUTION = "unstable"
+SSE_RELEASE = "main"
+
from sse_config_local import *
assert SSE_CRAWLER_ID != 0
diff --git a/feed/sse_debian.py b/feed/sse_debian.py
index 841c354..ff4fa9e 100755
--- a/feed/sse_debian.py
+++ b/feed/sse_debian.py
@@ -1,13 +1,9 @@
#!/usr/bin/python
-#SSE_DEBIAN_URL = "http://ftp.us.debian.org/debian/"
-SSE_DEBIAN_URL = "http://update.alturo-server.de/debian/"
-SSE_DISTRIBUTION = "unstable"
-SSE_RELEASE = "main"
-
import sys
from sse_defs import *
+from sse_config import *
import sse_grab
def process_entry(entry):
@@ -28,10 +24,11 @@ def process_entry(entry):
meta["name"] = entry["Package"]
meta["version"] = entry["Version"]
meta["archive-url"] = SSE_DEBIAN_URL + entry["Directory"] + "/" + meta["archive"]
+ meta["archive-url-download"] = SSE_DEBIAN_URL_DOWNLOAD + entry["Directory"] + "/" + meta["archive"]
meta["license"] = "DFSG approved"
meta["project-url"] = "http://packages.debian.org/"+ SSE_DISTRIBUTION + "/source/" + entry["Package"]
- print "Next record '%s'" % meta["name"]
+ print "Next record '%s' (debian:%s)" % (meta["name"], meta["name"])
sse_grab.grab_archive(meta, "debian:" + meta["name"], SSE_PROVIDER_DEBIAN)
diff --git a/feed/sse_fm.py b/feed/sse_fm.py
index b73f93f..1243f7b 100755
--- a/feed/sse_fm.py
+++ b/feed/sse_fm.py
@@ -74,7 +74,7 @@ def process_record(meta):
meta["id"] = int(meta["id"])
- print "Next record '%s'" % meta["name"]
+ print "Next record '%s' (freshmeat:%i)" % (meta["name"], meta["id"])
archive_url = None
diff --git a/feed/sse_grab.py b/feed/sse_grab.py
index c2b5d06..095081e 100644
--- a/feed/sse_grab.py
+++ b/feed/sse_grab.py
@@ -33,7 +33,10 @@ def grab_archive(meta, recid, provider_id = SSE_PROVIDER_NONE):
download = f.read(SSE_BLOCK_SIZE) != meta["version"]
f.close()
- archive_url = meta["archive-url"]
+ try:
+ archive_url = meta["archive-url-download"]
+ except KeyError:
+ archive_url = meta["archive-url"]
if not download:
print "File %s up-to-date." % archive_url
diff --git a/feed/sse_tar.py b/feed/sse_tar.py
index 8a1a492..d6dcc2b 100755
--- a/feed/sse_tar.py
+++ b/feed/sse_tar.py
@@ -8,11 +8,19 @@ from sse_defs import *
import tarfile, zipfile, sys, os, time, zlib
from md5 import new as message_digest
-def uncompress_tar(archive, root, package_id, meta = {}):
+tar_suffixes = [ ".tar.gz", ".tar.bz2", ".tgz", ".tbz2", ".tar" ]
+
+def uncompress_tar(archive, root, package_id, meta = {}, fo = None, descend = True, subarchive = None):
+ global n_depth
+
n = 0
- f = tarfile.open(archive, "r")
- print "Processing TAR file %s." % archive
+ if fo is None:
+ f = tarfile.open(archive, "r")
+ print "Processing TAR file %s." % archive
+ else:
+ f = tarfile.open(subarchive, "r", fo)
+ print "Processing subarchive TAR file %s." % subarchive
while True:
@@ -24,15 +32,32 @@ def uncompress_tar(archive, root, package_id, meta = {}):
if not i.isreg():
continue
- if not sse_feed.supported_source(i.name):
- continue
-
- dst = os.path.join(root, i.name)
+ if subarchive is None:
+ dst = os.path.join(root, i.name)
+ else:
+ dst = os.path.join(root, subarchive, i.name)
if os.access(dst, os.F_OK):
print "WARNING: File '%s' already extracted." % dst
continue
+ if descend:
+ for t in tar_suffixes:
+ if i.name.lower().endswith(t):
+ print "Found subarchive '%s', descending recursively..." % i.name
+
+ try:
+ os.makedirs(dst)
+ except:
+ pass
+
+ n += uncompress_tar(archive, root, package_id, meta, f.extractfile(i), False, i.name)
+ print "Subarchive ended, continuing with top level archive..."
+ continue
+
+ if not sse_feed.supported_source(i.name):
+ continue
+
try:
os.makedirs(os.path.dirname(dst))
except: