diff options
author | Lennart Poettering <lennart@poettering.net> | 2005-11-21 23:06:55 +0000 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2005-11-21 23:06:55 +0000 |
commit | 71e7248cba9a5b78531aeaac7a58e811ec701dba (patch) | |
tree | 85087eaf6464bb47c14419e61243f5d01dbf050f /feed/sse_fm.py | |
parent | e2df88d73130ed8237efeff3bdae9fd9f5e0c0a3 (diff) |
a days work
git-svn-id: file:///home/lennart/svn/public/sse/trunk@16 5fbabb74-0606-0410-a5e4-b5cc6a42724e
Diffstat (limited to 'feed/sse_fm.py')
-rwxr-xr-x | feed/sse_fm.py | 140 |
1 files changed, 64 insertions, 76 deletions
diff --git a/feed/sse_fm.py b/feed/sse_fm.py index dd45d58..7161d7a 100755 --- a/feed/sse_fm.py +++ b/feed/sse_fm.py @@ -1,89 +1,81 @@ #!/usr/bin/python - -import sys, urllib2, os, socket +import sys from xml.sax import ContentHandler, make_parser -from sse-config import * - -SSE_TAR = SSE_DIR + "sse-tar" -def process_tar(project, tar): - print "New tar %s" % tar +from sse_defs import * +import sse_grab + +license_blacklist = [ + "Other/Proprietary License with Free Trial", + "Free for non-commercial use", + "Free To Use But Restricted", + "Freely Distributable", + "Freeware", + "Shareware", + "Other/Proprietary License with Source", + "Other/Proprietary License", + ] + +license_whitelist = [ + "GNU General Public License (GPL)", + "GNU Lesser General Public License (LGPL)", + "OSI Approved", + "The Apache License", + "Q Public License (QPL)", + "Public Domain", + "BSD License (original)", + "Artistic License", + "MIT/X Consortium License", + "The Clarified Artistic License", + "BSD License (revised)" + ] + +def process_record(meta): - ret = os.system("%s '%s' '%s' '%s'" % (SSE_TAR, tar, project["name"], project["project-url"])) - - if ret != 0: - print "WARNING: Process returned %i" % ret + archive_url = None -def process_project(project): + for k, v in meta.items(): + meta[k] = v.strip() - archive_url = None + meta["id"] = int(meta["id"]) for a in ("archive-tgz-url", "archive-bz2-url", "archive-zip-url"): - if project.has_key(a) and project[a] != "": - archive_url = project[a] + if meta.has_key(a) and meta[a] != "": + archive_url = meta[a] break if archive_url is None: - print "WARNING: Ignoring project '%s' without archive URL!" % project["name"] + print "Ignoring project '%s' without archive URL!" % meta["name"] return - - fn = "%s/download/freshmeat-%i" % (HOME, int(project["id"])) - - download = False - - try: - f = open(fn+".release", "r") - except: - download = True - else: - download = f.read() != project["date"].strip() - - if not download: - print "File %s up-to-date." % archive_url - else: - print "Downloading %s..." % archive_url - - try: - dst = file(fn, "w") - src = urllib2.urlopen(archive_url) - - while True: - data = src.read(1024) + if meta["license"] in license_blacklist: + print "Ignoring project '%s' due to evil license '%s'!" % (meta["name"], meta["license"]) + return - if len(data) <= 0: - break + if meta["license"] not in license_whitelist: + print "WARNING: Unknown license '%s' for project '%s'!" % (meta["license"], meta["name"]) - dst.write(data) + f = file("graylist", "a") + f.write("%s\t%s\n" % (meta["name"], meta["license"])) + f.close() + return - del dst - del src - - except IOError, e: - os.unlink(fn) - print "WARNING: Failed to download %s!" % archive_url - return + meta["archive-url"] = archive_url - try: - f = open(fn+".release", "w") - except: - os.unlink(fn) + print "Next record '%s'" % meta["name"] - f.write(project["date"].strip()) - del f - - process_tar(project, fn) + sse_grab.grab_archive(meta, "freshmeat:%i" % meta["id"], SSE_PROVIDER_FRESHMEAT) class docHandler(ContentHandler): - project_data = {} + meta = {} field = None def startElement(self, name, attrs): if name == "project": - self.project_data = {} + self.meta = {} self.field = None elif name == "project_id": self.field = "id" @@ -100,35 +92,31 @@ class docHandler(ContentHandler): elif name == "license": self.field = "license" elif name == "latest_release_date": - self.field = "date" + self.field = "version" else: self.field = None def characters(self, data): if not self.field is None: - if self.project_data.has_key(self.field): - self.project_data[self.field] += data + if self.meta.has_key(self.field): + self.meta[self.field] += data else: - self.project_data[self.field] = data + self.meta[self.field] = data def endElement(self, name): if name == "project": - process_project(self.project_data) - self.project_data = None + process_record(self.meta) + self.meta = None self.field = None -try: - os.mkdir("%s/download" % HOME) -except: - pass - -socket.setdefaulttimeout(20) - -dh = docHandler() +def parse_xml(f): -parser = make_parser() + dh = docHandler() + parser = make_parser() + parser.setContentHandler(dh) + parser.parse(f) -parser.setContentHandler(dh) -parser.parse(sys.stdin) +if __name__ == "__main__": + parse_xml(sys.stdin) |