summaryrefslogtreecommitdiffstats
path: root/feed/sse_fm.py
diff options
context:
space:
mode:
Diffstat (limited to 'feed/sse_fm.py')
-rwxr-xr-xfeed/sse_fm.py140
1 files changed, 64 insertions, 76 deletions
diff --git a/feed/sse_fm.py b/feed/sse_fm.py
index dd45d58..7161d7a 100755
--- a/feed/sse_fm.py
+++ b/feed/sse_fm.py
@@ -1,89 +1,81 @@
#!/usr/bin/python
-
-import sys, urllib2, os, socket
+import sys
from xml.sax import ContentHandler, make_parser
-from sse-config import *
-
-SSE_TAR = SSE_DIR + "sse-tar"
-def process_tar(project, tar):
- print "New tar %s" % tar
+from sse_defs import *
+import sse_grab
+
+license_blacklist = [
+ "Other/Proprietary License with Free Trial",
+ "Free for non-commercial use",
+ "Free To Use But Restricted",
+ "Freely Distributable",
+ "Freeware",
+ "Shareware",
+ "Other/Proprietary License with Source",
+ "Other/Proprietary License",
+ ]
+
+license_whitelist = [
+ "GNU General Public License (GPL)",
+ "GNU Lesser General Public License (LGPL)",
+ "OSI Approved",
+ "The Apache License",
+ "Q Public License (QPL)",
+ "Public Domain",
+ "BSD License (original)",
+ "Artistic License",
+ "MIT/X Consortium License",
+ "The Clarified Artistic License",
+ "BSD License (revised)"
+ ]
+
+def process_record(meta):
- ret = os.system("%s '%s' '%s' '%s'" % (SSE_TAR, tar, project["name"], project["project-url"]))
-
- if ret != 0:
- print "WARNING: Process returned %i" % ret
+ archive_url = None
-def process_project(project):
+ for k, v in meta.items():
+ meta[k] = v.strip()
- archive_url = None
+ meta["id"] = int(meta["id"])
for a in ("archive-tgz-url", "archive-bz2-url", "archive-zip-url"):
- if project.has_key(a) and project[a] != "":
- archive_url = project[a]
+ if meta.has_key(a) and meta[a] != "":
+ archive_url = meta[a]
break
if archive_url is None:
- print "WARNING: Ignoring project '%s' without archive URL!" % project["name"]
+ print "Ignoring project '%s' without archive URL!" % meta["name"]
return
-
- fn = "%s/download/freshmeat-%i" % (HOME, int(project["id"]))
-
- download = False
-
- try:
- f = open(fn+".release", "r")
- except:
- download = True
- else:
- download = f.read() != project["date"].strip()
-
- if not download:
- print "File %s up-to-date." % archive_url
- else:
- print "Downloading %s..." % archive_url
-
- try:
- dst = file(fn, "w")
- src = urllib2.urlopen(archive_url)
-
- while True:
- data = src.read(1024)
+ if meta["license"] in license_blacklist:
+ print "Ignoring project '%s' due to evil license '%s'!" % (meta["name"], meta["license"])
+ return
- if len(data) <= 0:
- break
+ if meta["license"] not in license_whitelist:
+ print "WARNING: Unknown license '%s' for project '%s'!" % (meta["license"], meta["name"])
- dst.write(data)
+ f = file("graylist", "a")
+ f.write("%s\t%s\n" % (meta["name"], meta["license"]))
+ f.close()
+ return
- del dst
- del src
-
- except IOError, e:
- os.unlink(fn)
- print "WARNING: Failed to download %s!" % archive_url
- return
+ meta["archive-url"] = archive_url
- try:
- f = open(fn+".release", "w")
- except:
- os.unlink(fn)
+ print "Next record '%s'" % meta["name"]
- f.write(project["date"].strip())
- del f
-
- process_tar(project, fn)
+ sse_grab.grab_archive(meta, "freshmeat:%i" % meta["id"], SSE_PROVIDER_FRESHMEAT)
class docHandler(ContentHandler):
- project_data = {}
+ meta = {}
field = None
def startElement(self, name, attrs):
if name == "project":
- self.project_data = {}
+ self.meta = {}
self.field = None
elif name == "project_id":
self.field = "id"
@@ -100,35 +92,31 @@ class docHandler(ContentHandler):
elif name == "license":
self.field = "license"
elif name == "latest_release_date":
- self.field = "date"
+ self.field = "version"
else:
self.field = None
def characters(self, data):
if not self.field is None:
- if self.project_data.has_key(self.field):
- self.project_data[self.field] += data
+ if self.meta.has_key(self.field):
+ self.meta[self.field] += data
else:
- self.project_data[self.field] = data
+ self.meta[self.field] = data
def endElement(self, name):
if name == "project":
- process_project(self.project_data)
- self.project_data = None
+ process_record(self.meta)
+ self.meta = None
self.field = None
-try:
- os.mkdir("%s/download" % HOME)
-except:
- pass
-
-socket.setdefaulttimeout(20)
-
-dh = docHandler()
+def parse_xml(f):
-parser = make_parser()
+ dh = docHandler()
+ parser = make_parser()
+ parser.setContentHandler(dh)
+ parser.parse(f)
-parser.setContentHandler(dh)
-parser.parse(sys.stdin)
+if __name__ == "__main__":
+ parse_xml(sys.stdin)