summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2005-11-21 14:14:03 +0000
committerLennart Poettering <lennart@poettering.net>2005-11-21 14:14:03 +0000
commit6a9e54615b4da50ad0f288e1bd5f0e3cea4a1fc9 (patch)
tree832d1ab770167565e009eee089baca50da984c88
parente8dd1f3633bc46cfddfc4045bb970f65876e4e4c (diff)
major work
git-svn-id: file:///home/lennart/svn/public/sse/trunk@5 5fbabb74-0606-0410-a5e4-b5cc6a42724e
-rwxr-xr-xsse-feed62
-rwxr-xr-xsse-fm133
-rwxr-xr-xsse-tar25
-rw-r--r--sse.sql6
4 files changed, 205 insertions, 21 deletions
diff --git a/sse-feed b/sse-feed
index 8290f48..29718ca 100755
--- a/sse-feed
+++ b/sse-feed
@@ -1,36 +1,52 @@
#!/usr/bin/python
-import sys, os, MYSQLdb
+import sys, os, MySQLdb, stat
from popen2 import Popen3
-LEXER_PATH="."
+def last_insert_id(cursor):
+ cursor.execute("SELECT LAST_INSERT_ID()");
+ return cursor.fetchone()[0]
-db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse")
-
-def process_file(root, path):
+def process_file(package_id, root, path):
+ global cursor
print "Processing %s" % path
-
+ cursor.execute("INSERT INTO file (package_id, path, language_id) VALUES (%i, '%s', '0')" % (package_id, path));
- p = Popen3("%s/lex-c %s" % (LEXER_PATH, os.path.join(root, path)))
+ file_id = last_insert_id(cursor);
+
+ p = Popen3("lex-c %s" % (os.path.join(root, path)))
for identifier in p.fromchild:
- print "ID:", identifier.strip()
+ text = identifier.strip()
+
+ cursor.execute("INSERT IGNORE INTO word (text, type, file_id) VALUES ('%s', 'word', '%i')" % (text, file_id))
+ cursor.execute("UPDATE word SET cnt=cnt+1 WHERE text='%s' AND type='word' AND file_id=%i" % (text, file_id))
- if p.wait() != 0:
- print "WARNING: Subprocess failed!"
+ if p.wait() != 0:
+ print "WARNING: Subprocess failed!"
del p
-def handle_file(root, path, filename):
+def handle_file(package_id, root, path, filename):
+
+ t = sys.lstat(os.path.join(path, filename))
- extension = filename.split(".")[-1]
+ if stat.F_ISREG(t.st_mode):
- if extension in ("c", "h"):
- process_file(root, os.path.join(path, filename))
+ extension = filename.split(".")[-1]
+ if extension in ("c", "h"):
+ process_file(package_id, root, os.path.join(path, filename))
+ return
-def handle_tree(path):
+ os.unlink(os.path.join(root, path, filename))
+
+def handle_tree(path, name, url, md):
+ global cursor
+
+ cursor.execute("INSERT INTO package (path, name, url, timestamp, md) VALUES ('%s', '%s', '%s', NOW(), '%s')" % (path + "/%s", name, url, md));
+ package_id = last_insert_id(cursor);
path = os.path.realpath(path)
@@ -38,7 +54,17 @@ def handle_tree(path):
for f in files:
assert path + "/" == (dirpath + "/") [:len(path)+1]
- handle_file(path, dirpath[len(path)+1:], f)
+ handle_file(package_id, path, dirpath[len(path)+1:], f)
+
+db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse")
+cursor = db.cursor();
+cursor.execute("SET AUTOCOMMIT=0")
+cursor.execute("START TRANSACTION")
+
+assert len(sys.argv) == 5
+
+handle_tree(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
-for a in sys.argv[1:]:
- handle_tree(a)
+cursor.execute("COMMIT")
+cursor.close()
+db.close()
diff --git a/sse-fm b/sse-fm
new file mode 100755
index 0000000..f9fae44
--- /dev/null
+++ b/sse-fm
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+
+import sys, urllib2, os, socket
+from xml.sax import ContentHandler, make_parser
+
+HOME = os.environ["HOME"]
+SSE_TAR = HOME + "/bin/sse-tar"
+
+def process_tar(project, tar):
+ print "New tar %s" % tar
+
+ ret = os.system("%s '%s' '%s' '%s'" % (SSE_TAR, tar, project["name"], project["project-url"]))
+
+ if ret != 0:
+ print "WARNING: Process returned %i" % ret
+
+def process_project(project):
+
+ archive_url = None
+
+ for a in ("archive-tgz-url", "archive-bz2-url", "archive-zip-url"):
+
+ if project.has_key(a) and project[a] != "":
+ archive_url = project[a]
+ break
+
+ if archive_url is None:
+ print "WARNING: Ignoring project '%s' without archive URL!" % project["name"]
+ return
+
+ fn = "%s/download/freshmeat-%i" % (HOME, int(project["id"]))
+
+ download = False
+
+ try:
+ f = open(fn+".release", "r")
+ except:
+ download = True
+ else:
+ download = f.read() != project["date"].strip()
+
+ if not download:
+ print "File %s up-to-date." % archive_url
+ else:
+
+ print "Downloading %s..." % archive_url
+
+ try:
+ dst = file(fn, "w")
+ src = urllib2.urlopen(archive_url)
+
+ while True:
+ data = src.read(1024)
+
+ if len(data) <= 0:
+ break
+
+ dst.write(data)
+
+ del dst
+ del src
+
+ except IOError, e:
+ os.unlink(fn)
+ print "WARNING: Failed to download %s!" % archive_url
+ return
+
+ try:
+ f = open(fn+".release", "w")
+ except:
+ os.unlink(fn)
+
+ f.write(project["date"].strip())
+ del f
+
+ process_tar(project, fn)
+
+class docHandler(ContentHandler):
+
+ project_data = {}
+ field = None
+
+ def startElement(self, name, attrs):
+ if name == "project":
+ self.project_data = {}
+ self.field = None
+ elif name == "project_id":
+ self.field = "id"
+ elif name == "projectname_full":
+ self.field = "name"
+ elif name == "url_project_page":
+ self.field = "project-url"
+ elif name == "url_tgz":
+ self.field = "archive-tgz-url"
+ elif name == "url_bz2":
+ self.field = "archive-bz2-url"
+ elif name == "url_zip":
+ self.field = "archive-zip-url"
+ elif name == "license":
+ self.field = "license"
+ elif name == "latest_release_date":
+ self.field = "date"
+ else:
+ self.field = None
+
+ def characters(self, data):
+
+ if not self.field is None:
+ if self.project_data.has_key(self.field):
+ self.project_data[self.field] += data
+ else:
+ self.project_data[self.field] = data
+
+ def endElement(self, name):
+ if name == "project":
+ process_project(self.project_data)
+ self.project_data = None
+
+ self.field = None
+
+try:
+ os.mkdir("%s/download" % HOME)
+except:
+ pass
+
+socket.setdefaulttimeout(20)
+
+dh = docHandler()
+
+parser = make_parser()
+
+parser.setContentHandler(dh)
+parser.parse(sys.stdin)
diff --git a/sse-tar b/sse-tar
new file mode 100755
index 0000000..1f27ab0
--- /dev/null
+++ b/sse-tar
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+set -ex
+
+renice +10 $$ > /dev/null
+
+PATH="$PATH:/home/lennart/sse"
+SOURCES="$HOME/sources"
+
+TAR="$1"
+NAME="$2"
+URL="$3"
+
+[ "x$NAME" = "x" ] && NAME="$TAR"
+
+test -f "$TAR"
+
+MD=$(md5sum "$1" | awk '{print$1}')
+
+mkdir -p "$SOURCES/$MD"
+
+tar -C "$SOURCES/$MD" -xzf "$TAR" || tar -C "$SOURCES/$MD" -xjf "$TAR"
+chmod -R a+rX "$SOURCES/$MD"
+
+exec sse-feed "$SOURCES/$MD" "$NAME" "$URL" "$MD"
diff --git a/sse.sql b/sse.sql
index 0b4b8e6..2e48816 100644
--- a/sse.sql
+++ b/sse.sql
@@ -8,7 +8,7 @@ CREATE TABLE word (
file_id INTEGER UNSIGNED NOT NULL,
cnt INTEGER UNSIGNED NOT NULL,
PRIMARY KEY (text, type, file_id)
-);
+) ENGINE=InnoDB;
CREATE TABLE file (
id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
@@ -16,7 +16,7 @@ CREATE TABLE file (
path VARBINARY(255) NOT NULL,
language_id TINYINT UNSIGNED NOT NULL,
PRIMARY KEY (id)
-);
+) ENGINE=InnoDB;
CREATE TABLE package (
id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT,
@@ -26,4 +26,4 @@ CREATE TABLE package (
md CHAR(32) NOT NULL,
PRIMARY KEY(id),
KEY (md)
-);
+) ENGINE=InnoDB;