diff options
author | Lennart Poettering <lennart@poettering.net> | 2005-11-21 14:14:03 +0000 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2005-11-21 14:14:03 +0000 |
commit | 6a9e54615b4da50ad0f288e1bd5f0e3cea4a1fc9 (patch) | |
tree | 832d1ab770167565e009eee089baca50da984c88 | |
parent | e8dd1f3633bc46cfddfc4045bb970f65876e4e4c (diff) |
major work
git-svn-id: file:///home/lennart/svn/public/sse/trunk@5 5fbabb74-0606-0410-a5e4-b5cc6a42724e
-rwxr-xr-x | sse-feed | 62 | ||||
-rwxr-xr-x | sse-fm | 133 | ||||
-rwxr-xr-x | sse-tar | 25 | ||||
-rw-r--r-- | sse.sql | 6 |
4 files changed, 205 insertions, 21 deletions
@@ -1,36 +1,52 @@ #!/usr/bin/python -import sys, os, MYSQLdb +import sys, os, MySQLdb, stat from popen2 import Popen3 -LEXER_PATH="." +def last_insert_id(cursor): + cursor.execute("SELECT LAST_INSERT_ID()"); + return cursor.fetchone()[0] -db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse") - -def process_file(root, path): +def process_file(package_id, root, path): + global cursor print "Processing %s" % path - + cursor.execute("INSERT INTO file (package_id, path, language_id) VALUES (%i, '%s', '0')" % (package_id, path)); - p = Popen3("%s/lex-c %s" % (LEXER_PATH, os.path.join(root, path))) + file_id = last_insert_id(cursor); + + p = Popen3("lex-c %s" % (os.path.join(root, path))) for identifier in p.fromchild: - print "ID:", identifier.strip() + text = identifier.strip() + + cursor.execute("INSERT IGNORE INTO word (text, type, file_id) VALUES ('%s', 'word', '%i')" % (text, file_id)) + cursor.execute("UPDATE word SET cnt=cnt+1 WHERE text='%s' AND type='word' AND file_id=%i" % (text, file_id)) - if p.wait() != 0: - print "WARNING: Subprocess failed!" + if p.wait() != 0: + print "WARNING: Subprocess failed!" del p -def handle_file(root, path, filename): +def handle_file(package_id, root, path, filename): + + t = sys.lstat(os.path.join(path, filename)) - extension = filename.split(".")[-1] + if stat.F_ISREG(t.st_mode): - if extension in ("c", "h"): - process_file(root, os.path.join(path, filename)) + extension = filename.split(".")[-1] + if extension in ("c", "h"): + process_file(package_id, root, os.path.join(path, filename)) + return -def handle_tree(path): + os.unlink(os.path.join(root, path, filename)) + +def handle_tree(path, name, url, md): + global cursor + + cursor.execute("INSERT INTO package (path, name, url, timestamp, md) VALUES ('%s', '%s', '%s', NOW(), '%s')" % (path + "/%s", name, url, md)); + package_id = last_insert_id(cursor); path = os.path.realpath(path) @@ -38,7 +54,17 @@ def handle_tree(path): for f in files: assert path + "/" == (dirpath + "/") [:len(path)+1] - handle_file(path, dirpath[len(path)+1:], f) + handle_file(package_id, path, dirpath[len(path)+1:], f) + +db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse") +cursor = db.cursor(); +cursor.execute("SET AUTOCOMMIT=0") +cursor.execute("START TRANSACTION") + +assert len(sys.argv) == 5 + +handle_tree(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) -for a in sys.argv[1:]: - handle_tree(a) +cursor.execute("COMMIT") +cursor.close() +db.close() @@ -0,0 +1,133 @@ +#!/usr/bin/python + +import sys, urllib2, os, socket +from xml.sax import ContentHandler, make_parser + +HOME = os.environ["HOME"] +SSE_TAR = HOME + "/bin/sse-tar" + +def process_tar(project, tar): + print "New tar %s" % tar + + ret = os.system("%s '%s' '%s' '%s'" % (SSE_TAR, tar, project["name"], project["project-url"])) + + if ret != 0: + print "WARNING: Process returned %i" % ret + +def process_project(project): + + archive_url = None + + for a in ("archive-tgz-url", "archive-bz2-url", "archive-zip-url"): + + if project.has_key(a) and project[a] != "": + archive_url = project[a] + break + + if archive_url is None: + print "WARNING: Ignoring project '%s' without archive URL!" % project["name"] + return + + fn = "%s/download/freshmeat-%i" % (HOME, int(project["id"])) + + download = False + + try: + f = open(fn+".release", "r") + except: + download = True + else: + download = f.read() != project["date"].strip() + + if not download: + print "File %s up-to-date." % archive_url + else: + + print "Downloading %s..." % archive_url + + try: + dst = file(fn, "w") + src = urllib2.urlopen(archive_url) + + while True: + data = src.read(1024) + + if len(data) <= 0: + break + + dst.write(data) + + del dst + del src + + except IOError, e: + os.unlink(fn) + print "WARNING: Failed to download %s!" % archive_url + return + + try: + f = open(fn+".release", "w") + except: + os.unlink(fn) + + f.write(project["date"].strip()) + del f + + process_tar(project, fn) + +class docHandler(ContentHandler): + + project_data = {} + field = None + + def startElement(self, name, attrs): + if name == "project": + self.project_data = {} + self.field = None + elif name == "project_id": + self.field = "id" + elif name == "projectname_full": + self.field = "name" + elif name == "url_project_page": + self.field = "project-url" + elif name == "url_tgz": + self.field = "archive-tgz-url" + elif name == "url_bz2": + self.field = "archive-bz2-url" + elif name == "url_zip": + self.field = "archive-zip-url" + elif name == "license": + self.field = "license" + elif name == "latest_release_date": + self.field = "date" + else: + self.field = None + + def characters(self, data): + + if not self.field is None: + if self.project_data.has_key(self.field): + self.project_data[self.field] += data + else: + self.project_data[self.field] = data + + def endElement(self, name): + if name == "project": + process_project(self.project_data) + self.project_data = None + + self.field = None + +try: + os.mkdir("%s/download" % HOME) +except: + pass + +socket.setdefaulttimeout(20) + +dh = docHandler() + +parser = make_parser() + +parser.setContentHandler(dh) +parser.parse(sys.stdin) @@ -0,0 +1,25 @@ +#!/bin/sh + +set -ex + +renice +10 $$ > /dev/null + +PATH="$PATH:/home/lennart/sse" +SOURCES="$HOME/sources" + +TAR="$1" +NAME="$2" +URL="$3" + +[ "x$NAME" = "x" ] && NAME="$TAR" + +test -f "$TAR" + +MD=$(md5sum "$1" | awk '{print$1}') + +mkdir -p "$SOURCES/$MD" + +tar -C "$SOURCES/$MD" -xzf "$TAR" || tar -C "$SOURCES/$MD" -xjf "$TAR" +chmod -R a+rX "$SOURCES/$MD" + +exec sse-feed "$SOURCES/$MD" "$NAME" "$URL" "$MD" @@ -8,7 +8,7 @@ CREATE TABLE word ( file_id INTEGER UNSIGNED NOT NULL, cnt INTEGER UNSIGNED NOT NULL, PRIMARY KEY (text, type, file_id) -); +) ENGINE=InnoDB; CREATE TABLE file ( id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT, @@ -16,7 +16,7 @@ CREATE TABLE file ( path VARBINARY(255) NOT NULL, language_id TINYINT UNSIGNED NOT NULL, PRIMARY KEY (id) -); +) ENGINE=InnoDB; CREATE TABLE package ( id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT, @@ -26,4 +26,4 @@ CREATE TABLE package ( md CHAR(32) NOT NULL, PRIMARY KEY(id), KEY (md) -); +) ENGINE=InnoDB; |