From 60cd2a763756529262ae7bbe58b3272ed51e5598 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 21 Nov 2005 14:15:24 +0000 Subject: move everything down a directory git-svn-id: file:///home/lennart/svn/public/sse/trunk@6 5fbabb74-0606-0410-a5e4-b5cc6a42724e --- Makefile | 15 ------- feed/Makefile | 15 +++++++ feed/lex-c.l | 102 ++++++++++++++++++++++++++++++++++++++++++++ feed/sse-feed | 70 +++++++++++++++++++++++++++++++ feed/sse-fm | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ feed/sse-tar | 25 +++++++++++ lex-c.l | 102 -------------------------------------------- sse-feed | 70 ------------------------------- sse-fm | 133 ---------------------------------------------------------- sse-tar | 25 ----------- sse.sql | 11 ++--- 11 files changed, 351 insertions(+), 350 deletions(-) delete mode 100644 Makefile create mode 100644 feed/Makefile create mode 100644 feed/lex-c.l create mode 100755 feed/sse-feed create mode 100755 feed/sse-fm create mode 100755 feed/sse-tar delete mode 100644 lex-c.l delete mode 100755 sse-feed delete mode 100755 sse-fm delete mode 100755 sse-tar diff --git a/Makefile b/Makefile deleted file mode 100644 index 3830946..0000000 --- a/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -CLAGS=-Wextra -g -O2 -pipe -LIBS=-lfl - -all: lex-c - -lex-c.yy.c: lex-c.l - flex -o $@ $^ - -lex-c: lex-c.yy.o - $(CC) -o $@ $^ $(LIBS) - -clean: - rm -f *.o lex-c.yy.c lex-c - -.PHONY: all clean diff --git a/feed/Makefile b/feed/Makefile new file mode 100644 index 0000000..3830946 --- /dev/null +++ b/feed/Makefile @@ -0,0 +1,15 @@ +CLAGS=-Wextra -g -O2 -pipe +LIBS=-lfl + +all: lex-c + +lex-c.yy.c: lex-c.l + flex -o $@ $^ + +lex-c: lex-c.yy.o + $(CC) -o $@ $^ $(LIBS) + +clean: + rm -f *.o lex-c.yy.c lex-c + +.PHONY: all clean diff --git a/feed/lex-c.l b/feed/lex-c.l new file mode 100644 index 0000000..243c9ab --- /dev/null +++ b/feed/lex-c.l @@ -0,0 +1,102 @@ +/* --*-c-mode-*-- */ + +%{ + +#include + + +%} + +%Start DEF CCOMMENT CPPCOMMENT STRING CHAR PREPROC + +IDCHAR [_a-zA-Z0-9] +NIDCHAR [^_a-zA-Z0-9] + +%% + + BEGIN DEF; + +^#ifn?def | +^#if | +^#define | +^#undef ; + +^#. { BEGIN CPPCOMMENT; } + +"/*" { BEGIN CCOMMENT; } +"*/" { BEGIN DEF; } +\n | +. ; + +"//" { BEGIN CPPCOMMENT; } +\n { BEGIN DEF; } +. ; + +"\"" { BEGIN STRING; } +"\"" { BEGIN DEF; } +. | +\n ; + +"'" { BEGIN CHAR; } +"'" { BEGIN DEF; } +\n | +. ; + +auto{NIDCHAR} | +break{NIDCHAR} | +case{NIDCHAR} | +char{NIDCHAR} | +const{NIDCHAR} | +continue{NIDCHAR} | +default{NIDCHAR} | +do{NIDCHAR} | +double{NIDCHAR} | +else{NIDCHAR} | +enum{NIDCHAR} | +extern{NIDCHAR} | +float{NIDCHAR} | +for{NIDCHAR} | +goto{NIDCHAR} | +if{NIDCHAR} | +int{NIDCHAR} | +long{NIDCHAR} | +register{NIDCHAR} | +return{NIDCHAR} | +short{NIDCHAR} | +signed{NIDCHAR} | +sizeof{NIDCHAR} | +static{NIDCHAR} | +struct{NIDCHAR} | +switch{NIDCHAR} | +typedef{NIDCHAR} | +union{NIDCHAR} | +unsigned{NIDCHAR} | +void{NIDCHAR} | +volatile{NIDCHAR} | +while{NIDCHAR} { yyless(yyleng-1); } + +[a-zA-Z_][a-zA-Z_0-9]{3,} { printf("%s\n", yytext); } + +"\n" | +. ; + +%% + +int main(int argc, char *argv[]) { + + if (argc <= 1) + yylex(); + else { + int i; + + for (i = 1; i < argc; i++) { + if (!(freopen(argv[i], "r", stdin))) { + fprintf(stderr, "Failed to open file: %s\n", strerror(errno)); + return 1; + } + yylex(); + } + } + + return 0; +} diff --git a/feed/sse-feed b/feed/sse-feed new file mode 100755 index 0000000..29718ca --- /dev/null +++ b/feed/sse-feed @@ -0,0 +1,70 @@ +#!/usr/bin/python + +import sys, os, MySQLdb, stat +from popen2 import Popen3 + +def last_insert_id(cursor): + cursor.execute("SELECT LAST_INSERT_ID()"); + return cursor.fetchone()[0] + +def process_file(package_id, root, path): + global cursor + print "Processing %s" % path + + cursor.execute("INSERT INTO file (package_id, path, language_id) VALUES (%i, '%s', '0')" % (package_id, path)); + + file_id = last_insert_id(cursor); + + p = Popen3("lex-c %s" % (os.path.join(root, path))) + + for identifier in p.fromchild: + text = identifier.strip() + + cursor.execute("INSERT IGNORE INTO word (text, type, file_id) VALUES ('%s', 'word', '%i')" % (text, file_id)) + cursor.execute("UPDATE word SET cnt=cnt+1 WHERE text='%s' AND type='word' AND file_id=%i" % (text, file_id)) + + if p.wait() != 0: + print "WARNING: Subprocess failed!" + + del p + +def handle_file(package_id, root, path, filename): + + t = sys.lstat(os.path.join(path, filename)) + + if stat.F_ISREG(t.st_mode): + + extension = filename.split(".")[-1] + + if extension in ("c", "h"): + process_file(package_id, root, os.path.join(path, filename)) + return + + os.unlink(os.path.join(root, path, filename)) + +def handle_tree(path, name, url, md): + global cursor + + cursor.execute("INSERT INTO package (path, name, url, timestamp, md) VALUES ('%s', '%s', '%s', NOW(), '%s')" % (path + "/%s", name, url, md)); + package_id = last_insert_id(cursor); + + path = os.path.realpath(path) + + for dirpath, dirs, files in os.walk(path): + for f in files: + assert path + "/" == (dirpath + "/") [:len(path)+1] + + handle_file(package_id, path, dirpath[len(path)+1:], f) + +db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse") +cursor = db.cursor(); +cursor.execute("SET AUTOCOMMIT=0") +cursor.execute("START TRANSACTION") + +assert len(sys.argv) == 5 + +handle_tree(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) + +cursor.execute("COMMIT") +cursor.close() +db.close() diff --git a/feed/sse-fm b/feed/sse-fm new file mode 100755 index 0000000..f9fae44 --- /dev/null +++ b/feed/sse-fm @@ -0,0 +1,133 @@ +#!/usr/bin/python + +import sys, urllib2, os, socket +from xml.sax import ContentHandler, make_parser + +HOME = os.environ["HOME"] +SSE_TAR = HOME + "/bin/sse-tar" + +def process_tar(project, tar): + print "New tar %s" % tar + + ret = os.system("%s '%s' '%s' '%s'" % (SSE_TAR, tar, project["name"], project["project-url"])) + + if ret != 0: + print "WARNING: Process returned %i" % ret + +def process_project(project): + + archive_url = None + + for a in ("archive-tgz-url", "archive-bz2-url", "archive-zip-url"): + + if project.has_key(a) and project[a] != "": + archive_url = project[a] + break + + if archive_url is None: + print "WARNING: Ignoring project '%s' without archive URL!" % project["name"] + return + + fn = "%s/download/freshmeat-%i" % (HOME, int(project["id"])) + + download = False + + try: + f = open(fn+".release", "r") + except: + download = True + else: + download = f.read() != project["date"].strip() + + if not download: + print "File %s up-to-date." % archive_url + else: + + print "Downloading %s..." % archive_url + + try: + dst = file(fn, "w") + src = urllib2.urlopen(archive_url) + + while True: + data = src.read(1024) + + if len(data) <= 0: + break + + dst.write(data) + + del dst + del src + + except IOError, e: + os.unlink(fn) + print "WARNING: Failed to download %s!" % archive_url + return + + try: + f = open(fn+".release", "w") + except: + os.unlink(fn) + + f.write(project["date"].strip()) + del f + + process_tar(project, fn) + +class docHandler(ContentHandler): + + project_data = {} + field = None + + def startElement(self, name, attrs): + if name == "project": + self.project_data = {} + self.field = None + elif name == "project_id": + self.field = "id" + elif name == "projectname_full": + self.field = "name" + elif name == "url_project_page": + self.field = "project-url" + elif name == "url_tgz": + self.field = "archive-tgz-url" + elif name == "url_bz2": + self.field = "archive-bz2-url" + elif name == "url_zip": + self.field = "archive-zip-url" + elif name == "license": + self.field = "license" + elif name == "latest_release_date": + self.field = "date" + else: + self.field = None + + def characters(self, data): + + if not self.field is None: + if self.project_data.has_key(self.field): + self.project_data[self.field] += data + else: + self.project_data[self.field] = data + + def endElement(self, name): + if name == "project": + process_project(self.project_data) + self.project_data = None + + self.field = None + +try: + os.mkdir("%s/download" % HOME) +except: + pass + +socket.setdefaulttimeout(20) + +dh = docHandler() + +parser = make_parser() + +parser.setContentHandler(dh) +parser.parse(sys.stdin) diff --git a/feed/sse-tar b/feed/sse-tar new file mode 100755 index 0000000..1f27ab0 --- /dev/null +++ b/feed/sse-tar @@ -0,0 +1,25 @@ +#!/bin/sh + +set -ex + +renice +10 $$ > /dev/null + +PATH="$PATH:/home/lennart/sse" +SOURCES="$HOME/sources" + +TAR="$1" +NAME="$2" +URL="$3" + +[ "x$NAME" = "x" ] && NAME="$TAR" + +test -f "$TAR" + +MD=$(md5sum "$1" | awk '{print$1}') + +mkdir -p "$SOURCES/$MD" + +tar -C "$SOURCES/$MD" -xzf "$TAR" || tar -C "$SOURCES/$MD" -xjf "$TAR" +chmod -R a+rX "$SOURCES/$MD" + +exec sse-feed "$SOURCES/$MD" "$NAME" "$URL" "$MD" diff --git a/lex-c.l b/lex-c.l deleted file mode 100644 index 243c9ab..0000000 --- a/lex-c.l +++ /dev/null @@ -1,102 +0,0 @@ -/* --*-c-mode-*-- */ - -%{ - -#include - - -%} - -%Start DEF CCOMMENT CPPCOMMENT STRING CHAR PREPROC - -IDCHAR [_a-zA-Z0-9] -NIDCHAR [^_a-zA-Z0-9] - -%% - - BEGIN DEF; - -^#ifn?def | -^#if | -^#define | -^#undef ; - -^#. { BEGIN CPPCOMMENT; } - -"/*" { BEGIN CCOMMENT; } -"*/" { BEGIN DEF; } -\n | -. ; - -"//" { BEGIN CPPCOMMENT; } -\n { BEGIN DEF; } -. ; - -"\"" { BEGIN STRING; } -"\"" { BEGIN DEF; } -. | -\n ; - -"'" { BEGIN CHAR; } -"'" { BEGIN DEF; } -\n | -. ; - -auto{NIDCHAR} | -break{NIDCHAR} | -case{NIDCHAR} | -char{NIDCHAR} | -const{NIDCHAR} | -continue{NIDCHAR} | -default{NIDCHAR} | -do{NIDCHAR} | -double{NIDCHAR} | -else{NIDCHAR} | -enum{NIDCHAR} | -extern{NIDCHAR} | -float{NIDCHAR} | -for{NIDCHAR} | -goto{NIDCHAR} | -if{NIDCHAR} | -int{NIDCHAR} | -long{NIDCHAR} | -register{NIDCHAR} | -return{NIDCHAR} | -short{NIDCHAR} | -signed{NIDCHAR} | -sizeof{NIDCHAR} | -static{NIDCHAR} | -struct{NIDCHAR} | -switch{NIDCHAR} | -typedef{NIDCHAR} | -union{NIDCHAR} | -unsigned{NIDCHAR} | -void{NIDCHAR} | -volatile{NIDCHAR} | -while{NIDCHAR} { yyless(yyleng-1); } - -[a-zA-Z_][a-zA-Z_0-9]{3,} { printf("%s\n", yytext); } - -"\n" | -. ; - -%% - -int main(int argc, char *argv[]) { - - if (argc <= 1) - yylex(); - else { - int i; - - for (i = 1; i < argc; i++) { - if (!(freopen(argv[i], "r", stdin))) { - fprintf(stderr, "Failed to open file: %s\n", strerror(errno)); - return 1; - } - yylex(); - } - } - - return 0; -} diff --git a/sse-feed b/sse-feed deleted file mode 100755 index 29718ca..0000000 --- a/sse-feed +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/python - -import sys, os, MySQLdb, stat -from popen2 import Popen3 - -def last_insert_id(cursor): - cursor.execute("SELECT LAST_INSERT_ID()"); - return cursor.fetchone()[0] - -def process_file(package_id, root, path): - global cursor - print "Processing %s" % path - - cursor.execute("INSERT INTO file (package_id, path, language_id) VALUES (%i, '%s', '0')" % (package_id, path)); - - file_id = last_insert_id(cursor); - - p = Popen3("lex-c %s" % (os.path.join(root, path))) - - for identifier in p.fromchild: - text = identifier.strip() - - cursor.execute("INSERT IGNORE INTO word (text, type, file_id) VALUES ('%s', 'word', '%i')" % (text, file_id)) - cursor.execute("UPDATE word SET cnt=cnt+1 WHERE text='%s' AND type='word' AND file_id=%i" % (text, file_id)) - - if p.wait() != 0: - print "WARNING: Subprocess failed!" - - del p - -def handle_file(package_id, root, path, filename): - - t = sys.lstat(os.path.join(path, filename)) - - if stat.F_ISREG(t.st_mode): - - extension = filename.split(".")[-1] - - if extension in ("c", "h"): - process_file(package_id, root, os.path.join(path, filename)) - return - - os.unlink(os.path.join(root, path, filename)) - -def handle_tree(path, name, url, md): - global cursor - - cursor.execute("INSERT INTO package (path, name, url, timestamp, md) VALUES ('%s', '%s', '%s', NOW(), '%s')" % (path + "/%s", name, url, md)); - package_id = last_insert_id(cursor); - - path = os.path.realpath(path) - - for dirpath, dirs, files in os.walk(path): - for f in files: - assert path + "/" == (dirpath + "/") [:len(path)+1] - - handle_file(package_id, path, dirpath[len(path)+1:], f) - -db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse") -cursor = db.cursor(); -cursor.execute("SET AUTOCOMMIT=0") -cursor.execute("START TRANSACTION") - -assert len(sys.argv) == 5 - -handle_tree(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]) - -cursor.execute("COMMIT") -cursor.close() -db.close() diff --git a/sse-fm b/sse-fm deleted file mode 100755 index f9fae44..0000000 --- a/sse-fm +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/python - -import sys, urllib2, os, socket -from xml.sax import ContentHandler, make_parser - -HOME = os.environ["HOME"] -SSE_TAR = HOME + "/bin/sse-tar" - -def process_tar(project, tar): - print "New tar %s" % tar - - ret = os.system("%s '%s' '%s' '%s'" % (SSE_TAR, tar, project["name"], project["project-url"])) - - if ret != 0: - print "WARNING: Process returned %i" % ret - -def process_project(project): - - archive_url = None - - for a in ("archive-tgz-url", "archive-bz2-url", "archive-zip-url"): - - if project.has_key(a) and project[a] != "": - archive_url = project[a] - break - - if archive_url is None: - print "WARNING: Ignoring project '%s' without archive URL!" % project["name"] - return - - fn = "%s/download/freshmeat-%i" % (HOME, int(project["id"])) - - download = False - - try: - f = open(fn+".release", "r") - except: - download = True - else: - download = f.read() != project["date"].strip() - - if not download: - print "File %s up-to-date." % archive_url - else: - - print "Downloading %s..." % archive_url - - try: - dst = file(fn, "w") - src = urllib2.urlopen(archive_url) - - while True: - data = src.read(1024) - - if len(data) <= 0: - break - - dst.write(data) - - del dst - del src - - except IOError, e: - os.unlink(fn) - print "WARNING: Failed to download %s!" % archive_url - return - - try: - f = open(fn+".release", "w") - except: - os.unlink(fn) - - f.write(project["date"].strip()) - del f - - process_tar(project, fn) - -class docHandler(ContentHandler): - - project_data = {} - field = None - - def startElement(self, name, attrs): - if name == "project": - self.project_data = {} - self.field = None - elif name == "project_id": - self.field = "id" - elif name == "projectname_full": - self.field = "name" - elif name == "url_project_page": - self.field = "project-url" - elif name == "url_tgz": - self.field = "archive-tgz-url" - elif name == "url_bz2": - self.field = "archive-bz2-url" - elif name == "url_zip": - self.field = "archive-zip-url" - elif name == "license": - self.field = "license" - elif name == "latest_release_date": - self.field = "date" - else: - self.field = None - - def characters(self, data): - - if not self.field is None: - if self.project_data.has_key(self.field): - self.project_data[self.field] += data - else: - self.project_data[self.field] = data - - def endElement(self, name): - if name == "project": - process_project(self.project_data) - self.project_data = None - - self.field = None - -try: - os.mkdir("%s/download" % HOME) -except: - pass - -socket.setdefaulttimeout(20) - -dh = docHandler() - -parser = make_parser() - -parser.setContentHandler(dh) -parser.parse(sys.stdin) diff --git a/sse-tar b/sse-tar deleted file mode 100755 index 1f27ab0..0000000 --- a/sse-tar +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/sh - -set -ex - -renice +10 $$ > /dev/null - -PATH="$PATH:/home/lennart/sse" -SOURCES="$HOME/sources" - -TAR="$1" -NAME="$2" -URL="$3" - -[ "x$NAME" = "x" ] && NAME="$TAR" - -test -f "$TAR" - -MD=$(md5sum "$1" | awk '{print$1}') - -mkdir -p "$SOURCES/$MD" - -tar -C "$SOURCES/$MD" -xzf "$TAR" || tar -C "$SOURCES/$MD" -xjf "$TAR" -chmod -R a+rX "$SOURCES/$MD" - -exec sse-feed "$SOURCES/$MD" "$NAME" "$URL" "$MD" diff --git a/sse.sql b/sse.sql index 2e48816..2e4e9cc 100644 --- a/sse.sql +++ b/sse.sql @@ -6,7 +6,7 @@ CREATE TABLE word ( text VARCHAR(40) NOT NULL, type ENUM ('word', 'subword') DEFAULT 'word' NOT NULL, file_id INTEGER UNSIGNED NOT NULL, - cnt INTEGER UNSIGNED NOT NULL, + cnt INTEGER UNSIGNED DEFAULT 0 NOT NULL, PRIMARY KEY (text, type, file_id) ) ENGINE=InnoDB; @@ -20,10 +20,11 @@ CREATE TABLE file ( CREATE TABLE package ( id INTEGER UNSIGNED NOT NULL AUTO_INCREMENT, - filename VARBINARY(255) NOT NULL, - url VARBINARY(255) NOT NULL, + path VARBINARY(255) NOT NULL, + name VARBINARY(255) NOT NULL DEFAULT 'noname', + url VARBINARY(255) NOT NULL DEFAULT '', timestamp TIMESTAMP NOT NULL, - md CHAR(32) NOT NULL, + md CHAR(32) NOT NULL DEFAULT '', PRIMARY KEY(id), - KEY (md) + UNIQUE KEY (md) ) ENGINE=InnoDB; -- cgit