summaryrefslogtreecommitdiffstats
path: root/feed
diff options
context:
space:
mode:
Diffstat (limited to 'feed')
-rw-r--r--feed/Makefile15
-rw-r--r--feed/lex-c.l102
-rwxr-xr-xfeed/sse-feed70
-rwxr-xr-xfeed/sse-fm133
-rwxr-xr-xfeed/sse-tar25
5 files changed, 345 insertions, 0 deletions
diff --git a/feed/Makefile b/feed/Makefile
new file mode 100644
index 0000000..3830946
--- /dev/null
+++ b/feed/Makefile
@@ -0,0 +1,15 @@
+CLAGS=-Wextra -g -O2 -pipe
+LIBS=-lfl
+
+all: lex-c
+
+lex-c.yy.c: lex-c.l
+ flex -o $@ $^
+
+lex-c: lex-c.yy.o
+ $(CC) -o $@ $^ $(LIBS)
+
+clean:
+ rm -f *.o lex-c.yy.c lex-c
+
+.PHONY: all clean
diff --git a/feed/lex-c.l b/feed/lex-c.l
new file mode 100644
index 0000000..243c9ab
--- /dev/null
+++ b/feed/lex-c.l
@@ -0,0 +1,102 @@
+/* --*-c-mode-*-- */
+
+%{
+
+#include <stdio.h>
+
+
+%}
+
+%Start DEF CCOMMENT CPPCOMMENT STRING CHAR PREPROC
+
+IDCHAR [_a-zA-Z0-9]
+NIDCHAR [^_a-zA-Z0-9]
+
+%%
+
+ BEGIN DEF;
+
+<DEF>^#ifn?def |
+<DEF>^#if |
+<DEF>^#define |
+<DEF>^#undef ;
+
+<DEF>^#. { BEGIN CPPCOMMENT; }
+
+<DEF>"/*" { BEGIN CCOMMENT; }
+<CCOMMENT>"*/" { BEGIN DEF; }
+<CCOMMENT>\n |
+<CCOMMENT>. ;
+
+<DEF>"//" { BEGIN CPPCOMMENT; }
+<CPPCOMMENT>\n { BEGIN DEF; }
+<CPPCOMMENT>. ;
+
+<DEF>"\"" { BEGIN STRING; }
+<STRING>"\"" { BEGIN DEF; }
+<STRING>. |
+<STRING>\n ;
+
+<DEF>"'" { BEGIN CHAR; }
+<CHAR>"'" { BEGIN DEF; }
+<CHAR>\n |
+<CHAR>. ;
+
+<DEF>auto{NIDCHAR} |
+<DEF>break{NIDCHAR} |
+<DEF>case{NIDCHAR} |
+<DEF>char{NIDCHAR} |
+<DEF>const{NIDCHAR} |
+<DEF>continue{NIDCHAR} |
+<DEF>default{NIDCHAR} |
+<DEF>do{NIDCHAR} |
+<DEF>double{NIDCHAR} |
+<DEF>else{NIDCHAR} |
+<DEF>enum{NIDCHAR} |
+<DEF>extern{NIDCHAR} |
+<DEF>float{NIDCHAR} |
+<DEF>for{NIDCHAR} |
+<DEF>goto{NIDCHAR} |
+<DEF>if{NIDCHAR} |
+<DEF>int{NIDCHAR} |
+<DEF>long{NIDCHAR} |
+<DEF>register{NIDCHAR} |
+<DEF>return{NIDCHAR} |
+<DEF>short{NIDCHAR} |
+<DEF>signed{NIDCHAR} |
+<DEF>sizeof{NIDCHAR} |
+<DEF>static{NIDCHAR} |
+<DEF>struct{NIDCHAR} |
+<DEF>switch{NIDCHAR} |
+<DEF>typedef{NIDCHAR} |
+<DEF>union{NIDCHAR} |
+<DEF>unsigned{NIDCHAR} |
+<DEF>void{NIDCHAR} |
+<DEF>volatile{NIDCHAR} |
+<DEF>while{NIDCHAR} { yyless(yyleng-1); }
+
+<DEF>[a-zA-Z_][a-zA-Z_0-9]{3,} { printf("%s\n", yytext); }
+
+<DEF>"\n" |
+<DEF>. ;
+
+%%
+
+int main(int argc, char *argv[]) {
+
+ if (argc <= 1)
+ yylex();
+ else {
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ if (!(freopen(argv[i], "r", stdin))) {
+ fprintf(stderr, "Failed to open file: %s\n", strerror(errno));
+ return 1;
+ }
+ yylex();
+ }
+ }
+
+ return 0;
+}
diff --git a/feed/sse-feed b/feed/sse-feed
new file mode 100755
index 0000000..29718ca
--- /dev/null
+++ b/feed/sse-feed
@@ -0,0 +1,70 @@
+#!/usr/bin/python
+
+import sys, os, MySQLdb, stat
+from popen2 import Popen3
+
+def last_insert_id(cursor):
+ cursor.execute("SELECT LAST_INSERT_ID()");
+ return cursor.fetchone()[0]
+
+def process_file(package_id, root, path):
+ global cursor
+ print "Processing %s" % path
+
+ cursor.execute("INSERT INTO file (package_id, path, language_id) VALUES (%i, '%s', '0')" % (package_id, path));
+
+ file_id = last_insert_id(cursor);
+
+ p = Popen3("lex-c %s" % (os.path.join(root, path)))
+
+ for identifier in p.fromchild:
+ text = identifier.strip()
+
+ cursor.execute("INSERT IGNORE INTO word (text, type, file_id) VALUES ('%s', 'word', '%i')" % (text, file_id))
+ cursor.execute("UPDATE word SET cnt=cnt+1 WHERE text='%s' AND type='word' AND file_id=%i" % (text, file_id))
+
+ if p.wait() != 0:
+ print "WARNING: Subprocess failed!"
+
+ del p
+
+def handle_file(package_id, root, path, filename):
+
+ t = sys.lstat(os.path.join(path, filename))
+
+ if stat.F_ISREG(t.st_mode):
+
+ extension = filename.split(".")[-1]
+
+ if extension in ("c", "h"):
+ process_file(package_id, root, os.path.join(path, filename))
+ return
+
+ os.unlink(os.path.join(root, path, filename))
+
+def handle_tree(path, name, url, md):
+ global cursor
+
+ cursor.execute("INSERT INTO package (path, name, url, timestamp, md) VALUES ('%s', '%s', '%s', NOW(), '%s')" % (path + "/%s", name, url, md));
+ package_id = last_insert_id(cursor);
+
+ path = os.path.realpath(path)
+
+ for dirpath, dirs, files in os.walk(path):
+ for f in files:
+ assert path + "/" == (dirpath + "/") [:len(path)+1]
+
+ handle_file(package_id, path, dirpath[len(path)+1:], f)
+
+db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse")
+cursor = db.cursor();
+cursor.execute("SET AUTOCOMMIT=0")
+cursor.execute("START TRANSACTION")
+
+assert len(sys.argv) == 5
+
+handle_tree(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
+
+cursor.execute("COMMIT")
+cursor.close()
+db.close()
diff --git a/feed/sse-fm b/feed/sse-fm
new file mode 100755
index 0000000..f9fae44
--- /dev/null
+++ b/feed/sse-fm
@@ -0,0 +1,133 @@
+#!/usr/bin/python
+
+import sys, urllib2, os, socket
+from xml.sax import ContentHandler, make_parser
+
+HOME = os.environ["HOME"]
+SSE_TAR = HOME + "/bin/sse-tar"
+
+def process_tar(project, tar):
+ print "New tar %s" % tar
+
+ ret = os.system("%s '%s' '%s' '%s'" % (SSE_TAR, tar, project["name"], project["project-url"]))
+
+ if ret != 0:
+ print "WARNING: Process returned %i" % ret
+
+def process_project(project):
+
+ archive_url = None
+
+ for a in ("archive-tgz-url", "archive-bz2-url", "archive-zip-url"):
+
+ if project.has_key(a) and project[a] != "":
+ archive_url = project[a]
+ break
+
+ if archive_url is None:
+ print "WARNING: Ignoring project '%s' without archive URL!" % project["name"]
+ return
+
+ fn = "%s/download/freshmeat-%i" % (HOME, int(project["id"]))
+
+ download = False
+
+ try:
+ f = open(fn+".release", "r")
+ except:
+ download = True
+ else:
+ download = f.read() != project["date"].strip()
+
+ if not download:
+ print "File %s up-to-date." % archive_url
+ else:
+
+ print "Downloading %s..." % archive_url
+
+ try:
+ dst = file(fn, "w")
+ src = urllib2.urlopen(archive_url)
+
+ while True:
+ data = src.read(1024)
+
+ if len(data) <= 0:
+ break
+
+ dst.write(data)
+
+ del dst
+ del src
+
+ except IOError, e:
+ os.unlink(fn)
+ print "WARNING: Failed to download %s!" % archive_url
+ return
+
+ try:
+ f = open(fn+".release", "w")
+ except:
+ os.unlink(fn)
+
+ f.write(project["date"].strip())
+ del f
+
+ process_tar(project, fn)
+
+class docHandler(ContentHandler):
+
+ project_data = {}
+ field = None
+
+ def startElement(self, name, attrs):
+ if name == "project":
+ self.project_data = {}
+ self.field = None
+ elif name == "project_id":
+ self.field = "id"
+ elif name == "projectname_full":
+ self.field = "name"
+ elif name == "url_project_page":
+ self.field = "project-url"
+ elif name == "url_tgz":
+ self.field = "archive-tgz-url"
+ elif name == "url_bz2":
+ self.field = "archive-bz2-url"
+ elif name == "url_zip":
+ self.field = "archive-zip-url"
+ elif name == "license":
+ self.field = "license"
+ elif name == "latest_release_date":
+ self.field = "date"
+ else:
+ self.field = None
+
+ def characters(self, data):
+
+ if not self.field is None:
+ if self.project_data.has_key(self.field):
+ self.project_data[self.field] += data
+ else:
+ self.project_data[self.field] = data
+
+ def endElement(self, name):
+ if name == "project":
+ process_project(self.project_data)
+ self.project_data = None
+
+ self.field = None
+
+try:
+ os.mkdir("%s/download" % HOME)
+except:
+ pass
+
+socket.setdefaulttimeout(20)
+
+dh = docHandler()
+
+parser = make_parser()
+
+parser.setContentHandler(dh)
+parser.parse(sys.stdin)
diff --git a/feed/sse-tar b/feed/sse-tar
new file mode 100755
index 0000000..1f27ab0
--- /dev/null
+++ b/feed/sse-tar
@@ -0,0 +1,25 @@
+#!/bin/sh
+
+set -ex
+
+renice +10 $$ > /dev/null
+
+PATH="$PATH:/home/lennart/sse"
+SOURCES="$HOME/sources"
+
+TAR="$1"
+NAME="$2"
+URL="$3"
+
+[ "x$NAME" = "x" ] && NAME="$TAR"
+
+test -f "$TAR"
+
+MD=$(md5sum "$1" | awk '{print$1}')
+
+mkdir -p "$SOURCES/$MD"
+
+tar -C "$SOURCES/$MD" -xzf "$TAR" || tar -C "$SOURCES/$MD" -xjf "$TAR"
+chmod -R a+rX "$SOURCES/$MD"
+
+exec sse-feed "$SOURCES/$MD" "$NAME" "$URL" "$MD"