summaryrefslogtreecommitdiffstats
path: root/feed/sse_feed.py
diff options
context:
space:
mode:
Diffstat (limited to 'feed/sse_feed.py')
-rwxr-xr-xfeed/sse_feed.py121
1 files changed, 69 insertions, 52 deletions
diff --git a/feed/sse_feed.py b/feed/sse_feed.py
index c6f0de9..d31d826 100755
--- a/feed/sse_feed.py
+++ b/feed/sse_feed.py
@@ -1,81 +1,98 @@
#!/usr/bin/python
-import sys, os, MySQLdb, stat
+import sys, os, stat, string
from popen2 import Popen3
-supported = [".c", ".h"]
+import sse_db
+from sse_config import *
+from sse_defs import *
-def supported_source(fn):
+def camel_split(word):
- for e in supported:
- if fn.endswith(e):
- return True
+ if len(word) <= 0:
+ return []
- return False
+ r = []
+ last = 0
-def last_insert_id(cursor):
- cursor.execute("SELECT LAST_INSERT_ID()");
- return cursor.fetchone()[0]
+ for i in range(0, len(word)-1):
+
+ if word[i].islower() and word[i+1].isupper():
+ r.append(word[last:i+1])
+ last = i+1
-def process_file(package_id, root, path):
- global cursor
- print "Processing %s" % path
+ r.append(word[last:])
- cursor.execute("INSERT INTO file (package_id, path, language_id) VALUES (%i, '%s', '0')" % (package_id, path));
+ return r
+
- file_id = last_insert_id(cursor);
+def default_subword_split(word):
+ r = []
+
+ w = word.split("_")
- p = Popen3("lex-c %s" % (os.path.join(root, path)))
+ if len(w) > 1:
+ delimiter = "_"
+ else:
+ w = camel_split(word)
+ delimiter = ""
- for identifier in p.fromchild:
- text = identifier.strip()
-
- cursor.execute("INSERT IGNORE INTO word (text, type, file_id) VALUES ('%s', 'word', '%i')" % (text, file_id))
- cursor.execute("UPDATE word SET cnt=cnt+1 WHERE text='%s' AND type='word' AND file_id=%i" % (text, file_id))
+ if len(w) > 1:
- if p.wait() != 0:
- print "WARNING: Subprocess failed!"
+ for i in range(1, len(w)):
- del p
+ if len(w[i]) == 0:
+ continue
+
+ n = string.join(w[i:], delimiter)
-def handle_file(package_id, root, path, filename):
+ if len(n) >= 4:
+ r.append(n)
- t = sys.lstat(os.path.join(path, filename))
+ return r
+
+supported_languages = [ {
+ "extensions" : [".c", ".h", ".cc", ".hh", ".cpp", ".hpp"],
+ "subword_split" : default_subword_split,
+ "lexer" : SSE_DIR+"/sse_lex_c",
+ "language_id" : SSE_LANGUAGE_C
+ }]
- if stat.F_ISREG(t.st_mode):
+def find_language(fn):
+
+ for l in supported_languages:
+ for e in l["extensions"]:
+ if fn.lower().endswith(e):
+ return l
- extension = filename.split(".")[-1]
+ return None
- if extension in ("c", "h"):
- process_file(package_id, root, os.path.join(path, filename))
- return
+def supported_source(fn):
+ return not find_language(fn) is None
- os.unlink(os.path.join(root, path, filename))
+def process_source(archive, root, path, package_id, meta):
+ print "(%s) Processing %s" % (archive, path)
-def handle_tree(path, name, url, md):
- global cursor
+ language = find_language(path)
- cursor.execute("INSERT INTO package (path, name, url, timestamp, md) VALUES ('%s', '%s', '%s', NOW(), '%s')" % (path + "/%s", name, url, md));
- package_id = last_insert_id(cursor);
+ assert not language is None
- path = os.path.realpath(path)
-
- for dirpath, dirs, files in os.walk(path):
- for f in files:
- assert path + "/" == (dirpath + "/") [:len(path)+1]
+ file_id = sse_db.new_file(package_id, path, language["language_id"])
- handle_file(package_id, path, dirpath[len(path)+1:], f)
+ p = Popen3("%s %s" % (language["lexer"], os.path.join(root, path)))
-if __name__ == "__main__":
- db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse")
- cursor = db.cursor();
- cursor.execute("SET AUTOCOMMIT=0")
- cursor.execute("START TRANSACTION")
+ subword_split = language["subword_split"]
- assert len(sys.argv) == 5
+ for identifier in p.fromchild:
+
+ text = identifier.strip()
+ sse_db.new_word(file_id, text, False)
- handle_tree(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4])
+ subwords = subword_split(text)
+ for w in subwords:
+ sse_db.new_word(file_id, w, True)
+
+ if p.wait() != 0:
+ print "WARNING: Subprocess failed!"
- cursor.execute("COMMIT")
- cursor.close()
- db.close()
+ del p