diff options
author | Lennart Poettering <lennart@poettering.net> | 2005-11-22 00:06:03 +0000 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2005-11-22 00:06:03 +0000 |
commit | 84b8366e5b3027ccd74622b7e6a271f5c7a641fa (patch) | |
tree | 983c4b88a4c74843b44fa01f3a77cef705ca4d3f /feed | |
parent | b4b06a5275762a16e3f74fdc8626adff647a5129 (diff) |
lots of small fixes
git-svn-id: file:///home/lennart/svn/public/sse/trunk@19 5fbabb74-0606-0410-a5e4-b5cc6a42724e
Diffstat (limited to 'feed')
-rw-r--r-- | feed/sse_db.py | 3 | ||||
-rwxr-xr-x | feed/sse_feed.py | 58 | ||||
-rw-r--r-- | feed/sse_grab.py | 2 |
3 files changed, 7 insertions, 56 deletions
diff --git a/feed/sse_db.py b/feed/sse_db.py index 4cf2af3..e99c5d0 100644 --- a/feed/sse_db.py +++ b/feed/sse_db.py @@ -3,7 +3,6 @@ import sys, os, MySQLdb, stat db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse") cursor = db.cursor(); -cursor.execute("SET AUTOCOMMIT=0") def commit(): cursor.execute('COMMIT') @@ -70,6 +69,6 @@ def new_word(file_id, text, is_subword): t = "subword" else: t = "word" - + cursor.execute('INSERT IGNORE INTO word (text, type, file_id, cnt) VALUES (%s, %s, %s, 0)', (text, t, file_id)) cursor.execute('UPDATE word SET cnt=cnt+1 WHERE text=%s AND type=%s AND file_id=%s', (text, t, file_id)) diff --git a/feed/sse_feed.py b/feed/sse_feed.py index d31d826..7925c25 100755 --- a/feed/sse_feed.py +++ b/feed/sse_feed.py @@ -7,53 +7,8 @@ import sse_db from sse_config import * from sse_defs import * -def camel_split(word): - - if len(word) <= 0: - return [] - - r = [] - last = 0 - - for i in range(0, len(word)-1): - - if word[i].islower() and word[i+1].isupper(): - r.append(word[last:i+1]) - last = i+1 - - r.append(word[last:]) - - return r - - -def default_subword_split(word): - r = [] - - w = word.split("_") - - if len(w) > 1: - delimiter = "_" - else: - w = camel_split(word) - delimiter = "" - - if len(w) > 1: - - for i in range(1, len(w)): - - if len(w[i]) == 0: - continue - - n = string.join(w[i:], delimiter) - - if len(n) >= 4: - r.append(n) - - return r - supported_languages = [ { "extensions" : [".c", ".h", ".cc", ".hh", ".cpp", ".hpp"], - "subword_split" : default_subword_split, "lexer" : SSE_DIR+"/sse_lex_c", "language_id" : SSE_LANGUAGE_C }] @@ -74,24 +29,21 @@ def process_source(archive, root, path, package_id, meta): print "(%s) Processing %s" % (archive, path) language = find_language(path) - assert not language is None file_id = sse_db.new_file(package_id, path, language["language_id"]) p = Popen3("%s %s" % (language["lexer"], os.path.join(root, path))) - subword_split = language["subword_split"] - for identifier in p.fromchild: text = identifier.strip() - sse_db.new_word(file_id, text, False) - subwords = subword_split(text) - for w in subwords: - sse_db.new_word(file_id, w, True) - + if text.startswith("S:"): + sse_db.new_word(file_id, text[2:], True) + else: + sse_db.new_word(file_id, text, False) + if p.wait() != 0: print "WARNING: Subprocess failed!" diff --git a/feed/sse_grab.py b/feed/sse_grab.py index 6142f27..937aad4 100644 --- a/feed/sse_grab.py +++ b/feed/sse_grab.py @@ -61,7 +61,7 @@ def grab_archive(meta, recid, provider_id = SSE_PROVIDER_NONE): dst.close() del src - except IOError, e: + except (socket.timeout, IOError): os.unlink(fn) print "WARNING: Failed to download %s!" % archive_url return |