summaryrefslogtreecommitdiffstats
path: root/feed
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2005-11-22 00:06:03 +0000
committerLennart Poettering <lennart@poettering.net>2005-11-22 00:06:03 +0000
commit84b8366e5b3027ccd74622b7e6a271f5c7a641fa (patch)
tree983c4b88a4c74843b44fa01f3a77cef705ca4d3f /feed
parentb4b06a5275762a16e3f74fdc8626adff647a5129 (diff)
lots of small fixes
git-svn-id: file:///home/lennart/svn/public/sse/trunk@19 5fbabb74-0606-0410-a5e4-b5cc6a42724e
Diffstat (limited to 'feed')
-rw-r--r--feed/sse_db.py3
-rwxr-xr-xfeed/sse_feed.py58
-rw-r--r--feed/sse_grab.py2
3 files changed, 7 insertions, 56 deletions
diff --git a/feed/sse_db.py b/feed/sse_db.py
index 4cf2af3..e99c5d0 100644
--- a/feed/sse_db.py
+++ b/feed/sse_db.py
@@ -3,7 +3,6 @@ import sys, os, MySQLdb, stat
db = MySQLdb.connect(host = "localhost", user = "sse_web", passwd = "ece6Yoli", db = "sse")
cursor = db.cursor();
-cursor.execute("SET AUTOCOMMIT=0")
def commit():
cursor.execute('COMMIT')
@@ -70,6 +69,6 @@ def new_word(file_id, text, is_subword):
t = "subword"
else:
t = "word"
-
+
cursor.execute('INSERT IGNORE INTO word (text, type, file_id, cnt) VALUES (%s, %s, %s, 0)', (text, t, file_id))
cursor.execute('UPDATE word SET cnt=cnt+1 WHERE text=%s AND type=%s AND file_id=%s', (text, t, file_id))
diff --git a/feed/sse_feed.py b/feed/sse_feed.py
index d31d826..7925c25 100755
--- a/feed/sse_feed.py
+++ b/feed/sse_feed.py
@@ -7,53 +7,8 @@ import sse_db
from sse_config import *
from sse_defs import *
-def camel_split(word):
-
- if len(word) <= 0:
- return []
-
- r = []
- last = 0
-
- for i in range(0, len(word)-1):
-
- if word[i].islower() and word[i+1].isupper():
- r.append(word[last:i+1])
- last = i+1
-
- r.append(word[last:])
-
- return r
-
-
-def default_subword_split(word):
- r = []
-
- w = word.split("_")
-
- if len(w) > 1:
- delimiter = "_"
- else:
- w = camel_split(word)
- delimiter = ""
-
- if len(w) > 1:
-
- for i in range(1, len(w)):
-
- if len(w[i]) == 0:
- continue
-
- n = string.join(w[i:], delimiter)
-
- if len(n) >= 4:
- r.append(n)
-
- return r
-
supported_languages = [ {
"extensions" : [".c", ".h", ".cc", ".hh", ".cpp", ".hpp"],
- "subword_split" : default_subword_split,
"lexer" : SSE_DIR+"/sse_lex_c",
"language_id" : SSE_LANGUAGE_C
}]
@@ -74,24 +29,21 @@ def process_source(archive, root, path, package_id, meta):
print "(%s) Processing %s" % (archive, path)
language = find_language(path)
-
assert not language is None
file_id = sse_db.new_file(package_id, path, language["language_id"])
p = Popen3("%s %s" % (language["lexer"], os.path.join(root, path)))
- subword_split = language["subword_split"]
-
for identifier in p.fromchild:
text = identifier.strip()
- sse_db.new_word(file_id, text, False)
- subwords = subword_split(text)
- for w in subwords:
- sse_db.new_word(file_id, w, True)
-
+ if text.startswith("S:"):
+ sse_db.new_word(file_id, text[2:], True)
+ else:
+ sse_db.new_word(file_id, text, False)
+
if p.wait() != 0:
print "WARNING: Subprocess failed!"
diff --git a/feed/sse_grab.py b/feed/sse_grab.py
index 6142f27..937aad4 100644
--- a/feed/sse_grab.py
+++ b/feed/sse_grab.py
@@ -61,7 +61,7 @@ def grab_archive(meta, recid, provider_id = SSE_PROVIDER_NONE):
dst.close()
del src
- except IOError, e:
+ except (socket.timeout, IOError):
os.unlink(fn)
print "WARNING: Failed to download %s!" % archive_url
return