summaryrefslogtreecommitdiffstats
path: root/feed
diff options
context:
space:
mode:
authorLennart Poettering <lennart@poettering.net>2005-11-23 03:10:22 +0000
committerLennart Poettering <lennart@poettering.net>2005-11-23 03:10:22 +0000
commit64f14c6f2760f20417df31f543d03a08c59fe988 (patch)
tree8bc662d9f8b69a973b788252c0bd0b8de8209c03 /feed
parent3574f03545c00b33620d6c35cf7115310b66d264 (diff)
* add more licenses
* fix "unsubscriptable object" issue * DB optimization git-svn-id: file:///home/lennart/svn/public/sse/trunk@36 5fbabb74-0606-0410-a5e4-b5cc6a42724e
Diffstat (limited to 'feed')
-rw-r--r--feed/sse_db.py16
-rwxr-xr-xfeed/sse_feed.py19
-rwxr-xr-xfeed/sse_fm.py4
3 files changed, 26 insertions, 13 deletions
diff --git a/feed/sse_db.py b/feed/sse_db.py
index 16b07c7..bd74f15 100644
--- a/feed/sse_db.py
+++ b/feed/sse_db.py
@@ -30,8 +30,9 @@ def find_package(md):
if cursor.rowcount <= 0:
return None
-
- return (int(cursor.fetchone()[0]), int(cursor.fetchone()[1]))
+
+ r = cursor.fetchone()
+ return (int(r[0]), int(r[1]))
def new_provider_record(recid, package_id, provider_id, meta):
@@ -63,12 +64,13 @@ def new_file(package_id, path, language_id = 0):
return (SSE_CRAWLER_ID, last_insert_id())
-def new_word(file_id, text, is_subword):
+def new_word(file_id, text, is_subword, n):
if is_subword:
- t = "subword"
+ wtype = "subword"
else:
- t = "word"
+ wtype = "word"
+
+ assert n > 0
- cursor.execute('INSERT IGNORE INTO word (text, type, crawler_id, file_id, cnt) VALUES (%s, %s, %s, %s, 0)', (text, t, file_id[0], file_id[1]))
- cursor.execute('UPDATE word SET cnt=cnt+1 WHERE text=%s AND type=%s AND crawler_id=%s AND file_id=%s', (text, t, file_id[0], file_id[1]))
+ cursor.execute('INSERT INTO word (text, type, crawler_id, file_id, cnt) VALUES (%s, %s, %s, %s, %s)', (text, wtype, file_id[0], file_id[1], n))
diff --git a/feed/sse_feed.py b/feed/sse_feed.py
index 7925c25..6a34d69 100755
--- a/feed/sse_feed.py
+++ b/feed/sse_feed.py
@@ -33,16 +33,25 @@ def process_source(archive, root, path, package_id, meta):
file_id = sse_db.new_file(package_id, path, language["language_id"])
+ table = {}
+
p = Popen3("%s %s" % (language["lexer"], os.path.join(root, path)))
for identifier in p.fromchild:
-
- text = identifier.strip()
+ t = identifier.strip()
+
+ try:
+ table[t.lower()][1] += 1
+ except KeyError:
+ table[t.lower()] = [t, 1]
- if text.startswith("S:"):
- sse_db.new_word(file_id, text[2:], True)
+ for k, v in table.items():
+ if v[0].startswith("S:"):
+ sse_db.new_word(file_id, v[0][2:], True, v[1])
else:
- sse_db.new_word(file_id, text, False)
+ sse_db.new_word(file_id, v[0], False, v[1])
+
+ del table
if p.wait() != 0:
print "WARNING: Subprocess failed!"
diff --git a/feed/sse_fm.py b/feed/sse_fm.py
index 2d79c8c..467ed9f 100755
--- a/feed/sse_fm.py
+++ b/feed/sse_fm.py
@@ -32,7 +32,9 @@ license_whitelist = [
"BSD License (revised)",
"DFSG approved",
"GNU Free Documentation License (FDL)",
- "W3C License"
+ "W3C License",
+ "Mozilla Public License (MPL)",
+ "The Apache License 2.0"
]
def process_record(meta):