1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
import sys, os, MySQLdb, stat
from sse_config import *
db = MySQLdb.connect(SSE_DB_HOST, SSE_DB_USER, SSE_DB_PASSWORD, SSE_DB_DATABASE)
cursor = db.cursor();
def commit():
cursor.execute('COMMIT')
def rollback():
cursor.execute('ROLLBACK')
def start_transaction():
cursor.execute('START TRANSACTION')
def last_insert_id():
cursor.execute('SELECT LAST_INSERT_ID()')
return int(cursor.fetchone()[0])
def new_package(archive, root, meta):
cursor.execute('INSERT INTO package (crawler_id, path, timestamp, md) VALUES (%s, %s, NOW(), %s)', (SSE_CRAWLER_ID, root + '/%s', meta["md"]))
return (SSE_CRAWLER_ID, last_insert_id())
def find_package(md):
cursor.execute('SELECT crawler_id, id FROM package WHERE md=%s', md)
if cursor.rowcount <= 0:
return None
return (int(cursor.fetchone()[0]), int(cursor.fetchone()[1]))
def new_provider_record(recid, package_id, provider_id, meta):
try:
name = meta["name"]
except KeyError:
name = "noname"
try:
url = meta["project-url"]
except KeyError:
url = ""
try:
download_url = meta["archive-url"]
except KeyError:
download_url = ""
try:
l = meta["license"]
except KeyError:
l = ""
cursor.execute('REPLACE provider_record (id, crawler_id, package_id, provider_id, name, url, download_url, license) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)', (recid, package_id[0], package_id[1], provider_id, name, url, download_url, l))
def new_file(package_id, path, language_id = 0):
cursor.execute('INSERT INTO file (crawler_id, package_id, path, language_id) VALUES (%s, %s, %s)', (package_id[0], package_id[1], path, language_id));
return (SSE_CRAWLER_ID, last_insert_id())
def new_word(file_id, text, is_subword):
if is_subword:
t = "subword"
else:
t = "word"
cursor.execute('INSERT IGNORE INTO word (text, type, crawler_id, file_id, cnt) VALUES (%s, %s, %s, 0)', (text, t, crawler_id, file_id))
cursor.execute('UPDATE word SET cnt=cnt+1 WHERE text=%s AND type=%s AND crawler_id=%s AND file_id=%s', (text, t, crawler_id, file_id))
|