summaryrefslogtreecommitdiffstats
path: root/feed/sse_db.py
blob: 6a0f7e6d5d6cdd9bb38bafaa3bb5a08c8d09b0a8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74

import sys, os, MySQLdb, stat
from sse_config import *

db = MySQLdb.connect(SSE_DB_HOST, SSE_DB_USER, SSE_DB_PASSWORD, SSE_DB_DATABASE)
cursor = db.cursor();

def commit():
    cursor.execute('COMMIT')

def rollback():
    cursor.execute('ROLLBACK')

def start_transaction():
    cursor.execute('START TRANSACTION')

def last_insert_id():
    cursor.execute('SELECT LAST_INSERT_ID()')
    return int(cursor.fetchone()[0])

def new_package(archive, root, meta):

    cursor.execute('INSERT INTO package (crawler_id, path, timestamp, md) VALUES (%s, %s, NOW(), %s)', (SSE_CRAWLER_ID, root + '/%s', meta["md"]))

    return (SSE_CRAWLER_ID, last_insert_id())

def find_package(md):

    cursor.execute('SELECT crawler_id, id FROM package WHERE md=%s', md)

    if cursor.rowcount <= 0:
        return None
    
    return (int(cursor.fetchone()[0]), int(cursor.fetchone()[1]))

def new_provider_record(recid, package_id, provider_id, meta):

    try:
        name = meta["name"]
    except KeyError:
        name = "noname"

    try:
        url = meta["project-url"]
    except KeyError:
        url = ""

    try:
        download_url = meta["archive-url"]
    except KeyError:
        download_url = ""

    try:
        l = meta["license"]
    except KeyError:
        l = ""

    cursor.execute('REPLACE provider_record (id, crawler_id, package_id, provider_id, name, url, download_url, license) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)', (recid, package_id[0], package_id[1], provider_id, name, url, download_url, l))

def new_file(package_id, path, language_id = 0):

    cursor.execute('INSERT INTO file (crawler_id, package_id, path, language_id) VALUES (%s, %s, %s)', (package_id[0], package_id[1], path, language_id));

    return (SSE_CRAWLER_ID, last_insert_id())

def new_word(file_id, text, is_subword):

    if is_subword:
        t = "subword"
    else:
        t = "word"

    cursor.execute('INSERT IGNORE INTO word (text, type, crawler_id, file_id, cnt) VALUES (%s, %s, %s, 0)', (text, t, crawler_id, file_id))
    cursor.execute('UPDATE word SET cnt=cnt+1 WHERE text=%s AND type=%s AND crawler_id=%s AND file_id=%s', (text, t, crawler_id, file_id))