import sys, os, MySQLdb, stat from sse_config import * db = MySQLdb.connect(SSE_DB_HOST, SSE_DB_USER, SSE_DB_PASSWORD, SSE_DB_DATABASE) cursor = db.cursor(); def commit(): cursor.execute('COMMIT') def rollback(): cursor.execute('ROLLBACK') def start_transaction(): cursor.execute('START TRANSACTION') def last_insert_id(): cursor.execute('SELECT LAST_INSERT_ID()') return int(cursor.fetchone()[0]) def new_package(archive, root, meta): cursor.execute('INSERT INTO package (crawler_id, path, timestamp, md) VALUES (%s, %s, NOW(), %s)', (SSE_CRAWLER_ID, root + '/%s', meta["md"])) return (SSE_CRAWLER_ID, last_insert_id()) def find_package(md): cursor.execute('SELECT crawler_id, id FROM package WHERE md=%s', md) if cursor.rowcount <= 0: return None r = cursor.fetchone() return (int(r[0]), int(r[1])) def new_provider_record(recid, package_id, provider_id, meta): try: name = meta["name"] except KeyError: name = "noname" try: url = meta["project-url"] except KeyError: url = "" try: download_url = meta["archive-url"] except KeyError: download_url = "" try: l = meta["license"] except KeyError: l = "" cursor.execute('REPLACE provider_record (id, crawler_id, package_id, provider_id, name, url, download_url, license) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)', (recid, package_id[0], package_id[1], provider_id, name, url, download_url, l)) def new_file(package_id, path, language_id = 0): cursor.execute('INSERT INTO file (crawler_id, package_id, path, language_id) VALUES (%s, %s, %s, %s)', (package_id[0], package_id[1], path, language_id)); return (SSE_CRAWLER_ID, last_insert_id()) def new_word(file_id, text, is_subword, n): if is_subword: wtype = "subword" else: wtype = "word" assert n > 0 cursor.execute('INSERT INTO word (text, type, crawler_id, file_id, cnt) VALUES (%s, %s, %s, %s, %s)', (text, wtype, file_id[0], file_id[1], n))