1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#!/usr/bin/python
import sys, os, stat, string
from popen2 import Popen3
import sse_db
from sse_config import *
from sse_defs import *
def camel_split(word):
if len(word) <= 0:
return []
r = []
last = 0
for i in range(0, len(word)-1):
if word[i].islower() and word[i+1].isupper():
r.append(word[last:i+1])
last = i+1
r.append(word[last:])
return r
def default_subword_split(word):
r = []
w = word.split("_")
if len(w) > 1:
delimiter = "_"
else:
w = camel_split(word)
delimiter = ""
if len(w) > 1:
for i in range(1, len(w)):
if len(w[i]) == 0:
continue
n = string.join(w[i:], delimiter)
if len(n) >= 4:
r.append(n)
return r
supported_languages = [ {
"extensions" : [".c", ".h", ".cc", ".hh", ".cpp", ".hpp"],
"subword_split" : default_subword_split,
"lexer" : SSE_DIR+"/sse_lex_c",
"language_id" : SSE_LANGUAGE_C
}]
def find_language(fn):
for l in supported_languages:
for e in l["extensions"]:
if fn.lower().endswith(e):
return l
return None
def supported_source(fn):
return not find_language(fn) is None
def process_source(archive, root, path, package_id, meta):
print "(%s) Processing %s" % (archive, path)
language = find_language(path)
assert not language is None
file_id = sse_db.new_file(package_id, path, language["language_id"])
p = Popen3("%s %s" % (language["lexer"], os.path.join(root, path)))
subword_split = language["subword_split"]
for identifier in p.fromchild:
text = identifier.strip()
sse_db.new_word(file_id, text, False)
subwords = subword_split(text)
for w in subwords:
sse_db.new_word(file_id, w, True)
if p.wait() != 0:
print "WARNING: Subprocess failed!"
del p
|