-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathdiscern.py
More file actions
104 lines (88 loc) · 2.24 KB
/
discern.py
File metadata and controls
104 lines (88 loc) · 2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import re
from cga import Buffer
TITLE_LEVEL = ['h1', 'h2', 'h3', 'h4', 'h5']
def ll_title(buf: Buffer):
regex = '^#+ '
group = re.search(regex, buf.buf_str)
if group:
match = group.group(0)
level = TITLE_LEVEL[len(match) - 2]
sign = match[:-1]
return {
'matched': True,
'token': level,
'sign': sign,
'remain': buf.buf_str[len(match):]
}
return {
'matched': False,
'remain': buf.buf_str
}
def ll_text_line(buf: Buffer):
regex = '.+\n\Z'
group = re.search(regex, buf.buf_str)
if group:
match = group.group(0)
sign = match[:-1]
return {
'matched': True,
'token': 'text_line',
'sign': sign,
'remain': ''
}
return {
'matched': False,
'remain': buf.buf_str
}
def ll_code_start(buf: Buffer):
regex = '^```((c|C)(()|(\+{2})))\n\Z'
group = re.search(regex, buf.buf_str)
if group:
match = group.group(0)
sign = match[:-1]
return {
'matched': True,
'token': 'code_start',
'sign': sign,
'remain': ''
}
return {
'matched': False,
'remain': buf.buf_str
}
def ll_code_end(buf: Buffer):
regex = '^```\n'
group = re.search(regex, buf.buf_str)
if group:
match = group.group(0)
sign = match[:-1]
return {
'matched': True,
'token': 'code_end',
'sign': sign,
'remain': ''
}
return {
'matched': False,
'remain': buf.buf_str
}
PRIORITY_QUEUE = [ll_title, ll_code_start, ll_code_end, ll_text_line]
def lexer(buf: Buffer):
tokens = []
for f in PRIORITY_QUEUE:
ll = f(buf)
buf.buf_str = ll.get('remain')
if ll.get('matched'):
tokens.append({ll.get('token'): ll.get('sign')})
return tokens
def lex(file: str):
res = []
with open(file, mode='r', encoding='utf-8') as f:
while True:
line = f.readline()
if not line:
break
buffer = Buffer(line)
res.extend(lexer(buffer))
f.close()
return res