1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
|
--[[--- A Lua lexical scanner using LPeg.
= CREDITS
Written by Peter Odding, 2007/04/04
= THANKS TO
- the Lua authors for a wonderful language;
- Roberto for LPeg;
- caffeine for keeping me awake :)
= LICENSE
Shamelessly ripped from the SQLite[3] project:
The author disclaims copyright to this source code. In place of a legal
notice, here is a blessing:
May you do good and not evil.
May you find forgiveness for yourself and forgive others.
May you share freely, never taking more than you give.
@module macro.lexer
--]]
local lexer = {}
local lpeg = require 'lpeg'
local P, R, S, C, Cb, Cc, Cg, Cmt, Ct =
lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Ct
-- create a pattern which captures the lua value [id] and the input matching
-- [patt] in a table
local function token(id, patt) return Ct(Cc(id) * C(patt)) end
-- private interface
local table_of_tokens
local extra_tokens
function lexer.add_extra_tokens(extra)
extra_tokens = extra_tokens or {}
for _,t in ipairs(extra) do
table.insert(extra_tokens,t)
end
table_of_tokens = nil -- re-initialize
end
function lexer.init ()
local digit = R('09')
-- range of valid characters after first character of identifier
--local idsafe = R('AZ', 'az', '\127\255') + P '_'
local idsafe = R('AZ', 'az') + P '_' + R '\206\223' * R '\128\255'
-- operators
local OT = P '=='
if extra_tokens then
for _,ex in ipairs(extra_tokens) do
OT = OT + P(ex)
end
end
local operator = token('operator', OT + P '.' + P '~=' + P '<=' + P '>=' + P '...'
+ P '..' + S '+-*/%^#=<>;:,.{}[]()')
-- identifiers
local ident = token('iden', idsafe * (idsafe + digit) ^ 0)
-- keywords
local keyword = token('keyword', (P 'and' + P 'break' + P 'do' + P 'elseif' +
P 'else' + P 'end' + P 'false' + P 'for' + P 'function' + P 'if' +
P 'in' + P 'local' + P 'nil' + P 'not' + P 'or' + P 'repeat' + P 'return' +
P 'then' + P 'true' + P 'until' + P 'while') * -(idsafe + digit))
-- numbers
local number_sign = S'+-'^-1
local number_decimal = digit ^ 1
local number_hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
local number_float = (digit^1 * P'.' * digit^0 + P'.' * digit^1) *
(S'eE' * number_sign * digit^1)^-1
local number = token('number', number_hexadecimal +
number_float +
number_decimal)
-- callback for [=[ long strings ]=]
-- ps. LPeg is for Lua what regex is for Perl, which makes me smile :)
local equals = P '=' ^ 0
local open = P '[' * Cg(equals, "init") * P '[' * P '\n' ^ -1
local close = P ']' * C(equals) * P ']'
local closeeq = Cmt(close * Cb "init", function (s, i, a, b) return a == b end)
local longstring = open * C((P(1) - closeeq)^0) * close --/ 1
-- strings
local singlequoted_string = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'"
local doublequoted_string = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"'
local string = token('string', singlequoted_string +
doublequoted_string +
longstring)
-- comments
local singleline_comment = P '--' * (1 - S '\r\n\f') ^ 0
local multiline_comment = P '--' * longstring
local comment = token('comment', multiline_comment + singleline_comment)
-- whitespace
local whitespace = token('space', S('\r\n\f\t ')^1)
-- ordered choice of all tokens and last-resort error which consumes one character
local any_token = whitespace + number + keyword + ident +
string + comment + operator + token('error', 1)
table_of_tokens = Ct(any_token ^ 0)
end
-- increment [line] by the number of line-ends in [text]
local function sync(line, text)
local index, limit = 1, #text
while index <= limit do
local start, stop = text:find('\r\n', index, true)
if not start then
start, stop = text:find('[\r\n\f]', index)
if not start then break end
end
index = stop + 1
line = line + 1
end
return line
end
lexer.sync = sync
lexer.line = 0
-- we only need to synchronize the line-counter for these token types
local multiline_tokens = { comment = true, string = true, space = true }
lexer.multiline_tokens = multiline_tokens
function lexer.scan_lua_tokenlist(input)
if not table_of_tokens then
lexer.init()
end
assert(type(input) == 'string', 'bad argument #1 (expected string)')
local line = 1
local tokens = lpeg.match(table_of_tokens, input)
for i, token in pairs(tokens) do
local t = token[1]
if t == 'operator' or t == 'error' then
token[1] = token[2]
end
token[3] = line
if multiline_tokens[t] then
line = sync(line, token[2])
end
end
return tokens
end
--- get a token iterator from a source containing Lua code.
-- Note that this token iterator includes spaces and comments, and does not convert
-- string and number tokens - so e.g. a string token is quoted and a number token is
-- an unconverted string.
-- @param input the source - can be a string or a file-like object (i.e. read() returns line)
-- @param name for the source
function lexer.scan_lua(input,name)
if type(input) ~= 'string' and input.read then
input = input:read('*a')
end
local tokens = lexer.scan_lua_tokenlist(input)
local i, n = 1, #tokens
return function(k)
if k ~= nil then
k = i + k
if k < 1 or k > n then return nil end
return tokens[k]
end
local tok = tokens[i]
i = i + 1
if tok then
lexer.line = tok[3]
lexer.name = name
return tok[1],tok[2]
end
end
end
return lexer
|