diff options
author | chai <chaifix@163.com> | 2021-11-17 23:03:07 +0800 |
---|---|---|
committer | chai <chaifix@163.com> | 2021-11-17 23:03:07 +0800 |
commit | 27d6efb5f5a076f825fe2da1875e0cabaf02b4e7 (patch) | |
tree | 44f301110bc2ea742908ed92a78eba0803cd3b60 /Tools/LuaMacro/macro/lexer.lua | |
parent | b34310c631989551054d456eb47aaab5ded266a4 (diff) |
+ LuaMacro
Diffstat (limited to 'Tools/LuaMacro/macro/lexer.lua')
-rw-r--r-- | Tools/LuaMacro/macro/lexer.lua | 179 |
1 files changed, 179 insertions, 0 deletions
diff --git a/Tools/LuaMacro/macro/lexer.lua b/Tools/LuaMacro/macro/lexer.lua new file mode 100644 index 0000000..58ab53a --- /dev/null +++ b/Tools/LuaMacro/macro/lexer.lua @@ -0,0 +1,179 @@ +--[[--- A Lua lexical scanner using LPeg. += CREDITS +Written by Peter Odding, 2007/04/04 + += THANKS TO +- the Lua authors for a wonderful language; +- Roberto for LPeg; +- caffeine for keeping me awake :) + += LICENSE +Shamelessly ripped from the SQLite[3] project: + + The author disclaims copyright to this source code. In place of a legal + notice, here is a blessing: + + May you do good and not evil. + May you find forgiveness for yourself and forgive others. + May you share freely, never taking more than you give. + +@module macro.lexer +--]] + +local lexer = {} +local lpeg = require 'lpeg' +local P, R, S, C, Cb, Cc, Cg, Cmt, Ct = + lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Ct + +-- create a pattern which captures the lua value [id] and the input matching +-- [patt] in a table +local function token(id, patt) return Ct(Cc(id) * C(patt)) end + +-- private interface +local table_of_tokens +local extra_tokens + +function lexer.add_extra_tokens(extra) + extra_tokens = extra_tokens or {} + for _,t in ipairs(extra) do + table.insert(extra_tokens,t) + end + table_of_tokens = nil -- re-initialize +end + +function lexer.init () + local digit = R('09') + + -- range of valid characters after first character of identifier + --local idsafe = R('AZ', 'az', '\127\255') + P '_' + local idsafe = R('AZ', 'az') + P '_' + R '\206\223' * R '\128\255' + -- operators + local OT = P '==' + if extra_tokens then + for _,ex in ipairs(extra_tokens) do + OT = OT + P(ex) + end + end + local operator = token('operator', OT + P '.' + P '~=' + P '<=' + P '>=' + P '...' + + P '..' + S '+-*/%^#=<>;:,.{}[]()') + -- identifiers + local ident = token('iden', idsafe * (idsafe + digit) ^ 0) + + -- keywords + local keyword = token('keyword', (P 'and' + P 'break' + P 'do' + P 'elseif' + + P 'else' + P 'end' + P 'false' + P 'for' + P 'function' + P 'if' + + P 'in' + P 'local' + P 'nil' + P 'not' + P 'or' + P 'repeat' + P 'return' + + P 'then' + P 'true' + P 'until' + P 'while') * -(idsafe + digit)) + + -- numbers + local number_sign = S'+-'^-1 + local number_decimal = digit ^ 1 + local number_hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1 + local number_float = (digit^1 * P'.' * digit^0 + P'.' * digit^1) * + (S'eE' * number_sign * digit^1)^-1 + local number = token('number', number_hexadecimal + + number_float + + number_decimal) + + -- callback for [=[ long strings ]=] + -- ps. LPeg is for Lua what regex is for Perl, which makes me smile :) + local equals = P '=' ^ 0 + local open = P '[' * Cg(equals, "init") * P '[' * P '\n' ^ -1 + local close = P ']' * C(equals) * P ']' + local closeeq = Cmt(close * Cb "init", function (s, i, a, b) return a == b end) + local longstring = open * C((P(1) - closeeq)^0) * close --/ 1 + + -- strings + local singlequoted_string = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'" + local doublequoted_string = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"' + local string = token('string', singlequoted_string + + doublequoted_string + + longstring) + + -- comments + local singleline_comment = P '--' * (1 - S '\r\n\f') ^ 0 + local multiline_comment = P '--' * longstring + local comment = token('comment', multiline_comment + singleline_comment) + + -- whitespace + local whitespace = token('space', S('\r\n\f\t ')^1) + + -- ordered choice of all tokens and last-resort error which consumes one character + local any_token = whitespace + number + keyword + ident + + string + comment + operator + token('error', 1) + + + table_of_tokens = Ct(any_token ^ 0) +end + +-- increment [line] by the number of line-ends in [text] +local function sync(line, text) + local index, limit = 1, #text + while index <= limit do + local start, stop = text:find('\r\n', index, true) + if not start then + start, stop = text:find('[\r\n\f]', index) + if not start then break end + end + index = stop + 1 + line = line + 1 + end + return line +end +lexer.sync = sync + +lexer.line = 0 + +-- we only need to synchronize the line-counter for these token types +local multiline_tokens = { comment = true, string = true, space = true } +lexer.multiline_tokens = multiline_tokens + +function lexer.scan_lua_tokenlist(input) + if not table_of_tokens then + lexer.init() + end + assert(type(input) == 'string', 'bad argument #1 (expected string)') + local line = 1 + local tokens = lpeg.match(table_of_tokens, input) + for i, token in pairs(tokens) do + local t = token[1] + if t == 'operator' or t == 'error' then + token[1] = token[2] + end + token[3] = line + if multiline_tokens[t] then + line = sync(line, token[2]) + end + end + return tokens +end + +--- get a token iterator from a source containing Lua code. +-- Note that this token iterator includes spaces and comments, and does not convert +-- string and number tokens - so e.g. a string token is quoted and a number token is +-- an unconverted string. +-- @param input the source - can be a string or a file-like object (i.e. read() returns line) +-- @param name for the source +function lexer.scan_lua(input,name) + if type(input) ~= 'string' and input.read then + input = input:read('*a') + end + local tokens = lexer.scan_lua_tokenlist(input) + local i, n = 1, #tokens + return function(k) + if k ~= nil then + k = i + k + if k < 1 or k > n then return nil end + return tokens[k] + end + local tok = tokens[i] + i = i + 1 + if tok then + lexer.line = tok[3] + lexer.name = name + return tok[1],tok[2] + end + end +end + +return lexer |