summaryrefslogtreecommitdiff
path: root/Tools/LuaMacro/macro/clexer.lua
diff options
context:
space:
mode:
authorchai <chaifix@163.com>2021-11-17 23:03:07 +0800
committerchai <chaifix@163.com>2021-11-17 23:03:07 +0800
commit27d6efb5f5a076f825fe2da1875e0cabaf02b4e7 (patch)
tree44f301110bc2ea742908ed92a78eba0803cd3b60 /Tools/LuaMacro/macro/clexer.lua
parentb34310c631989551054d456eb47aaab5ded266a4 (diff)
+ LuaMacro
Diffstat (limited to 'Tools/LuaMacro/macro/clexer.lua')
-rw-r--r--Tools/LuaMacro/macro/clexer.lua169
1 files changed, 169 insertions, 0 deletions
diff --git a/Tools/LuaMacro/macro/clexer.lua b/Tools/LuaMacro/macro/clexer.lua
new file mode 100644
index 0000000..fd859a8
--- /dev/null
+++ b/Tools/LuaMacro/macro/clexer.lua
@@ -0,0 +1,169 @@
+--[[--- A C lexical scanner using LPeg.
+= CREDITS
+= based on the C lexer in Peter Odding's lua-lxsh
+@module macro.clexer
+--]]
+
+local clexer = {}
+local lpeg = require 'lpeg'
+local P, R, S, C, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Ct
+
+-- create a pattern which captures the lua value [id] and the input matching
+-- [patt] in a table
+local function token(id, patt) return Ct(Cc(id) * C(patt)) end
+
+-- private interface
+local table_of_tokens
+local extra_tokens
+
+function clexer.add_extra_tokens(extra)
+ extra_tokens = extra_tokens or {}
+ for _,t in ipairs(extra) do
+ table.insert(extra_tokens,t)
+ end
+ table_of_tokens = nil -- re-initialize
+end
+
+function clexer.init ()
+ local digit = R('09')
+
+ local upp, low = R'AZ', R'az'
+ local oct, dec = R'07', R'09'
+ local hex = dec + R'AF' + R'af'
+ local letter = upp + low
+ local alnum = letter + dec + '_'
+ local endline = S'\r\n\f'
+ local newline = '\r\n' + endline
+ local escape = '\\' * ( newline
+ + S'\\"\'?abfnrtv'
+ + (#oct * oct^-3)
+ + ('x' * #hex * hex^-2))
+
+
+ -- range of valid characters after first character of identifier
+ local idsafe = R('AZ', 'az', '\127\255') + P '_'
+
+ -- operators
+ local OT = P '=='
+ if extra_tokens then
+ for _,ex in ipairs(extra_tokens) do
+ OT = OT + P(ex)
+ end
+ end
+ local operator = token('operator', OT + P '.' + P'>>=' + '<<=' + '--' + '>>' + '>=' + '/=' + '==' + '<='
+ + '+=' + '<<' + '*=' + '++' + '&&' + '|=' + '||' + '!=' + '&=' + '-='
+ + '^=' + '%=' + '->' + S',)*%+&(-~/^]{}|.[>!?:=<;')
+ -- identifiers
+ local ident = token('iden', idsafe * (idsafe + digit) ^ 0)
+
+ -- keywords
+ local keyword = token('keyword', (P 'auto' + P 'break' + P 'case' + P'char' +
+ P 'const' + P 'continue' + P 'default' +
+ P 'do' + P 'double' + P 'else' + P 'enum' + P 'extern' + P 'float' +
+ P 'for' + P 'goto' + P 'if' + P 'int' + P 'long' + P 'register' +
+ P 'return' + P 'short' + P 'signed' + P 'sizeof' + P 'static' +
+ P 'struct' + P 'switch' + P 'typedef' + P 'union' + P 'void' +
+ P 'volatile' + P 'while') * -(idsafe + digit))
+
+ -- numbers
+ local number_sign = S'+-'^-1
+ local number_decimal = digit ^ 1
+ local number_hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
+ local number_float = (digit^1 * P'.' * digit^0 + P'.' * digit^1) *
+ (S'eE' * number_sign * digit^1)^-1
+ local number = token('number', number_hexadecimal +
+ number_float +
+ number_decimal)
+
+
+ local string = token('string', '"' * ((1 - S'\\\r\n\f"') + escape)^0 * '"')
+ local char = token('char',"'" * ((1 - S"\\\r\n\f'") + escape) * "'")
+
+ -- comments
+ local singleline_comment = P '//' * (1 - S '\r\n\f') ^ 0
+ local multiline_comment = '/*' * (1 - P'*/')^0 * '*/'
+ local comment = token('comment', multiline_comment + singleline_comment)
+ local prepro = token('prepro',P '#' * (1 - S '\r\n\f') ^ 0)
+
+ -- whitespace
+ local whitespace = token('space', S('\r\n\f\t ')^1)
+
+ -- ordered choice of all tokens and last-resort error which consumes one character
+ local any_token = whitespace + number + keyword + ident +
+ string + char + comment + prepro + operator + token('error', 1)
+
+
+ table_of_tokens = Ct(any_token ^ 0)
+end
+
+-- increment [line] by the number of line-ends in [text]
+local function sync(line, text)
+ local index, limit = 1, #text
+ while index <= limit do
+ local start, stop = text:find('\r\n', index, true)
+ if not start then
+ start, stop = text:find('[\r\n\f]', index)
+ if not start then break end
+ end
+ index = stop + 1
+ line = line + 1
+ end
+ return line
+end
+clexer.sync = sync
+
+clexer.line = 0
+
+-- we only need to synchronize the line-counter for these token types
+local multiline_tokens = { comment = true, space = true }
+clexer.multiline_tokens = multiline_tokens
+
+function clexer.scan_c_tokenlist(input)
+ if not table_of_tokens then
+ clexer.init()
+ end
+ assert(type(input) == 'string', 'bad argument #1 (expected string)')
+ local line = 1
+ local tokens = lpeg.match(table_of_tokens, input)
+ for i, token in pairs(tokens) do
+ local t = token[1]
+ if t == 'operator' or t == 'error' then
+ token[1] = token[2]
+ end
+ token[3] = line
+ if multiline_tokens[t] then
+ line = sync(line, token[2])
+ end
+ end
+ return tokens
+end
+
+--- get a token iterator from a source containing Lua code.
+-- S is the source - can be a string or a file-like object (i.e. read() returns line)
+-- Note that this token iterator includes spaces and comments, and does not convert
+-- string and number tokens - so e.g. a string token is quoted and a number token is
+-- an unconverted string.
+function clexer.scan_c(input,name)
+ if type(input) ~= 'string' and input.read then
+ input = input:read('*a')
+ end
+ local tokens = clexer.scan_c_tokenlist(input)
+ local i, n = 1, #tokens
+ return function(k)
+ if k ~= nil then
+ k = i + k
+ if k < 1 or k > n then return nil end
+ return tokens[k]
+ end
+ local tok = tokens[i]
+ i = i + 1
+ if tok then
+ clexer.line = tok[3]
+ clexer.name = name
+ return tok[1],tok[2]
+ end
+ end
+
+end
+
+return clexer