292 lines
11 KiB
Lua
292 lines
11 KiB
Lua
|
--[==[
|
||
|
Parser for Lua 5.3
|
||
|
Based on https://github.com/antlr/grammars-v4/blob/master/lua/Lua.g4 and https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
|
||
|
]==]
|
||
|
package.path = package.path .. ";../?.lua"
|
||
|
local pg = require "parser-gen"
|
||
|
function equals(s,i,a,b) return #a == #b end
|
||
|
function fixexp (...)
|
||
|
local exp = {...}
|
||
|
local len = #exp
|
||
|
if len > 1 then
|
||
|
exp.rule = "exp"
|
||
|
exp[len].rule = "exp"
|
||
|
return exp
|
||
|
elseif exp[1] then
|
||
|
if exp[1].rule == "expTokens" then
|
||
|
return exp[1]
|
||
|
else
|
||
|
return exp[1][1]
|
||
|
end
|
||
|
end
|
||
|
end
|
||
|
function fold (...)
|
||
|
local exp = {...}
|
||
|
local len = #exp
|
||
|
if len > 1 then
|
||
|
local folded = { rule = "exp", fixexp(exp[1]) }
|
||
|
for i = 2, len, 2 do
|
||
|
folded = { rule = "exp", folded, exp[i], fixexp(exp[i+1]) }
|
||
|
end
|
||
|
return folded
|
||
|
elseif exp[1] then
|
||
|
return exp[1][1]
|
||
|
end
|
||
|
end
|
||
|
-- from https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
|
||
|
local labels = {
|
||
|
ErrExtra="unexpected character(s), expected EOF",
|
||
|
ErrInvalidStat={"unexpected token, invalid start of statement",[[ (!%nl .)* ]]},
|
||
|
|
||
|
ErrEndIf="expected 'end' to close the if statement",
|
||
|
ErrExprIf="expected a condition after 'if'",
|
||
|
ErrThenIf="expected 'then' after the condition",
|
||
|
ErrExprEIf="expected a condition after 'elseif'",
|
||
|
ErrThenEIf="expected 'then' after the condition",
|
||
|
|
||
|
ErrEndDo="expected 'end' to close the do block",
|
||
|
ErrExprWhile="expected a condition after 'while'",
|
||
|
ErrDoWhile="expected 'do' after the condition",
|
||
|
ErrEndWhile="expected 'end' to close the while loop",
|
||
|
ErrUntilRep="expected 'until' at the end of the repeat loop",
|
||
|
ErrExprRep="expected a conditions after 'until'",
|
||
|
|
||
|
ErrForRange="expected a numeric or generic range after 'for'",
|
||
|
ErrEndFor="expected 'end' to close the for loop",
|
||
|
ErrExprFor1="expected a starting expression for the numeric range",
|
||
|
ErrCommaFor="expected ',' to split the start and end of the range",
|
||
|
ErrExprFor2="expected an ending expression for the numeric range",
|
||
|
ErrExprFor3={"expected a step expression for the numeric range after ','",[[ (!'do' !%nl .)* ]]},
|
||
|
ErrInFor="expected '=' or 'in' after the variable(s)",
|
||
|
ErrEListFor="expected one or more expressions after 'in'",
|
||
|
ErrDoFor="expected 'do' after the range of the for loop",
|
||
|
|
||
|
ErrDefLocal="expected a function definition or assignment after local",
|
||
|
ErrNameLFunc="expected a function name after 'function'",
|
||
|
ErrEListLAssign="expected one or more expressions after '='",
|
||
|
ErrEListAssign="expected one or more expressions after '='",
|
||
|
|
||
|
ErrFuncName="expected a function name after 'function'",
|
||
|
ErrNameFunc1="expected a function name after '.'",
|
||
|
ErrNameFunc2="expected a method name after ':'",
|
||
|
ErrOParenPList="expected '(' for the parameter list",
|
||
|
ErrCParenPList="expected ')' to close the parameter list",
|
||
|
ErrEndFunc="expected 'end' to close the function body",
|
||
|
ErrParList="expected a variable name or '...' after ','",
|
||
|
|
||
|
ErrLabel="expected a label name after '::'",
|
||
|
ErrCloseLabel="expected '::' after the label",
|
||
|
ErrGoto="expected a label after 'goto'",
|
||
|
|
||
|
ErrVarList={"expected a variable name after ','",[[ (!'=' !%nl .)* ]]},
|
||
|
ErrExprList="expected an expression after ','",
|
||
|
|
||
|
ErrOrExpr="expected an expression after 'or'",
|
||
|
ErrAndExpr="expected an expression after 'and'",
|
||
|
ErrRelExpr="expected an expression after the relational operator",
|
||
|
|
||
|
ErrBitwiseExpr="expected an expression after bitwise operator",
|
||
|
|
||
|
ErrConcatExpr="expected an expression after '..'",
|
||
|
ErrAddExpr="expected an expression after the additive operator",
|
||
|
ErrMulExpr="expected an expression after the multiplicative operator",
|
||
|
ErrUnaryExpr="expected an expression after the unary operator",
|
||
|
ErrPowExpr="expected an expression after '^'",
|
||
|
|
||
|
ErrExprParen="expected an expression after '('",
|
||
|
ErrCParenExpr="expected ')' to close the expression",
|
||
|
ErrNameIndex="expected a field name after '.'",
|
||
|
ErrExprIndex="expected an expression after '['",
|
||
|
ErrCBracketIndex="expected ']' to close the indexing expression",
|
||
|
ErrNameMeth="expected a method name after ':'",
|
||
|
ErrMethArgs="expected some arguments for the method call (or '()')",
|
||
|
|
||
|
|
||
|
ErrCParenArgs="expected ')' to close the argument list",
|
||
|
|
||
|
ErrCBraceTable="expected '}' to close the table constructor",
|
||
|
ErrEqField="expected '=' after the table key",
|
||
|
ErrExprField="expected an expression after '='",
|
||
|
ErrExprFKey={"expected an expression after '[' for the table key",[[ (!']' !%nl .)* ]] },
|
||
|
ErrCBracketFKey={"expected ']' to close the table key",[[ (!'=' !%nl .)* ]]},
|
||
|
|
||
|
ErrDigitHex="expected one or more hexadecimal digits after '0x'",
|
||
|
ErrDigitDeci="expected one or more digits after the decimal point",
|
||
|
ErrDigitExpo="expected one or more digits for the exponent",
|
||
|
|
||
|
ErrQuote="unclosed string",
|
||
|
ErrHexEsc={"expected exactly two hexadecimal digits after '\\x'",[[ (!('"' / "'" / %nl) .)* ]]},
|
||
|
ErrOBraceUEsc="expected '{' after '\\u'",
|
||
|
ErrDigitUEsc={"expected one or more hexadecimal digits for the UTF-8 code point",[[ (!'}' !%nl .)* ]]},
|
||
|
ErrCBraceUEsc={"expected '}' after the code point",[[ (!('"' / "'") .)* ]]},
|
||
|
ErrEscSeq={"invalid escape sequence",[[ (!('"' / "'" / %nl) .)* ]]},
|
||
|
ErrCloseLStr="unclosed long string",
|
||
|
ErrEqAssign="expected '=' after variable list in assign statement"
|
||
|
}
|
||
|
pg.setlabels(labels)
|
||
|
local grammar = pg.compile([==[
|
||
|
chunk <- block (!.)^ErrExtra
|
||
|
block <- stat* retstat?
|
||
|
stat <- ';' /
|
||
|
functioncall /
|
||
|
varlist '='^ErrEqAssign explist^ErrEListAssign /
|
||
|
'break' /
|
||
|
'goto' NAME^ErrGoto /
|
||
|
'do' block 'end'^ErrEndDo /
|
||
|
'while' exp^ErrExprWhile 'do'^ErrDoWhile block 'end'^ErrEndWhile /
|
||
|
'repeat' block 'until'^ErrUntilRep exp^ErrExprRep /
|
||
|
'if' exp^ErrExprIf 'then'^ErrThenIf block ('elseif' exp^ErrExprEIf 'then'^ErrThenEIf block)* ('else' block)? 'end'^ErrEndIf /
|
||
|
'for' (forNum / forIn)^ErrForRange 'do'^ErrDoFor block 'end'^ErrEndFor /
|
||
|
|
||
|
'function' funcname^ErrFuncName funcbody /
|
||
|
'local' (localAssign / localFunc)^ErrDefLocal /
|
||
|
label /
|
||
|
!blockEnd %{ErrInvalidStat}
|
||
|
blockEnd <- 'return' / 'end' / 'elseif' / 'else' / 'until' / !.
|
||
|
retstat <- 'return' explist? ';'?
|
||
|
forNum <- NAME '=' exp^ErrExprFor1 ','^ErrCommaFor exp^ErrExprFor2 (',' exp^ErrExprFor3)?
|
||
|
forIn <- namelist 'in'^ErrInFor explist^ErrEListFor
|
||
|
localFunc <- 'function' NAME^ErrNameLFunc funcbody
|
||
|
localAssign <- namelist ('=' explist^ErrEListLAssign)?
|
||
|
label <- '::' NAME^ErrLabel '::'^ErrCloseLabel
|
||
|
funcname <- NAME ('.' NAME^ErrNameFunc1)* (':' NAME^ErrNameFunc2)?
|
||
|
varlist <- var (',' var^ErrVarList)*
|
||
|
namelist <- NAME (',' NAME)*
|
||
|
explist <- exp (',' exp^ErrExprList )*
|
||
|
|
||
|
exp <- expOR -> fixexp
|
||
|
expOR <- (expAND (operatorOr expAND^ErrOrExpr)*) -> fold
|
||
|
expAND <- (expREL (operatorAnd expREL^ErrAndExpr)*) -> fold
|
||
|
expREL <- (expBIT (operatorComparison expBIT^ErrRelExpr)*) -> fold
|
||
|
expBIT <- (expCAT (operatorBitwise expCAT^ErrBitwiseExpr)*) -> fold
|
||
|
expCAT <- (expADD (operatorStrcat expCAT^ErrConcatExpr)?) -> fixexp
|
||
|
expADD <- (expMUL (operatorAddSub expMUL^ErrAddExpr)*) -> fold
|
||
|
expMUL <- (expUNA (operatorMulDivMod expUNA^ErrMulExpr)*) -> fold
|
||
|
expUNA <- ((operatorUnary expUNA^ErrUnaryExpr) / expPOW) -> fixexp
|
||
|
expPOW <- (expTokens (operatorPower expUNA^ErrPowExpr)?) -> fixexp
|
||
|
|
||
|
expTokens <- 'nil' / 'false' / 'true' /
|
||
|
number /
|
||
|
string /
|
||
|
'...' /
|
||
|
'function' funcbody /
|
||
|
tableconstructor /
|
||
|
prefixexp
|
||
|
|
||
|
prefixexp <- varOrExp nameAndArgs*
|
||
|
functioncall <- varOrExp nameAndArgs+
|
||
|
varOrExp <- var / brackexp
|
||
|
brackexp <- '(' exp^ErrExprParen ')'^ErrCParenExpr
|
||
|
var <- (NAME / brackexp varSuffix) varSuffix*
|
||
|
varSuffix <- nameAndArgs* ('[' exp^ErrExprIndex ']'^ErrCBracketIndex / '.' !'.' NAME^ErrNameIndex)
|
||
|
nameAndArgs <- (':' !':' NAME^ErrNameMeth args^ErrMethArgs) /
|
||
|
args
|
||
|
args <- '(' explist? ')'^ErrCParenArgs / tableconstructor / string
|
||
|
funcbody <- '('^ErrOParenPList parlist? ')'^ErrCParenPList block 'end'^ErrEndFunc
|
||
|
parlist <- namelist (',' '...'^ErrParList)? / '...'
|
||
|
tableconstructor<- '{' fieldlist? '}'^ErrCBraceTable
|
||
|
fieldlist <- field (fieldsep field)* fieldsep?
|
||
|
field <- !OPEN '[' exp^ErrExprFKey ']'^ErrCBracketFKey '='^ErrEqField exp^ErrExprField /
|
||
|
NAME '=' exp /
|
||
|
exp
|
||
|
fieldsep <- ',' / ';'
|
||
|
operatorOr <- 'or'
|
||
|
operatorAnd <- 'and'
|
||
|
operatorComparison<- '<=' / '>=' / '~=' / '==' / '<' !'<' / '>' !'>'
|
||
|
operatorStrcat <- !'...' '..'
|
||
|
operatorAddSub <- '+' / '-'
|
||
|
operatorMulDivMod<- '*' / '%' / '//' / '/'
|
||
|
operatorBitwise <- '&' / '|' / !'~=' '~' / '<<' / '>>'
|
||
|
operatorUnary <- 'not' / '#' / '-' / !'~=' '~'
|
||
|
operatorPower <- '^'
|
||
|
number <- FLOAT / HEX_FLOAT / HEX / INT
|
||
|
string <- NORMALSTRING / CHARSTRING / LONGSTRING
|
||
|
-- lexer
|
||
|
fragment
|
||
|
RESERVED <- KEYWORDS !IDREST
|
||
|
fragment
|
||
|
IDREST <- [a-zA-Z_0-9]
|
||
|
fragment
|
||
|
KEYWORDS <- 'and' / 'break' / 'do' / 'elseif' / 'else' / 'end' /
|
||
|
'false' / 'for' / 'function' / 'goto' / 'if' / 'in' /
|
||
|
'local' / 'nil' / 'not' / 'or' / 'repeat' / 'return' /
|
||
|
'then' / 'true' / 'until' / 'while'
|
||
|
NAME <- !RESERVED [a-zA-Z_] [a-zA-Z_0-9]*
|
||
|
fragment
|
||
|
NORMALSTRING <- '"' {( ESC / [^"\] )*} '"'^ErrQuote
|
||
|
fragment
|
||
|
CHARSTRING <- "'" {( ESC / [^\'] )*} "'"^ErrQuote
|
||
|
fragment
|
||
|
LONGSTRING <- (OPEN {(!CLOSEEQ .)*} CLOSE^ErrCloseLStr) -> 1 -- capture only the string
|
||
|
|
||
|
fragment
|
||
|
OPEN <- '[' {:openEq: EQUALS :} '[' %nl?
|
||
|
fragment
|
||
|
CLOSE <- ']' {EQUALS} ']'
|
||
|
fragment
|
||
|
EQUALS <- '='*
|
||
|
fragment
|
||
|
CLOSEEQ <- (CLOSE =openEq) => equals
|
||
|
|
||
|
INT <- DIGIT+
|
||
|
HEX <- '0' [xX] HEXDIGIT+^ErrDigitHex
|
||
|
FLOAT <- DIGIT+ '.' DIGIT* ExponentPart? /
|
||
|
'.' !'.' DIGIT+^ErrDigitDeci ExponentPart? /
|
||
|
DIGIT+ ExponentPart
|
||
|
HEX_FLOAT <- '0' [xX] HEXDIGIT+ '.' HEXDIGIT* HexExponentPart? /
|
||
|
'0' [xX] '.' HEXDIGIT+ HexExponentPart? /
|
||
|
'0' [xX] HEXDIGIT+^ErrDigitHex HexExponentPart
|
||
|
fragment
|
||
|
ExponentPart <- [eE] [+-]? DIGIT+^ErrDigitExpo
|
||
|
fragment
|
||
|
HexExponentPart <- [pP] [+-]? DIGIT+^ErrDigitExpo
|
||
|
fragment
|
||
|
ESC <- '\' [abfnrtvz"'\] /
|
||
|
'\' %nl /
|
||
|
DECESC /
|
||
|
HEXESC/
|
||
|
UTFESC/
|
||
|
'\' %{ErrEscSeq}
|
||
|
fragment
|
||
|
DECESC <- '\' ( DIGIT DIGIT? / [0-2] DIGIT DIGIT)
|
||
|
fragment
|
||
|
HEXESC <- '\' 'x' (HEXDIGIT HEXDIGIT)^ErrHexEsc
|
||
|
fragment
|
||
|
UTFESC <- '\' 'u' '{'^ErrOBraceUEsc HEXDIGIT+^ErrDigitUEsc '}'^ErrCBraceUEsc
|
||
|
fragment
|
||
|
DIGIT <- [0-9]
|
||
|
fragment
|
||
|
HEXDIGIT <- [0-9a-fA-F]
|
||
|
|
||
|
|
||
|
fragment
|
||
|
COMMENT <- '--' LONGSTRING -> 0 -- skip this
|
||
|
fragment
|
||
|
LINE_COMMENT <- '--' COM_TYPES ( %nl / !.)
|
||
|
fragment
|
||
|
COM_TYPES <- '[' '='* [^[=%nl] [^%nl]* /
|
||
|
'[' '='* /
|
||
|
[^[%nl] [^%nl]* /
|
||
|
''
|
||
|
fragment
|
||
|
SHEBANG <- '#' '!' [^%nl]*
|
||
|
|
||
|
|
||
|
SKIP <- %nl / %s / COMMENT / LINE_COMMENT / SHEBANG
|
||
|
fragment
|
||
|
HELPER <- RESERVED / '(' / ')' -- for sync expression
|
||
|
SYNC <- ((!HELPER !SKIP .)+ / .?) SKIP* -- either sync to reserved keyword or skip characters and consume them
|
||
|
|
||
|
]==],{ equals = equals, fixexp = fixexp, fold = fold })
|
||
|
local errnr = 1
|
||
|
local function err (desc, line, col, sfail, recexp)
|
||
|
print("Syntax error #"..errnr..": "..desc.." at line "..line.."(col "..col..")")
|
||
|
errnr = errnr+1
|
||
|
end
|
||
|
local function parse (input)
|
||
|
errnr = 1
|
||
|
local ast, errs = pg.parse(input,grammar,err)
|
||
|
return ast, errs
|
||
|
end
|
||
|
return {parse=parse}
|