lang-bootstrap/06/parser-gen/parsers/lua-parser.lua
Dawid Sobczak e6b88d5a0f Add stage 06: Lua bootstrap
The goal of stage 06 is to try parse zig synax in lua. I pulled in
lpeglable 1.2.0 and parser-gen off github to get started. All of this
needs to be cleaned up rather soon.

Lua boostraps using tcc and musl from the previous stage. Since musl
0.6.0 doesn't support dynamic linking this build of lua doesn't support
shared libraries. I couldn't easily patch musl with dlopen and friends
so instead I link statically and call deps with c api.
2023-07-06 12:32:47 +01:00

291 lines
11 KiB
Lua

--[==[
Parser for Lua 5.3
Based on https://github.com/antlr/grammars-v4/blob/master/lua/Lua.g4 and https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
]==]
package.path = package.path .. ";../?.lua"
local pg = require "parser-gen"
function equals(s,i,a,b) return #a == #b end
function fixexp (...)
local exp = {...}
local len = #exp
if len > 1 then
exp.rule = "exp"
exp[len].rule = "exp"
return exp
elseif exp[1] then
if exp[1].rule == "expTokens" then
return exp[1]
else
return exp[1][1]
end
end
end
function fold (...)
local exp = {...}
local len = #exp
if len > 1 then
local folded = { rule = "exp", fixexp(exp[1]) }
for i = 2, len, 2 do
folded = { rule = "exp", folded, exp[i], fixexp(exp[i+1]) }
end
return folded
elseif exp[1] then
return exp[1][1]
end
end
-- from https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
local labels = {
ErrExtra="unexpected character(s), expected EOF",
ErrInvalidStat={"unexpected token, invalid start of statement",[[ (!%nl .)* ]]},
ErrEndIf="expected 'end' to close the if statement",
ErrExprIf="expected a condition after 'if'",
ErrThenIf="expected 'then' after the condition",
ErrExprEIf="expected a condition after 'elseif'",
ErrThenEIf="expected 'then' after the condition",
ErrEndDo="expected 'end' to close the do block",
ErrExprWhile="expected a condition after 'while'",
ErrDoWhile="expected 'do' after the condition",
ErrEndWhile="expected 'end' to close the while loop",
ErrUntilRep="expected 'until' at the end of the repeat loop",
ErrExprRep="expected a conditions after 'until'",
ErrForRange="expected a numeric or generic range after 'for'",
ErrEndFor="expected 'end' to close the for loop",
ErrExprFor1="expected a starting expression for the numeric range",
ErrCommaFor="expected ',' to split the start and end of the range",
ErrExprFor2="expected an ending expression for the numeric range",
ErrExprFor3={"expected a step expression for the numeric range after ','",[[ (!'do' !%nl .)* ]]},
ErrInFor="expected '=' or 'in' after the variable(s)",
ErrEListFor="expected one or more expressions after 'in'",
ErrDoFor="expected 'do' after the range of the for loop",
ErrDefLocal="expected a function definition or assignment after local",
ErrNameLFunc="expected a function name after 'function'",
ErrEListLAssign="expected one or more expressions after '='",
ErrEListAssign="expected one or more expressions after '='",
ErrFuncName="expected a function name after 'function'",
ErrNameFunc1="expected a function name after '.'",
ErrNameFunc2="expected a method name after ':'",
ErrOParenPList="expected '(' for the parameter list",
ErrCParenPList="expected ')' to close the parameter list",
ErrEndFunc="expected 'end' to close the function body",
ErrParList="expected a variable name or '...' after ','",
ErrLabel="expected a label name after '::'",
ErrCloseLabel="expected '::' after the label",
ErrGoto="expected a label after 'goto'",
ErrVarList={"expected a variable name after ','",[[ (!'=' !%nl .)* ]]},
ErrExprList="expected an expression after ','",
ErrOrExpr="expected an expression after 'or'",
ErrAndExpr="expected an expression after 'and'",
ErrRelExpr="expected an expression after the relational operator",
ErrBitwiseExpr="expected an expression after bitwise operator",
ErrConcatExpr="expected an expression after '..'",
ErrAddExpr="expected an expression after the additive operator",
ErrMulExpr="expected an expression after the multiplicative operator",
ErrUnaryExpr="expected an expression after the unary operator",
ErrPowExpr="expected an expression after '^'",
ErrExprParen="expected an expression after '('",
ErrCParenExpr="expected ')' to close the expression",
ErrNameIndex="expected a field name after '.'",
ErrExprIndex="expected an expression after '['",
ErrCBracketIndex="expected ']' to close the indexing expression",
ErrNameMeth="expected a method name after ':'",
ErrMethArgs="expected some arguments for the method call (or '()')",
ErrCParenArgs="expected ')' to close the argument list",
ErrCBraceTable="expected '}' to close the table constructor",
ErrEqField="expected '=' after the table key",
ErrExprField="expected an expression after '='",
ErrExprFKey={"expected an expression after '[' for the table key",[[ (!']' !%nl .)* ]] },
ErrCBracketFKey={"expected ']' to close the table key",[[ (!'=' !%nl .)* ]]},
ErrDigitHex="expected one or more hexadecimal digits after '0x'",
ErrDigitDeci="expected one or more digits after the decimal point",
ErrDigitExpo="expected one or more digits for the exponent",
ErrQuote="unclosed string",
ErrHexEsc={"expected exactly two hexadecimal digits after '\\x'",[[ (!('"' / "'" / %nl) .)* ]]},
ErrOBraceUEsc="expected '{' after '\\u'",
ErrDigitUEsc={"expected one or more hexadecimal digits for the UTF-8 code point",[[ (!'}' !%nl .)* ]]},
ErrCBraceUEsc={"expected '}' after the code point",[[ (!('"' / "'") .)* ]]},
ErrEscSeq={"invalid escape sequence",[[ (!('"' / "'" / %nl) .)* ]]},
ErrCloseLStr="unclosed long string",
ErrEqAssign="expected '=' after variable list in assign statement"
}
pg.setlabels(labels)
local grammar = pg.compile([==[
chunk <- block (!.)^ErrExtra
block <- stat* retstat?
stat <- ';' /
functioncall /
varlist '='^ErrEqAssign explist^ErrEListAssign /
'break' /
'goto' NAME^ErrGoto /
'do' block 'end'^ErrEndDo /
'while' exp^ErrExprWhile 'do'^ErrDoWhile block 'end'^ErrEndWhile /
'repeat' block 'until'^ErrUntilRep exp^ErrExprRep /
'if' exp^ErrExprIf 'then'^ErrThenIf block ('elseif' exp^ErrExprEIf 'then'^ErrThenEIf block)* ('else' block)? 'end'^ErrEndIf /
'for' (forNum / forIn)^ErrForRange 'do'^ErrDoFor block 'end'^ErrEndFor /
'function' funcname^ErrFuncName funcbody /
'local' (localAssign / localFunc)^ErrDefLocal /
label /
!blockEnd %{ErrInvalidStat}
blockEnd <- 'return' / 'end' / 'elseif' / 'else' / 'until' / !.
retstat <- 'return' explist? ';'?
forNum <- NAME '=' exp^ErrExprFor1 ','^ErrCommaFor exp^ErrExprFor2 (',' exp^ErrExprFor3)?
forIn <- namelist 'in'^ErrInFor explist^ErrEListFor
localFunc <- 'function' NAME^ErrNameLFunc funcbody
localAssign <- namelist ('=' explist^ErrEListLAssign)?
label <- '::' NAME^ErrLabel '::'^ErrCloseLabel
funcname <- NAME ('.' NAME^ErrNameFunc1)* (':' NAME^ErrNameFunc2)?
varlist <- var (',' var^ErrVarList)*
namelist <- NAME (',' NAME)*
explist <- exp (',' exp^ErrExprList )*
exp <- expOR -> fixexp
expOR <- (expAND (operatorOr expAND^ErrOrExpr)*) -> fold
expAND <- (expREL (operatorAnd expREL^ErrAndExpr)*) -> fold
expREL <- (expBIT (operatorComparison expBIT^ErrRelExpr)*) -> fold
expBIT <- (expCAT (operatorBitwise expCAT^ErrBitwiseExpr)*) -> fold
expCAT <- (expADD (operatorStrcat expCAT^ErrConcatExpr)?) -> fixexp
expADD <- (expMUL (operatorAddSub expMUL^ErrAddExpr)*) -> fold
expMUL <- (expUNA (operatorMulDivMod expUNA^ErrMulExpr)*) -> fold
expUNA <- ((operatorUnary expUNA^ErrUnaryExpr) / expPOW) -> fixexp
expPOW <- (expTokens (operatorPower expUNA^ErrPowExpr)?) -> fixexp
expTokens <- 'nil' / 'false' / 'true' /
number /
string /
'...' /
'function' funcbody /
tableconstructor /
prefixexp
prefixexp <- varOrExp nameAndArgs*
functioncall <- varOrExp nameAndArgs+
varOrExp <- var / brackexp
brackexp <- '(' exp^ErrExprParen ')'^ErrCParenExpr
var <- (NAME / brackexp varSuffix) varSuffix*
varSuffix <- nameAndArgs* ('[' exp^ErrExprIndex ']'^ErrCBracketIndex / '.' !'.' NAME^ErrNameIndex)
nameAndArgs <- (':' !':' NAME^ErrNameMeth args^ErrMethArgs) /
args
args <- '(' explist? ')'^ErrCParenArgs / tableconstructor / string
funcbody <- '('^ErrOParenPList parlist? ')'^ErrCParenPList block 'end'^ErrEndFunc
parlist <- namelist (',' '...'^ErrParList)? / '...'
tableconstructor<- '{' fieldlist? '}'^ErrCBraceTable
fieldlist <- field (fieldsep field)* fieldsep?
field <- !OPEN '[' exp^ErrExprFKey ']'^ErrCBracketFKey '='^ErrEqField exp^ErrExprField /
NAME '=' exp /
exp
fieldsep <- ',' / ';'
operatorOr <- 'or'
operatorAnd <- 'and'
operatorComparison<- '<=' / '>=' / '~=' / '==' / '<' !'<' / '>' !'>'
operatorStrcat <- !'...' '..'
operatorAddSub <- '+' / '-'
operatorMulDivMod<- '*' / '%' / '//' / '/'
operatorBitwise <- '&' / '|' / !'~=' '~' / '<<' / '>>'
operatorUnary <- 'not' / '#' / '-' / !'~=' '~'
operatorPower <- '^'
number <- FLOAT / HEX_FLOAT / HEX / INT
string <- NORMALSTRING / CHARSTRING / LONGSTRING
-- lexer
fragment
RESERVED <- KEYWORDS !IDREST
fragment
IDREST <- [a-zA-Z_0-9]
fragment
KEYWORDS <- 'and' / 'break' / 'do' / 'elseif' / 'else' / 'end' /
'false' / 'for' / 'function' / 'goto' / 'if' / 'in' /
'local' / 'nil' / 'not' / 'or' / 'repeat' / 'return' /
'then' / 'true' / 'until' / 'while'
NAME <- !RESERVED [a-zA-Z_] [a-zA-Z_0-9]*
fragment
NORMALSTRING <- '"' {( ESC / [^"\] )*} '"'^ErrQuote
fragment
CHARSTRING <- "'" {( ESC / [^\'] )*} "'"^ErrQuote
fragment
LONGSTRING <- (OPEN {(!CLOSEEQ .)*} CLOSE^ErrCloseLStr) -> 1 -- capture only the string
fragment
OPEN <- '[' {:openEq: EQUALS :} '[' %nl?
fragment
CLOSE <- ']' {EQUALS} ']'
fragment
EQUALS <- '='*
fragment
CLOSEEQ <- (CLOSE =openEq) => equals
INT <- DIGIT+
HEX <- '0' [xX] HEXDIGIT+^ErrDigitHex
FLOAT <- DIGIT+ '.' DIGIT* ExponentPart? /
'.' !'.' DIGIT+^ErrDigitDeci ExponentPart? /
DIGIT+ ExponentPart
HEX_FLOAT <- '0' [xX] HEXDIGIT+ '.' HEXDIGIT* HexExponentPart? /
'0' [xX] '.' HEXDIGIT+ HexExponentPart? /
'0' [xX] HEXDIGIT+^ErrDigitHex HexExponentPart
fragment
ExponentPart <- [eE] [+-]? DIGIT+^ErrDigitExpo
fragment
HexExponentPart <- [pP] [+-]? DIGIT+^ErrDigitExpo
fragment
ESC <- '\' [abfnrtvz"'\] /
'\' %nl /
DECESC /
HEXESC/
UTFESC/
'\' %{ErrEscSeq}
fragment
DECESC <- '\' ( DIGIT DIGIT? / [0-2] DIGIT DIGIT)
fragment
HEXESC <- '\' 'x' (HEXDIGIT HEXDIGIT)^ErrHexEsc
fragment
UTFESC <- '\' 'u' '{'^ErrOBraceUEsc HEXDIGIT+^ErrDigitUEsc '}'^ErrCBraceUEsc
fragment
DIGIT <- [0-9]
fragment
HEXDIGIT <- [0-9a-fA-F]
fragment
COMMENT <- '--' LONGSTRING -> 0 -- skip this
fragment
LINE_COMMENT <- '--' COM_TYPES ( %nl / !.)
fragment
COM_TYPES <- '[' '='* [^[=%nl] [^%nl]* /
'[' '='* /
[^[%nl] [^%nl]* /
''
fragment
SHEBANG <- '#' '!' [^%nl]*
SKIP <- %nl / %s / COMMENT / LINE_COMMENT / SHEBANG
fragment
HELPER <- RESERVED / '(' / ')' -- for sync expression
SYNC <- ((!HELPER !SKIP .)+ / .?) SKIP* -- either sync to reserved keyword or skip characters and consume them
]==],{ equals = equals, fixexp = fixexp, fold = fold })
local errnr = 1
local function err (desc, line, col, sfail, recexp)
print("Syntax error #"..errnr..": "..desc.." at line "..line.."(col "..col..")")
errnr = errnr+1
end
local function parse (input)
errnr = 1
local ast, errs = pg.parse(input,grammar,err)
return ast, errs
end
return {parse=parse}