The goal of stage 06 is to try parse zig synax in lua. I pulled in lpeglable 1.2.0 and parser-gen off github to get started. All of this needs to be cleaned up rather soon. Lua boostraps using tcc and musl from the previous stage. Since musl 0.6.0 doesn't support dynamic linking this build of lua doesn't support shared libraries. I couldn't easily patch musl with dlopen and friends so instead I link statically and call deps with c api.
401 lines
7.2 KiB
Lua
401 lines
7.2 KiB
Lua
-- Error generation code for LL(1) grammars
|
|
-- AST funcs:
|
|
|
|
local function isfinal(t)
|
|
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
|
|
end
|
|
|
|
local function isaction(t)
|
|
return t["action"]
|
|
end
|
|
|
|
|
|
local function isrule(t)
|
|
return t and t["rulename"]
|
|
end
|
|
|
|
local function isgrammar(t)
|
|
if type(t) == "table" and not(t["action"]) then
|
|
return isrule(t[1])
|
|
end
|
|
return false
|
|
end
|
|
local function istoken (t)
|
|
return t["token"] == "1"
|
|
end
|
|
|
|
local function finalNode (t)
|
|
if t["t"] then
|
|
return"t",t["t"] -- terminal
|
|
elseif t["nt"] then
|
|
return "nt", t["nt"], istoken(t) -- nonterminal
|
|
elseif t["func"] then
|
|
return "func", t["func"] -- function
|
|
elseif t["s"] then
|
|
return "s", t["s"]
|
|
elseif t["num"] then
|
|
return "num", t["num"]
|
|
end
|
|
return nil
|
|
end
|
|
|
|
--[[
|
|
|
|
function rightleaf:
|
|
|
|
returns the right-most concatenation in the AST.
|
|
used for followset keys
|
|
|
|
input: ((A B) C)
|
|
output: {"nt_C"}
|
|
|
|
input: (A / B / C) (D / 'e')
|
|
output: {"nt_D","t_e"}
|
|
|
|
input: A*
|
|
output: {'',"nt_A"}
|
|
|
|
input: !A
|
|
output: {"not_nt_A"}
|
|
]]
|
|
local function addnot(t)
|
|
local ret = {}
|
|
for k,v in pairs(t) do
|
|
ret[k] = "not_"..v
|
|
end
|
|
return ret
|
|
end
|
|
local function addepsilon(t)
|
|
local ret = t
|
|
table.insert(ret, '')
|
|
return ret
|
|
end
|
|
local function mergetables(first,second)
|
|
local ret = first
|
|
for k,v in pairs(second) do
|
|
table.insert(ret, v)
|
|
end
|
|
return ret
|
|
end
|
|
|
|
local function rightleaf(t)
|
|
local action = t.action
|
|
local op1 = t.op1
|
|
local op2 = t.op2
|
|
|
|
if isfinal(t) then
|
|
|
|
-- todo: replace nt_A with FIRST(A)
|
|
local typefn, fn, tok = finalNode(t)
|
|
local ret = typefn .. "_" .. fn -- terminals: t_if, nonterminals: nt_if
|
|
return {ret}
|
|
|
|
end
|
|
|
|
|
|
if action == "or" then
|
|
|
|
return mergetables(rightleaf(op1), rightleaf(op2))
|
|
|
|
elseif action == "and" then -- consider only RHS
|
|
|
|
return rightleaf(op2)
|
|
|
|
elseif action == "&" then
|
|
|
|
return rightleaf(op1)
|
|
|
|
elseif action == "!" then
|
|
|
|
return addnot(rightleaf(op1))
|
|
|
|
elseif action == "+" then
|
|
|
|
return rightleaf(op1)
|
|
|
|
elseif action == "*" or action == "?" then
|
|
|
|
return addepsilon(rightleaf(op1))
|
|
|
|
elseif action == "^" then
|
|
|
|
op2 = op2["num"] -- second operand is number
|
|
if op2 >= 1 then
|
|
return rightleaf(op1)
|
|
else
|
|
return addepsilon(rightleaf(op1))
|
|
end
|
|
|
|
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
|
|
|
|
return rightleaf(op1)
|
|
|
|
elseif action == "bref" or action == "poscap" then
|
|
|
|
return addepsilon({}) -- only empty string
|
|
|
|
elseif action == "anychar" then
|
|
|
|
return {"_anychar"}
|
|
|
|
elseif action == "label" then
|
|
|
|
return addepsilon({})
|
|
|
|
elseif action == "%" then
|
|
|
|
return addepsilon({})
|
|
|
|
elseif action == "invert" then
|
|
|
|
return addnot(rightleaf(op1))
|
|
|
|
elseif action == "range" then
|
|
|
|
return {"_anychar"}
|
|
|
|
else
|
|
error("Unsupported action '"..action.."'")
|
|
end
|
|
|
|
end
|
|
|
|
|
|
local FOLLOW = {}
|
|
|
|
local function follow_aux(t, dontsplit)
|
|
|
|
local action = t.action
|
|
local op1 = t.op1
|
|
local op2 = t.op2
|
|
|
|
|
|
if isfinal(t) then
|
|
|
|
return {t}
|
|
|
|
end
|
|
|
|
if action == "or" then
|
|
|
|
if dontsplit then -- do not split "(B / C)" in "A (B / C)"
|
|
return {t}
|
|
else -- return both
|
|
return mergetables(follow_aux(op1), follow_aux(op2))
|
|
end
|
|
|
|
elseif action == "and" then -- magic happens here
|
|
|
|
-- (A (B / D)) (!E C / D)
|
|
|
|
-- 1) FOLLOW(B) = FOLLOW(D) = {(!E C / D)}
|
|
local rightset = rightleaf(op1)
|
|
local rhs = follow_aux(op2)
|
|
for k,v in pairs(rightset) do
|
|
if not FOLLOW[v] then
|
|
FOLLOW[v] = {}
|
|
end
|
|
-- TODO: check if rhs already exists in FOLLOW(v)
|
|
table.insert(FOLLOW[v],rhs)
|
|
|
|
end
|
|
|
|
-- 2) FOLLOW(A) = {(B / D)}
|
|
|
|
return follow_aux(op1)
|
|
|
|
|
|
elseif action == "&" then
|
|
|
|
return follow_aux(op1)
|
|
|
|
elseif action == "!" then
|
|
|
|
return {action="!", op1=follow_aux(op1)}
|
|
|
|
elseif action == "+" then
|
|
|
|
return follow_aux(op1)
|
|
|
|
elseif action == "*" then
|
|
|
|
return addepsilon(follow_aux(op1))
|
|
|
|
elseif action == "?" then
|
|
|
|
return addepsilon(follow_aux(op1))
|
|
|
|
elseif action == "^" then
|
|
|
|
op2 = op2["num"]
|
|
|
|
if op2 >= 1 then
|
|
return follow_aux(op1)
|
|
else
|
|
return addepsilon(follow_aux(op1))
|
|
end
|
|
|
|
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
|
|
|
|
return follow_aux(op1)
|
|
|
|
elseif action == "bref" or action == "poscap" then
|
|
|
|
return addepsilon({}) -- only empty string
|
|
|
|
elseif action == "anychar" then
|
|
|
|
return {"_anychar"}
|
|
|
|
elseif action == "label" then
|
|
|
|
return addepsilon({})
|
|
|
|
elseif action == "%" then
|
|
|
|
return addepsilon({})
|
|
|
|
elseif action == "invert" then
|
|
|
|
return {t} -- whole table
|
|
|
|
elseif action == "range" then
|
|
|
|
return {"_anychar"}
|
|
|
|
else
|
|
error("Unsupported action '"..action.."'")
|
|
end
|
|
end
|
|
|
|
-- function: follow
|
|
-- finds follow set for the whole AST, with key (rule, term)
|
|
local function follow (t)
|
|
local followset = {}
|
|
if isgrammar(t) then
|
|
for pos,val in pairs(t) do
|
|
local rule = val.rulename
|
|
FOLLOW = {} -- reset for each rule
|
|
follow_aux(val.rule) -- apply recursive function
|
|
followset[rule] = FOLLOW
|
|
end
|
|
else
|
|
FOLLOW = {}
|
|
follow_aux(t)
|
|
followset[''] = FOLLOW
|
|
end
|
|
return followset
|
|
end
|
|
|
|
-- functions to add errors
|
|
-- find size of table
|
|
local function getn (t)
|
|
local size = 0
|
|
for _, _ in pairs(t) do
|
|
size = size+1
|
|
end
|
|
return size
|
|
end
|
|
-- generate error message by traversing table to the left
|
|
local function printexpect(op)
|
|
--peg.print_r(op)
|
|
if isfinal(op) then
|
|
if op["t"] then
|
|
return "'"..op["t"].."'"
|
|
end
|
|
return op["nt"] or op["func"] or op["s"] or op["num"]
|
|
else
|
|
local test = op.op1
|
|
if not test then
|
|
return op.action
|
|
else
|
|
return printexpect(test)
|
|
end
|
|
end
|
|
end
|
|
local GENERATED_ERRORS = 0
|
|
local TERRS = {}
|
|
local function generateerror(op, after)
|
|
|
|
local desc = "Expected "..printexpect(op)
|
|
|
|
local err = GENERATED_ERRORS+1
|
|
if err >= 255 then
|
|
error("Error label limit reached(255)")
|
|
end
|
|
local name = "errorgen"..err
|
|
TERRS[name] = desc
|
|
GENERATED_ERRORS = GENERATED_ERRORS+1
|
|
return name
|
|
end
|
|
|
|
|
|
local function tryadderror(op, after)
|
|
|
|
if FOLLOW then
|
|
|
|
local rhs = rightleaf(after)
|
|
-- (A / B) C
|
|
-- generate error iff #FOLLOW(A) OR #FOLLOW(B) = 1
|
|
local generate = false
|
|
for k,v in pairs(rhs) do
|
|
if FOLLOW[v] then
|
|
local n = getn(FOLLOW[v])
|
|
generate = generate or n==1
|
|
end
|
|
end
|
|
if generate then
|
|
local lab = generateerror(op, after)
|
|
return {action="^LABEL",op1=op,op2={s=lab}}
|
|
end
|
|
end
|
|
return op
|
|
end
|
|
|
|
|
|
-- function: adderrors
|
|
-- traverses the AST and adds error labels where possible
|
|
|
|
local function adderrors_aux(ast,tokenrule)
|
|
|
|
if not ast then
|
|
return nil
|
|
end
|
|
|
|
if isaction(ast) then
|
|
|
|
local act, op1, op2
|
|
act = ast["action"]
|
|
op1 = ast["op1"]
|
|
op2 = ast["op2"]
|
|
|
|
if act == "and" and not tokenrule then
|
|
|
|
op2 = tryadderror(op2, op1)
|
|
|
|
end
|
|
|
|
ast["op1"] = adderrors_aux(op1,tokenrule)
|
|
ast["op2"] = adderrors_aux(op2,tokenrule)
|
|
end
|
|
return ast
|
|
end
|
|
local function adderrors(t, followset)
|
|
GENERATED_ERRORS = 0
|
|
TERRS = {}
|
|
if isgrammar(t) then
|
|
for pos,val in pairs(t) do
|
|
local currentrule = val.rulename
|
|
FOLLOW = followset[currentrule]
|
|
local rule = val.rule
|
|
local istokenrule = val.token == "1"
|
|
adderrors_aux(rule,istokenrule)
|
|
end
|
|
else
|
|
FOLLOW = followset['']
|
|
adderrors_aux(t,false)
|
|
end
|
|
return TERRS
|
|
end
|
|
|
|
return {follow=follow,adderrors=adderrors}
|