lang-bootstrap/06/parser-gen/errorgen.lua
Dawid Sobczak e6b88d5a0f Add stage 06: Lua bootstrap
The goal of stage 06 is to try parse zig synax in lua. I pulled in
lpeglable 1.2.0 and parser-gen off github to get started. All of this
needs to be cleaned up rather soon.

Lua boostraps using tcc and musl from the previous stage. Since musl
0.6.0 doesn't support dynamic linking this build of lua doesn't support
shared libraries. I couldn't easily patch musl with dlopen and friends
so instead I link statically and call deps with c api.
2023-07-06 12:32:47 +01:00

401 lines
7.2 KiB
Lua

-- Error generation code for LL(1) grammars
-- AST funcs:
local function isfinal(t)
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
end
local function isaction(t)
return t["action"]
end
local function isrule(t)
return t and t["rulename"]
end
local function isgrammar(t)
if type(t) == "table" and not(t["action"]) then
return isrule(t[1])
end
return false
end
local function istoken (t)
return t["token"] == "1"
end
local function finalNode (t)
if t["t"] then
return"t",t["t"] -- terminal
elseif t["nt"] then
return "nt", t["nt"], istoken(t) -- nonterminal
elseif t["func"] then
return "func", t["func"] -- function
elseif t["s"] then
return "s", t["s"]
elseif t["num"] then
return "num", t["num"]
end
return nil
end
--[[
function rightleaf:
returns the right-most concatenation in the AST.
used for followset keys
input: ((A B) C)
output: {"nt_C"}
input: (A / B / C) (D / 'e')
output: {"nt_D","t_e"}
input: A*
output: {'',"nt_A"}
input: !A
output: {"not_nt_A"}
]]
local function addnot(t)
local ret = {}
for k,v in pairs(t) do
ret[k] = "not_"..v
end
return ret
end
local function addepsilon(t)
local ret = t
table.insert(ret, '')
return ret
end
local function mergetables(first,second)
local ret = first
for k,v in pairs(second) do
table.insert(ret, v)
end
return ret
end
local function rightleaf(t)
local action = t.action
local op1 = t.op1
local op2 = t.op2
if isfinal(t) then
-- todo: replace nt_A with FIRST(A)
local typefn, fn, tok = finalNode(t)
local ret = typefn .. "_" .. fn -- terminals: t_if, nonterminals: nt_if
return {ret}
end
if action == "or" then
return mergetables(rightleaf(op1), rightleaf(op2))
elseif action == "and" then -- consider only RHS
return rightleaf(op2)
elseif action == "&" then
return rightleaf(op1)
elseif action == "!" then
return addnot(rightleaf(op1))
elseif action == "+" then
return rightleaf(op1)
elseif action == "*" or action == "?" then
return addepsilon(rightleaf(op1))
elseif action == "^" then
op2 = op2["num"] -- second operand is number
if op2 >= 1 then
return rightleaf(op1)
else
return addepsilon(rightleaf(op1))
end
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
return rightleaf(op1)
elseif action == "bref" or action == "poscap" then
return addepsilon({}) -- only empty string
elseif action == "anychar" then
return {"_anychar"}
elseif action == "label" then
return addepsilon({})
elseif action == "%" then
return addepsilon({})
elseif action == "invert" then
return addnot(rightleaf(op1))
elseif action == "range" then
return {"_anychar"}
else
error("Unsupported action '"..action.."'")
end
end
local FOLLOW = {}
local function follow_aux(t, dontsplit)
local action = t.action
local op1 = t.op1
local op2 = t.op2
if isfinal(t) then
return {t}
end
if action == "or" then
if dontsplit then -- do not split "(B / C)" in "A (B / C)"
return {t}
else -- return both
return mergetables(follow_aux(op1), follow_aux(op2))
end
elseif action == "and" then -- magic happens here
-- (A (B / D)) (!E C / D)
-- 1) FOLLOW(B) = FOLLOW(D) = {(!E C / D)}
local rightset = rightleaf(op1)
local rhs = follow_aux(op2)
for k,v in pairs(rightset) do
if not FOLLOW[v] then
FOLLOW[v] = {}
end
-- TODO: check if rhs already exists in FOLLOW(v)
table.insert(FOLLOW[v],rhs)
end
-- 2) FOLLOW(A) = {(B / D)}
return follow_aux(op1)
elseif action == "&" then
return follow_aux(op1)
elseif action == "!" then
return {action="!", op1=follow_aux(op1)}
elseif action == "+" then
return follow_aux(op1)
elseif action == "*" then
return addepsilon(follow_aux(op1))
elseif action == "?" then
return addepsilon(follow_aux(op1))
elseif action == "^" then
op2 = op2["num"]
if op2 >= 1 then
return follow_aux(op1)
else
return addepsilon(follow_aux(op1))
end
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
return follow_aux(op1)
elseif action == "bref" or action == "poscap" then
return addepsilon({}) -- only empty string
elseif action == "anychar" then
return {"_anychar"}
elseif action == "label" then
return addepsilon({})
elseif action == "%" then
return addepsilon({})
elseif action == "invert" then
return {t} -- whole table
elseif action == "range" then
return {"_anychar"}
else
error("Unsupported action '"..action.."'")
end
end
-- function: follow
-- finds follow set for the whole AST, with key (rule, term)
local function follow (t)
local followset = {}
if isgrammar(t) then
for pos,val in pairs(t) do
local rule = val.rulename
FOLLOW = {} -- reset for each rule
follow_aux(val.rule) -- apply recursive function
followset[rule] = FOLLOW
end
else
FOLLOW = {}
follow_aux(t)
followset[''] = FOLLOW
end
return followset
end
-- functions to add errors
-- find size of table
local function getn (t)
local size = 0
for _, _ in pairs(t) do
size = size+1
end
return size
end
-- generate error message by traversing table to the left
local function printexpect(op)
--peg.print_r(op)
if isfinal(op) then
if op["t"] then
return "'"..op["t"].."'"
end
return op["nt"] or op["func"] or op["s"] or op["num"]
else
local test = op.op1
if not test then
return op.action
else
return printexpect(test)
end
end
end
local GENERATED_ERRORS = 0
local TERRS = {}
local function generateerror(op, after)
local desc = "Expected "..printexpect(op)
local err = GENERATED_ERRORS+1
if err >= 255 then
error("Error label limit reached(255)")
end
local name = "errorgen"..err
TERRS[name] = desc
GENERATED_ERRORS = GENERATED_ERRORS+1
return name
end
local function tryadderror(op, after)
if FOLLOW then
local rhs = rightleaf(after)
-- (A / B) C
-- generate error iff #FOLLOW(A) OR #FOLLOW(B) = 1
local generate = false
for k,v in pairs(rhs) do
if FOLLOW[v] then
local n = getn(FOLLOW[v])
generate = generate or n==1
end
end
if generate then
local lab = generateerror(op, after)
return {action="^LABEL",op1=op,op2={s=lab}}
end
end
return op
end
-- function: adderrors
-- traverses the AST and adds error labels where possible
local function adderrors_aux(ast,tokenrule)
if not ast then
return nil
end
if isaction(ast) then
local act, op1, op2
act = ast["action"]
op1 = ast["op1"]
op2 = ast["op2"]
if act == "and" and not tokenrule then
op2 = tryadderror(op2, op1)
end
ast["op1"] = adderrors_aux(op1,tokenrule)
ast["op2"] = adderrors_aux(op2,tokenrule)
end
return ast
end
local function adderrors(t, followset)
GENERATED_ERRORS = 0
TERRS = {}
if isgrammar(t) then
for pos,val in pairs(t) do
local currentrule = val.rulename
FOLLOW = followset[currentrule]
local rule = val.rule
local istokenrule = val.token == "1"
adderrors_aux(rule,istokenrule)
end
else
FOLLOW = followset['']
adderrors_aux(t,false)
end
return TERRS
end
return {follow=follow,adderrors=adderrors}