The goal of stage 06 is to try parse zig synax in lua. I pulled in lpeglable 1.2.0 and parser-gen off github to get started. All of this needs to be cleaned up rather soon. Lua boostraps using tcc and musl from the previous stage. Since musl 0.6.0 doesn't support dynamic linking this build of lua doesn't support shared libraries. I couldn't easily patch musl with dlopen and friends so instead I link statically and call deps with c api.
566 lines
12 KiB
Lua
566 lines
12 KiB
Lua
local peg = require "parser-gen.peg-parser"
|
|
local eg = require "parser-gen.errorgen"
|
|
local s = require "parser-gen.stack"
|
|
|
|
-- create stack for tokens inside captures. nil - not inside capture, 0 - inside capture, 1 - token found inside capture
|
|
local tokenstack = s.Stack:Create()
|
|
|
|
|
|
|
|
local subject, errors, errorfunc
|
|
|
|
-- Lua 5.1 compatibility:
|
|
local unpack = unpack or table.unpack
|
|
|
|
|
|
local Predef = { nl = m.P"\n", cr = m.P"\r", tab = m.P"\t" }
|
|
|
|
|
|
local mem = {} -- for compiled grammars
|
|
|
|
local function updatelocale()
|
|
m.locale(Predef)
|
|
local any = m.P(1)
|
|
Predef.a = Predef.alpha
|
|
Predef.c = Predef.cntrl
|
|
Predef.d = Predef.digit
|
|
Predef.g = Predef.graph
|
|
Predef.l = Predef.lower
|
|
Predef.p = Predef.punct
|
|
Predef.s = Predef.space
|
|
Predef.u = Predef.upper
|
|
Predef.w = Predef.alnum
|
|
Predef.x = Predef.xdigit
|
|
Predef.A = any - Predef.a
|
|
Predef.C = any - Predef.c
|
|
Predef.D = any - Predef.d
|
|
Predef.G = any - Predef.g
|
|
Predef.L = any - Predef.l
|
|
Predef.P = any - Predef.p
|
|
Predef.S = any - Predef.s
|
|
Predef.U = any - Predef.u
|
|
Predef.W = any - Predef.w
|
|
Predef.X = any - Predef.x
|
|
mem = {}
|
|
end
|
|
|
|
updatelocale()
|
|
|
|
local definitions = {}
|
|
local tlabels = {}
|
|
local totallabels = 0
|
|
local tlabelnames = {} -- reverse table
|
|
local tdescs = {}
|
|
local trecs = {} -- recovery for each error
|
|
|
|
|
|
local function defaultsync(patt)
|
|
return (m.P(1)^-1) * (-patt * m.P(1))^0
|
|
end
|
|
|
|
-- TODO: store these variables for each grammar
|
|
local SKIP = (Predef.space + Predef.nl)
|
|
local SYNC = defaultsync(SKIP)
|
|
|
|
|
|
local recovery = true
|
|
local skipspaces = true
|
|
local buildast = true
|
|
|
|
local function sync (patt)
|
|
return patt --(-patt * m.P(1))^0 * patt^0 -- skip until we find the pattern and consume it(if we do)
|
|
end
|
|
|
|
|
|
local function pattspaces (patt)
|
|
if skipspaces then
|
|
return patt * SKIP^0
|
|
else
|
|
return patt
|
|
end
|
|
end
|
|
|
|
local function token (patt)
|
|
local incapture = tokenstack:pop() -- returns nil if not in capture
|
|
if not incapture then
|
|
return pattspaces(patt)
|
|
end
|
|
tokenstack:push(1)
|
|
return patt
|
|
end
|
|
|
|
|
|
|
|
|
|
-- functions used by the tool
|
|
|
|
local function iscompiled (gr)
|
|
return m.type(gr) == "pattern"
|
|
end
|
|
|
|
local function istoken (t)
|
|
return t["token"] == "1"
|
|
end
|
|
|
|
local function isfinal(t)
|
|
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
|
|
end
|
|
|
|
local function isaction(t)
|
|
return t["action"]
|
|
end
|
|
|
|
|
|
local function isrule(t)
|
|
return t and t["rulename"]
|
|
end
|
|
local function isgrammar(t)
|
|
if type(t) == "table" and not(t["action"]) then
|
|
return isrule(t[1])
|
|
end
|
|
return false
|
|
end
|
|
|
|
local function iscapture (action)
|
|
return action == "=>" or action == "gcap" or action == "scap" or action == "subcap" or action == "poscap"
|
|
end
|
|
|
|
local function finalNode (t)
|
|
if t["t"] then
|
|
return"t",t["t"] -- terminal
|
|
elseif t["nt"] then
|
|
return "nt", t["nt"], istoken(t) -- nonterminal
|
|
elseif t["func"] then
|
|
return "func", t["func"] -- function
|
|
elseif t["s"] then
|
|
return "s", t["s"]
|
|
elseif t["num"] then
|
|
return "num", t["num"]
|
|
end
|
|
return nil
|
|
end
|
|
local bg = {} -- local variable to keep global function buildgrammar
|
|
|
|
|
|
local function addspaces (caps)
|
|
local hastoken = tokenstack:pop()
|
|
if hastoken == 1 then
|
|
return pattspaces(caps)
|
|
end
|
|
return caps
|
|
end
|
|
|
|
local function applyaction(action, op1, op2, labels,tokenrule)
|
|
if action == "or" then
|
|
if labels then -- labels = {{s="errName"},{s="errName2"}}
|
|
for i, v in ipairs(labels) do
|
|
local labname = v["s"]
|
|
local lab = tlabels[labname]
|
|
if not lab then
|
|
error("Label '"..labname.."' undefined")
|
|
end
|
|
labels[i] = lab
|
|
end
|
|
return m.Rec(op1,op2,unpack(labels))
|
|
end
|
|
return op1 + op2
|
|
elseif action == "and" then
|
|
|
|
return op1 * op2
|
|
elseif action == "&" then
|
|
return #op1
|
|
elseif action == "!" then
|
|
return -op1
|
|
elseif action == "+" then
|
|
return op1^1
|
|
elseif action == "*" then
|
|
return op1^0
|
|
elseif action == "?" then
|
|
return op1^-1
|
|
elseif action == "^" then
|
|
return op1^op2
|
|
elseif action == "^LABEL" then
|
|
local lab = tlabels[op2]
|
|
if not lab then
|
|
error("Label '"..op2.."' unspecified using setlabels()")
|
|
end
|
|
return op1 + m.T(lab)
|
|
elseif action == "->" then
|
|
return op1 / op2
|
|
-- in captures we add SPACES^0
|
|
elseif action == "=>" then
|
|
return addspaces(m.Cmt(op1,op2))
|
|
elseif action == "tcap" then
|
|
return m.Ct(op1) -- nospaces
|
|
elseif action == "gcap" then
|
|
return addspaces(m.Cg(op1, op2))
|
|
elseif action == "bref" then
|
|
return m.Cb(op1) --m.Cmt(m.Cb(op1), equalcap) -- do we need to add spaces to bcap?
|
|
elseif action == "poscap" then
|
|
return addspaces(m.Cp())
|
|
elseif action == "subcap" then
|
|
return addspaces(m.Cs(op1))
|
|
elseif action == "scap" then
|
|
return addspaces(m.C(op1))
|
|
elseif action == "anychar" then
|
|
if buildast and not tokenrule then
|
|
return m.C(m.P(1))
|
|
end
|
|
return m.P(1)
|
|
elseif action == "label" then
|
|
local lab = tlabels[op1]
|
|
if not lab then
|
|
error("Label '"..op1.."' unspecified using setlabels()")
|
|
end
|
|
return m.T(lab) -- lpeglabel
|
|
elseif action == "%" then
|
|
if definitions[op1] then
|
|
return definitions[op1]
|
|
elseif Predef[op1] then
|
|
return Predef[op1]
|
|
else
|
|
error("Definition for '%"..op1.."' unspecified(use second parameter of parser-gen.compile())")
|
|
end
|
|
elseif action == "invert" then
|
|
return m.P(1) - op1
|
|
elseif action == "range" then
|
|
local res = m.R(op1)
|
|
if not tokenrule then
|
|
if buildast then
|
|
res = m.C(res)
|
|
end
|
|
res = token(res)
|
|
end
|
|
return res
|
|
else
|
|
error("Unsupported action '"..action.."'")
|
|
end
|
|
end
|
|
|
|
local function applyfinal(action, term, tokenterm, tokenrule)
|
|
|
|
if action == "t" then
|
|
local res = m.P(term)
|
|
if not tokenrule then
|
|
if buildast then
|
|
res = m.C(res)
|
|
end
|
|
if skipspaces then
|
|
res = token(res)
|
|
end
|
|
end
|
|
return res
|
|
elseif action == "nt" then
|
|
if skipspaces and tokenterm and (not tokenrule) then
|
|
return token(m.V(term))
|
|
else
|
|
return m.V(term)
|
|
end
|
|
elseif action == "func" then
|
|
if definitions[term] then
|
|
return definitions[term]
|
|
else
|
|
error("Definition for function '"..term.."' unspecified (use second parameter of parser-gen.compile())")
|
|
end
|
|
elseif action == "s" then -- simple string
|
|
return term
|
|
elseif action == "num" then -- numbered string
|
|
return tonumber(term)
|
|
end
|
|
end
|
|
|
|
|
|
local function applygrammar(gram)
|
|
return m.P(gram)
|
|
end
|
|
|
|
local function traverse (ast, tokenrule)
|
|
if not ast then
|
|
return nil
|
|
end
|
|
|
|
if isfinal(ast) then
|
|
local typefn, fn, tok = finalNode(ast)
|
|
return applyfinal(typefn, fn, tok, tokenrule)
|
|
|
|
elseif isaction(ast) then
|
|
|
|
local act, op1, op2, labs, ret1, ret2
|
|
act = ast["action"]
|
|
op1 = ast["op1"]
|
|
op2 = ast["op2"]
|
|
labs = ast["condition"] -- recovery operations
|
|
|
|
-- post-order traversal
|
|
if iscapture(act) then
|
|
tokenstack:push(0) -- not found any tokens yet
|
|
end
|
|
|
|
ret1 = traverse(op1, tokenrule)
|
|
ret2 = traverse(op2, tokenrule)
|
|
|
|
|
|
return applyaction(act, ret1, ret2, labs, tokenrule)
|
|
|
|
elseif isgrammar(ast) then
|
|
--
|
|
local g = bg.buildgrammar (ast)
|
|
return applygrammar (g)
|
|
|
|
else
|
|
peg.print_r(ast)
|
|
error("Unsupported AST")
|
|
end
|
|
|
|
end
|
|
|
|
local function specialrules(ast, builder)
|
|
-- initialize values
|
|
SKIP = (Predef.space + Predef.nl)
|
|
skipspaces = true
|
|
SYNC = nil
|
|
recovery = true
|
|
-- find SPACE and SYNC rules
|
|
for i, v in ipairs(ast) do
|
|
local name = v["rulename"]
|
|
local rule
|
|
if name == "SKIP" then
|
|
rule = traverse(v["rule"], true)
|
|
if v["rule"]["t"] == '' then
|
|
skipspaces = false
|
|
else
|
|
skipspaces = true
|
|
SKIP = rule
|
|
end
|
|
builder[name] = rule
|
|
elseif name == "SYNC" then
|
|
rule = traverse(v["rule"], true)
|
|
if v["rule"]["t"] == '' then-- SYNC <- ''
|
|
recovery=false
|
|
else
|
|
recovery= true
|
|
SYNC = rule
|
|
end
|
|
builder[name] = rule
|
|
end
|
|
end
|
|
if not SYNC and recovery then
|
|
SYNC = defaultsync(SKIP)
|
|
end
|
|
end
|
|
|
|
local function recorderror(position,label)
|
|
-- call error function here
|
|
local line, col = peg.calcline(subject, position)
|
|
local desc
|
|
if label == 0 then
|
|
desc = "Syntax error"
|
|
else
|
|
desc = tdescs[label]
|
|
end
|
|
if errorfunc then
|
|
local temp = string.sub(subject,position)
|
|
local strend = string.find(temp, "\n")
|
|
local sfail = string.sub(temp, 1, strend)
|
|
errorfunc(desc,line,col,sfail,trecs[label])
|
|
end
|
|
|
|
local err = { line = line, col = col, label=tlabelnames[label], msg = desc }
|
|
table.insert(errors, err)
|
|
|
|
end
|
|
local function record(label)
|
|
return (m.Cp() * m.Cc(label)) / recorderror
|
|
end
|
|
|
|
local function buildrecovery(grammar)
|
|
|
|
local synctoken = pattspaces(sync(SYNC))
|
|
local grec = grammar
|
|
|
|
for k,v in pairs(tlabels) do
|
|
|
|
if trecs[v] then -- custom sync token
|
|
grec = m.Rec(grec,record(v) * pattspaces(trecs[v]), v)
|
|
else -- use global sync token
|
|
grec = m.Rec(grec,record(v) * synctoken, v)
|
|
end
|
|
end
|
|
return grec
|
|
|
|
end
|
|
local usenode = false
|
|
|
|
local function usenodes(val)
|
|
usenode = val
|
|
end
|
|
|
|
|
|
function bg.buildgrammar (ast)
|
|
local builder = {}
|
|
specialrules(ast, builder)
|
|
local initialrule
|
|
for i, v in ipairs(ast) do
|
|
local istokenrule = v["token"] == "1"
|
|
local isfragment = v["fragment"] == "1"
|
|
local isnode = v["node"] == "1"
|
|
|
|
if isnode and not usenodes then
|
|
error("Node mode disabled - please use parser-gen.usenodes(true) before compiling the grammar")
|
|
end
|
|
|
|
local name = v["rulename"]
|
|
local isspecial = name == "SKIP" or name == "SYNC"
|
|
local rule = v["rule"]
|
|
if i == 1 then
|
|
initialrule = name
|
|
table.insert(builder, name) -- lpeg syntax
|
|
builder[name] = traverse(rule, istokenrule)
|
|
else
|
|
if not builder[name] then -- dont traverse rules for SKIP and SYNC twice
|
|
builder[name] = traverse(rule, istokenrule)
|
|
end
|
|
end
|
|
if buildast and not isfragment and not isspecial and ((not usenode) or (usenode and isnode)) then
|
|
if istokenrule then
|
|
builder[name] = m.C(builder[name])
|
|
end
|
|
builder[name] = m.Ct(m.Cg(m.Cc(name),"rule") * m.Cg(m.Cp(),"pos") * builder[name])
|
|
end
|
|
end
|
|
|
|
if skipspaces then
|
|
builder[initialrule] = SKIP^0 * builder[initialrule] -- skip spaces at the beginning of the input
|
|
end
|
|
if recovery then
|
|
builder[initialrule] = buildrecovery(builder[initialrule]) -- build recovery on top of initial rule
|
|
end
|
|
return builder
|
|
end
|
|
|
|
|
|
|
|
|
|
local function build(ast, defs)
|
|
if defs then
|
|
definitions = defs
|
|
end
|
|
if isgrammar(ast) then
|
|
return traverse(ast)
|
|
else
|
|
SKIP = (Predef.space + Predef.nl)
|
|
skipspaces = true
|
|
SYNC = nil
|
|
recovery = true
|
|
SYNC = defaultsync(SKIP)
|
|
local res = SKIP ^0 * traverse(ast)
|
|
if buildast then
|
|
res = m.Ct(res)
|
|
end
|
|
return res -- input is not a grammar - skip spaces and sync by default
|
|
end
|
|
end
|
|
|
|
|
|
|
|
-- recovery grammar
|
|
|
|
|
|
|
|
|
|
-- end
|
|
|
|
|
|
|
|
-- t = {errName="Error description",...}
|
|
local function setlabels (t, errorgen)
|
|
local index
|
|
if errorgen then
|
|
index = totallabels + 1
|
|
else
|
|
-- reset error tables
|
|
index = 1
|
|
tlabels = {}
|
|
|
|
tdescs = {}
|
|
trecs = {}
|
|
end
|
|
for key,value in pairs(t) do
|
|
if index >= 255 then
|
|
error("Error label limit reached(255)")
|
|
end
|
|
if type(value) == "table" then -- we have a recovery expression
|
|
tdescs[index] = value[1]
|
|
|
|
trecs[index] = traverse(peg.pegToAST(value[2]), true)-- PEG to LPEG
|
|
else
|
|
tdescs[index] = value
|
|
end
|
|
tlabels[key] = index
|
|
tlabelnames[index] = key -- reverse table
|
|
index = index + 1
|
|
end
|
|
totallabels = index-1
|
|
end
|
|
|
|
|
|
local function compile (input, defs, generrors, nocaptures)
|
|
if iscompiled(input) then
|
|
return input
|
|
end
|
|
if not mem[input] then
|
|
buildast = true
|
|
if nocaptures then
|
|
buildast=false
|
|
end
|
|
--re.setlabels(tlabels)
|
|
--re.compile(input,defs)
|
|
-- build ast
|
|
local ast = peg.pegToAST(input)
|
|
if generrors then
|
|
local follow = eg.follow(ast)
|
|
local errors = eg.adderrors(ast, follow)
|
|
setlabels (errors, true) -- add errors generated by errorgen
|
|
end
|
|
local gram = build(ast,defs)
|
|
if not gram then
|
|
-- find error using relabel module
|
|
|
|
end
|
|
mem[input] = gram-- store if the user forgets to compile it
|
|
end
|
|
return mem[input]
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local function parse (input, grammar, errorfunction)
|
|
if not iscompiled(grammar) then
|
|
|
|
local cp = compile(grammar)
|
|
grammar = cp
|
|
end
|
|
-- set up recovery table
|
|
errorfunc = errorfunction
|
|
subject = input
|
|
errors = {}
|
|
-- end
|
|
local r, e, sfail = m.match(grammar,input)
|
|
if not r then
|
|
recorderror(#input - #sfail, e)
|
|
end
|
|
if #errors == 0 then errors=nil end
|
|
return r, errors
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local pg = {compile=compile, setlabels=setlabels, parse=parse,follow=follow, calcline = peg.calcline, usenodes = usenodes}
|
|
|
|
return pg
|