Add stage 06: Lua bootstrap
The goal of stage 06 is to try parse zig synax in lua. I pulled in lpeglable 1.2.0 and parser-gen off github to get started. All of this needs to be cleaned up rather soon. Lua boostraps using tcc and musl from the previous stage. Since musl 0.6.0 doesn't support dynamic linking this build of lua doesn't support shared libraries. I couldn't easily patch musl with dlopen and friends so instead I link statically and call deps with c api.
This commit is contained in:
parent
2ae045cf8a
commit
e6b88d5a0f
170 changed files with 72518 additions and 2 deletions
334
06/parser-gen/peg-parser.lua
Normal file
334
06/parser-gen/peg-parser.lua
Normal file
|
@ -0,0 +1,334 @@
|
|||
local re = require "lpeglabel.relabel"
|
||||
|
||||
local peg = {}
|
||||
|
||||
-- from relabel.lua
|
||||
|
||||
local errinfo = {
|
||||
{"NoPatt", "no pattern found"},
|
||||
{"ExtraChars", "unexpected characters after the pattern"},
|
||||
|
||||
{"ExpPatt1", "expected a pattern after '/' or '//{...}'"},
|
||||
|
||||
{"ExpPatt2", "expected a pattern after '&'"},
|
||||
{"ExpPatt3", "expected a pattern after '!'"},
|
||||
|
||||
{"ExpPatt4", "expected a pattern after '('"},
|
||||
{"ExpPatt5", "expected a pattern after ':'"},
|
||||
{"ExpPatt6", "expected a pattern after '{~'"},
|
||||
{"ExpPatt7", "expected a pattern after '{|'"},
|
||||
|
||||
{"ExpPatt8", "expected a pattern after '<-'"},
|
||||
|
||||
{"ExpPattOrClose", "expected a pattern or closing '}' after '{'"},
|
||||
|
||||
{"ExpNum", "expected a number after '^', '+' or '-' (no space)"},
|
||||
{"ExpNumOrLab", "expected a number or a label after ^"},
|
||||
{"ExpCap", "expected a string, number, '{}' or name after '->'"},
|
||||
|
||||
{"ExpName1", "expected the name of a rule after '=>'"},
|
||||
{"ExpName2", "expected the name of a rule after '=' (no space)"},
|
||||
{"ExpName3", "expected the name of a rule after '<' (no space)"},
|
||||
|
||||
{"ExpLab1", "expected at least one label after '{'"},
|
||||
{"ExpLab2", "expected a label after the comma"},
|
||||
|
||||
{"ExpNameOrLab", "expected a name or label after '%' (no space)"},
|
||||
|
||||
{"ExpItem", "expected at least one item after '[' or '^'"},
|
||||
|
||||
{"MisClose1", "missing closing ')'"},
|
||||
{"MisClose2", "missing closing ':}'"},
|
||||
{"MisClose3", "missing closing '~}'"},
|
||||
{"MisClose4", "missing closing '|}'"},
|
||||
{"MisClose5", "missing closing '}'"}, -- for the captures
|
||||
|
||||
{"MisClose6", "missing closing '>'"},
|
||||
{"MisClose7", "missing closing '}'"}, -- for the labels
|
||||
|
||||
{"MisClose8", "missing closing ']'"},
|
||||
|
||||
{"MisTerm1", "missing terminating single quote"},
|
||||
{"MisTerm2", "missing terminating double quote"},
|
||||
}
|
||||
|
||||
local errmsgs = {}
|
||||
local labels = {}
|
||||
|
||||
for i, err in ipairs(errinfo) do
|
||||
errmsgs[i] = err[2]
|
||||
labels[err[1]] = i
|
||||
end
|
||||
|
||||
re.setlabels(labels)
|
||||
|
||||
local function concat(a,b)
|
||||
return a..b
|
||||
end
|
||||
local function foldtable(action,t)
|
||||
local re
|
||||
local first = true
|
||||
for key,value in pairs(t) do
|
||||
if first then
|
||||
re = value
|
||||
first = false
|
||||
else
|
||||
|
||||
local temp = re
|
||||
if action == "suf" then -- suffix actions
|
||||
local act = value[1]
|
||||
if act == "*" or act == "?" or act == "+" then
|
||||
re = {action=act, op1=temp}
|
||||
else
|
||||
re = {action=act, op1=temp, op2=value[2]}
|
||||
end
|
||||
elseif action == "or" and #value == 2 then -- recovery expression
|
||||
local labels = value[1]
|
||||
local op2 = value[2]
|
||||
re = {action=action, op1=temp, op2=op2, condition=labels}
|
||||
else
|
||||
re = {action=action, op1=temp, op2=value}
|
||||
end
|
||||
end
|
||||
end
|
||||
return re
|
||||
end
|
||||
|
||||
|
||||
local gram = [=[
|
||||
|
||||
pattern <- (exp / %{NoPatt}) (!. / %{ExtraChars})
|
||||
exp <- S (grammar / alternative)
|
||||
|
||||
labels <- {| '{' {: (label / %{ExpLab1}) :} (',' {: (label / %{ExpLab2}) :})* ('}' / %{MisClose7}) |}
|
||||
|
||||
|
||||
alternative <- ( {:''->'or':} {| {: seq :} ('/' (('/' {| {: labels :} S {: (seq / %{ExpPatt1}) :} |}) / (S {: (seq / %{ExpPatt1}) :} ) ) )* |} ) -> foldtable
|
||||
|
||||
|
||||
seq <- ( {:''->'and':} {| {: prefix :}+ |} ) -> foldtable
|
||||
|
||||
|
||||
prefix <- {| {:action: '&' :} S {:op1: (prefix / %{ExpPatt2}) :} |}
|
||||
/ {| {:action: '!' :} S {:op1: (prefix / %{ExpPatt3}) :} |}
|
||||
/ suffix
|
||||
|
||||
suffix <- ( {:''->'suf':} {| primary S {| suffixaction S |}* |} ) -> foldtable
|
||||
|
||||
|
||||
suffixaction <- {[+*?]}
|
||||
/ {'^'} {| {:num: [+-]? NUM:} |}
|
||||
/ '^'->'^LABEL' (label / %{ExpNumOrLab})
|
||||
/ {'->'} S ((string / {| {:action:'{}'->'poscap':} |} / funcname / {|{:num: NUM :} |}) / %{ExpCap})
|
||||
/ {'=>'} S (funcname / %{ExpName1})
|
||||
|
||||
|
||||
|
||||
|
||||
primary <- '(' (exp / %{ExpPatt4}) (')' / %{MisClose1})
|
||||
/ term
|
||||
/ class
|
||||
/ defined
|
||||
/ {| {:action: '%'->'label':} ('{' / %{ExpNameOrLab}) S ({:op1: label:} / %{ExpLab1}) S ('}' / %{MisClose7}) |}
|
||||
/ {| {:action: '{:'->'gcap':} {:op2: defname:} ':' !'}' ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
||||
/ {| {:action: '{:'->'gcap':} ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
||||
/ {| {:action: '='->'bref':} ({:op1: defname:} / %{ExpName2}) |}
|
||||
/ {| {:action: '{}'->'poscap':} |}
|
||||
/ {| {:action: '{~'->'subcap':} ({:op1: exp:} / %{ExpPatt6}) ('~}' / %{MisClose3}) |}
|
||||
/ {| {:action: '{|'->'tcap':} ({:op1: exp:} / %{ExpPatt7}) ('|}' / %{MisClose4}) |}
|
||||
/ {| {:action: '{'->'scap':} ({:op1: exp:} / %{ExpPattOrClose}) ('}' / %{MisClose5}) |}
|
||||
/ {| {:action: '.'->'anychar':} |}
|
||||
/ !frag !nodee name S !ARROW
|
||||
/ '<' (name / %{ExpName3}) ('>' / %{MisClose6}) -- old-style non terminals
|
||||
|
||||
grammar <- {| definition+ |}
|
||||
definition <- {| (frag / nodee)? (token / nontoken) S ARROW ({:rule: exp :} / %{ExpPatt8}) |}
|
||||
|
||||
label <- {| {:s: ERRORNAME :} |}
|
||||
|
||||
frag <- {:fragment: 'fragment'->'1' :} ![0-9_a-z] S !ARROW
|
||||
nodee <- {:node: 'node'->'1' :} ![0-9_a-z] S !ARROW
|
||||
token <- {:rulename: TOKENNAME :} {:token:''->'1':}
|
||||
nontoken <- {:rulename: NAMESTRING :}
|
||||
|
||||
class <- '[' ( ('^' {| {:action:''->'invert':} {:op1: classset :} |} ) / classset ) (']' / %{MisClose8})
|
||||
classset <- ( {:''->'or':} {| {: (item / %{ExpItem}) :} (!']' {: (item / %{ExpItem}) :})* |} ) -> foldtable
|
||||
item <- defined / range / {| {:t: . :} |}
|
||||
range <- {| {:action:''->'range':} {:op1: {| {:s: ({: . :} ('-') {: [^]] :} ) -> concat :} |} :} |}
|
||||
|
||||
S <- (%s / '--' [^%nl]*)* -- spaces and comments
|
||||
name <- {| {:nt: TOKENNAME :} {:token:''->'1':} / {:nt: NAMESTRING :} |}
|
||||
|
||||
funcname <- {| {:func: NAMESTRING :} |}
|
||||
ERRORNAME <- NAMESTRING
|
||||
NAMESTRING <- [A-Za-z][A-Za-z0-9_]*
|
||||
TOKENNAME <- [A-Z_]+ ![0-9a-z]
|
||||
defname <- {| {:s: NAMESTRING :} |}
|
||||
ARROW <- '<-'
|
||||
NUM <- [0-9]+
|
||||
term <- {| '"' {:t: [^"]* :} ('"' / %{MisTerm2}) / "'" {:t: [^']* :} ("'" / %{MisTerm1}) |}
|
||||
string <- {| '"' {:s: [^"]* :} ('"' / %{MisTerm2}) / "'" {:s: [^']* :} ("'" / %{MisTerm1}) |}
|
||||
defined <- {| {:action: '%':} {:op1: defname :} |}
|
||||
]=]
|
||||
|
||||
local defs = {foldtable=foldtable, concat=concat}
|
||||
peg.gram = gram
|
||||
peg.defs = defs
|
||||
peg.labels = labels
|
||||
local p = re.compile ( gram, defs)
|
||||
|
||||
|
||||
|
||||
|
||||
--[[
|
||||
Function: pegToAST(input)
|
||||
|
||||
Input: a grammar in PEG format, described in https://github.com/vsbenas/parser-gen
|
||||
|
||||
Output: if parsing successful - a table of grammar rules, else - runtime error
|
||||
|
||||
Example input: "
|
||||
|
||||
Program <- stmt* / SPACE
|
||||
stmt <- ('a' / 'b')+
|
||||
SPACE <- ''
|
||||
|
||||
"
|
||||
|
||||
Example output: {
|
||||
{rulename = "Program", rule = {action = "or", op1 = {action = "*", op1 = {nt = "stmt"}}, op2 = {nt = "SPACE", token="1"}}},
|
||||
{rulename = "stmt", rule = {action = "+", op1 = {action="or", op1 = {t = "a"}, op2 = {t = "b"}}}},
|
||||
{rulename = "SPACE", rule = {t=""}, token=1},
|
||||
}
|
||||
|
||||
The rules are further processed and turned into lpeg compatible format in parser-gen.lua
|
||||
|
||||
Action names:
|
||||
or (has parameter condition for recovery expresions)
|
||||
and
|
||||
&
|
||||
!
|
||||
+
|
||||
*
|
||||
?
|
||||
^num (num is a number with an optional plus or minus sign)
|
||||
^label (label is an error label set with setlabels)
|
||||
->
|
||||
=>
|
||||
tcap
|
||||
gcap (op2= name, anonymous otherwise)
|
||||
bref
|
||||
poscap
|
||||
subcap
|
||||
scap
|
||||
anychar
|
||||
label
|
||||
%
|
||||
range
|
||||
|
||||
Final token actions:
|
||||
t - terminal
|
||||
nt - non terminal
|
||||
func - function definition
|
||||
s - literal string
|
||||
num - literal number
|
||||
]]--
|
||||
local function splitlines(str)
|
||||
local t = {}
|
||||
local function helper(line) table.insert(t, line) return "" end
|
||||
helper((str:gsub("(.-)\r?\n", helper)))
|
||||
return t
|
||||
end
|
||||
function peg.pegToAST(input, defs)
|
||||
local r, e, sfail = p:match(input, defs)
|
||||
if not r then
|
||||
local lab
|
||||
if e == 0 then
|
||||
lab = "Syntax error"
|
||||
else
|
||||
lab = errmsgs[e]
|
||||
end
|
||||
local lines = splitlines(input)
|
||||
local line, col = re.calcline(input, #input - #sfail + 1)
|
||||
local err = {}
|
||||
table.insert(err, "L" .. line .. ":C" .. col .. ": " .. lab)
|
||||
table.insert(err, lines[line])
|
||||
table.insert(err, string.rep(" ", col-1) .. "^")
|
||||
error("syntax error(s) in pattern\n" .. table.concat(err, "\n"), 3)
|
||||
end
|
||||
return r
|
||||
end
|
||||
|
||||
|
||||
function peg.print_r ( t ) -- for debugging
|
||||
local print_r_cache={}
|
||||
local function sub_print_r(t,indent)
|
||||
if (print_r_cache[tostring(t)]) then
|
||||
print(indent.."*"..tostring(t))
|
||||
else
|
||||
print_r_cache[tostring(t)]=true
|
||||
if (type(t)=="table") then
|
||||
for pos,val in pairs(t) do
|
||||
if (type(val)=="table") then
|
||||
print(indent.."["..pos.."] => {")
|
||||
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
||||
print(indent..string.rep(" ",string.len(pos)+6).."}")
|
||||
else
|
||||
print(indent.."["..pos.."] => '"..tostring(val).."'")
|
||||
end
|
||||
end
|
||||
else
|
||||
print(indent..tostring(t))
|
||||
end
|
||||
end
|
||||
end
|
||||
sub_print_r(t,"")
|
||||
end
|
||||
function peg.print_t ( t ) -- for debugging
|
||||
local print_r_cache={}
|
||||
local function sub_print_r (t,indent)
|
||||
if (print_r_cache[tostring(t)]) then
|
||||
print(indent.."*"..tostring(t))
|
||||
else
|
||||
print_r_cache[tostring(t)]=true
|
||||
if (type(t)=="table") then
|
||||
local function subprint (pos,val,indent)
|
||||
if (type(val)=="table") then
|
||||
print(indent.."{")
|
||||
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
||||
print(indent..string.rep(" ",string.len(pos)-1).."},")
|
||||
else
|
||||
if type(val) ~= "number" then
|
||||
val = "'"..tostring(val).."'"
|
||||
end
|
||||
|
||||
if tonumber(pos) then
|
||||
print(indent..val..",")
|
||||
else
|
||||
print(indent..pos.."="..val..",")
|
||||
end
|
||||
end
|
||||
end
|
||||
if t["rule"] then
|
||||
subprint("rule",t["rule"],indent)
|
||||
end
|
||||
if t["pos"] then
|
||||
subprint("pos",t["pos"],indent)
|
||||
end
|
||||
for pos,val in pairs(t) do
|
||||
if pos ~= "rule" and pos ~= "pos" then
|
||||
subprint(pos,val,indent)
|
||||
end
|
||||
end
|
||||
else
|
||||
print(indent..tostring(t))
|
||||
end
|
||||
end
|
||||
end
|
||||
sub_print_r(t,"")
|
||||
end
|
||||
|
||||
function peg.calcline(subject, pos)
|
||||
return re.calcline(subject,pos)
|
||||
end
|
||||
return peg
|
Loading…
Add table
Add a link
Reference in a new issue