I accidentally made this commit really big after some git mishaps. Working on zig grammar, finally kinda works. Fixed some diagnostics warnings Moved examples to tofix because fixing them is besides the point right now.
336 lines
No EOL
9.7 KiB
Lua
336 lines
No EOL
9.7 KiB
Lua
local re = require("deps.lpeglabel.relabel")
|
|
local util = require("src.util")
|
|
|
|
local peg = {}
|
|
|
|
-- from relabel.lua
|
|
|
|
local errinfo = {
|
|
{"NoPatt", "no pattern found"},
|
|
{"ExtraChars", "unexpected characters after the pattern"},
|
|
|
|
{"ExpPatt1", "expected a pattern after '/' or '//{...}'"},
|
|
|
|
{"ExpPatt2", "expected a pattern after '&'"},
|
|
{"ExpPatt3", "expected a pattern after '!'"},
|
|
|
|
{"ExpPatt4", "expected a pattern after '('"},
|
|
{"ExpPatt5", "expected a pattern after ':'"},
|
|
{"ExpPatt6", "expected a pattern after '{~'"},
|
|
{"ExpPatt7", "expected a pattern after '{|'"},
|
|
|
|
{"ExpPatt8", "expected a pattern after '<-'"},
|
|
|
|
{"ExpPattOrClose", "expected a pattern or closing '}' after '{'"},
|
|
|
|
{"ExpNum", "expected a number after '^', '+' or '-' (no space)"},
|
|
{"ExpNumOrLab", "expected a number or a label after ^"},
|
|
{"ExpCap", "expected a string, number, '{}' or name after '->'"},
|
|
|
|
{"ExpName1", "expected the name of a rule after '=>'"},
|
|
{"ExpName2", "expected the name of a rule after '=' (no space)"},
|
|
{"ExpName3", "expected the name of a rule after '<' (no space)"},
|
|
|
|
{"ExpLab1", "expected at least one label after '{'"},
|
|
{"ExpLab2", "expected a label after the comma"},
|
|
|
|
{"ExpNameOrLab", "expected a name or label after '%' (no space)"},
|
|
|
|
{"ExpItem", "expected at least one item after '[' or '^'"},
|
|
|
|
{"MisClose1", "missing closing ')'"},
|
|
{"MisClose2", "missing closing ':}'"},
|
|
{"MisClose3", "missing closing '~}'"},
|
|
{"MisClose4", "missing closing '|}'"},
|
|
{"MisClose5", "missing closing '}'"}, -- for the captures
|
|
|
|
{"MisClose6", "missing closing '>'"},
|
|
{"MisClose7", "missing closing '}'"}, -- for the labels
|
|
|
|
{"MisClose8", "missing closing ']'"},
|
|
|
|
{"MisTerm1", "missing terminating single quote"},
|
|
{"MisTerm2", "missing terminating double quote"},
|
|
}
|
|
|
|
local errmsgs = {}
|
|
local labels = {}
|
|
|
|
for i, err in ipairs(errinfo) do
|
|
errmsgs[i] = err[2]
|
|
labels[err[1]] = i
|
|
end
|
|
|
|
re.setlabels(labels)
|
|
|
|
local function concat(a,b)
|
|
return a..b
|
|
end
|
|
local function foldtable(action,t)
|
|
local re
|
|
local first = true
|
|
for key,value in pairs(t) do
|
|
if first then
|
|
re = value
|
|
first = false
|
|
else
|
|
|
|
local temp = re
|
|
if action == "suf" then -- suffix actions
|
|
local act = value[1]
|
|
if act == "*" or act == "?" or act == "+" then
|
|
re = {action=act, op1=temp}
|
|
else
|
|
re = {action=act, op1=temp, op2=value[2]}
|
|
end
|
|
elseif action == "or" and #value == 2 then -- recovery expression
|
|
local labels = value[1]
|
|
local op2 = value[2]
|
|
re = {action=action, op1=temp, op2=op2, condition=labels}
|
|
else
|
|
re = {action=action, op1=temp, op2=value}
|
|
end
|
|
end
|
|
end
|
|
return re
|
|
end
|
|
|
|
|
|
local gram = [=[
|
|
|
|
pattern <- (exp / %{NoPatt}) (!. / %{ExtraChars})
|
|
exp <- S (grammar / alternative)
|
|
|
|
labels <- {| '{' {: (label / %{ExpLab1}) :} (',' {: (label / %{ExpLab2}) :})* ('}' / %{MisClose7}) |}
|
|
|
|
|
|
alternative <- ( {:''->'or':} {| {: seq :} ('/' (('/' {| {: labels :} S {: (seq / %{ExpPatt1}) :} |}) / (S {: (seq / %{ExpPatt1}) :} ) ) )* |} ) -> foldtable
|
|
|
|
|
|
seq <- ( {:''->'and':} {| {: prefix :}+ |} ) -> foldtable
|
|
|
|
|
|
prefix <- {| {:action: '&' :} S {:op1: (prefix / %{ExpPatt2}) :} |}
|
|
/ {| {:action: '!' :} S {:op1: (prefix / %{ExpPatt3}) :} |}
|
|
/ suffix
|
|
|
|
suffix <- ( {:''->'suf':} {| primary S {| suffixaction S |}* |} ) -> foldtable
|
|
|
|
|
|
suffixaction <- {[+*?]}
|
|
/ {'^'} {| {:num: [+-]? NUM:} |}
|
|
/ '^'->'^LABEL' (label / %{ExpNumOrLab})
|
|
/ {'->'} S ((string / {| {:action:'{}'->'poscap':} |} / funcname / {|{:num: NUM :} |}) / %{ExpCap})
|
|
/ {'=>'} S (funcname / %{ExpName1})
|
|
|
|
|
|
|
|
|
|
primary <- '(' (exp / %{ExpPatt4}) (')' / %{MisClose1})
|
|
/ term
|
|
/ class
|
|
/ defined
|
|
/ {| {:action: '%'->'label':} ('{' / %{ExpNameOrLab}) S ({:op1: label:} / %{ExpLab1}) S ('}' / %{MisClose7}) |}
|
|
/ {| {:action: '{:'->'gcap':} {:op2: defname:} ':' !'}' ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
|
/ {| {:action: '{:'->'gcap':} ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
|
/ {| {:action: '='->'bref':} ({:op1: defname:} / %{ExpName2}) |}
|
|
/ {| {:action: '{}'->'poscap':} |}
|
|
/ {| {:action: '{~'->'subcap':} ({:op1: exp:} / %{ExpPatt6}) ('~}' / %{MisClose3}) |}
|
|
/ {| {:action: '{|'->'tcap':} ({:op1: exp:} / %{ExpPatt7}) ('|}' / %{MisClose4}) |}
|
|
/ {| {:action: '{'->'scap':} ({:op1: exp:} / %{ExpPattOrClose}) ('}' / %{MisClose5}) |}
|
|
/ {| {:action: '.'->'anychar':} |}
|
|
/ !frag !nodee name S !ARROW
|
|
/ '<' (name / %{ExpName3}) ('>' / %{MisClose6}) -- old-style non terminals
|
|
|
|
grammar <- {| definition+ |}
|
|
definition <- {| (frag / nodee)? (token / nontoken) S ARROW ({:rule: exp :} / %{ExpPatt8}) |}
|
|
|
|
label <- {| {:s: ERRORNAME :} |}
|
|
|
|
frag <- {:fragment: 'fragment'->'1' :} ![0-9_a-z] S !ARROW
|
|
nodee <- {:node: 'node'->'1' :} ![0-9_a-z] S !ARROW
|
|
token <- {:rulename: TOKENNAME :} {:token:''->'1':}
|
|
nontoken <- {:rulename: NAMESTRING :}
|
|
|
|
class <- '[' ( ('^' {| {:action:''->'invert':} {:op1: classset :} |} ) / classset ) (']' / %{MisClose8})
|
|
classset <- ( {:''->'or':} {| {: (item / %{ExpItem}) :} (!']' {: (item / %{ExpItem}) :})* |} ) -> foldtable
|
|
item <- defined / range / {| {:t: . :} |}
|
|
range <- {| {:action:''->'range':} {:op1: {| {:s: ({: . :} ('-') {: [^]] :} ) -> concat :} |} :} |}
|
|
|
|
S <- (%s / '--' [^%nl]*)* -- spaces and comments
|
|
name <- {| {:nt: TOKENNAME :} {:token:''->'1':} / {:nt: NAMESTRING :} |}
|
|
|
|
funcname <- {| {:func: NAMESTRING :} |}
|
|
ERRORNAME <- NAMESTRING
|
|
NAMESTRING <- [A-Za-z][A-Za-z0-9_]*
|
|
TOKENNAME <- [A-Z_]+ ![0-9a-z]
|
|
defname <- {| {:s: NAMESTRING :} |}
|
|
ARROW <- '<-'
|
|
NUM <- [0-9]+
|
|
term <- {| '"' {:t: [^"]* :} ('"' / %{MisTerm2}) / "'" {:t: [^']* :} ("'" / %{MisTerm1}) |}
|
|
string <- {| '"' {:s: [^"]* :} ('"' / %{MisTerm2}) / "'" {:s: [^']* :} ("'" / %{MisTerm1}) |}
|
|
defined <- {| {:action: '%':} {:op1: defname :} |}
|
|
]=]
|
|
|
|
local defs = {foldtable=foldtable, concat=concat}
|
|
peg.gram = gram
|
|
peg.defs = defs
|
|
peg.labels = labels
|
|
local p = re.compile ( gram, defs)
|
|
|
|
|
|
|
|
|
|
--[[
|
|
Function: pegToAST(input)
|
|
|
|
Input: a grammar in PEG format, described in https://github.com/vsbenas/parser-gen
|
|
|
|
Output: if parsing successful - a table of grammar rules, else - runtime error
|
|
|
|
Example input: "
|
|
|
|
Program <- stmt* / SPACE
|
|
stmt <- ('a' / 'b')+
|
|
SPACE <- ''
|
|
|
|
"
|
|
|
|
Example output: {
|
|
{rulename = "Program", rule = {action = "or", op1 = {action = "*", op1 = {nt = "stmt"}}, op2 = {nt = "SPACE", token="1"}}},
|
|
{rulename = "stmt", rule = {action = "+", op1 = {action="or", op1 = {t = "a"}, op2 = {t = "b"}}}},
|
|
{rulename = "SPACE", rule = {t=""}, token=1},
|
|
}
|
|
|
|
The rules are further processed and turned into lpeg compatible format in parser-gen.lua
|
|
|
|
Action names:
|
|
or (has parameter condition for recovery expresions)
|
|
and
|
|
&
|
|
!
|
|
+
|
|
*
|
|
?
|
|
^num (num is a number with an optional plus or minus sign)
|
|
^label (label is an error label set with setlabels)
|
|
->
|
|
=>
|
|
tcap
|
|
gcap (op2= name, anonymous otherwise)
|
|
bref
|
|
poscap
|
|
subcap
|
|
scap
|
|
anychar
|
|
label
|
|
%
|
|
range
|
|
|
|
Final token actions:
|
|
t - terminal
|
|
nt - non terminal
|
|
func - function definition
|
|
s - literal string
|
|
num - literal number
|
|
]]--
|
|
local function splitlines(str)
|
|
local t = {}
|
|
local function helper(line) table.insert(t, line) return "" end
|
|
helper((str:gsub("(.-)\r?\n", helper)))
|
|
return t
|
|
end
|
|
function peg.pegToAST(input, defs)
|
|
local r, e, sfail = p:match(input, defs)
|
|
if not r then
|
|
local lab
|
|
if e == 0 then
|
|
lab = "Syntax error"
|
|
else
|
|
lab = errmsgs[e]
|
|
end
|
|
local lines = splitlines(input)
|
|
local line, col = re.calcline(input, #input - #sfail + 1)
|
|
local err = {}
|
|
table.insert(err, "L" .. line .. ":C" .. col .. ": " .. lab)
|
|
table.insert(err, lines[line])
|
|
table.insert(err, string.rep(" ", col-1) .. "^")
|
|
error("syntax error(s) in pattern\n" .. table.concat(err, "\n"), 3)
|
|
end
|
|
|
|
return r
|
|
end
|
|
|
|
|
|
function peg.print_r ( t ) -- for debugging
|
|
local print_r_cache={}
|
|
local function sub_print_r(t,indent)
|
|
if (print_r_cache[tostring(t)]) then
|
|
print(indent.."*"..tostring(t))
|
|
else
|
|
print_r_cache[tostring(t)]=true
|
|
if (type(t)=="table") then
|
|
for pos,val in pairs(t) do
|
|
if (type(val)=="table") then
|
|
print(indent.."["..pos.."] => {")
|
|
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
|
print(indent..string.rep(" ",string.len(pos)+6).."}")
|
|
else
|
|
print(indent.."["..pos.."] => '"..tostring(val).."'")
|
|
end
|
|
end
|
|
else
|
|
print(indent..tostring(t))
|
|
end
|
|
end
|
|
end
|
|
sub_print_r(t,"")
|
|
end
|
|
function peg.print_t ( t ) -- for debugging
|
|
local print_r_cache={}
|
|
local function sub_print_r (t,indent)
|
|
if (print_r_cache[tostring(t)]) then
|
|
print(indent.."*"..tostring(t))
|
|
else
|
|
print_r_cache[tostring(t)]=true
|
|
if (type(t)=="table") then
|
|
local function subprint (pos,val,indent)
|
|
if (type(val)=="table") then
|
|
print(indent.."{")
|
|
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
|
print(indent..string.rep(" ",string.len(pos)-1).."},")
|
|
else
|
|
if type(val) ~= "number" then
|
|
val = "'"..tostring(val).."'"
|
|
end
|
|
|
|
if tonumber(pos) then
|
|
print(indent..val..",")
|
|
else
|
|
print(indent..pos.."="..val..",")
|
|
end
|
|
end
|
|
end
|
|
if t["rule"] then
|
|
subprint("rule",t["rule"],indent)
|
|
end
|
|
if t["pos"] then
|
|
subprint("pos",t["pos"],indent)
|
|
end
|
|
for pos,val in pairs(t) do
|
|
if pos ~= "rule" and pos ~= "pos" then
|
|
subprint(pos,val,indent)
|
|
end
|
|
end
|
|
else
|
|
print(indent..tostring(t))
|
|
end
|
|
end
|
|
end
|
|
sub_print_r(t,"")
|
|
end
|
|
|
|
function peg.calcline(subject, pos)
|
|
return re.calcline(subject,pos)
|
|
end
|
|
return peg |