2023-09-19 11:42:10 +01:00
|
|
|
local re = require("deps.lpeglabel.relabel")
|
2023-09-19 11:42:10 +01:00
|
|
|
local util = require("src.util")
|
2023-07-06 11:48:59 +01:00
|
|
|
|
|
|
|
local peg = {}
|
|
|
|
|
|
|
|
-- from relabel.lua
|
|
|
|
|
|
|
|
local errinfo = {
|
|
|
|
{"NoPatt", "no pattern found"},
|
|
|
|
{"ExtraChars", "unexpected characters after the pattern"},
|
|
|
|
|
|
|
|
{"ExpPatt1", "expected a pattern after '/' or '//{...}'"},
|
|
|
|
|
|
|
|
{"ExpPatt2", "expected a pattern after '&'"},
|
|
|
|
{"ExpPatt3", "expected a pattern after '!'"},
|
|
|
|
|
|
|
|
{"ExpPatt4", "expected a pattern after '('"},
|
|
|
|
{"ExpPatt5", "expected a pattern after ':'"},
|
|
|
|
{"ExpPatt6", "expected a pattern after '{~'"},
|
|
|
|
{"ExpPatt7", "expected a pattern after '{|'"},
|
|
|
|
|
|
|
|
{"ExpPatt8", "expected a pattern after '<-'"},
|
|
|
|
|
|
|
|
{"ExpPattOrClose", "expected a pattern or closing '}' after '{'"},
|
|
|
|
|
|
|
|
{"ExpNum", "expected a number after '^', '+' or '-' (no space)"},
|
|
|
|
{"ExpNumOrLab", "expected a number or a label after ^"},
|
|
|
|
{"ExpCap", "expected a string, number, '{}' or name after '->'"},
|
|
|
|
|
|
|
|
{"ExpName1", "expected the name of a rule after '=>'"},
|
|
|
|
{"ExpName2", "expected the name of a rule after '=' (no space)"},
|
|
|
|
{"ExpName3", "expected the name of a rule after '<' (no space)"},
|
|
|
|
|
|
|
|
{"ExpLab1", "expected at least one label after '{'"},
|
|
|
|
{"ExpLab2", "expected a label after the comma"},
|
|
|
|
|
|
|
|
{"ExpNameOrLab", "expected a name or label after '%' (no space)"},
|
|
|
|
|
|
|
|
{"ExpItem", "expected at least one item after '[' or '^'"},
|
|
|
|
|
|
|
|
{"MisClose1", "missing closing ')'"},
|
|
|
|
{"MisClose2", "missing closing ':}'"},
|
|
|
|
{"MisClose3", "missing closing '~}'"},
|
|
|
|
{"MisClose4", "missing closing '|}'"},
|
|
|
|
{"MisClose5", "missing closing '}'"}, -- for the captures
|
|
|
|
|
|
|
|
{"MisClose6", "missing closing '>'"},
|
|
|
|
{"MisClose7", "missing closing '}'"}, -- for the labels
|
|
|
|
|
|
|
|
{"MisClose8", "missing closing ']'"},
|
|
|
|
|
|
|
|
{"MisTerm1", "missing terminating single quote"},
|
|
|
|
{"MisTerm2", "missing terminating double quote"},
|
|
|
|
}
|
|
|
|
|
|
|
|
local errmsgs = {}
|
|
|
|
local labels = {}
|
|
|
|
|
|
|
|
for i, err in ipairs(errinfo) do
|
|
|
|
errmsgs[i] = err[2]
|
|
|
|
labels[err[1]] = i
|
|
|
|
end
|
|
|
|
|
|
|
|
re.setlabels(labels)
|
|
|
|
|
|
|
|
local function concat(a,b)
|
|
|
|
return a..b
|
|
|
|
end
|
|
|
|
local function foldtable(action,t)
|
|
|
|
local re
|
|
|
|
local first = true
|
|
|
|
for key,value in pairs(t) do
|
|
|
|
if first then
|
|
|
|
re = value
|
|
|
|
first = false
|
|
|
|
else
|
|
|
|
|
|
|
|
local temp = re
|
|
|
|
if action == "suf" then -- suffix actions
|
|
|
|
local act = value[1]
|
|
|
|
if act == "*" or act == "?" or act == "+" then
|
|
|
|
re = {action=act, op1=temp}
|
|
|
|
else
|
|
|
|
re = {action=act, op1=temp, op2=value[2]}
|
|
|
|
end
|
|
|
|
elseif action == "or" and #value == 2 then -- recovery expression
|
|
|
|
local labels = value[1]
|
|
|
|
local op2 = value[2]
|
|
|
|
re = {action=action, op1=temp, op2=op2, condition=labels}
|
|
|
|
else
|
|
|
|
re = {action=action, op1=temp, op2=value}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
return re
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
local gram = [=[
|
|
|
|
|
|
|
|
pattern <- (exp / %{NoPatt}) (!. / %{ExtraChars})
|
|
|
|
exp <- S (grammar / alternative)
|
|
|
|
|
|
|
|
labels <- {| '{' {: (label / %{ExpLab1}) :} (',' {: (label / %{ExpLab2}) :})* ('}' / %{MisClose7}) |}
|
|
|
|
|
|
|
|
|
|
|
|
alternative <- ( {:''->'or':} {| {: seq :} ('/' (('/' {| {: labels :} S {: (seq / %{ExpPatt1}) :} |}) / (S {: (seq / %{ExpPatt1}) :} ) ) )* |} ) -> foldtable
|
|
|
|
|
|
|
|
|
|
|
|
seq <- ( {:''->'and':} {| {: prefix :}+ |} ) -> foldtable
|
|
|
|
|
|
|
|
|
|
|
|
prefix <- {| {:action: '&' :} S {:op1: (prefix / %{ExpPatt2}) :} |}
|
|
|
|
/ {| {:action: '!' :} S {:op1: (prefix / %{ExpPatt3}) :} |}
|
|
|
|
/ suffix
|
|
|
|
|
|
|
|
suffix <- ( {:''->'suf':} {| primary S {| suffixaction S |}* |} ) -> foldtable
|
|
|
|
|
|
|
|
|
|
|
|
suffixaction <- {[+*?]}
|
|
|
|
/ {'^'} {| {:num: [+-]? NUM:} |}
|
|
|
|
/ '^'->'^LABEL' (label / %{ExpNumOrLab})
|
|
|
|
/ {'->'} S ((string / {| {:action:'{}'->'poscap':} |} / funcname / {|{:num: NUM :} |}) / %{ExpCap})
|
|
|
|
/ {'=>'} S (funcname / %{ExpName1})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
primary <- '(' (exp / %{ExpPatt4}) (')' / %{MisClose1})
|
|
|
|
/ term
|
|
|
|
/ class
|
|
|
|
/ defined
|
|
|
|
/ {| {:action: '%'->'label':} ('{' / %{ExpNameOrLab}) S ({:op1: label:} / %{ExpLab1}) S ('}' / %{MisClose7}) |}
|
|
|
|
/ {| {:action: '{:'->'gcap':} {:op2: defname:} ':' !'}' ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
|
|
|
/ {| {:action: '{:'->'gcap':} ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
|
|
|
/ {| {:action: '='->'bref':} ({:op1: defname:} / %{ExpName2}) |}
|
|
|
|
/ {| {:action: '{}'->'poscap':} |}
|
|
|
|
/ {| {:action: '{~'->'subcap':} ({:op1: exp:} / %{ExpPatt6}) ('~}' / %{MisClose3}) |}
|
|
|
|
/ {| {:action: '{|'->'tcap':} ({:op1: exp:} / %{ExpPatt7}) ('|}' / %{MisClose4}) |}
|
|
|
|
/ {| {:action: '{'->'scap':} ({:op1: exp:} / %{ExpPattOrClose}) ('}' / %{MisClose5}) |}
|
|
|
|
/ {| {:action: '.'->'anychar':} |}
|
|
|
|
/ !frag !nodee name S !ARROW
|
|
|
|
/ '<' (name / %{ExpName3}) ('>' / %{MisClose6}) -- old-style non terminals
|
|
|
|
|
|
|
|
grammar <- {| definition+ |}
|
|
|
|
definition <- {| (frag / nodee)? (token / nontoken) S ARROW ({:rule: exp :} / %{ExpPatt8}) |}
|
|
|
|
|
|
|
|
label <- {| {:s: ERRORNAME :} |}
|
|
|
|
|
|
|
|
frag <- {:fragment: 'fragment'->'1' :} ![0-9_a-z] S !ARROW
|
|
|
|
nodee <- {:node: 'node'->'1' :} ![0-9_a-z] S !ARROW
|
|
|
|
token <- {:rulename: TOKENNAME :} {:token:''->'1':}
|
|
|
|
nontoken <- {:rulename: NAMESTRING :}
|
|
|
|
|
|
|
|
class <- '[' ( ('^' {| {:action:''->'invert':} {:op1: classset :} |} ) / classset ) (']' / %{MisClose8})
|
|
|
|
classset <- ( {:''->'or':} {| {: (item / %{ExpItem}) :} (!']' {: (item / %{ExpItem}) :})* |} ) -> foldtable
|
|
|
|
item <- defined / range / {| {:t: . :} |}
|
|
|
|
range <- {| {:action:''->'range':} {:op1: {| {:s: ({: . :} ('-') {: [^]] :} ) -> concat :} |} :} |}
|
|
|
|
|
|
|
|
S <- (%s / '--' [^%nl]*)* -- spaces and comments
|
|
|
|
name <- {| {:nt: TOKENNAME :} {:token:''->'1':} / {:nt: NAMESTRING :} |}
|
|
|
|
|
|
|
|
funcname <- {| {:func: NAMESTRING :} |}
|
|
|
|
ERRORNAME <- NAMESTRING
|
|
|
|
NAMESTRING <- [A-Za-z][A-Za-z0-9_]*
|
|
|
|
TOKENNAME <- [A-Z_]+ ![0-9a-z]
|
|
|
|
defname <- {| {:s: NAMESTRING :} |}
|
|
|
|
ARROW <- '<-'
|
|
|
|
NUM <- [0-9]+
|
|
|
|
term <- {| '"' {:t: [^"]* :} ('"' / %{MisTerm2}) / "'" {:t: [^']* :} ("'" / %{MisTerm1}) |}
|
|
|
|
string <- {| '"' {:s: [^"]* :} ('"' / %{MisTerm2}) / "'" {:s: [^']* :} ("'" / %{MisTerm1}) |}
|
|
|
|
defined <- {| {:action: '%':} {:op1: defname :} |}
|
|
|
|
]=]
|
|
|
|
|
|
|
|
local defs = {foldtable=foldtable, concat=concat}
|
|
|
|
peg.gram = gram
|
|
|
|
peg.defs = defs
|
|
|
|
peg.labels = labels
|
|
|
|
local p = re.compile ( gram, defs)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
--[[
|
|
|
|
Function: pegToAST(input)
|
|
|
|
|
|
|
|
Input: a grammar in PEG format, described in https://github.com/vsbenas/parser-gen
|
|
|
|
|
|
|
|
Output: if parsing successful - a table of grammar rules, else - runtime error
|
|
|
|
|
|
|
|
Example input: "
|
|
|
|
|
|
|
|
Program <- stmt* / SPACE
|
|
|
|
stmt <- ('a' / 'b')+
|
|
|
|
SPACE <- ''
|
|
|
|
|
|
|
|
"
|
|
|
|
|
|
|
|
Example output: {
|
|
|
|
{rulename = "Program", rule = {action = "or", op1 = {action = "*", op1 = {nt = "stmt"}}, op2 = {nt = "SPACE", token="1"}}},
|
|
|
|
{rulename = "stmt", rule = {action = "+", op1 = {action="or", op1 = {t = "a"}, op2 = {t = "b"}}}},
|
|
|
|
{rulename = "SPACE", rule = {t=""}, token=1},
|
|
|
|
}
|
|
|
|
|
|
|
|
The rules are further processed and turned into lpeg compatible format in parser-gen.lua
|
|
|
|
|
|
|
|
Action names:
|
|
|
|
or (has parameter condition for recovery expresions)
|
|
|
|
and
|
|
|
|
&
|
|
|
|
!
|
|
|
|
+
|
|
|
|
*
|
|
|
|
?
|
|
|
|
^num (num is a number with an optional plus or minus sign)
|
|
|
|
^label (label is an error label set with setlabels)
|
|
|
|
->
|
|
|
|
=>
|
|
|
|
tcap
|
|
|
|
gcap (op2= name, anonymous otherwise)
|
|
|
|
bref
|
|
|
|
poscap
|
|
|
|
subcap
|
|
|
|
scap
|
|
|
|
anychar
|
|
|
|
label
|
|
|
|
%
|
|
|
|
range
|
|
|
|
|
|
|
|
Final token actions:
|
|
|
|
t - terminal
|
|
|
|
nt - non terminal
|
|
|
|
func - function definition
|
|
|
|
s - literal string
|
|
|
|
num - literal number
|
|
|
|
]]--
|
|
|
|
local function splitlines(str)
|
|
|
|
local t = {}
|
|
|
|
local function helper(line) table.insert(t, line) return "" end
|
|
|
|
helper((str:gsub("(.-)\r?\n", helper)))
|
|
|
|
return t
|
|
|
|
end
|
|
|
|
function peg.pegToAST(input, defs)
|
|
|
|
local r, e, sfail = p:match(input, defs)
|
|
|
|
if not r then
|
|
|
|
local lab
|
|
|
|
if e == 0 then
|
|
|
|
lab = "Syntax error"
|
|
|
|
else
|
|
|
|
lab = errmsgs[e]
|
|
|
|
end
|
|
|
|
local lines = splitlines(input)
|
|
|
|
local line, col = re.calcline(input, #input - #sfail + 1)
|
|
|
|
local err = {}
|
|
|
|
table.insert(err, "L" .. line .. ":C" .. col .. ": " .. lab)
|
|
|
|
table.insert(err, lines[line])
|
|
|
|
table.insert(err, string.rep(" ", col-1) .. "^")
|
|
|
|
error("syntax error(s) in pattern\n" .. table.concat(err, "\n"), 3)
|
|
|
|
end
|
2023-09-19 11:42:10 +01:00
|
|
|
|
2023-07-06 11:48:59 +01:00
|
|
|
return r
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
function peg.print_r ( t ) -- for debugging
|
|
|
|
local print_r_cache={}
|
|
|
|
local function sub_print_r(t,indent)
|
|
|
|
if (print_r_cache[tostring(t)]) then
|
|
|
|
print(indent.."*"..tostring(t))
|
|
|
|
else
|
|
|
|
print_r_cache[tostring(t)]=true
|
|
|
|
if (type(t)=="table") then
|
|
|
|
for pos,val in pairs(t) do
|
|
|
|
if (type(val)=="table") then
|
|
|
|
print(indent.."["..pos.."] => {")
|
|
|
|
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
|
|
|
print(indent..string.rep(" ",string.len(pos)+6).."}")
|
|
|
|
else
|
|
|
|
print(indent.."["..pos.."] => '"..tostring(val).."'")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
else
|
|
|
|
print(indent..tostring(t))
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
sub_print_r(t,"")
|
|
|
|
end
|
|
|
|
function peg.print_t ( t ) -- for debugging
|
|
|
|
local print_r_cache={}
|
|
|
|
local function sub_print_r (t,indent)
|
|
|
|
if (print_r_cache[tostring(t)]) then
|
|
|
|
print(indent.."*"..tostring(t))
|
|
|
|
else
|
|
|
|
print_r_cache[tostring(t)]=true
|
|
|
|
if (type(t)=="table") then
|
|
|
|
local function subprint (pos,val,indent)
|
|
|
|
if (type(val)=="table") then
|
|
|
|
print(indent.."{")
|
|
|
|
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
|
|
|
print(indent..string.rep(" ",string.len(pos)-1).."},")
|
|
|
|
else
|
|
|
|
if type(val) ~= "number" then
|
|
|
|
val = "'"..tostring(val).."'"
|
|
|
|
end
|
|
|
|
|
|
|
|
if tonumber(pos) then
|
|
|
|
print(indent..val..",")
|
|
|
|
else
|
|
|
|
print(indent..pos.."="..val..",")
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
if t["rule"] then
|
|
|
|
subprint("rule",t["rule"],indent)
|
|
|
|
end
|
|
|
|
if t["pos"] then
|
|
|
|
subprint("pos",t["pos"],indent)
|
|
|
|
end
|
|
|
|
for pos,val in pairs(t) do
|
|
|
|
if pos ~= "rule" and pos ~= "pos" then
|
|
|
|
subprint(pos,val,indent)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
else
|
|
|
|
print(indent..tostring(t))
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
sub_print_r(t,"")
|
|
|
|
end
|
|
|
|
|
|
|
|
function peg.calcline(subject, pos)
|
|
|
|
return re.calcline(subject,pos)
|
|
|
|
end
|
|
|
|
return peg
|