lang-bootstrap/06/deps/parser-gen/peg-parser.lua

336 lines
9.7 KiB
Lua
Raw Normal View History

local re = require("deps.lpeglabel.relabel")
local util = require("src.util")
local peg = {}
-- from relabel.lua
local errinfo = {
{"NoPatt", "no pattern found"},
{"ExtraChars", "unexpected characters after the pattern"},
{"ExpPatt1", "expected a pattern after '/' or '//{...}'"},
{"ExpPatt2", "expected a pattern after '&'"},
{"ExpPatt3", "expected a pattern after '!'"},
{"ExpPatt4", "expected a pattern after '('"},
{"ExpPatt5", "expected a pattern after ':'"},
{"ExpPatt6", "expected a pattern after '{~'"},
{"ExpPatt7", "expected a pattern after '{|'"},
{"ExpPatt8", "expected a pattern after '<-'"},
{"ExpPattOrClose", "expected a pattern or closing '}' after '{'"},
{"ExpNum", "expected a number after '^', '+' or '-' (no space)"},
{"ExpNumOrLab", "expected a number or a label after ^"},
{"ExpCap", "expected a string, number, '{}' or name after '->'"},
{"ExpName1", "expected the name of a rule after '=>'"},
{"ExpName2", "expected the name of a rule after '=' (no space)"},
{"ExpName3", "expected the name of a rule after '<' (no space)"},
{"ExpLab1", "expected at least one label after '{'"},
{"ExpLab2", "expected a label after the comma"},
{"ExpNameOrLab", "expected a name or label after '%' (no space)"},
{"ExpItem", "expected at least one item after '[' or '^'"},
{"MisClose1", "missing closing ')'"},
{"MisClose2", "missing closing ':}'"},
{"MisClose3", "missing closing '~}'"},
{"MisClose4", "missing closing '|}'"},
{"MisClose5", "missing closing '}'"}, -- for the captures
{"MisClose6", "missing closing '>'"},
{"MisClose7", "missing closing '}'"}, -- for the labels
{"MisClose8", "missing closing ']'"},
{"MisTerm1", "missing terminating single quote"},
{"MisTerm2", "missing terminating double quote"},
}
local errmsgs = {}
local labels = {}
for i, err in ipairs(errinfo) do
errmsgs[i] = err[2]
labels[err[1]] = i
end
re.setlabels(labels)
local function concat(a,b)
return a..b
end
local function foldtable(action,t)
local re
local first = true
for key,value in pairs(t) do
if first then
re = value
first = false
else
local temp = re
if action == "suf" then -- suffix actions
local act = value[1]
if act == "*" or act == "?" or act == "+" then
re = {action=act, op1=temp}
else
re = {action=act, op1=temp, op2=value[2]}
end
elseif action == "or" and #value == 2 then -- recovery expression
local labels = value[1]
local op2 = value[2]
re = {action=action, op1=temp, op2=op2, condition=labels}
else
re = {action=action, op1=temp, op2=value}
end
end
end
return re
end
local gram = [=[
pattern <- (exp / %{NoPatt}) (!. / %{ExtraChars})
exp <- S (grammar / alternative)
labels <- {| '{' {: (label / %{ExpLab1}) :} (',' {: (label / %{ExpLab2}) :})* ('}' / %{MisClose7}) |}
alternative <- ( {:''->'or':} {| {: seq :} ('/' (('/' {| {: labels :} S {: (seq / %{ExpPatt1}) :} |}) / (S {: (seq / %{ExpPatt1}) :} ) ) )* |} ) -> foldtable
seq <- ( {:''->'and':} {| {: prefix :}+ |} ) -> foldtable
prefix <- {| {:action: '&' :} S {:op1: (prefix / %{ExpPatt2}) :} |}
/ {| {:action: '!' :} S {:op1: (prefix / %{ExpPatt3}) :} |}
/ suffix
suffix <- ( {:''->'suf':} {| primary S {| suffixaction S |}* |} ) -> foldtable
suffixaction <- {[+*?]}
/ {'^'} {| {:num: [+-]? NUM:} |}
/ '^'->'^LABEL' (label / %{ExpNumOrLab})
/ {'->'} S ((string / {| {:action:'{}'->'poscap':} |} / funcname / {|{:num: NUM :} |}) / %{ExpCap})
/ {'=>'} S (funcname / %{ExpName1})
primary <- '(' (exp / %{ExpPatt4}) (')' / %{MisClose1})
/ term
/ class
/ defined
/ {| {:action: '%'->'label':} ('{' / %{ExpNameOrLab}) S ({:op1: label:} / %{ExpLab1}) S ('}' / %{MisClose7}) |}
/ {| {:action: '{:'->'gcap':} {:op2: defname:} ':' !'}' ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
/ {| {:action: '{:'->'gcap':} ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
/ {| {:action: '='->'bref':} ({:op1: defname:} / %{ExpName2}) |}
/ {| {:action: '{}'->'poscap':} |}
/ {| {:action: '{~'->'subcap':} ({:op1: exp:} / %{ExpPatt6}) ('~}' / %{MisClose3}) |}
/ {| {:action: '{|'->'tcap':} ({:op1: exp:} / %{ExpPatt7}) ('|}' / %{MisClose4}) |}
/ {| {:action: '{'->'scap':} ({:op1: exp:} / %{ExpPattOrClose}) ('}' / %{MisClose5}) |}
/ {| {:action: '.'->'anychar':} |}
/ !frag !nodee name S !ARROW
/ '<' (name / %{ExpName3}) ('>' / %{MisClose6}) -- old-style non terminals
grammar <- {| definition+ |}
definition <- {| (frag / nodee)? (token / nontoken) S ARROW ({:rule: exp :} / %{ExpPatt8}) |}
label <- {| {:s: ERRORNAME :} |}
frag <- {:fragment: 'fragment'->'1' :} ![0-9_a-z] S !ARROW
nodee <- {:node: 'node'->'1' :} ![0-9_a-z] S !ARROW
token <- {:rulename: TOKENNAME :} {:token:''->'1':}
nontoken <- {:rulename: NAMESTRING :}
class <- '[' ( ('^' {| {:action:''->'invert':} {:op1: classset :} |} ) / classset ) (']' / %{MisClose8})
classset <- ( {:''->'or':} {| {: (item / %{ExpItem}) :} (!']' {: (item / %{ExpItem}) :})* |} ) -> foldtable
item <- defined / range / {| {:t: . :} |}
range <- {| {:action:''->'range':} {:op1: {| {:s: ({: . :} ('-') {: [^]] :} ) -> concat :} |} :} |}
S <- (%s / '--' [^%nl]*)* -- spaces and comments
name <- {| {:nt: TOKENNAME :} {:token:''->'1':} / {:nt: NAMESTRING :} |}
funcname <- {| {:func: NAMESTRING :} |}
ERRORNAME <- NAMESTRING
NAMESTRING <- [A-Za-z][A-Za-z0-9_]*
TOKENNAME <- [A-Z_]+ ![0-9a-z]
defname <- {| {:s: NAMESTRING :} |}
ARROW <- '<-'
NUM <- [0-9]+
term <- {| '"' {:t: [^"]* :} ('"' / %{MisTerm2}) / "'" {:t: [^']* :} ("'" / %{MisTerm1}) |}
string <- {| '"' {:s: [^"]* :} ('"' / %{MisTerm2}) / "'" {:s: [^']* :} ("'" / %{MisTerm1}) |}
defined <- {| {:action: '%':} {:op1: defname :} |}
]=]
local defs = {foldtable=foldtable, concat=concat}
peg.gram = gram
peg.defs = defs
peg.labels = labels
local p = re.compile ( gram, defs)
--[[
Function: pegToAST(input)
Input: a grammar in PEG format, described in https://github.com/vsbenas/parser-gen
Output: if parsing successful - a table of grammar rules, else - runtime error
Example input: "
Program <- stmt* / SPACE
stmt <- ('a' / 'b')+
SPACE <- ''
"
Example output: {
{rulename = "Program", rule = {action = "or", op1 = {action = "*", op1 = {nt = "stmt"}}, op2 = {nt = "SPACE", token="1"}}},
{rulename = "stmt", rule = {action = "+", op1 = {action="or", op1 = {t = "a"}, op2 = {t = "b"}}}},
{rulename = "SPACE", rule = {t=""}, token=1},
}
The rules are further processed and turned into lpeg compatible format in parser-gen.lua
Action names:
or (has parameter condition for recovery expresions)
and
&
!
+
*
?
^num (num is a number with an optional plus or minus sign)
^label (label is an error label set with setlabels)
->
=>
tcap
gcap (op2= name, anonymous otherwise)
bref
poscap
subcap
scap
anychar
label
%
range
Final token actions:
t - terminal
nt - non terminal
func - function definition
s - literal string
num - literal number
]]--
local function splitlines(str)
local t = {}
local function helper(line) table.insert(t, line) return "" end
helper((str:gsub("(.-)\r?\n", helper)))
return t
end
function peg.pegToAST(input, defs)
local r, e, sfail = p:match(input, defs)
if not r then
local lab
if e == 0 then
lab = "Syntax error"
else
lab = errmsgs[e]
end
local lines = splitlines(input)
local line, col = re.calcline(input, #input - #sfail + 1)
local err = {}
table.insert(err, "L" .. line .. ":C" .. col .. ": " .. lab)
table.insert(err, lines[line])
table.insert(err, string.rep(" ", col-1) .. "^")
error("syntax error(s) in pattern\n" .. table.concat(err, "\n"), 3)
end
return r
end
function peg.print_r ( t ) -- for debugging
local print_r_cache={}
local function sub_print_r(t,indent)
if (print_r_cache[tostring(t)]) then
print(indent.."*"..tostring(t))
else
print_r_cache[tostring(t)]=true
if (type(t)=="table") then
for pos,val in pairs(t) do
if (type(val)=="table") then
print(indent.."["..pos.."] => {")
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
print(indent..string.rep(" ",string.len(pos)+6).."}")
else
print(indent.."["..pos.."] => '"..tostring(val).."'")
end
end
else
print(indent..tostring(t))
end
end
end
sub_print_r(t,"")
end
function peg.print_t ( t ) -- for debugging
local print_r_cache={}
local function sub_print_r (t,indent)
if (print_r_cache[tostring(t)]) then
print(indent.."*"..tostring(t))
else
print_r_cache[tostring(t)]=true
if (type(t)=="table") then
local function subprint (pos,val,indent)
if (type(val)=="table") then
print(indent.."{")
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
print(indent..string.rep(" ",string.len(pos)-1).."},")
else
if type(val) ~= "number" then
val = "'"..tostring(val).."'"
end
if tonumber(pos) then
print(indent..val..",")
else
print(indent..pos.."="..val..",")
end
end
end
if t["rule"] then
subprint("rule",t["rule"],indent)
end
if t["pos"] then
subprint("pos",t["pos"],indent)
end
for pos,val in pairs(t) do
if pos ~= "rule" and pos ~= "pos" then
subprint(pos,val,indent)
end
end
else
print(indent..tostring(t))
end
end
end
sub_print_r(t,"")
end
function peg.calcline(subject, pos)
return re.calcline(subject,pos)
end
return peg