local re = require("deps.lpeglabel.relabel") local util = require("src.util") local peg = {} -- from relabel.lua local errinfo = { {"NoPatt", "no pattern found"}, {"ExtraChars", "unexpected characters after the pattern"}, {"ExpPatt1", "expected a pattern after '/' or '//{...}'"}, {"ExpPatt2", "expected a pattern after '&'"}, {"ExpPatt3", "expected a pattern after '!'"}, {"ExpPatt4", "expected a pattern after '('"}, {"ExpPatt5", "expected a pattern after ':'"}, {"ExpPatt6", "expected a pattern after '{~'"}, {"ExpPatt7", "expected a pattern after '{|'"}, {"ExpPatt8", "expected a pattern after '<-'"}, {"ExpPattOrClose", "expected a pattern or closing '}' after '{'"}, {"ExpNum", "expected a number after '^', '+' or '-' (no space)"}, {"ExpNumOrLab", "expected a number or a label after ^"}, {"ExpCap", "expected a string, number, '{}' or name after '->'"}, {"ExpName1", "expected the name of a rule after '=>'"}, {"ExpName2", "expected the name of a rule after '=' (no space)"}, {"ExpName3", "expected the name of a rule after '<' (no space)"}, {"ExpLab1", "expected at least one label after '{'"}, {"ExpLab2", "expected a label after the comma"}, {"ExpNameOrLab", "expected a name or label after '%' (no space)"}, {"ExpItem", "expected at least one item after '[' or '^'"}, {"MisClose1", "missing closing ')'"}, {"MisClose2", "missing closing ':}'"}, {"MisClose3", "missing closing '~}'"}, {"MisClose4", "missing closing '|}'"}, {"MisClose5", "missing closing '}'"}, -- for the captures {"MisClose6", "missing closing '>'"}, {"MisClose7", "missing closing '}'"}, -- for the labels {"MisClose8", "missing closing ']'"}, {"MisTerm1", "missing terminating single quote"}, {"MisTerm2", "missing terminating double quote"}, } local errmsgs = {} local labels = {} for i, err in ipairs(errinfo) do errmsgs[i] = err[2] labels[err[1]] = i end re.setlabels(labels) local function concat(a,b) return a..b end local function foldtable(action,t) local re local first = true for key,value in pairs(t) do if first then re = value first = false else local temp = re if action == "suf" then -- suffix actions local act = value[1] if act == "*" or act == "?" or act == "+" then re = {action=act, op1=temp} else re = {action=act, op1=temp, op2=value[2]} end elseif action == "or" and #value == 2 then -- recovery expression local labels = value[1] local op2 = value[2] re = {action=action, op1=temp, op2=op2, condition=labels} else re = {action=action, op1=temp, op2=value} end end end return re end local gram = [=[ pattern <- (exp / %{NoPatt}) (!. / %{ExtraChars}) exp <- S (grammar / alternative) labels <- {| '{' {: (label / %{ExpLab1}) :} (',' {: (label / %{ExpLab2}) :})* ('}' / %{MisClose7}) |} alternative <- ( {:''->'or':} {| {: seq :} ('/' (('/' {| {: labels :} S {: (seq / %{ExpPatt1}) :} |}) / (S {: (seq / %{ExpPatt1}) :} ) ) )* |} ) -> foldtable seq <- ( {:''->'and':} {| {: prefix :}+ |} ) -> foldtable prefix <- {| {:action: '&' :} S {:op1: (prefix / %{ExpPatt2}) :} |} / {| {:action: '!' :} S {:op1: (prefix / %{ExpPatt3}) :} |} / suffix suffix <- ( {:''->'suf':} {| primary S {| suffixaction S |}* |} ) -> foldtable suffixaction <- {[+*?]} / {'^'} {| {:num: [+-]? NUM:} |} / '^'->'^LABEL' (label / %{ExpNumOrLab}) / {'->'} S ((string / {| {:action:'{}'->'poscap':} |} / funcname / {|{:num: NUM :} |}) / %{ExpCap}) / {'=>'} S (funcname / %{ExpName1}) primary <- '(' (exp / %{ExpPatt4}) (')' / %{MisClose1}) / term / class / defined / {| {:action: '%'->'label':} ('{' / %{ExpNameOrLab}) S ({:op1: label:} / %{ExpLab1}) S ('}' / %{MisClose7}) |} / {| {:action: '{:'->'gcap':} {:op2: defname:} ':' !'}' ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |} / {| {:action: '{:'->'gcap':} ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |} / {| {:action: '='->'bref':} ({:op1: defname:} / %{ExpName2}) |} / {| {:action: '{}'->'poscap':} |} / {| {:action: '{~'->'subcap':} ({:op1: exp:} / %{ExpPatt6}) ('~}' / %{MisClose3}) |} / {| {:action: '{|'->'tcap':} ({:op1: exp:} / %{ExpPatt7}) ('|}' / %{MisClose4}) |} / {| {:action: '{'->'scap':} ({:op1: exp:} / %{ExpPattOrClose}) ('}' / %{MisClose5}) |} / {| {:action: '.'->'anychar':} |} / !frag !nodee name S !ARROW / '<' (name / %{ExpName3}) ('>' / %{MisClose6}) -- old-style non terminals grammar <- {| definition+ |} definition <- {| (frag / nodee)? (token / nontoken) S ARROW ({:rule: exp :} / %{ExpPatt8}) |} label <- {| {:s: ERRORNAME :} |} frag <- {:fragment: 'fragment'->'1' :} ![0-9_a-z] S !ARROW nodee <- {:node: 'node'->'1' :} ![0-9_a-z] S !ARROW token <- {:rulename: TOKENNAME :} {:token:''->'1':} nontoken <- {:rulename: NAMESTRING :} class <- '[' ( ('^' {| {:action:''->'invert':} {:op1: classset :} |} ) / classset ) (']' / %{MisClose8}) classset <- ( {:''->'or':} {| {: (item / %{ExpItem}) :} (!']' {: (item / %{ExpItem}) :})* |} ) -> foldtable item <- defined / range / {| {:t: . :} |} range <- {| {:action:''->'range':} {:op1: {| {:s: ({: . :} ('-') {: [^]] :} ) -> concat :} |} :} |} S <- (%s / '--' [^%nl]*)* -- spaces and comments name <- {| {:nt: TOKENNAME :} {:token:''->'1':} / {:nt: NAMESTRING :} |} funcname <- {| {:func: NAMESTRING :} |} ERRORNAME <- NAMESTRING NAMESTRING <- [A-Za-z][A-Za-z0-9_]* TOKENNAME <- [A-Z_]+ ![0-9a-z] defname <- {| {:s: NAMESTRING :} |} ARROW <- '<-' NUM <- [0-9]+ term <- {| '"' {:t: [^"]* :} ('"' / %{MisTerm2}) / "'" {:t: [^']* :} ("'" / %{MisTerm1}) |} string <- {| '"' {:s: [^"]* :} ('"' / %{MisTerm2}) / "'" {:s: [^']* :} ("'" / %{MisTerm1}) |} defined <- {| {:action: '%':} {:op1: defname :} |} ]=] local defs = {foldtable=foldtable, concat=concat} peg.gram = gram peg.defs = defs peg.labels = labels local p = re.compile ( gram, defs) --[[ Function: pegToAST(input) Input: a grammar in PEG format, described in https://github.com/vsbenas/parser-gen Output: if parsing successful - a table of grammar rules, else - runtime error Example input: " Program <- stmt* / SPACE stmt <- ('a' / 'b')+ SPACE <- '' " Example output: { {rulename = "Program", rule = {action = "or", op1 = {action = "*", op1 = {nt = "stmt"}}, op2 = {nt = "SPACE", token="1"}}}, {rulename = "stmt", rule = {action = "+", op1 = {action="or", op1 = {t = "a"}, op2 = {t = "b"}}}}, {rulename = "SPACE", rule = {t=""}, token=1}, } The rules are further processed and turned into lpeg compatible format in parser-gen.lua Action names: or (has parameter condition for recovery expresions) and & ! + * ? ^num (num is a number with an optional plus or minus sign) ^label (label is an error label set with setlabels) -> => tcap gcap (op2= name, anonymous otherwise) bref poscap subcap scap anychar label % range Final token actions: t - terminal nt - non terminal func - function definition s - literal string num - literal number ]]-- local function splitlines(str) local t = {} local function helper(line) table.insert(t, line) return "" end helper((str:gsub("(.-)\r?\n", helper))) return t end function peg.pegToAST(input, defs) local r, e, sfail = p:match(input, defs) if not r then local lab if e == 0 then lab = "Syntax error" else lab = errmsgs[e] end local lines = splitlines(input) local line, col = re.calcline(input, #input - #sfail + 1) local err = {} table.insert(err, "L" .. line .. ":C" .. col .. ": " .. lab) table.insert(err, lines[line]) table.insert(err, string.rep(" ", col-1) .. "^") error("syntax error(s) in pattern\n" .. table.concat(err, "\n"), 3) end return r end function peg.print_r ( t ) -- for debugging local print_r_cache={} local function sub_print_r(t,indent) if (print_r_cache[tostring(t)]) then print(indent.."*"..tostring(t)) else print_r_cache[tostring(t)]=true if (type(t)=="table") then for pos,val in pairs(t) do if (type(val)=="table") then print(indent.."["..pos.."] => {") sub_print_r(val,indent..string.rep(" ",string.len(pos)+8)) print(indent..string.rep(" ",string.len(pos)+6).."}") else print(indent.."["..pos.."] => '"..tostring(val).."'") end end else print(indent..tostring(t)) end end end sub_print_r(t,"") end function peg.print_t ( t ) -- for debugging local print_r_cache={} local function sub_print_r (t,indent) if (print_r_cache[tostring(t)]) then print(indent.."*"..tostring(t)) else print_r_cache[tostring(t)]=true if (type(t)=="table") then local function subprint (pos,val,indent) if (type(val)=="table") then print(indent.."{") sub_print_r(val,indent..string.rep(" ",string.len(pos)+8)) print(indent..string.rep(" ",string.len(pos)-1).."},") else if type(val) ~= "number" then val = "'"..tostring(val).."'" end if tonumber(pos) then print(indent..val..",") else print(indent..pos.."="..val..",") end end end if t["rule"] then subprint("rule",t["rule"],indent) end if t["pos"] then subprint("pos",t["pos"],indent) end for pos,val in pairs(t) do if pos ~= "rule" and pos ~= "pos" then subprint(pos,val,indent) end end else print(indent..tostring(t)) end end end sub_print_r(t,"") end function peg.calcline(subject, pos) return re.calcline(subject,pos) end return peg