Fixed some diagnostics warnings

Moved examples to tofix because fixing them is besides the point right
now.
This commit is contained in:
Dawid Sobczak 2023-09-19 11:42:10 +01:00
parent 52164c82e3
commit 858fe11666
166 changed files with 68 additions and 264 deletions

View file

@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2017 Benas Vaitkevicius
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -0,0 +1,397 @@
# parser-gen
A Lua parser generator that makes it possible to describe grammars in a [PEG](https://en.wikipedia.org/wiki/Parsing_expression_grammar) syntax. The tool will parse a given input using a provided grammar and if the matching is successful produce an AST as an output with the captured values using [Lpeg](http://www.inf.puc-rio.br/~roberto/lpeg/). If the matching fails, labelled errors can be used in the grammar to indicate failure position, and recovery grammars are generated to continue parsing the input using [LpegLabel](https://github.com/sqmedeiros/lpeglabel). The tool can also automatically generate error labels and recovery grammars for LL(1) grammars.
parser-gen is a [GSoC 2017](https://developers.google.com/open-source/gsoc/) project, and was completed with the help of my mentor [@sqmedeiros](https://github.com/sqmedeiros) from [LabLua](http://www.lua.inf.puc-rio.br/). A blog documenting the progress of the project can be found [here](https://parsergen.blogspot.com/2017/08/parser-generator-based-on-lpeglabel.html).
---
# Table of contents
* [Requirements](#requirements)
* [Syntax](#syntax)
* [Grammar Syntax](#grammar-syntax)
* [Example: Tiny Parser](#example-tiny-parser)
# Requirements
```
lua >= 5.1
lpeglabel >= 1.2.0
```
# Syntax
### compile
This function generates a PEG parser from the grammar description.
```lua
local pg = require "parser-gen"
grammar = pg.compile(input,definitions [, errorgen, noast])
```
*Arguments*:
`input` - A string containing a PEG grammar description. For complete PEG syntax see the grammar section of this document.
`definitions` - table of custom functions and definitions used inside the grammar, for example {equals=equals}, where equals is a function.
`errorgen` - **EXPERIMENTAL** optional boolean parameter(default:false), when enabled generates error labels automatically. Works well only on LL(1) grammars. Custom error labels have precedence over automatically generated ones.
`noast` - optional boolean parameter(default:false), when enabled does not generate an AST for the parse.
*Output*:
`grammar` - a compiled grammar on success, throws error on failure.
### setlabels
If custom error labels are used, the function *setlabels* allows setting their description (and custom recovery pattern):
```lua
pg.setlabels(t)
```
Example table of a simple error and one with a custom recovery expression:
```lua
-- grammar rule: " ifexp <- 'if' exp 'then'^missingThen stmt 'end'^missingEnd "
local t = {
missingEnd = "Missing 'end' in if expression",
missingThen = {"Missing 'then' in if expression", " (!stmt .)* "} -- a custom recovery pattern
}
pg.setlabels(t)
```
If the recovery pattern is not set, then the one specified by the rule SYNC will be used. It is by default set to:
```lua
SKIP <- %s / %nl -- a space ' ' or newline '\n' character
SYNC <- .? (!SKIP .)*
```
Learn more about special rules in the grammar section.
### parse
This operation attempts to match a grammar to the given input.
```lua
result, errors = pg.parse(input, grammar [, errorfunction])
```
*Arguments*:
`input` - an input string that the tool will attempt to parse.
`grammar` - a compiled grammar.
`errorfunction` - an optional function that will be called if an error is encountered, with the arguments `desc` for the error description set using `setlabels()`; location indicators `line` and `col`; the remaining string before failure `sfail` and a custom recovery expression `trec` if available.
Example:
```lua
local errs = 0
local function printerror(desc,line,col,sfail,trec)
errs = errs+1
print("Error #"..errs..": "..desc.." before '"..sfail.."' on line "..line.."(col "..col..")")
end
result, errors = pg.parse(input,grammar,printerror)
```
*Output*:
If the parse is succesful, the function returns an abstract syntax tree containing the captures `result` and a table of any encountered `errors`. If the parse was unsuccessful, `result` is going to be **nil**.
Also, if the `noast` option is enabled when compiling the grammar, the function will then produce the longest match length or any custom captures used.
### calcline
Calculates line and column information regarding position i of the subject (exported from the relabel module).
```lua
line, col = pg.calcline(subject, position)
```
*Arguments*:
`subject` - subject string
`position` - position inside the string, for example, the one given by automatic AST generation.
### usenodes
When AST generation is enabled, this function will enable the "node" mode, where only rules tagged with a `node` prefix will generate AST entries. Must be used before compiling the grammar.
```lua
pg.usenodes(value)
```
*Arguments*:
`value` - a boolean value that enables or disables this function
# Grammar Syntax
The grammar used for this tool is described using a PEG-like syntax, that is identical to the one provided by the [re](http://www.inf.puc-rio.br/~roberto/lpeg/re.html) module, with an extension of labelled failures provided by [relabel](https://github.com/sqmedeiros/lpeglabel) module (except numbered labels). That is, all grammars that work with relabel should work with parser-gen as long as numbered error labels are not used, as they are not supported by parser-gen.
Since a parser generated with parser-gen automatically consumes space characters, builds ASTs and generates errors, additional extensions have been added based on the [ANTLR](http://www.antlr.org/) syntax.
### Basic syntax
The syntax of parser-gen grammars is somewhat similar to regex syntax. The next table summarizes the tools syntax. A p represents an arbitrary pattern; num represents a number (`[0-9]+`); name represents an identifier (`[a-zA-Z][a-zA-Z0-9_]*`).`defs` is the definitions table provided when compiling the grammar. Note that error names must be set using `setlabels()` before compiling the grammar. Constructions are listed in order of decreasing precedence.
<table border="1">
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
<tr><td><code>( p )</code></td> <td>grouping</td></tr>
<tr><td><code>'string'</code></td> <td>literal string</td></tr>
<tr><td><code>"string"</code></td> <td>literal string</td></tr>
<tr><td><code>[class]</code></td> <td>character class</td></tr>
<tr><td><code>.</code></td> <td>any character</td></tr>
<tr><td><code>%name</code></td>
<td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
<tr><td><code>name</code></td><td>non terminal</td></tr>
<tr><td><code>&lt;name&gt;</code></td><td>non terminal</td></tr>
<tr><td><code>%{name}</code></td> <td>error label</td></tr>
<tr><td><code>{}</code></td> <td>position capture</td></tr>
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
<tr><td><code>=name</code></td> <td>back reference
</td></tr>
<tr><td><code>p ?</code></td> <td>optional match</td></tr>
<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
<tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr>
<tr><td><code>p^+num</code></td>
<td>at least <code>n</code> repetitions</td></tr>
<tr><td><code>p^-num</code></td>
<td>at most <code>n</code> repetitions</td></tr>
<tr><td><code>p^name</code></td> <td>match p or throw error label name.</td></tr>
<tr><td><code>p -&gt; 'string'</code></td> <td>string capture</td></tr>
<tr><td><code>p -&gt; "string"</code></td> <td>string capture</td></tr>
<tr><td><code>p -&gt; num</code></td> <td>numbered capture</td></tr>
<tr><td><code>p -&gt; name</code></td> <td>function/query/string capture
equivalent to <code>p / defs[name]</code></td></tr>
<tr><td><code>p =&gt; name</code></td> <td>match-time capture
equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
<tr><td><code>&amp; p</code></td> <td>and predicate</td></tr>
<tr><td><code>! p</code></td> <td>not predicate</td></tr>
<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
<tr><td><code>p1 //{name [, name, ...]} p2</code></td> <td>specifies recovery pattern p2 for p1
when one of the labels is thrown</td></tr>
<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
<tr><td>(<code>name &lt;- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
</tbody></table>
The grammar below is used to match balanced parenthesis
```lua
balanced <- "(" ([^()] / balanced)* ")"
```
For more examples check out the [re](http://www.inf.puc-rio.br/~roberto/lpeg/re.html) page, see the Tiny parser below or the [Lua parser](https://github.com/vsbenas/parser-gen/blob/master/parsers/lua-parser.lua) writen with this tool.
### Error labels
Error labels are provided by the relabel function %{errorname} (errorname must follow `[A-Za-z][A-Za-z0-9_]*` format). Usually we use error labels in a syntax like `'a' ('b' / %{errB}) 'c'`, which throws an error label if `'b'` is not matched. This syntax is quite complicated so an additional syntax is allowed `'a' 'b'^errB 'c'`, which allows cleaner description of grammars. Note: all errors must be defined in a table using parser-gen.setlabels() before compiling and parsing the grammar.
### Tokens
Non-terminals with names in all capital letters, i.e. `[A-Z]+`, are considered tokens and are treated as a single object in parsing. That is, the whole string matched by a token is captured in a single AST entry and space characters are not consumed. Consider two examples:
```lua
-- a token non-terminal
grammar = pg.compile [[
WORD <- [A-Z]+
]]
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", "AA"}
```
```lua
-- a non-token non-terminal
grammar = pg.compile [[
word <- [A-Z]+
]]
res, _ = pg.parse("AA A", grammar) -- outputs {rule="word", "A", "A", "A"}
```
### Fragments
If a token definition is followed by a `fragment` keyword, then the parser does not build an AST entry for that token. Essentially, these rules are used to simplify grammars without building unnecessarily complicated ASTS. Example of `fragment` usage:
```lua
grammar = pg.compile [[
WORD <- LETTER+
fragment LETTER <- [A-Z]
]]
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", "AA"}
```
Without using `fragment`:
```lua
grammar = pg.compile [[
WORD <- LETTER+
LETTER <- [A-Z]
]]
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", {rule="LETTER", "A"}, {rule="LETTER", "A"}}
```
### Nodes
When node mode is enabled using `pg.usenodes(true)` only rules prefixed with a `node` keyword will generate AST entries:
```lua
grammar = pg.compile [[
node WORD <- LETTER+
LETTER <- [A-Z]
]]
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", "AA"}
```
### Special rules
There are two special rules used by the grammar:
#### SKIP
The `SKIP` rule identifies which characters to skip in a grammar. For example, most programming languages do not take into acount any space or newline characters. By default, SKIP is set to:
```lua
SKIP <- %s / %nl
```
This rule can be extended to contain semicolons `';'`, comments, or any other patterns that the parser can safely ignore.
Character skipping can be disabled by using:
```lua
SKIP <- ''
```
#### SYNC
This rule specifies the general recovery expression both for custom errors and automatically generated ones. By default:
```lua
SYNC <- .? (!SKIP .)*
```
The default SYNC rule consumes any characters until the next character matched by SKIP, usually a space or a newline. That means, if some statement in a program is invalid, the parser will continue parsing after a space or a newline character.
For some programming languages it might be useful to skip to a semicolon or a keyword, since they usually indicate the end of a statement, so SYNC could be something like:
```lua
HELPER <- ';' / 'end' / SKIP -- etc
SYNC <- (!HELPER .)* SKIP* -- we can consume the spaces after syncing with them as well
```
Recovery grammars can be disabled by using:
```lua
SYNC <- ''
```
# Example: Tiny parser
Below is the full code from *parsers/tiny-parser.lua*:
```lua
local pg = require "parser-gen"
local peg = require "peg-parser"
local errs = {errMissingThen = "Missing Then"} -- one custom error
pg.setlabels(errs)
--warning: experimental error generation function is enabled. If the grammar isn't LL(1), set errorgen to false
local errorgen = true
local grammar = pg.compile([[
program <- stmtsequence !.
stmtsequence <- statement (';' statement)*
statement <- ifstmt / repeatstmt / assignstmt / readstmt / writestmt
ifstmt <- 'if' exp 'then'^errMissingThen stmtsequence elsestmt? 'end'
elsestmt <- ('else' stmtsequence)
repeatstmt <- 'repeat' stmtsequence 'until' exp
assignstmt <- IDENTIFIER ':=' exp
readstmt <- 'read' IDENTIFIER
writestmt <- 'write' exp
exp <- simpleexp (COMPARISONOP simpleexp)*
COMPARISONOP <- '<' / '='
simpleexp <- term (ADDOP term)*
ADDOP <- [+-]
term <- factor (MULOP factor)*
MULOP <- [*/]
factor <- '(' exp ')' / NUMBER / IDENTIFIER
NUMBER <- '-'? [0-9]+
KEYWORDS <- 'if' / 'repeat' / 'read' / 'write' / 'then' / 'else' / 'end' / 'until'
RESERVED <- KEYWORDS ![a-zA-Z]
IDENTIFIER <- !RESERVED [a-zA-Z]+
HELPER <- ';' / %nl / %s / KEYWORDS / !.
SYNC <- (!HELPER .)*
]], _, errorgen)
local errors = 0
local function printerror(desc,line,col,sfail,trec)
errors = errors+1
print("Error #"..errors..": "..desc.." on line "..line.."(col "..col..")")
end
local function parse(input)
errors = 0
result, errors = pg.parse(input,grammar,printerror)
return result, errors
end
if arg[1] then
-- argument must be in quotes if it contains spaces
res, errs = parse(arg[1])
peg.print_t(res)
peg.print_r(errs)
end
local ret = {parse=parse}
return ret
```
For input: `lua tiny-parser-nocap.lua "if a b:=1"` we get:
```lua
Error #1: Missing Then on line 1(col 6)
Error #2: Expected stmtsequence on line 1(col 9)
Error #3: Expected 'end' on line 1(col 9)
-- ast:
rule='program',
pos=1,
{
rule='stmtsequence',
pos=1,
{
rule='statement',
pos=1,
{
rule='ifstmt',
pos=1,
'if',
{
rule='exp',
pos=4,
{
rule='simpleexp',
pos=4,
{
rule='term',
pos=4,
{
rule='factor',
pos=4,
{
rule='IDENTIFIER',
pos=4,
'a',
},
},
},
},
},
},
},
},
-- error table:
[1] => {
[msg] => 'Missing Then' -- custom error is used over the automatically generated one
[line] => '1'
[col] => '6'
[label] => 'errMissingThen'
}
[2] => {
[msg] => 'Expected stmtsequence' -- automatically generated errors
[line] => '1'
[col] => '9'
[label] => 'errorgen6'
}
[3] => {
[msg] => 'Expected 'end''
[line] => '1'
[col] => '9'
[label] => 'errorgen4'
}
```

View file

@ -0,0 +1,32 @@
-- this function compares if two tables are equal
local function equals(o1, o2, ignore_mt)
if o1 == o2 then return true end
local o1Type = type(o1)
local o2Type = type(o2)
if o1Type ~= o2Type then return false end
if o1Type ~= 'table' then return false end
if not ignore_mt then
local mt1 = getmetatable(o1)
if mt1 and mt1.__eq then
--compare using built in method
return o1 == o2
end
end
local keySet = {}
for key1, value1 in pairs(o1) do
local value2 = o2[key1]
if value2 == nil or equals(value1, value2, ignore_mt) == false then
return false
end
keySet[key1] = true
end
for key2, _ in pairs(o2) do
if not keySet[key2] then return false end
end
return true
end
return {equals=equals}

View file

@ -0,0 +1,401 @@
-- Error generation code for LL(1) grammars
-- AST funcs:
local function isfinal(t)
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
end
local function isaction(t)
return t["action"]
end
local function isrule(t)
return t and t["rulename"]
end
local function isgrammar(t)
if type(t) == "table" and not(t["action"]) then
return isrule(t[1])
end
return false
end
local function istoken (t)
return t["token"] == "1"
end
local function finalNode (t)
if t["t"] then
return"t",t["t"] -- terminal
elseif t["nt"] then
return "nt", t["nt"], istoken(t) -- nonterminal
elseif t["func"] then
return "func", t["func"] -- function
elseif t["s"] then
return "s", t["s"]
elseif t["num"] then
return "num", t["num"]
end
return nil
end
--[[
function rightleaf:
returns the right-most concatenation in the AST.
used for followset keys
input: ((A B) C)
output: {"nt_C"}
input: (A / B / C) (D / 'e')
output: {"nt_D","t_e"}
input: A*
output: {'',"nt_A"}
input: !A
output: {"not_nt_A"}
]]
local function addnot(t)
local ret = {}
for k,v in pairs(t) do
ret[k] = "not_"..v
end
return ret
end
local function addepsilon(t)
local ret = t
table.insert(ret, '')
return ret
end
local function mergetables(first,second)
local ret = first
for k,v in pairs(second) do
table.insert(ret, v)
end
return ret
end
local function rightleaf(t)
local action = t.action
local op1 = t.op1
local op2 = t.op2
if isfinal(t) then
-- todo: replace nt_A with FIRST(A)
local typefn, fn, tok = finalNode(t)
local ret = typefn .. "_" .. fn -- terminals: t_if, nonterminals: nt_if
return {ret}
end
if action == "or" then
return mergetables(rightleaf(op1), rightleaf(op2))
elseif action == "and" then -- consider only RHS
return rightleaf(op2)
elseif action == "&" then
return rightleaf(op1)
elseif action == "!" then
return addnot(rightleaf(op1))
elseif action == "+" then
return rightleaf(op1)
elseif action == "*" or action == "?" then
return addepsilon(rightleaf(op1))
elseif action == "^" then
op2 = op2["num"] -- second operand is number
if op2 >= 1 then
return rightleaf(op1)
else
return addepsilon(rightleaf(op1))
end
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
return rightleaf(op1)
elseif action == "bref" or action == "poscap" then
return addepsilon({}) -- only empty string
elseif action == "anychar" then
return {"_anychar"}
elseif action == "label" then
return addepsilon({})
elseif action == "%" then
return addepsilon({})
elseif action == "invert" then
return addnot(rightleaf(op1))
elseif action == "range" then
return {"_anychar"}
else
error("Unsupported action '"..action.."'")
end
end
local FOLLOW = {}
local function follow_aux(t, dontsplit)
local action = t.action
local op1 = t.op1
local op2 = t.op2
if isfinal(t) then
return {t}
end
if action == "or" then
if dontsplit then -- do not split "(B / C)" in "A (B / C)"
return {t}
else -- return both
return mergetables(follow_aux(op1), follow_aux(op2))
end
elseif action == "and" then -- magic happens here
-- (A (B / D)) (!E C / D)
-- 1) FOLLOW(B) = FOLLOW(D) = {(!E C / D)}
local rightset = rightleaf(op1)
local rhs = follow_aux(op2)
for k,v in pairs(rightset) do
if not FOLLOW[v] then
FOLLOW[v] = {}
end
-- TODO: check if rhs already exists in FOLLOW(v)
table.insert(FOLLOW[v],rhs)
end
-- 2) FOLLOW(A) = {(B / D)}
return follow_aux(op1)
elseif action == "&" then
return follow_aux(op1)
elseif action == "!" then
return {action="!", op1=follow_aux(op1)}
elseif action == "+" then
return follow_aux(op1)
elseif action == "*" then
return addepsilon(follow_aux(op1))
elseif action == "?" then
return addepsilon(follow_aux(op1))
elseif action == "^" then
op2 = op2["num"]
if op2 >= 1 then
return follow_aux(op1)
else
return addepsilon(follow_aux(op1))
end
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
return follow_aux(op1)
elseif action == "bref" or action == "poscap" then
return addepsilon({}) -- only empty string
elseif action == "anychar" then
return {"_anychar"}
elseif action == "label" then
return addepsilon({})
elseif action == "%" then
return addepsilon({})
elseif action == "invert" then
return {t} -- whole table
elseif action == "range" then
return {"_anychar"}
else
error("Unsupported action '"..action.."'")
end
end
-- function: follow
-- finds follow set for the whole AST, with key (rule, term)
local function follow (t)
local followset = {}
if isgrammar(t) then
for pos,val in pairs(t) do
local rule = val.rulename
FOLLOW = {} -- reset for each rule
follow_aux(val.rule) -- apply recursive function
followset[rule] = FOLLOW
end
else
FOLLOW = {}
follow_aux(t)
followset[''] = FOLLOW
end
return followset
end
-- functions to add errors
-- find size of table
local function getn (t)
local size = 0
for _, _ in pairs(t) do
size = size+1
end
return size
end
-- generate error message by traversing table to the left
local function printexpect(op)
--peg.print_r(op)
if isfinal(op) then
if op["t"] then
return "'"..op["t"].."'"
end
return op["nt"] or op["func"] or op["s"] or op["num"]
else
local test = op.op1
if not test then
return op.action
else
return printexpect(test)
end
end
end
local GENERATED_ERRORS = 0
local TERRS = {}
local function generateerror(op, after)
local desc = "Expected "..printexpect(op)
local err = GENERATED_ERRORS+1
if err >= 255 then
error("Error label limit reached(255)")
end
local name = "errorgen"..err
TERRS[name] = desc
GENERATED_ERRORS = GENERATED_ERRORS+1
return name
end
local function tryadderror(op, after)
if FOLLOW then
local rhs = rightleaf(after)
-- (A / B) C
-- generate error iff #FOLLOW(A) OR #FOLLOW(B) = 1
local generate = false
for k,v in pairs(rhs) do
if FOLLOW[v] then
local n = getn(FOLLOW[v])
generate = generate or n==1
end
end
if generate then
local lab = generateerror(op, after)
return {action="^LABEL",op1=op,op2={s=lab}}
end
end
return op
end
-- function: adderrors
-- traverses the AST and adds error labels where possible
local function adderrors_aux(ast,tokenrule)
if not ast then
return nil
end
if isaction(ast) then
local act, op1, op2
act = ast["action"]
op1 = ast["op1"]
op2 = ast["op2"]
if act == "and" and not tokenrule then
op2 = tryadderror(op2, op1)
end
ast["op1"] = adderrors_aux(op1,tokenrule)
ast["op2"] = adderrors_aux(op2,tokenrule)
end
return ast
end
local function adderrors(t, followset)
GENERATED_ERRORS = 0
TERRS = {}
if isgrammar(t) then
for pos,val in pairs(t) do
local currentrule = val.rulename
FOLLOW = followset[currentrule]
local rule = val.rule
local istokenrule = val.token == "1"
adderrors_aux(rule,istokenrule)
end
else
FOLLOW = followset['']
adderrors_aux(t,false)
end
return TERRS
end
return {follow=follow,adderrors=adderrors}

View file

@ -0,0 +1,149 @@
local pg = require("parser-gen")
local equals = require("equals").equals
-- terminals
-- space allowed
rule = pg.compile [[
rule <- 'a'
]]
str = "a a aa "
res = pg.parse(str,rule)
assert(res)
-- space not allowed
rule = pg.compile [[
RULE <- 'a' 'b'
]]
str = "a b"
res = pg.parse(str,rule)
assert(not res)
-- space not allowed 2
rule = pg.compile [[
rule <- 'a' 'b'
SKIP <- ''
SYNC <- ''
]]
str = "a b"
res = pg.parse(str,rule)
assert(not res)
-- custom space
rule = pg.compile [[
rule <- 'a' 'b'
SKIP <- DOT
DOT <- '.'
]]
str = "a...b"
res = pg.parse(str,rule)
assert(res)
-- non terminals
-- space allowed
rule = pg.compile [[
rule <- A B
A <- 'a'
B <- 'b'
]]
str = "a b"
res, err = pg.parse(str,rule)
assert(res)
-- no spaces allowed
rule = pg.compile [[
RULE <- A B
A <- 'a'
B <- 'b'
]]
str = "a b"
res = pg.parse(str,rule)
assert(not res)
-- space in the beginning and end of string
rule = pg.compile [[
rule <- A B
A <- 'a'
B <- 'b'
]]
str = " a b "
res = pg.parse(str,rule)
assert(res)
-- TESTING CAPTURES
r = pg.compile([[ rule <- {| {:'a' 'b':}* |}
]],_,_,true)
res = pg.parse("ababab", r)
assert(equals(res,{"ab","ab","ab"}))
-- space in capture
rule = pg.compile([[ rule <- {| {: 'a' :}* |}
]],_,_,true)
str = " a a a "
res = pg.parse(str,rule)
assert(equals(res,{"a","a","a"})) -- fails
-- TESTING ERROR LABELS
local labs = {errName = "Error number 1",errName2 = "Error number 2"}
pg.setlabels(labs)
rule = pg.compile [[ rule <- 'a' / %{errName}
SYNC <- ''
]]
local errorcalled = false
local function err(desc, line, col, sfail, recexp)
errorcalled = true
assert(desc == "Error number 1")
end
res = pg.parse("b",rule,err)
assert(errorcalled)
-- TESTING ERROR RECOVERY
local labs = {errName = "Error number 1",errName2 = "Error number 2"}
pg.setlabels(labs)
rule = pg.compile [[
rule <- As //{errName,errName2} Bs
As <- 'a'* / %{errName2}
Bs <- 'b'*
]]
res1 = pg.parse(" a a a",rule)
res2 = pg.parse("b b b ",rule)
assert(res1 and res2)
-- TESTING ERROR GENERATION
pg.setlabels({})
rule = pg.compile([[
rule <- A B C
A <- 'a'
B <- 'b'
C <- 'c'
]],_,true)
res1, errs = pg.parse("ab",rule)
assert(errs[1]["msg"] == "Expected C")
-- TESTING RECOVERY GENERATION
-- SELF-DESCRIPTION
pg.setlabels(pg.labels)
gram = pg.compile(pg.gram, pg.defs,_,true)
res1, errs = pg.parse(pg.gram,gram)
assert(res1) -- parse succesful
--[[ this test is invalid since tool added ^LABEL syntax
r = re.compile(pg.gram,pg.defs)
res2 = r:match(pg.gram)
--pg.print_r(res2)
assert(equals(res1, res2))
]]--
print("all tests succesful")

View file

@ -0,0 +1,563 @@
local f = (...):match("(.-)[^%.]+$")
local peg = require(f .. "peg-parser")
local eg = require(f .. "errorgen")
local s = require(f .. "stack")
-- Create stack for tokens inside captures.
-- nil - not inside capture,
-- 0 - inside capture,
-- 1 - token found inside capture
local tokenstack = s.Stack:Create()
local subject, errors, errorfunc
local unpack = table.unpack;
local Predef = { nl = m.P"\n", cr = m.P"\r", tab = m.P"\t" }
local mem = {} -- for compiled grammars
local function updatelocale()
m.locale(Predef)
local any = m.P(1)
Predef.a = Predef.alpha
Predef.c = Predef.cntrl
Predef.d = Predef.digit
Predef.g = Predef.graph
Predef.l = Predef.lower
Predef.p = Predef.punct
Predef.s = Predef.space
Predef.u = Predef.upper
Predef.w = Predef.alnum
Predef.x = Predef.xdigit
Predef.A = any - Predef.a
Predef.C = any - Predef.c
Predef.D = any - Predef.d
Predef.G = any - Predef.g
Predef.L = any - Predef.l
Predef.P = any - Predef.p
Predef.S = any - Predef.s
Predef.U = any - Predef.u
Predef.W = any - Predef.w
Predef.X = any - Predef.x
mem = {}
end
updatelocale()
local definitions = {}
local tlabels = {}
local totallabels = 0
local tlabelnames = {} -- reverse table
local tdescs = {}
local trecs = {} -- recovery for each error
local function defaultsync(patt)
return (m.P(1)^-1) * (-patt * m.P(1))^0
end
-- TODO: store these variables for each grammar
local SKIP = (Predef.space + Predef.nl)
local SYNC = defaultsync(SKIP)
local recovery = true
local skipspaces = true
local buildast = true
local function sync (patt)
return patt --(-patt * m.P(1))^0 * patt^0 -- skip until we find the pattern and consume it(if we do)
end
local function pattspaces (patt)
if skipspaces then
return patt * SKIP^0
else
return patt
end
end
local function token (patt)
local incapture = tokenstack:pop() -- returns nil if not in capture
if not incapture then
return pattspaces(patt)
end
tokenstack:push(1)
return patt
end
-- functions used by the tool
local function iscompiled (gr)
return m.type(gr) == "pattern"
end
local function istoken (t)
return t["token"] == "1"
end
local function isfinal(t)
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
end
local function isaction(t)
return t["action"]
end
local function isrule(t)
return t and t["rulename"]
end
local function isgrammar(t)
if type(t) == "table" and not(t["action"]) then
return isrule(t[1])
end
return false
end
local function iscapture (action)
return action == "=>" or action == "gcap" or action == "scap" or action == "subcap" or action == "poscap"
end
local function finalNode (t)
if t["t"] then
return"t",t["t"] -- terminal
elseif t["nt"] then
return "nt", t["nt"], istoken(t) -- nonterminal
elseif t["func"] then
return "func", t["func"] -- function
elseif t["s"] then
return "s", t["s"]
elseif t["num"] then
return "num", t["num"]
end
return nil
end
local bg = {} -- local variable to keep global function buildgrammar
local function addspaces (caps)
local hastoken = tokenstack:pop()
if hastoken == 1 then
return pattspaces(caps)
end
return caps
end
local function applyaction(action, op1, op2, labels,tokenrule)
if action == "or" then
if labels then -- labels = {{s="errName"},{s="errName2"}}
for i, v in ipairs(labels) do
local labname = v["s"]
local lab = tlabels[labname]
if not lab then
error("Label '"..labname.."' undefined")
end
labels[i] = lab
end
return m.Rec(op1,op2,unpack(labels))
end
return op1 + op2
elseif action == "and" then
return op1 * op2
elseif action == "&" then
return #op1
elseif action == "!" then
return -op1
elseif action == "+" then
return op1^1
elseif action == "*" then
return op1^0
elseif action == "?" then
return op1^-1
elseif action == "^" then
return op1^op2
elseif action == "^LABEL" then
local lab = tlabels[op2]
if not lab then
error("Label '"..op2.."' unspecified using setlabels()")
end
return op1 + m.T(lab)
elseif action == "->" then
return op1 / op2
-- in captures we add SPACES^0
elseif action == "=>" then
return addspaces(m.Cmt(op1,op2))
elseif action == "tcap" then
return m.Ct(op1) -- nospaces
elseif action == "gcap" then
return addspaces(m.Cg(op1, op2))
elseif action == "bref" then
return m.Cb(op1) --m.Cmt(m.Cb(op1), equalcap) -- do we need to add spaces to bcap?
elseif action == "poscap" then
return addspaces(m.Cp())
elseif action == "subcap" then
return addspaces(m.Cs(op1))
elseif action == "scap" then
return addspaces(m.C(op1))
elseif action == "anychar" then
if buildast and not tokenrule then
return m.C(m.P(1))
end
return m.P(1)
elseif action == "label" then
local lab = tlabels[op1]
if not lab then
error("Label '"..op1.."' unspecified using setlabels()")
end
return m.T(lab) -- lpeglabel
elseif action == "%" then
if definitions[op1] then
return definitions[op1]
elseif Predef[op1] then
return Predef[op1]
else
error("Definition for '%"..op1.."' unspecified(use second parameter of parser-gen.compile())")
end
elseif action == "invert" then
return m.P(1) - op1
elseif action == "range" then
local res = m.R(op1)
if not tokenrule then
if buildast then
res = m.C(res)
end
res = token(res)
end
return res
else
error("Unsupported action '"..action.."'")
end
end
local function applyfinal(action, term, tokenterm, tokenrule)
if action == "t" then
local res = m.P(term)
if not tokenrule then
if buildast then
res = m.C(res)
end
if skipspaces then
res = token(res)
end
end
return res
elseif action == "nt" then
if skipspaces and tokenterm and (not tokenrule) then
return token(m.V(term))
else
return m.V(term)
end
elseif action == "func" then
if definitions[term] then
return definitions[term]
else
error("Definition for function '"..term.."' unspecified (use second parameter of parser-gen.compile())")
end
elseif action == "s" then -- simple string
return term
elseif action == "num" then -- numbered string
return tonumber(term)
end
end
local function applygrammar(gram)
return m.P(gram)
end
local function traverse (ast, tokenrule)
if not ast then
return nil
end
if isfinal(ast) then
local typefn, fn, tok = finalNode(ast)
return applyfinal(typefn, fn, tok, tokenrule)
elseif isaction(ast) then
local act, op1, op2, labs, ret1, ret2
act = ast["action"]
op1 = ast["op1"]
op2 = ast["op2"]
labs = ast["condition"] -- recovery operations
-- post-order traversal
if iscapture(act) then
tokenstack:push(0) -- not found any tokens yet
end
ret1 = traverse(op1, tokenrule)
ret2 = traverse(op2, tokenrule)
return applyaction(act, ret1, ret2, labs, tokenrule)
elseif isgrammar(ast) then
--
local g = bg.buildgrammar (ast)
return applygrammar (g)
else
peg.print_r(ast)
error("Unsupported AST")
end
end
local function specialrules(ast, builder)
-- initialize values
SKIP = (Predef.space + Predef.nl)
skipspaces = true
SYNC = nil
recovery = true
-- find SPACE and SYNC rules
for i, v in ipairs(ast) do
local name = v["rulename"]
local rule
if name == "SKIP" then
rule = traverse(v["rule"], true)
if v["rule"]["t"] == '' then
skipspaces = false
else
skipspaces = true
SKIP = rule
end
builder[name] = rule
elseif name == "SYNC" then
rule = traverse(v["rule"], true)
if v["rule"]["t"] == '' then-- SYNC <- ''
recovery=false
else
recovery= true
SYNC = rule
end
builder[name] = rule
end
end
if not SYNC and recovery then
SYNC = defaultsync(SKIP)
end
end
local function recorderror(position,label)
-- call error function here
local line, col = peg.calcline(subject, position)
local desc
if label == 0 then
desc = "Syntax error"
else
desc = tdescs[label]
end
if errorfunc then
local temp = string.sub(subject,position)
local strend = string.find(temp, "\n")
local sfail = string.sub(temp, 1, strend)
errorfunc(desc,line,col,sfail,trecs[label])
end
local err = { line = line, col = col, label=tlabelnames[label], msg = desc }
table.insert(errors, err)
end
local function record(label)
return (m.Cp() * m.Cc(label)) / recorderror
end
local function buildrecovery(grammar)
local synctoken = pattspaces(sync(SYNC))
local grec = grammar
for k,v in pairs(tlabels) do
if trecs[v] then -- custom sync token
grec = m.Rec(grec,record(v) * pattspaces(trecs[v]), v)
else -- use global sync token
grec = m.Rec(grec,record(v) * synctoken, v)
end
end
return grec
end
local usenode = false
local function usenodes(val)
usenode = val
end
function bg.buildgrammar (ast)
local builder = {}
specialrules(ast, builder)
local initialrule
for i, v in ipairs(ast) do
local istokenrule = v["token"] == "1"
local isfragment = v["fragment"] == "1"
local isnode = v["node"] == "1"
if isnode and not usenodes then
error("Node mode disabled - please use parser-gen.usenodes(true) before compiling the grammar")
end
local name = v["rulename"]
local isspecial = name == "SKIP" or name == "SYNC"
local rule = v["rule"]
if i == 1 then
initialrule = name
table.insert(builder, name) -- lpeg syntax
builder[name] = traverse(rule, istokenrule)
else
if not builder[name] then -- dont traverse rules for SKIP and SYNC twice
builder[name] = traverse(rule, istokenrule)
end
end
if buildast and not isfragment and not isspecial and ((not usenode) or (usenode and isnode)) then
if istokenrule then
builder[name] = m.C(builder[name])
end
builder[name] = m.Ct(m.Cg(m.Cc(name),"rule") * m.Cg(m.Cp(),"pos") * builder[name])
end
end
if skipspaces then
builder[initialrule] = SKIP^0 * builder[initialrule] -- skip spaces at the beginning of the input
end
if recovery then
builder[initialrule] = buildrecovery(builder[initialrule]) -- build recovery on top of initial rule
end
return builder
end
local function build(ast, defs)
if defs then
definitions = defs
end
if isgrammar(ast) then
return traverse(ast)
else
SKIP = (Predef.space + Predef.nl)
skipspaces = true
SYNC = nil
recovery = true
SYNC = defaultsync(SKIP)
local res = SKIP ^0 * traverse(ast)
if buildast then
res = m.Ct(res)
end
return res -- input is not a grammar - skip spaces and sync by default
end
end
-- recovery grammar
-- end
-- t = {errName="Error description",...}
local function setlabels (t, errorgen)
local index
if errorgen then
index = totallabels + 1
else
-- reset error tables
index = 1
tlabels = {}
tdescs = {}
trecs = {}
end
for key,value in pairs(t) do
if index >= 255 then
error("Error label limit reached(255)")
end
if type(value) == "table" then -- we have a recovery expression
tdescs[index] = value[1]
trecs[index] = traverse(peg.pegToAST(value[2]), true)-- PEG to LPEG
else
tdescs[index] = value
end
tlabels[key] = index
tlabelnames[index] = key -- reverse table
index = index + 1
end
totallabels = index-1
end
local function compile (input, defs, generrors, nocaptures)
if iscompiled(input) then
return input
end
if not mem[input] then
buildast = true
if nocaptures then
buildast=false
end
--re.setlabels(tlabels)
--re.compile(input,defs)
-- build ast
local ast = peg.pegToAST(input)
if generrors then
local follow = eg.follow(ast)
local errors = eg.adderrors(ast, follow)
setlabels (errors, true) -- add errors generated by errorgen
end
local gram = build(ast,defs)
if not gram then
-- find error using relabel module
end
mem[input] = gram-- store if the user forgets to compile it
end
return mem[input]
end
local function parse (input, grammar, errorfunction)
if not iscompiled(grammar) then
local cp = compile(grammar)
grammar = cp
end
-- set up recovery table
errorfunc = errorfunction
subject = input
errors = {}
-- end
local r, e, sfail = m.match(grammar,input)
if not r then
recorderror(#input - #sfail, e)
end
if #errors == 0 then errors=nil end
return r, errors
end
-- could be wrong
local follow = eg.follow
local pg = {compile=compile, setlabels=setlabels, parse=parse,follow=follow, calcline = peg.calcline, usenodes = usenodes}
return pg

View file

@ -0,0 +1,799 @@
local lua = require "lua-parser"
local peg = require "peg-parser"
local eq = require "equals"
local equals = eq.equals
print("\n\n [[ PARSING LUA TEST SUITE FILES ]] \n\n")
local filenames = {
'all.lua',
'main.lua',
'gc.lua',
'db.lua',
'calls.lua',
'strings.lua',
'literals.lua',
'tpack.lua',
'attrib.lua',
'locals.lua',
'constructs.lua',
'code.lua',
'big.lua',
'nextvar.lua',
'pm.lua',
'utf8.lua',
'api.lua',
'events.lua',
'vararg.lua',
'closure.lua',
'coroutine.lua',
'goto.lua',
'errors.lua',
'math.lua',
'sort.lua',
'bitwise.lua',
'verybig.lua',
'files.lua',
}
local errs = 0
for k,v in ipairs(filenames) do
local filename = "lua-5.3.4-tests/"..v
local f = assert(io.open(filename, "r"))
local t = f:read("*all")
local res, err = lua.parse(t)
local s = "OK"
if not res then s = "FAIL" end -- only check if succesful since grammar ensures whole file is read
print("Testing file '"..v.."': ["..s.."]")
if not res then
errs = errs + 1
print("Error: "..err[1]["msg"])
end
f:close()
end
assert(errs == 0)
print("\n\n Test suite files compiled successfully")
print("\n\n [[ TESTING ERROR LABELS ]] ")
local pr = peg.print_r
-- test errors
local s,res, err
local ErrExtra="unexpected character(s), expected EOF"
s = [[ return; ! ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExtra)
local ErrInvalidStat="unexpected token, invalid start of statement"
s = [[ ! ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrInvalidStat)
local ErrEndIf="expected 'end' to close the if statement"
s = [[ if c then b=1 ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEndIf)
local ErrExprIf="expected a condition after 'if'"
s = [[ if then b=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprIf)
local ErrThenIf="expected 'then' after the condition"
s = [[ if c b=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrThenIf)
local ErrExprEIf="expected a condition after 'elseif'"
s = [[ if a then b=1 elseif then d=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprEIf)
local ErrThenEIf="expected 'then' after the condition"
s = [[ if a b=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrThenEIf)
local ErrEndDo="expected 'end' to close the do block"
s = [[ do x=1 ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEndDo)
local ErrExprWhile="expected a condition after 'while'"
s = [[ while do c=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprWhile)
local ErrDoWhile="expected 'do' after the condition"
s = [[ while a c=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrDoWhile)
local ErrEndWhile="expected 'end' to close the while loop"
s = [[ while a do b=1]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEndWhile)
local ErrUntilRep="expected 'until' at the end of the repeat loop"
s = [[ repeat c=1 ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrUntilRep)
local ErrExprRep="expected a conditions after 'until'"
s = [[ repeat c=1 until ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprRep)
local ErrForRange="expected a numeric or generic range after 'for'"
s = [[ for 3,4 do x=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrForRange)
local ErrEndFor="expected 'end' to close the for loop"
s = [[ for c=1,3 do a=1 ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEndFor)
local ErrExprFor1="expected a starting expression for the numeric range"
s = [[ for a=,4 do a=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprFor1)
local ErrCommaFor="expected ',' to split the start and end of the range"
s = [[ for a=4 5 do a=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCommaFor)
local ErrExprFor2="expected an ending expression for the numeric range"
s = [[ for a=4, do a=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprFor2)
local ErrExprFor3="expected a step expression for the numeric range after ','"
s = [[ for a=1,2, do a=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprFor3)
local ErrInFor="expected '=' or 'in' after the variable(s)"
s = [[ for a of 1 do a=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrInFor)
local ErrEListFor="expected one or more expressions after 'in'"
s = [[ for a in do a=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEListFor)
local ErrDoFor="expected 'do' after the range of the for loop"
s = [[ for a=1,2 a=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrDoFor)
local ErrDefLocal="expected a function definition or assignment after local"
s = [[ local return c ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrDefLocal)
local ErrNameLFunc="expected a function name after 'function'"
s = [[ local function() c=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrNameLFunc)
local ErrEListLAssign="expected one or more expressions after '='"
s = [[ local a = return b ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEListLAssign)
local ErrEListAssign="expected one or more expressions after '='"
s = [[ a = return b ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEListAssign)
local ErrFuncName="expected a function name after 'function'"
s = [[ function () a=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrFuncName)
local ErrNameFunc1="expected a function name after '.'"
s = [[ function a.() a=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrNameFunc1)
local ErrNameFunc2="expected a method name after ':'"
s = [[ function a:() a=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrNameFunc2)
local ErrOParenPList="expected '(' for the parameter list"
s = [[ function a b=1 end]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrOParenPList)
local ErrCParenPList="expected ')' to close the parameter list"
s = [[
function a(
b=1
end
]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCParenPList)
local ErrEndFunc="expected 'end' to close the function body"
s = [[ function a() b=1 ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEndFunc)
local ErrParList="expected a variable name or '...' after ','"
s = [[ function a(b, ) b=1 end ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrParList)
local ErrLabel="expected a label name after '::'"
s = [[ :: return b ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrLabel)
local ErrCloseLabel="expected '::' after the label"
s = [[ :: abc return a]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCloseLabel)
local ErrGoto="expected a label after 'goto'"
s = [[ goto return c]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrGoto)
local ErrVarList="expected a variable name after ','"
s = [[ abc,
= 3]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrVarList)
local ErrExprList="expected an expression after ','"
s = [[ return a,;]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprList)
local ErrOrExpr="expected an expression after 'or'"
s = [[ return a or; ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrOrExpr)
local ErrAndExpr="expected an expression after 'and'"
s = [[ return a and;]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrAndExpr)
local ErrRelExpr="expected an expression after the relational operator"
s = [[ return a >;]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrRelExpr)
local ErrBitwiseExpr="expected an expression after bitwise operator"
s = [[ return b & ; ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrBitwiseExpr)
local ErrConcatExpr="expected an expression after '..'"
s = [[ print(a..) ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrConcatExpr)
local ErrAddExpr="expected an expression after the additive operator"
s = [[ return a - ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrAddExpr)
local ErrMulExpr="expected an expression after the multiplicative operator"
s = [[ return a/ ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrMulExpr)
local ErrUnaryExpr="expected an expression after the unary operator"
s = [[ return # ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrUnaryExpr)
local ErrPowExpr="expected an expression after '^'"
s = [[ return a^ ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrPowExpr)
local ErrExprParen="expected an expression after '('"
s = [[ return a + () ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprParen)
local ErrCParenExpr="expected ')' to close the expression"
s = [[ return a + (a ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCParenExpr)
local ErrNameIndex="expected a field name after '.'"
s = [[ return a. ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrNameIndex)
local ErrExprIndex="expected an expression after '['"
s = [[ return a [ ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprIndex)
local ErrCBracketIndex="expected ']' to close the indexing expression"
s = [[ return a[1 ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCBracketIndex)
local ErrNameMeth="expected a method name after ':'"
s = [[ return a: ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrNameMeth)
local ErrMethArgs="expected some arguments for the method call (or '()')"
s = [[ a:b ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrMethArgs)
local ErrCParenArgs="expected ')' to close the argument list"
s = [[ return a(c ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCParenArgs)
local ErrCBraceTable="expected '}' to close the table constructor"
s = [[ return { ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCBraceTable)
local ErrEqField="expected '=' after the table key"
s = [[ a = {[b] b} ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEqField)
local ErrExprField="expected an expression after '='"
s = [[ a = {[a] = } ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprField)
local ErrExprFKey="expected an expression after '[' for the table key"
s = [[ a = {[ = b} ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrExprFKey)
local ErrCBracketFKey="expected ']' to close the table key"
s = [[ a = {[a = b} ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCBracketFKey)
local ErrDigitHex="expected one or more hexadecimal digits after '0x'"
s = [[ a = 0x ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrDigitHex)
local ErrDigitDeci="expected one or more digits after the decimal point"
s = [[ a = . ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrDigitDeci)
local ErrDigitExpo="expected one or more digits for the exponent"
s = [[ a = 1.0e ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrDigitExpo)
local ErrQuote="unclosed string"
s = [[ a = ";]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrQuote)
local ErrHexEsc="expected exactly two hexadecimal digits after '\\x'"
s = [[ a = "a\x1" ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrHexEsc)
local ErrOBraceUEsc="expected '{' after '\\u'"
s = [[ a = "a\u" ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrOBraceUEsc)
local ErrDigitUEsc="expected one or more hexadecimal digits for the UTF-8 code point"
s = [[ a = "\u{}"]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrDigitUEsc)
local ErrCBraceUEsc="expected '}' after the code point"
s = [[ a = "\u{12" ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCBraceUEsc)
local ErrEscSeq="invalid escape sequence"
s = [[ a = "\;" ]]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrEscSeq)
local ErrCloseLStr="unclosed long string"
s = [==[ a = [[ abc return; ]==]
print("Parsing '"..s.."'")
res, err = lua.parse(s)
assert(err[1]["msg"] == ErrCloseLStr)
print("\n\n All error labels generated successfully")
print("\n\n [[ TESTING AST GENERATION ]]\n\n ")
-- TODO: AST
s = [[
if abc > 123 then
abc=123
end]]
rez = {
rule='chunk',
pos=3,
{
rule='block',
pos=3,
{
rule='stat',
pos=3,
'if',
{
rule='exp',
pos=6,
{
rule='exp',
{
rule='exp',
{
rule='expTokens',
pos=6,
{
rule='prefixexp',
pos=6,
{
rule='varOrExp',
pos=6,
{
rule='var',
pos=6,
{
rule='NAME',
pos=6,
'abc',
},
},
},
},
},
},
{
rule='operatorComparison',
pos=10,
'>',
},
{
rule='expTokens',
pos=12,
{
rule='number',
pos=12,
{
rule='INT',
pos=12,
'123',
},
},
},
},
},
'then',
{
rule='block',
pos=23,
{
rule='stat',
pos=23,
{
rule='varlist',
pos=23,
{
rule='var',
pos=23,
{
rule='NAME',
pos=23,
'abc',
},
},
},
'=',
{
rule='explist',
pos=27,
{
rule='exp',
pos=27,
{
rule='expTokens',
pos=27,
{
rule='number',
pos=27,
{
rule='INT',
pos=27,
'123',
},
},
},
},
},
},
},
'end',
},
},
}
print("Parsing '"..s.."'")
res, err = lua.parse(s)
peg.print_t(res)
assert(equals(res,rez))
s = [[
local a = [=[ long string ]=]
-- aaa
return a
--[==[ hi
]==]
]]
rez = {
rule='chunk',
pos=3,
{
rule='block',
pos=3,
{
rule='stat',
pos=3,
'local',
{
rule='localAssign',
pos=9,
{
rule='namelist',
pos=9,
{
rule='NAME',
pos=9,
'a',
},
},
'=',
{
rule='explist',
pos=13,
{
rule='exp',
pos=13,
{
rule='expTokens',
pos=13,
{
rule='string',
pos=13,
' long string ',
},
},
},
},
},
},
{
rule='retstat',
pos=41,
'return',
{
rule='explist',
pos=48,
{
rule='exp',
pos=48,
{
rule='expTokens',
pos=48,
{
rule='prefixexp',
pos=48,
{
rule='varOrExp',
pos=48,
{
rule='var',
pos=48,
{
rule='NAME',
pos=48,
'a',
},
},
},
},
},
},
},
},
},
}
print("Parsing '"..s.."'")
res, err = lua.parse(s)
peg.print_t(res)
assert(equals(res,rez))
print("\n\n All AST's generated successfully")
print("\n\nAll tests passed!")

View file

@ -0,0 +1,291 @@
--[==[
Parser for Lua 5.3
Based on https://github.com/antlr/grammars-v4/blob/master/lua/Lua.g4 and https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
]==]
package.path = package.path .. ";../?.lua"
local pg = require "parser-gen"
function equals(s,i,a,b) return #a == #b end
function fixexp (...)
local exp = {...}
local len = #exp
if len > 1 then
exp.rule = "exp"
exp[len].rule = "exp"
return exp
elseif exp[1] then
if exp[1].rule == "expTokens" then
return exp[1]
else
return exp[1][1]
end
end
end
function fold (...)
local exp = {...}
local len = #exp
if len > 1 then
local folded = { rule = "exp", fixexp(exp[1]) }
for i = 2, len, 2 do
folded = { rule = "exp", folded, exp[i], fixexp(exp[i+1]) }
end
return folded
elseif exp[1] then
return exp[1][1]
end
end
-- from https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
local labels = {
ErrExtra="unexpected character(s), expected EOF",
ErrInvalidStat={"unexpected token, invalid start of statement",[[ (!%nl .)* ]]},
ErrEndIf="expected 'end' to close the if statement",
ErrExprIf="expected a condition after 'if'",
ErrThenIf="expected 'then' after the condition",
ErrExprEIf="expected a condition after 'elseif'",
ErrThenEIf="expected 'then' after the condition",
ErrEndDo="expected 'end' to close the do block",
ErrExprWhile="expected a condition after 'while'",
ErrDoWhile="expected 'do' after the condition",
ErrEndWhile="expected 'end' to close the while loop",
ErrUntilRep="expected 'until' at the end of the repeat loop",
ErrExprRep="expected a conditions after 'until'",
ErrForRange="expected a numeric or generic range after 'for'",
ErrEndFor="expected 'end' to close the for loop",
ErrExprFor1="expected a starting expression for the numeric range",
ErrCommaFor="expected ',' to split the start and end of the range",
ErrExprFor2="expected an ending expression for the numeric range",
ErrExprFor3={"expected a step expression for the numeric range after ','",[[ (!'do' !%nl .)* ]]},
ErrInFor="expected '=' or 'in' after the variable(s)",
ErrEListFor="expected one or more expressions after 'in'",
ErrDoFor="expected 'do' after the range of the for loop",
ErrDefLocal="expected a function definition or assignment after local",
ErrNameLFunc="expected a function name after 'function'",
ErrEListLAssign="expected one or more expressions after '='",
ErrEListAssign="expected one or more expressions after '='",
ErrFuncName="expected a function name after 'function'",
ErrNameFunc1="expected a function name after '.'",
ErrNameFunc2="expected a method name after ':'",
ErrOParenPList="expected '(' for the parameter list",
ErrCParenPList="expected ')' to close the parameter list",
ErrEndFunc="expected 'end' to close the function body",
ErrParList="expected a variable name or '...' after ','",
ErrLabel="expected a label name after '::'",
ErrCloseLabel="expected '::' after the label",
ErrGoto="expected a label after 'goto'",
ErrVarList={"expected a variable name after ','",[[ (!'=' !%nl .)* ]]},
ErrExprList="expected an expression after ','",
ErrOrExpr="expected an expression after 'or'",
ErrAndExpr="expected an expression after 'and'",
ErrRelExpr="expected an expression after the relational operator",
ErrBitwiseExpr="expected an expression after bitwise operator",
ErrConcatExpr="expected an expression after '..'",
ErrAddExpr="expected an expression after the additive operator",
ErrMulExpr="expected an expression after the multiplicative operator",
ErrUnaryExpr="expected an expression after the unary operator",
ErrPowExpr="expected an expression after '^'",
ErrExprParen="expected an expression after '('",
ErrCParenExpr="expected ')' to close the expression",
ErrNameIndex="expected a field name after '.'",
ErrExprIndex="expected an expression after '['",
ErrCBracketIndex="expected ']' to close the indexing expression",
ErrNameMeth="expected a method name after ':'",
ErrMethArgs="expected some arguments for the method call (or '()')",
ErrCParenArgs="expected ')' to close the argument list",
ErrCBraceTable="expected '}' to close the table constructor",
ErrEqField="expected '=' after the table key",
ErrExprField="expected an expression after '='",
ErrExprFKey={"expected an expression after '[' for the table key",[[ (!']' !%nl .)* ]] },
ErrCBracketFKey={"expected ']' to close the table key",[[ (!'=' !%nl .)* ]]},
ErrDigitHex="expected one or more hexadecimal digits after '0x'",
ErrDigitDeci="expected one or more digits after the decimal point",
ErrDigitExpo="expected one or more digits for the exponent",
ErrQuote="unclosed string",
ErrHexEsc={"expected exactly two hexadecimal digits after '\\x'",[[ (!('"' / "'" / %nl) .)* ]]},
ErrOBraceUEsc="expected '{' after '\\u'",
ErrDigitUEsc={"expected one or more hexadecimal digits for the UTF-8 code point",[[ (!'}' !%nl .)* ]]},
ErrCBraceUEsc={"expected '}' after the code point",[[ (!('"' / "'") .)* ]]},
ErrEscSeq={"invalid escape sequence",[[ (!('"' / "'" / %nl) .)* ]]},
ErrCloseLStr="unclosed long string",
ErrEqAssign="expected '=' after variable list in assign statement"
}
pg.setlabels(labels)
local grammar = pg.compile([==[
chunk <- block (!.)^ErrExtra
block <- stat* retstat?
stat <- ';' /
functioncall /
varlist '='^ErrEqAssign explist^ErrEListAssign /
'break' /
'goto' NAME^ErrGoto /
'do' block 'end'^ErrEndDo /
'while' exp^ErrExprWhile 'do'^ErrDoWhile block 'end'^ErrEndWhile /
'repeat' block 'until'^ErrUntilRep exp^ErrExprRep /
'if' exp^ErrExprIf 'then'^ErrThenIf block ('elseif' exp^ErrExprEIf 'then'^ErrThenEIf block)* ('else' block)? 'end'^ErrEndIf /
'for' (forNum / forIn)^ErrForRange 'do'^ErrDoFor block 'end'^ErrEndFor /
'function' funcname^ErrFuncName funcbody /
'local' (localAssign / localFunc)^ErrDefLocal /
label /
!blockEnd %{ErrInvalidStat}
blockEnd <- 'return' / 'end' / 'elseif' / 'else' / 'until' / !.
retstat <- 'return' explist? ';'?
forNum <- NAME '=' exp^ErrExprFor1 ','^ErrCommaFor exp^ErrExprFor2 (',' exp^ErrExprFor3)?
forIn <- namelist 'in'^ErrInFor explist^ErrEListFor
localFunc <- 'function' NAME^ErrNameLFunc funcbody
localAssign <- namelist ('=' explist^ErrEListLAssign)?
label <- '::' NAME^ErrLabel '::'^ErrCloseLabel
funcname <- NAME ('.' NAME^ErrNameFunc1)* (':' NAME^ErrNameFunc2)?
varlist <- var (',' var^ErrVarList)*
namelist <- NAME (',' NAME)*
explist <- exp (',' exp^ErrExprList )*
exp <- expOR -> fixexp
expOR <- (expAND (operatorOr expAND^ErrOrExpr)*) -> fold
expAND <- (expREL (operatorAnd expREL^ErrAndExpr)*) -> fold
expREL <- (expBIT (operatorComparison expBIT^ErrRelExpr)*) -> fold
expBIT <- (expCAT (operatorBitwise expCAT^ErrBitwiseExpr)*) -> fold
expCAT <- (expADD (operatorStrcat expCAT^ErrConcatExpr)?) -> fixexp
expADD <- (expMUL (operatorAddSub expMUL^ErrAddExpr)*) -> fold
expMUL <- (expUNA (operatorMulDivMod expUNA^ErrMulExpr)*) -> fold
expUNA <- ((operatorUnary expUNA^ErrUnaryExpr) / expPOW) -> fixexp
expPOW <- (expTokens (operatorPower expUNA^ErrPowExpr)?) -> fixexp
expTokens <- 'nil' / 'false' / 'true' /
number /
string /
'...' /
'function' funcbody /
tableconstructor /
prefixexp
prefixexp <- varOrExp nameAndArgs*
functioncall <- varOrExp nameAndArgs+
varOrExp <- var / brackexp
brackexp <- '(' exp^ErrExprParen ')'^ErrCParenExpr
var <- (NAME / brackexp varSuffix) varSuffix*
varSuffix <- nameAndArgs* ('[' exp^ErrExprIndex ']'^ErrCBracketIndex / '.' !'.' NAME^ErrNameIndex)
nameAndArgs <- (':' !':' NAME^ErrNameMeth args^ErrMethArgs) /
args
args <- '(' explist? ')'^ErrCParenArgs / tableconstructor / string
funcbody <- '('^ErrOParenPList parlist? ')'^ErrCParenPList block 'end'^ErrEndFunc
parlist <- namelist (',' '...'^ErrParList)? / '...'
tableconstructor<- '{' fieldlist? '}'^ErrCBraceTable
fieldlist <- field (fieldsep field)* fieldsep?
field <- !OPEN '[' exp^ErrExprFKey ']'^ErrCBracketFKey '='^ErrEqField exp^ErrExprField /
NAME '=' exp /
exp
fieldsep <- ',' / ';'
operatorOr <- 'or'
operatorAnd <- 'and'
operatorComparison<- '<=' / '>=' / '~=' / '==' / '<' !'<' / '>' !'>'
operatorStrcat <- !'...' '..'
operatorAddSub <- '+' / '-'
operatorMulDivMod<- '*' / '%' / '//' / '/'
operatorBitwise <- '&' / '|' / !'~=' '~' / '<<' / '>>'
operatorUnary <- 'not' / '#' / '-' / !'~=' '~'
operatorPower <- '^'
number <- FLOAT / HEX_FLOAT / HEX / INT
string <- NORMALSTRING / CHARSTRING / LONGSTRING
-- lexer
fragment
RESERVED <- KEYWORDS !IDREST
fragment
IDREST <- [a-zA-Z_0-9]
fragment
KEYWORDS <- 'and' / 'break' / 'do' / 'elseif' / 'else' / 'end' /
'false' / 'for' / 'function' / 'goto' / 'if' / 'in' /
'local' / 'nil' / 'not' / 'or' / 'repeat' / 'return' /
'then' / 'true' / 'until' / 'while'
NAME <- !RESERVED [a-zA-Z_] [a-zA-Z_0-9]*
fragment
NORMALSTRING <- '"' {( ESC / [^"\] )*} '"'^ErrQuote
fragment
CHARSTRING <- "'" {( ESC / [^\'] )*} "'"^ErrQuote
fragment
LONGSTRING <- (OPEN {(!CLOSEEQ .)*} CLOSE^ErrCloseLStr) -> 1 -- capture only the string
fragment
OPEN <- '[' {:openEq: EQUALS :} '[' %nl?
fragment
CLOSE <- ']' {EQUALS} ']'
fragment
EQUALS <- '='*
fragment
CLOSEEQ <- (CLOSE =openEq) => equals
INT <- DIGIT+
HEX <- '0' [xX] HEXDIGIT+^ErrDigitHex
FLOAT <- DIGIT+ '.' DIGIT* ExponentPart? /
'.' !'.' DIGIT+^ErrDigitDeci ExponentPart? /
DIGIT+ ExponentPart
HEX_FLOAT <- '0' [xX] HEXDIGIT+ '.' HEXDIGIT* HexExponentPart? /
'0' [xX] '.' HEXDIGIT+ HexExponentPart? /
'0' [xX] HEXDIGIT+^ErrDigitHex HexExponentPart
fragment
ExponentPart <- [eE] [+-]? DIGIT+^ErrDigitExpo
fragment
HexExponentPart <- [pP] [+-]? DIGIT+^ErrDigitExpo
fragment
ESC <- '\' [abfnrtvz"'\] /
'\' %nl /
DECESC /
HEXESC/
UTFESC/
'\' %{ErrEscSeq}
fragment
DECESC <- '\' ( DIGIT DIGIT? / [0-2] DIGIT DIGIT)
fragment
HEXESC <- '\' 'x' (HEXDIGIT HEXDIGIT)^ErrHexEsc
fragment
UTFESC <- '\' 'u' '{'^ErrOBraceUEsc HEXDIGIT+^ErrDigitUEsc '}'^ErrCBraceUEsc
fragment
DIGIT <- [0-9]
fragment
HEXDIGIT <- [0-9a-fA-F]
fragment
COMMENT <- '--' LONGSTRING -> 0 -- skip this
fragment
LINE_COMMENT <- '--' COM_TYPES ( %nl / !.)
fragment
COM_TYPES <- '[' '='* [^[=%nl] [^%nl]* /
'[' '='* /
[^[%nl] [^%nl]* /
''
fragment
SHEBANG <- '#' '!' [^%nl]*
SKIP <- %nl / %s / COMMENT / LINE_COMMENT / SHEBANG
fragment
HELPER <- RESERVED / '(' / ')' -- for sync expression
SYNC <- ((!HELPER !SKIP .)+ / .?) SKIP* -- either sync to reserved keyword or skip characters and consume them
]==],{ equals = equals, fixexp = fixexp, fold = fold })
local errnr = 1
local function err (desc, line, col, sfail, recexp)
print("Syntax error #"..errnr..": "..desc.." at line "..line.."(col "..col..")")
errnr = errnr+1
end
local function parse (input)
errnr = 1
local ast, errs = pg.parse(input,grammar,err)
return ast, errs
end
return {parse=parse}

View file

@ -0,0 +1,55 @@
package.path = package.path .. ";../?.lua"
local pg = require "parser-gen"
local peg = require "peg-parser"
local errs = {errMissingThen = "Missing Then"}
pg.setlabels(errs)
local grammar = pg.compile([[
program <- stmtsequence !.
stmtsequence <- statement (';' statement)*
statement <- ifstmt / repeatstmt / assignstmt / readstmt / writestmt
ifstmt <- 'if' exp 'then'^errMissingThen stmtsequence elsestmt? 'end'
elsestmt <- ('else' stmtsequence)
repeatstmt <- 'repeat' stmtsequence 'until' exp
assignstmt <- IDENTIFIER ':=' exp
readstmt <- 'read' IDENTIFIER
writestmt <- 'write' exp
exp <- simpleexp (COMPARISONOP simpleexp)*
COMPARISONOP <- '<' / '='
simpleexp <- term (ADDOP term)*
ADDOP <- [+-]
term <- factor (MULOP factor)*
MULOP <- [*/]
factor <- '(' exp ')' / NUMBER / IDENTIFIER
NUMBER <- '-'? [0-9]+
KEYWORDS <- 'if' / 'repeat' / 'read' / 'write' / 'then' / 'else' / 'end' / 'until'
RESERVED <- KEYWORDS ![a-zA-Z]
IDENTIFIER <- !RESERVED [a-zA-Z]+
HELPER <- ';' / %nl / %s / KEYWORDS / !.
SYNC <- (!HELPER .)*
]], _, true)
local errors = 0
local function printerror(desc,line,col,sfail,trec)
errors = errors+1
print("Error #"..errors..": "..desc.." on line "..line.."(col "..col..")")
end
local function parse(input)
errors = 0
result, errors = pg.parse(input,grammar,printerror)
return result, errors
end
if arg[1] then
-- argument must be in quotes if it contains spaces
res, errs = parse(arg[1])
peg.print_t(res)
peg.print_r(errs)
end
local ret = {parse=parse}
return ret

View file

@ -0,0 +1,260 @@
local peg = require("peg-parser")
local f = peg.pegToAST
local eq = require "equals"
local equals = eq.equals
-- self-description of peg-parser:
--assert(f(peg.gram))
-- ( p ) grouping
e = f("('a')")
res = {t="a"}
assert(equals(e,res))
-- 'string' literal string
e = f("'string'")
res = {t="string"}
assert(equals(e,res))
-- "string" literal string
e = f('"string"')
res = {t="string"}
assert(equals(e,res))
--[class] character class
e = f("[^a-zA-Z01]")
res = {
action = "invert",
op1 = {
action = "or",
op1 = {
action = "or",
op1 = {
action = "or",
op1 = {
action = "range",
op1 = {
s = "az"
}
},
op2 = {
action = "range",
op1 = {
s = "AZ"
}
}
},
op2 = {
t = "0"
}
},
op2 = {
t = "1"
}
}
}
assert(equals(e,res))
--. any character
e = f(".")
res = {action="anychar"}
assert(equals(e,res))
--%name pattern defs[name] or a pre-defined pattern
e = f("%name")
res = {action="%", op1={s="name"}}
assert(equals(e,res))
--name non terminal
e = f("name")
res = {nt="name"}
assert(equals(e,res))
--<name> non terminal
e = f("<name>")
res = {nt="name"}
assert(equals(e,res))
--{} position capture
e = f("{}")
res = {action="poscap"}
assert(equals(e,res))
--{ p } simple capture
e = f("{name}")
res = {action="scap", op1= {nt="name"}}
assert(equals(e,res))
--{: p :} anonymous group capture
e = f("{:name:}")
res = {action="gcap", op1= {nt="name"}}
assert(equals(e,res))
--{:name: p :} named group capture
e = f("{:g: name:}")
res = {action="gcap", op1= {nt="name"} , op2={s="g"}}
assert(equals(e,res))
--{~ p ~} substitution capture
e = f("{~ name ~}")
res = {action="subcap", op1= {nt="name"}}
assert(equals(e,res))
--{| p |} table capture
e = f("{| name |}")
res = {action="tcap", op1= {nt="name"}}
assert(equals(e,res))
--=name back reference
e = f("=name")
res = {action="bref", op1= {s="name"}}
assert(equals(e,res))
--p ? optional match
e = f("name?")
res = {action="?", op1= {nt="name"}}
assert(equals(e,res))
--p * zero or more repetitions
e = f("name*")
res = {action="*", op1= {nt="name"}}
assert(equals(e,res))
--p + one or more repetitions
e = f("name+")
res = {action="+", op1= {nt="name"}}
assert(equals(e,res))
--p^num exactly n repetitions
e = f("name^3")
res = {action="^", op1= {nt="name"}, op2 = {num="3"}}
assert(equals(e,res))
--p^+num at least n repetitions
e = f("name^+3")
res = {action="^", op1= {nt="name"}, op2 = {num="+3"}}
assert(equals(e,res))
--p^-num at most n repetitions
e = f("name^-3")
res = {action="^", op1= {nt="name"}, op2 = {num="-3"}}
assert(equals(e,res))
--p^LABEL error label
e = f("name^err")
res = {action = "^LABEL", op1= {nt="name"}, op2 = {s="err"}}
assert(equals(e,res))
--p -> 'string' string capture
e = f("name -> 'a'")
res = {action="->", op1= {nt="name"}, op2 = {s="a"}}
assert(equals(e,res))
--p -> "string" string capture
e = f('name -> "a"')
res = {action="->", op1= {nt="name"}, op2 = {s="a"}}
assert(equals(e,res))
--p -> num numbered capture
e = f('name -> 3')
res = {action="->", op1= {nt="name"}, op2 = {num="3"}}
assert(equals(e,res))
--p -> name function/query/string capture equivalent to p / defs[name]
e = f('name -> func')
res = {action="->", op1= {nt="name"}, op2 = {func="func"}}
assert(equals(e,res))
--p => name match-time capture equivalent to lpeg.Cmt(p, defs[name])
e = f('name => func')
res = {action="=>", op1= {nt="name"}, op2 = {func="func"}}
assert(equals(e,res))
--& p and predicate
e = f('&name')
res = {action="&", op1= {nt="name"}}
assert(equals(e,res))
--! p not predicate
e = f('!name')
res = {action="!", op1= {nt="name"}}
assert(equals(e,res))
--p1 p2 p3 concatenation with left association
e = f('name name2 name3')
res = {action="and", op1= {action = "and", op1={nt="name"}, op2={nt="name2"}}, op2={nt="name3"}}
assert(equals(e,res))
--p1 / p2 / p3 ordered choice with left association
e = f('name / name2 / name3')
res = {action="or", op1= {action = "or", op1={nt="name"}, op2={nt="name2"}}, op2={nt="name3"}}
assert(equals(e,res))
--(name <- p)+ grammar
e = f('a <- b b <- c')
res = {
{rulename = "a", rule = {nt="b"}},
{rulename = "b", rule = {nt="c"}}
}
assert(equals(e,res))
-- error labels
-- %{errName}
--peg.setlabels({errName=1})
e = f('%{errName}')
res = {action="label", op1={s="errName"}}
assert(equals(e,res))
-- a //{errName,errName2} b
--peg.setlabels({errName=1, errName2=2})
e = f('a //{errName,errName2} b')
res = {action="or", condition={{s="errName"},{s="errName2"}}, op1={nt="a"}, op2={nt="b"}}
assert(equals(e,res))
print("all tests succesful")

View file

@ -0,0 +1,334 @@
local re = require("deps.lpeglabel.relabel")
local peg = {}
-- from relabel.lua
local errinfo = {
{"NoPatt", "no pattern found"},
{"ExtraChars", "unexpected characters after the pattern"},
{"ExpPatt1", "expected a pattern after '/' or '//{...}'"},
{"ExpPatt2", "expected a pattern after '&'"},
{"ExpPatt3", "expected a pattern after '!'"},
{"ExpPatt4", "expected a pattern after '('"},
{"ExpPatt5", "expected a pattern after ':'"},
{"ExpPatt6", "expected a pattern after '{~'"},
{"ExpPatt7", "expected a pattern after '{|'"},
{"ExpPatt8", "expected a pattern after '<-'"},
{"ExpPattOrClose", "expected a pattern or closing '}' after '{'"},
{"ExpNum", "expected a number after '^', '+' or '-' (no space)"},
{"ExpNumOrLab", "expected a number or a label after ^"},
{"ExpCap", "expected a string, number, '{}' or name after '->'"},
{"ExpName1", "expected the name of a rule after '=>'"},
{"ExpName2", "expected the name of a rule after '=' (no space)"},
{"ExpName3", "expected the name of a rule after '<' (no space)"},
{"ExpLab1", "expected at least one label after '{'"},
{"ExpLab2", "expected a label after the comma"},
{"ExpNameOrLab", "expected a name or label after '%' (no space)"},
{"ExpItem", "expected at least one item after '[' or '^'"},
{"MisClose1", "missing closing ')'"},
{"MisClose2", "missing closing ':}'"},
{"MisClose3", "missing closing '~}'"},
{"MisClose4", "missing closing '|}'"},
{"MisClose5", "missing closing '}'"}, -- for the captures
{"MisClose6", "missing closing '>'"},
{"MisClose7", "missing closing '}'"}, -- for the labels
{"MisClose8", "missing closing ']'"},
{"MisTerm1", "missing terminating single quote"},
{"MisTerm2", "missing terminating double quote"},
}
local errmsgs = {}
local labels = {}
for i, err in ipairs(errinfo) do
errmsgs[i] = err[2]
labels[err[1]] = i
end
re.setlabels(labels)
local function concat(a,b)
return a..b
end
local function foldtable(action,t)
local re
local first = true
for key,value in pairs(t) do
if first then
re = value
first = false
else
local temp = re
if action == "suf" then -- suffix actions
local act = value[1]
if act == "*" or act == "?" or act == "+" then
re = {action=act, op1=temp}
else
re = {action=act, op1=temp, op2=value[2]}
end
elseif action == "or" and #value == 2 then -- recovery expression
local labels = value[1]
local op2 = value[2]
re = {action=action, op1=temp, op2=op2, condition=labels}
else
re = {action=action, op1=temp, op2=value}
end
end
end
return re
end
local gram = [=[
pattern <- (exp / %{NoPatt}) (!. / %{ExtraChars})
exp <- S (grammar / alternative)
labels <- {| '{' {: (label / %{ExpLab1}) :} (',' {: (label / %{ExpLab2}) :})* ('}' / %{MisClose7}) |}
alternative <- ( {:''->'or':} {| {: seq :} ('/' (('/' {| {: labels :} S {: (seq / %{ExpPatt1}) :} |}) / (S {: (seq / %{ExpPatt1}) :} ) ) )* |} ) -> foldtable
seq <- ( {:''->'and':} {| {: prefix :}+ |} ) -> foldtable
prefix <- {| {:action: '&' :} S {:op1: (prefix / %{ExpPatt2}) :} |}
/ {| {:action: '!' :} S {:op1: (prefix / %{ExpPatt3}) :} |}
/ suffix
suffix <- ( {:''->'suf':} {| primary S {| suffixaction S |}* |} ) -> foldtable
suffixaction <- {[+*?]}
/ {'^'} {| {:num: [+-]? NUM:} |}
/ '^'->'^LABEL' (label / %{ExpNumOrLab})
/ {'->'} S ((string / {| {:action:'{}'->'poscap':} |} / funcname / {|{:num: NUM :} |}) / %{ExpCap})
/ {'=>'} S (funcname / %{ExpName1})
primary <- '(' (exp / %{ExpPatt4}) (')' / %{MisClose1})
/ term
/ class
/ defined
/ {| {:action: '%'->'label':} ('{' / %{ExpNameOrLab}) S ({:op1: label:} / %{ExpLab1}) S ('}' / %{MisClose7}) |}
/ {| {:action: '{:'->'gcap':} {:op2: defname:} ':' !'}' ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
/ {| {:action: '{:'->'gcap':} ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
/ {| {:action: '='->'bref':} ({:op1: defname:} / %{ExpName2}) |}
/ {| {:action: '{}'->'poscap':} |}
/ {| {:action: '{~'->'subcap':} ({:op1: exp:} / %{ExpPatt6}) ('~}' / %{MisClose3}) |}
/ {| {:action: '{|'->'tcap':} ({:op1: exp:} / %{ExpPatt7}) ('|}' / %{MisClose4}) |}
/ {| {:action: '{'->'scap':} ({:op1: exp:} / %{ExpPattOrClose}) ('}' / %{MisClose5}) |}
/ {| {:action: '.'->'anychar':} |}
/ !frag !nodee name S !ARROW
/ '<' (name / %{ExpName3}) ('>' / %{MisClose6}) -- old-style non terminals
grammar <- {| definition+ |}
definition <- {| (frag / nodee)? (token / nontoken) S ARROW ({:rule: exp :} / %{ExpPatt8}) |}
label <- {| {:s: ERRORNAME :} |}
frag <- {:fragment: 'fragment'->'1' :} ![0-9_a-z] S !ARROW
nodee <- {:node: 'node'->'1' :} ![0-9_a-z] S !ARROW
token <- {:rulename: TOKENNAME :} {:token:''->'1':}
nontoken <- {:rulename: NAMESTRING :}
class <- '[' ( ('^' {| {:action:''->'invert':} {:op1: classset :} |} ) / classset ) (']' / %{MisClose8})
classset <- ( {:''->'or':} {| {: (item / %{ExpItem}) :} (!']' {: (item / %{ExpItem}) :})* |} ) -> foldtable
item <- defined / range / {| {:t: . :} |}
range <- {| {:action:''->'range':} {:op1: {| {:s: ({: . :} ('-') {: [^]] :} ) -> concat :} |} :} |}
S <- (%s / '--' [^%nl]*)* -- spaces and comments
name <- {| {:nt: TOKENNAME :} {:token:''->'1':} / {:nt: NAMESTRING :} |}
funcname <- {| {:func: NAMESTRING :} |}
ERRORNAME <- NAMESTRING
NAMESTRING <- [A-Za-z][A-Za-z0-9_]*
TOKENNAME <- [A-Z_]+ ![0-9a-z]
defname <- {| {:s: NAMESTRING :} |}
ARROW <- '<-'
NUM <- [0-9]+
term <- {| '"' {:t: [^"]* :} ('"' / %{MisTerm2}) / "'" {:t: [^']* :} ("'" / %{MisTerm1}) |}
string <- {| '"' {:s: [^"]* :} ('"' / %{MisTerm2}) / "'" {:s: [^']* :} ("'" / %{MisTerm1}) |}
defined <- {| {:action: '%':} {:op1: defname :} |}
]=]
local defs = {foldtable=foldtable, concat=concat}
peg.gram = gram
peg.defs = defs
peg.labels = labels
local p = re.compile ( gram, defs)
--[[
Function: pegToAST(input)
Input: a grammar in PEG format, described in https://github.com/vsbenas/parser-gen
Output: if parsing successful - a table of grammar rules, else - runtime error
Example input: "
Program <- stmt* / SPACE
stmt <- ('a' / 'b')+
SPACE <- ''
"
Example output: {
{rulename = "Program", rule = {action = "or", op1 = {action = "*", op1 = {nt = "stmt"}}, op2 = {nt = "SPACE", token="1"}}},
{rulename = "stmt", rule = {action = "+", op1 = {action="or", op1 = {t = "a"}, op2 = {t = "b"}}}},
{rulename = "SPACE", rule = {t=""}, token=1},
}
The rules are further processed and turned into lpeg compatible format in parser-gen.lua
Action names:
or (has parameter condition for recovery expresions)
and
&
!
+
*
?
^num (num is a number with an optional plus or minus sign)
^label (label is an error label set with setlabels)
->
=>
tcap
gcap (op2= name, anonymous otherwise)
bref
poscap
subcap
scap
anychar
label
%
range
Final token actions:
t - terminal
nt - non terminal
func - function definition
s - literal string
num - literal number
]]--
local function splitlines(str)
local t = {}
local function helper(line) table.insert(t, line) return "" end
helper((str:gsub("(.-)\r?\n", helper)))
return t
end
function peg.pegToAST(input, defs)
local r, e, sfail = p:match(input, defs)
if not r then
local lab
if e == 0 then
lab = "Syntax error"
else
lab = errmsgs[e]
end
local lines = splitlines(input)
local line, col = re.calcline(input, #input - #sfail + 1)
local err = {}
table.insert(err, "L" .. line .. ":C" .. col .. ": " .. lab)
table.insert(err, lines[line])
table.insert(err, string.rep(" ", col-1) .. "^")
error("syntax error(s) in pattern\n" .. table.concat(err, "\n"), 3)
end
return r
end
function peg.print_r ( t ) -- for debugging
local print_r_cache={}
local function sub_print_r(t,indent)
if (print_r_cache[tostring(t)]) then
print(indent.."*"..tostring(t))
else
print_r_cache[tostring(t)]=true
if (type(t)=="table") then
for pos,val in pairs(t) do
if (type(val)=="table") then
print(indent.."["..pos.."] => {")
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
print(indent..string.rep(" ",string.len(pos)+6).."}")
else
print(indent.."["..pos.."] => '"..tostring(val).."'")
end
end
else
print(indent..tostring(t))
end
end
end
sub_print_r(t,"")
end
function peg.print_t ( t ) -- for debugging
local print_r_cache={}
local function sub_print_r (t,indent)
if (print_r_cache[tostring(t)]) then
print(indent.."*"..tostring(t))
else
print_r_cache[tostring(t)]=true
if (type(t)=="table") then
local function subprint (pos,val,indent)
if (type(val)=="table") then
print(indent.."{")
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
print(indent..string.rep(" ",string.len(pos)-1).."},")
else
if type(val) ~= "number" then
val = "'"..tostring(val).."'"
end
if tonumber(pos) then
print(indent..val..",")
else
print(indent..pos.."="..val..",")
end
end
end
if t["rule"] then
subprint("rule",t["rule"],indent)
end
if t["pos"] then
subprint("pos",t["pos"],indent)
end
for pos,val in pairs(t) do
if pos ~= "rule" and pos ~= "pos" then
subprint(pos,val,indent)
end
end
else
print(indent..tostring(t))
end
end
end
sub_print_r(t,"")
end
function peg.calcline(subject, pos)
return re.calcline(subject,pos)
end
return peg

View file

@ -0,0 +1,27 @@
package = "parser-gen"
version = "1.0-7"
source = {
url = "git://github.com/vsbenas/parser-gen",
tag = "v1.0"
}
description = {
summary = "A PEG parser generator that handles space characters, generates ASTs and adds error labels automatically.",
homepage = "https://github.com/vsbenas/parser-gen",
license = "MIT/X11"
}
dependencies = {
"lua >= 5.1, < 5.4",
"lpeglabel >= 0.12.2"
}
build = {
type = "builtin",
modules = {
["parser-gen"] = "parser-gen.lua",
["peg-parser"] = "peg-parser.lua",
["stack"] = "stack.lua",
["equals"] = "equals.lua",
["errorgen"] = "errorgen.lua",
}
}

View file

@ -0,0 +1,27 @@
package = "parser-gen"
version = "1.1-0"
source = {
url = "git://github.com/vsbenas/parser-gen",
tag = "v1.1"
}
description = {
summary = "A PEG parser generator that handles space characters, generates ASTs and adds error labels automatically.",
homepage = "https://github.com/vsbenas/parser-gen",
license = "MIT/X11"
}
dependencies = {
"lua >= 5.1, < 5.4",
"lpeglabel >= 0.12.2"
}
build = {
type = "builtin",
modules = {
["parser-gen"] = "parser-gen.lua",
["peg-parser"] = "peg-parser.lua",
["stack"] = "stack.lua",
["equals"] = "equals.lua",
["errorgen"] = "errorgen.lua",
}
}

View file

@ -0,0 +1,27 @@
package = "parser-gen"
version = "1.2-0"
source = {
url = "git://github.com/vsbenas/parser-gen",
tag = "v1.2"
}
description = {
summary = "A PEG parser generator that handles space characters, generates ASTs and adds error labels automatically.",
homepage = "https://github.com/vsbenas/parser-gen",
license = "MIT/X11"
}
dependencies = {
"lua >= 5.1, < 5.4",
"lpeglabel >= 0.12.2"
}
build = {
type = "builtin",
modules = {
["parser-gen"] = "parser-gen.lua",
["peg-parser"] = "peg-parser.lua",
["stack"] = "stack.lua",
["equals"] = "equals.lua",
["errorgen"] = "errorgen.lua",
}
}

View file

@ -0,0 +1,66 @@
-- Stack Table
-- Uses a table as stack, use <table>:push(value) and <table>:pop()
local unpack = table.unpack
-- GLOBAL
local Stack = {}
-- Create a Table with stack functions
function Stack:Create()
-- stack table
local t = {}
-- entry table
t._et = {}
-- push a value on to the stack
function t:push(...)
if ... then
local targs = {...}
-- add values
for _,v in ipairs(targs) do
table.insert(self._et, v)
end
end
end
-- pop a value from the stack
function t:pop(num)
-- get num values from stack
local num = num or 1
-- return table
local entries = {}
-- get values into entries
for i = 1, num do
-- get last entry
if #self._et ~= 0 then
table.insert(entries, self._et[#self._et])
-- remove last value
table.remove(self._et)
else
break
end
end
-- return unpacked entries
return unpack(entries)
end
-- get entries
function t:getn()
return #self._et
end
-- list values
function t:list()
for i,v in pairs(self._et) do
print(i, v)
end
end
return t
end
return {Stack=Stack}
-- CHILLCODE™