Fixed some diagnostics warnings
Moved examples to tofix because fixing them is besides the point right now.
This commit is contained in:
parent
52164c82e3
commit
858fe11666
166 changed files with 68 additions and 264 deletions
20
06/deps/parser-gen/LICENSE
Normal file
20
06/deps/parser-gen/LICENSE
Normal file
|
@ -0,0 +1,20 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2017 Benas Vaitkevicius
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
397
06/deps/parser-gen/README.md
Normal file
397
06/deps/parser-gen/README.md
Normal file
|
@ -0,0 +1,397 @@
|
|||
# parser-gen
|
||||
|
||||
A Lua parser generator that makes it possible to describe grammars in a [PEG](https://en.wikipedia.org/wiki/Parsing_expression_grammar) syntax. The tool will parse a given input using a provided grammar and if the matching is successful produce an AST as an output with the captured values using [Lpeg](http://www.inf.puc-rio.br/~roberto/lpeg/). If the matching fails, labelled errors can be used in the grammar to indicate failure position, and recovery grammars are generated to continue parsing the input using [LpegLabel](https://github.com/sqmedeiros/lpeglabel). The tool can also automatically generate error labels and recovery grammars for LL(1) grammars.
|
||||
|
||||
parser-gen is a [GSoC 2017](https://developers.google.com/open-source/gsoc/) project, and was completed with the help of my mentor [@sqmedeiros](https://github.com/sqmedeiros) from [LabLua](http://www.lua.inf.puc-rio.br/). A blog documenting the progress of the project can be found [here](https://parsergen.blogspot.com/2017/08/parser-generator-based-on-lpeglabel.html).
|
||||
|
||||
---
|
||||
# Table of contents
|
||||
|
||||
* [Requirements](#requirements)
|
||||
|
||||
* [Syntax](#syntax)
|
||||
|
||||
* [Grammar Syntax](#grammar-syntax)
|
||||
|
||||
* [Example: Tiny Parser](#example-tiny-parser)
|
||||
|
||||
# Requirements
|
||||
```
|
||||
lua >= 5.1
|
||||
lpeglabel >= 1.2.0
|
||||
```
|
||||
# Syntax
|
||||
|
||||
### compile
|
||||
|
||||
This function generates a PEG parser from the grammar description.
|
||||
|
||||
```lua
|
||||
local pg = require "parser-gen"
|
||||
grammar = pg.compile(input,definitions [, errorgen, noast])
|
||||
```
|
||||
*Arguments*:
|
||||
|
||||
`input` - A string containing a PEG grammar description. For complete PEG syntax see the grammar section of this document.
|
||||
|
||||
`definitions` - table of custom functions and definitions used inside the grammar, for example {equals=equals}, where equals is a function.
|
||||
|
||||
`errorgen` - **EXPERIMENTAL** optional boolean parameter(default:false), when enabled generates error labels automatically. Works well only on LL(1) grammars. Custom error labels have precedence over automatically generated ones.
|
||||
|
||||
`noast` - optional boolean parameter(default:false), when enabled does not generate an AST for the parse.
|
||||
|
||||
*Output*:
|
||||
|
||||
`grammar` - a compiled grammar on success, throws error on failure.
|
||||
|
||||
### setlabels
|
||||
|
||||
If custom error labels are used, the function *setlabels* allows setting their description (and custom recovery pattern):
|
||||
```lua
|
||||
pg.setlabels(t)
|
||||
```
|
||||
Example table of a simple error and one with a custom recovery expression:
|
||||
```lua
|
||||
-- grammar rule: " ifexp <- 'if' exp 'then'^missingThen stmt 'end'^missingEnd "
|
||||
local t = {
|
||||
missingEnd = "Missing 'end' in if expression",
|
||||
missingThen = {"Missing 'then' in if expression", " (!stmt .)* "} -- a custom recovery pattern
|
||||
}
|
||||
pg.setlabels(t)
|
||||
```
|
||||
If the recovery pattern is not set, then the one specified by the rule SYNC will be used. It is by default set to:
|
||||
```lua
|
||||
SKIP <- %s / %nl -- a space ' ' or newline '\n' character
|
||||
SYNC <- .? (!SKIP .)*
|
||||
```
|
||||
Learn more about special rules in the grammar section.
|
||||
|
||||
### parse
|
||||
|
||||
This operation attempts to match a grammar to the given input.
|
||||
|
||||
```lua
|
||||
result, errors = pg.parse(input, grammar [, errorfunction])
|
||||
```
|
||||
*Arguments*:
|
||||
|
||||
`input` - an input string that the tool will attempt to parse.
|
||||
|
||||
`grammar` - a compiled grammar.
|
||||
|
||||
`errorfunction` - an optional function that will be called if an error is encountered, with the arguments `desc` for the error description set using `setlabels()`; location indicators `line` and `col`; the remaining string before failure `sfail` and a custom recovery expression `trec` if available.
|
||||
Example:
|
||||
```lua
|
||||
local errs = 0
|
||||
local function printerror(desc,line,col,sfail,trec)
|
||||
errs = errs+1
|
||||
print("Error #"..errs..": "..desc.." before '"..sfail.."' on line "..line.."(col "..col..")")
|
||||
end
|
||||
|
||||
result, errors = pg.parse(input,grammar,printerror)
|
||||
```
|
||||
*Output*:
|
||||
|
||||
If the parse is succesful, the function returns an abstract syntax tree containing the captures `result` and a table of any encountered `errors`. If the parse was unsuccessful, `result` is going to be **nil**.
|
||||
Also, if the `noast` option is enabled when compiling the grammar, the function will then produce the longest match length or any custom captures used.
|
||||
|
||||
### calcline
|
||||
|
||||
Calculates line and column information regarding position i of the subject (exported from the relabel module).
|
||||
|
||||
```lua
|
||||
line, col = pg.calcline(subject, position)
|
||||
```
|
||||
*Arguments*:
|
||||
|
||||
`subject` - subject string
|
||||
|
||||
`position` - position inside the string, for example, the one given by automatic AST generation.
|
||||
|
||||
### usenodes
|
||||
|
||||
When AST generation is enabled, this function will enable the "node" mode, where only rules tagged with a `node` prefix will generate AST entries. Must be used before compiling the grammar.
|
||||
|
||||
```lua
|
||||
pg.usenodes(value)
|
||||
```
|
||||
*Arguments*:
|
||||
|
||||
`value` - a boolean value that enables or disables this function
|
||||
|
||||
# Grammar Syntax
|
||||
|
||||
The grammar used for this tool is described using a PEG-like syntax, that is identical to the one provided by the [re](http://www.inf.puc-rio.br/~roberto/lpeg/re.html) module, with an extension of labelled failures provided by [relabel](https://github.com/sqmedeiros/lpeglabel) module (except numbered labels). That is, all grammars that work with relabel should work with parser-gen as long as numbered error labels are not used, as they are not supported by parser-gen.
|
||||
|
||||
Since a parser generated with parser-gen automatically consumes space characters, builds ASTs and generates errors, additional extensions have been added based on the [ANTLR](http://www.antlr.org/) syntax.
|
||||
|
||||
### Basic syntax
|
||||
|
||||
The syntax of parser-gen grammars is somewhat similar to regex syntax. The next table summarizes the tools syntax. A p represents an arbitrary pattern; num represents a number (`[0-9]+`); name represents an identifier (`[a-zA-Z][a-zA-Z0-9_]*`).`defs` is the definitions table provided when compiling the grammar. Note that error names must be set using `setlabels()` before compiling the grammar. Constructions are listed in order of decreasing precedence.
|
||||
|
||||
<table border="1">
|
||||
<tbody><tr><td><b>Syntax</b></td><td><b>Description</b></td></tr>
|
||||
<tr><td><code>( p )</code></td> <td>grouping</td></tr>
|
||||
<tr><td><code>'string'</code></td> <td>literal string</td></tr>
|
||||
<tr><td><code>"string"</code></td> <td>literal string</td></tr>
|
||||
<tr><td><code>[class]</code></td> <td>character class</td></tr>
|
||||
<tr><td><code>.</code></td> <td>any character</td></tr>
|
||||
<tr><td><code>%name</code></td>
|
||||
<td>pattern <code>defs[name]</code> or a pre-defined pattern</td></tr>
|
||||
<tr><td><code>name</code></td><td>non terminal</td></tr>
|
||||
<tr><td><code><name></code></td><td>non terminal</td></tr>
|
||||
<tr><td><code>%{name}</code></td> <td>error label</td></tr>
|
||||
<tr><td><code>{}</code></td> <td>position capture</td></tr>
|
||||
<tr><td><code>{ p }</code></td> <td>simple capture</td></tr>
|
||||
<tr><td><code>{: p :}</code></td> <td>anonymous group capture</td></tr>
|
||||
<tr><td><code>{:name: p :}</code></td> <td>named group capture</td></tr>
|
||||
<tr><td><code>{~ p ~}</code></td> <td>substitution capture</td></tr>
|
||||
<tr><td><code>{| p |}</code></td> <td>table capture</td></tr>
|
||||
<tr><td><code>=name</code></td> <td>back reference
|
||||
</td></tr>
|
||||
<tr><td><code>p ?</code></td> <td>optional match</td></tr>
|
||||
<tr><td><code>p *</code></td> <td>zero or more repetitions</td></tr>
|
||||
<tr><td><code>p +</code></td> <td>one or more repetitions</td></tr>
|
||||
<tr><td><code>p^num</code></td> <td>exactly <code>n</code> repetitions</td></tr>
|
||||
<tr><td><code>p^+num</code></td>
|
||||
<td>at least <code>n</code> repetitions</td></tr>
|
||||
<tr><td><code>p^-num</code></td>
|
||||
<td>at most <code>n</code> repetitions</td></tr>
|
||||
<tr><td><code>p^name</code></td> <td>match p or throw error label name.</td></tr>
|
||||
<tr><td><code>p -> 'string'</code></td> <td>string capture</td></tr>
|
||||
<tr><td><code>p -> "string"</code></td> <td>string capture</td></tr>
|
||||
<tr><td><code>p -> num</code></td> <td>numbered capture</td></tr>
|
||||
<tr><td><code>p -> name</code></td> <td>function/query/string capture
|
||||
equivalent to <code>p / defs[name]</code></td></tr>
|
||||
<tr><td><code>p => name</code></td> <td>match-time capture
|
||||
equivalent to <code>lpeg.Cmt(p, defs[name])</code></td></tr>
|
||||
<tr><td><code>& p</code></td> <td>and predicate</td></tr>
|
||||
<tr><td><code>! p</code></td> <td>not predicate</td></tr>
|
||||
<tr><td><code>p1 p2</code></td> <td>concatenation</td></tr>
|
||||
<tr><td><code>p1 //{name [, name, ...]} p2</code></td> <td>specifies recovery pattern p2 for p1
|
||||
when one of the labels is thrown</td></tr>
|
||||
<tr><td><code>p1 / p2</code></td> <td>ordered choice</td></tr>
|
||||
<tr><td>(<code>name <- p</code>)<sup>+</sup></td> <td>grammar</td></tr>
|
||||
</tbody></table>
|
||||
|
||||
|
||||
The grammar below is used to match balanced parenthesis
|
||||
|
||||
```lua
|
||||
balanced <- "(" ([^()] / balanced)* ")"
|
||||
```
|
||||
For more examples check out the [re](http://www.inf.puc-rio.br/~roberto/lpeg/re.html) page, see the Tiny parser below or the [Lua parser](https://github.com/vsbenas/parser-gen/blob/master/parsers/lua-parser.lua) writen with this tool.
|
||||
|
||||
### Error labels
|
||||
|
||||
Error labels are provided by the relabel function %{errorname} (errorname must follow `[A-Za-z][A-Za-z0-9_]*` format). Usually we use error labels in a syntax like `'a' ('b' / %{errB}) 'c'`, which throws an error label if `'b'` is not matched. This syntax is quite complicated so an additional syntax is allowed `'a' 'b'^errB 'c'`, which allows cleaner description of grammars. Note: all errors must be defined in a table using parser-gen.setlabels() before compiling and parsing the grammar.
|
||||
|
||||
### Tokens
|
||||
|
||||
Non-terminals with names in all capital letters, i.e. `[A-Z]+`, are considered tokens and are treated as a single object in parsing. That is, the whole string matched by a token is captured in a single AST entry and space characters are not consumed. Consider two examples:
|
||||
```lua
|
||||
-- a token non-terminal
|
||||
grammar = pg.compile [[
|
||||
WORD <- [A-Z]+
|
||||
]]
|
||||
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", "AA"}
|
||||
```
|
||||
```lua
|
||||
-- a non-token non-terminal
|
||||
grammar = pg.compile [[
|
||||
word <- [A-Z]+
|
||||
]]
|
||||
res, _ = pg.parse("AA A", grammar) -- outputs {rule="word", "A", "A", "A"}
|
||||
```
|
||||
|
||||
### Fragments
|
||||
|
||||
If a token definition is followed by a `fragment` keyword, then the parser does not build an AST entry for that token. Essentially, these rules are used to simplify grammars without building unnecessarily complicated ASTS. Example of `fragment` usage:
|
||||
```lua
|
||||
grammar = pg.compile [[
|
||||
WORD <- LETTER+
|
||||
fragment LETTER <- [A-Z]
|
||||
]]
|
||||
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", "AA"}
|
||||
```
|
||||
Without using `fragment`:
|
||||
```lua
|
||||
grammar = pg.compile [[
|
||||
WORD <- LETTER+
|
||||
LETTER <- [A-Z]
|
||||
]]
|
||||
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", {rule="LETTER", "A"}, {rule="LETTER", "A"}}
|
||||
|
||||
```
|
||||
|
||||
### Nodes
|
||||
|
||||
When node mode is enabled using `pg.usenodes(true)` only rules prefixed with a `node` keyword will generate AST entries:
|
||||
```lua
|
||||
grammar = pg.compile [[
|
||||
node WORD <- LETTER+
|
||||
LETTER <- [A-Z]
|
||||
]]
|
||||
res, _ = pg.parse("AA A", grammar) -- outputs {rule="WORD", "AA"}
|
||||
```
|
||||
### Special rules
|
||||
|
||||
There are two special rules used by the grammar:
|
||||
|
||||
#### SKIP
|
||||
|
||||
The `SKIP` rule identifies which characters to skip in a grammar. For example, most programming languages do not take into acount any space or newline characters. By default, SKIP is set to:
|
||||
```lua
|
||||
SKIP <- %s / %nl
|
||||
```
|
||||
This rule can be extended to contain semicolons `';'`, comments, or any other patterns that the parser can safely ignore.
|
||||
|
||||
Character skipping can be disabled by using:
|
||||
```lua
|
||||
SKIP <- ''
|
||||
```
|
||||
|
||||
#### SYNC
|
||||
|
||||
This rule specifies the general recovery expression both for custom errors and automatically generated ones. By default:
|
||||
```lua
|
||||
SYNC <- .? (!SKIP .)*
|
||||
```
|
||||
The default SYNC rule consumes any characters until the next character matched by SKIP, usually a space or a newline. That means, if some statement in a program is invalid, the parser will continue parsing after a space or a newline character.
|
||||
|
||||
For some programming languages it might be useful to skip to a semicolon or a keyword, since they usually indicate the end of a statement, so SYNC could be something like:
|
||||
```lua
|
||||
HELPER <- ';' / 'end' / SKIP -- etc
|
||||
SYNC <- (!HELPER .)* SKIP* -- we can consume the spaces after syncing with them as well
|
||||
```
|
||||
|
||||
Recovery grammars can be disabled by using:
|
||||
```lua
|
||||
SYNC <- ''
|
||||
```
|
||||
# Example: Tiny parser
|
||||
|
||||
Below is the full code from *parsers/tiny-parser.lua*:
|
||||
```lua
|
||||
local pg = require "parser-gen"
|
||||
local peg = require "peg-parser"
|
||||
local errs = {errMissingThen = "Missing Then"} -- one custom error
|
||||
pg.setlabels(errs)
|
||||
|
||||
--warning: experimental error generation function is enabled. If the grammar isn't LL(1), set errorgen to false
|
||||
local errorgen = true
|
||||
|
||||
local grammar = pg.compile([[
|
||||
|
||||
program <- stmtsequence !.
|
||||
stmtsequence <- statement (';' statement)*
|
||||
statement <- ifstmt / repeatstmt / assignstmt / readstmt / writestmt
|
||||
ifstmt <- 'if' exp 'then'^errMissingThen stmtsequence elsestmt? 'end'
|
||||
elsestmt <- ('else' stmtsequence)
|
||||
repeatstmt <- 'repeat' stmtsequence 'until' exp
|
||||
assignstmt <- IDENTIFIER ':=' exp
|
||||
readstmt <- 'read' IDENTIFIER
|
||||
writestmt <- 'write' exp
|
||||
exp <- simpleexp (COMPARISONOP simpleexp)*
|
||||
COMPARISONOP <- '<' / '='
|
||||
simpleexp <- term (ADDOP term)*
|
||||
ADDOP <- [+-]
|
||||
term <- factor (MULOP factor)*
|
||||
MULOP <- [*/]
|
||||
factor <- '(' exp ')' / NUMBER / IDENTIFIER
|
||||
|
||||
NUMBER <- '-'? [0-9]+
|
||||
KEYWORDS <- 'if' / 'repeat' / 'read' / 'write' / 'then' / 'else' / 'end' / 'until'
|
||||
RESERVED <- KEYWORDS ![a-zA-Z]
|
||||
IDENTIFIER <- !RESERVED [a-zA-Z]+
|
||||
HELPER <- ';' / %nl / %s / KEYWORDS / !.
|
||||
SYNC <- (!HELPER .)*
|
||||
|
||||
]], _, errorgen)
|
||||
|
||||
local errors = 0
|
||||
local function printerror(desc,line,col,sfail,trec)
|
||||
errors = errors+1
|
||||
print("Error #"..errors..": "..desc.." on line "..line.."(col "..col..")")
|
||||
end
|
||||
|
||||
|
||||
local function parse(input)
|
||||
errors = 0
|
||||
result, errors = pg.parse(input,grammar,printerror)
|
||||
return result, errors
|
||||
end
|
||||
|
||||
if arg[1] then
|
||||
-- argument must be in quotes if it contains spaces
|
||||
res, errs = parse(arg[1])
|
||||
peg.print_t(res)
|
||||
peg.print_r(errs)
|
||||
end
|
||||
local ret = {parse=parse}
|
||||
return ret
|
||||
```
|
||||
For input: `lua tiny-parser-nocap.lua "if a b:=1"` we get:
|
||||
```lua
|
||||
Error #1: Missing Then on line 1(col 6)
|
||||
Error #2: Expected stmtsequence on line 1(col 9)
|
||||
Error #3: Expected 'end' on line 1(col 9)
|
||||
-- ast:
|
||||
rule='program',
|
||||
pos=1,
|
||||
{
|
||||
rule='stmtsequence',
|
||||
pos=1,
|
||||
{
|
||||
rule='statement',
|
||||
pos=1,
|
||||
{
|
||||
rule='ifstmt',
|
||||
pos=1,
|
||||
'if',
|
||||
{
|
||||
rule='exp',
|
||||
pos=4,
|
||||
{
|
||||
rule='simpleexp',
|
||||
pos=4,
|
||||
{
|
||||
rule='term',
|
||||
pos=4,
|
||||
{
|
||||
rule='factor',
|
||||
pos=4,
|
||||
{
|
||||
rule='IDENTIFIER',
|
||||
pos=4,
|
||||
'a',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
-- error table:
|
||||
[1] => {
|
||||
[msg] => 'Missing Then' -- custom error is used over the automatically generated one
|
||||
[line] => '1'
|
||||
[col] => '6'
|
||||
[label] => 'errMissingThen'
|
||||
}
|
||||
[2] => {
|
||||
[msg] => 'Expected stmtsequence' -- automatically generated errors
|
||||
[line] => '1'
|
||||
[col] => '9'
|
||||
[label] => 'errorgen6'
|
||||
}
|
||||
[3] => {
|
||||
[msg] => 'Expected 'end''
|
||||
[line] => '1'
|
||||
[col] => '9'
|
||||
[label] => 'errorgen4'
|
||||
}
|
||||
```
|
||||
|
||||
|
32
06/deps/parser-gen/equals.lua
Normal file
32
06/deps/parser-gen/equals.lua
Normal file
|
@ -0,0 +1,32 @@
|
|||
-- this function compares if two tables are equal
|
||||
local function equals(o1, o2, ignore_mt)
|
||||
if o1 == o2 then return true end
|
||||
local o1Type = type(o1)
|
||||
local o2Type = type(o2)
|
||||
if o1Type ~= o2Type then return false end
|
||||
if o1Type ~= 'table' then return false end
|
||||
|
||||
if not ignore_mt then
|
||||
local mt1 = getmetatable(o1)
|
||||
if mt1 and mt1.__eq then
|
||||
--compare using built in method
|
||||
return o1 == o2
|
||||
end
|
||||
end
|
||||
|
||||
local keySet = {}
|
||||
|
||||
for key1, value1 in pairs(o1) do
|
||||
local value2 = o2[key1]
|
||||
if value2 == nil or equals(value1, value2, ignore_mt) == false then
|
||||
return false
|
||||
end
|
||||
keySet[key1] = true
|
||||
end
|
||||
|
||||
for key2, _ in pairs(o2) do
|
||||
if not keySet[key2] then return false end
|
||||
end
|
||||
return true
|
||||
end
|
||||
return {equals=equals}
|
401
06/deps/parser-gen/errorgen.lua
Normal file
401
06/deps/parser-gen/errorgen.lua
Normal file
|
@ -0,0 +1,401 @@
|
|||
-- Error generation code for LL(1) grammars
|
||||
-- AST funcs:
|
||||
|
||||
local function isfinal(t)
|
||||
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
|
||||
end
|
||||
|
||||
local function isaction(t)
|
||||
return t["action"]
|
||||
end
|
||||
|
||||
|
||||
local function isrule(t)
|
||||
return t and t["rulename"]
|
||||
end
|
||||
|
||||
local function isgrammar(t)
|
||||
if type(t) == "table" and not(t["action"]) then
|
||||
return isrule(t[1])
|
||||
end
|
||||
return false
|
||||
end
|
||||
local function istoken (t)
|
||||
return t["token"] == "1"
|
||||
end
|
||||
|
||||
local function finalNode (t)
|
||||
if t["t"] then
|
||||
return"t",t["t"] -- terminal
|
||||
elseif t["nt"] then
|
||||
return "nt", t["nt"], istoken(t) -- nonterminal
|
||||
elseif t["func"] then
|
||||
return "func", t["func"] -- function
|
||||
elseif t["s"] then
|
||||
return "s", t["s"]
|
||||
elseif t["num"] then
|
||||
return "num", t["num"]
|
||||
end
|
||||
return nil
|
||||
end
|
||||
|
||||
--[[
|
||||
|
||||
function rightleaf:
|
||||
|
||||
returns the right-most concatenation in the AST.
|
||||
used for followset keys
|
||||
|
||||
input: ((A B) C)
|
||||
output: {"nt_C"}
|
||||
|
||||
input: (A / B / C) (D / 'e')
|
||||
output: {"nt_D","t_e"}
|
||||
|
||||
input: A*
|
||||
output: {'',"nt_A"}
|
||||
|
||||
input: !A
|
||||
output: {"not_nt_A"}
|
||||
]]
|
||||
local function addnot(t)
|
||||
local ret = {}
|
||||
for k,v in pairs(t) do
|
||||
ret[k] = "not_"..v
|
||||
end
|
||||
return ret
|
||||
end
|
||||
local function addepsilon(t)
|
||||
local ret = t
|
||||
table.insert(ret, '')
|
||||
return ret
|
||||
end
|
||||
local function mergetables(first,second)
|
||||
local ret = first
|
||||
for k,v in pairs(second) do
|
||||
table.insert(ret, v)
|
||||
end
|
||||
return ret
|
||||
end
|
||||
|
||||
local function rightleaf(t)
|
||||
local action = t.action
|
||||
local op1 = t.op1
|
||||
local op2 = t.op2
|
||||
|
||||
if isfinal(t) then
|
||||
|
||||
-- todo: replace nt_A with FIRST(A)
|
||||
local typefn, fn, tok = finalNode(t)
|
||||
local ret = typefn .. "_" .. fn -- terminals: t_if, nonterminals: nt_if
|
||||
return {ret}
|
||||
|
||||
end
|
||||
|
||||
|
||||
if action == "or" then
|
||||
|
||||
return mergetables(rightleaf(op1), rightleaf(op2))
|
||||
|
||||
elseif action == "and" then -- consider only RHS
|
||||
|
||||
return rightleaf(op2)
|
||||
|
||||
elseif action == "&" then
|
||||
|
||||
return rightleaf(op1)
|
||||
|
||||
elseif action == "!" then
|
||||
|
||||
return addnot(rightleaf(op1))
|
||||
|
||||
elseif action == "+" then
|
||||
|
||||
return rightleaf(op1)
|
||||
|
||||
elseif action == "*" or action == "?" then
|
||||
|
||||
return addepsilon(rightleaf(op1))
|
||||
|
||||
elseif action == "^" then
|
||||
|
||||
op2 = op2["num"] -- second operand is number
|
||||
if op2 >= 1 then
|
||||
return rightleaf(op1)
|
||||
else
|
||||
return addepsilon(rightleaf(op1))
|
||||
end
|
||||
|
||||
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
|
||||
|
||||
return rightleaf(op1)
|
||||
|
||||
elseif action == "bref" or action == "poscap" then
|
||||
|
||||
return addepsilon({}) -- only empty string
|
||||
|
||||
elseif action == "anychar" then
|
||||
|
||||
return {"_anychar"}
|
||||
|
||||
elseif action == "label" then
|
||||
|
||||
return addepsilon({})
|
||||
|
||||
elseif action == "%" then
|
||||
|
||||
return addepsilon({})
|
||||
|
||||
elseif action == "invert" then
|
||||
|
||||
return addnot(rightleaf(op1))
|
||||
|
||||
elseif action == "range" then
|
||||
|
||||
return {"_anychar"}
|
||||
|
||||
else
|
||||
error("Unsupported action '"..action.."'")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
local FOLLOW = {}
|
||||
|
||||
local function follow_aux(t, dontsplit)
|
||||
|
||||
local action = t.action
|
||||
local op1 = t.op1
|
||||
local op2 = t.op2
|
||||
|
||||
|
||||
if isfinal(t) then
|
||||
|
||||
return {t}
|
||||
|
||||
end
|
||||
|
||||
if action == "or" then
|
||||
|
||||
if dontsplit then -- do not split "(B / C)" in "A (B / C)"
|
||||
return {t}
|
||||
else -- return both
|
||||
return mergetables(follow_aux(op1), follow_aux(op2))
|
||||
end
|
||||
|
||||
elseif action == "and" then -- magic happens here
|
||||
|
||||
-- (A (B / D)) (!E C / D)
|
||||
|
||||
-- 1) FOLLOW(B) = FOLLOW(D) = {(!E C / D)}
|
||||
local rightset = rightleaf(op1)
|
||||
local rhs = follow_aux(op2)
|
||||
for k,v in pairs(rightset) do
|
||||
if not FOLLOW[v] then
|
||||
FOLLOW[v] = {}
|
||||
end
|
||||
-- TODO: check if rhs already exists in FOLLOW(v)
|
||||
table.insert(FOLLOW[v],rhs)
|
||||
|
||||
end
|
||||
|
||||
-- 2) FOLLOW(A) = {(B / D)}
|
||||
|
||||
return follow_aux(op1)
|
||||
|
||||
|
||||
elseif action == "&" then
|
||||
|
||||
return follow_aux(op1)
|
||||
|
||||
elseif action == "!" then
|
||||
|
||||
return {action="!", op1=follow_aux(op1)}
|
||||
|
||||
elseif action == "+" then
|
||||
|
||||
return follow_aux(op1)
|
||||
|
||||
elseif action == "*" then
|
||||
|
||||
return addepsilon(follow_aux(op1))
|
||||
|
||||
elseif action == "?" then
|
||||
|
||||
return addepsilon(follow_aux(op1))
|
||||
|
||||
elseif action == "^" then
|
||||
|
||||
op2 = op2["num"]
|
||||
|
||||
if op2 >= 1 then
|
||||
return follow_aux(op1)
|
||||
else
|
||||
return addepsilon(follow_aux(op1))
|
||||
end
|
||||
|
||||
elseif action == "^LABEL" or action == "->" or action == "=>" or action == "tcap" or action == "gcap" or action == "subcap" or action == "scap" then
|
||||
|
||||
return follow_aux(op1)
|
||||
|
||||
elseif action == "bref" or action == "poscap" then
|
||||
|
||||
return addepsilon({}) -- only empty string
|
||||
|
||||
elseif action == "anychar" then
|
||||
|
||||
return {"_anychar"}
|
||||
|
||||
elseif action == "label" then
|
||||
|
||||
return addepsilon({})
|
||||
|
||||
elseif action == "%" then
|
||||
|
||||
return addepsilon({})
|
||||
|
||||
elseif action == "invert" then
|
||||
|
||||
return {t} -- whole table
|
||||
|
||||
elseif action == "range" then
|
||||
|
||||
return {"_anychar"}
|
||||
|
||||
else
|
||||
error("Unsupported action '"..action.."'")
|
||||
end
|
||||
end
|
||||
|
||||
-- function: follow
|
||||
-- finds follow set for the whole AST, with key (rule, term)
|
||||
local function follow (t)
|
||||
local followset = {}
|
||||
if isgrammar(t) then
|
||||
for pos,val in pairs(t) do
|
||||
local rule = val.rulename
|
||||
FOLLOW = {} -- reset for each rule
|
||||
follow_aux(val.rule) -- apply recursive function
|
||||
followset[rule] = FOLLOW
|
||||
end
|
||||
else
|
||||
FOLLOW = {}
|
||||
follow_aux(t)
|
||||
followset[''] = FOLLOW
|
||||
end
|
||||
return followset
|
||||
end
|
||||
|
||||
-- functions to add errors
|
||||
-- find size of table
|
||||
local function getn (t)
|
||||
local size = 0
|
||||
for _, _ in pairs(t) do
|
||||
size = size+1
|
||||
end
|
||||
return size
|
||||
end
|
||||
-- generate error message by traversing table to the left
|
||||
local function printexpect(op)
|
||||
--peg.print_r(op)
|
||||
if isfinal(op) then
|
||||
if op["t"] then
|
||||
return "'"..op["t"].."'"
|
||||
end
|
||||
return op["nt"] or op["func"] or op["s"] or op["num"]
|
||||
else
|
||||
local test = op.op1
|
||||
if not test then
|
||||
return op.action
|
||||
else
|
||||
return printexpect(test)
|
||||
end
|
||||
end
|
||||
end
|
||||
local GENERATED_ERRORS = 0
|
||||
local TERRS = {}
|
||||
local function generateerror(op, after)
|
||||
|
||||
local desc = "Expected "..printexpect(op)
|
||||
|
||||
local err = GENERATED_ERRORS+1
|
||||
if err >= 255 then
|
||||
error("Error label limit reached(255)")
|
||||
end
|
||||
local name = "errorgen"..err
|
||||
TERRS[name] = desc
|
||||
GENERATED_ERRORS = GENERATED_ERRORS+1
|
||||
return name
|
||||
end
|
||||
|
||||
|
||||
local function tryadderror(op, after)
|
||||
|
||||
if FOLLOW then
|
||||
|
||||
local rhs = rightleaf(after)
|
||||
-- (A / B) C
|
||||
-- generate error iff #FOLLOW(A) OR #FOLLOW(B) = 1
|
||||
local generate = false
|
||||
for k,v in pairs(rhs) do
|
||||
if FOLLOW[v] then
|
||||
local n = getn(FOLLOW[v])
|
||||
generate = generate or n==1
|
||||
end
|
||||
end
|
||||
if generate then
|
||||
local lab = generateerror(op, after)
|
||||
return {action="^LABEL",op1=op,op2={s=lab}}
|
||||
end
|
||||
end
|
||||
return op
|
||||
end
|
||||
|
||||
|
||||
-- function: adderrors
|
||||
-- traverses the AST and adds error labels where possible
|
||||
|
||||
local function adderrors_aux(ast,tokenrule)
|
||||
|
||||
if not ast then
|
||||
return nil
|
||||
end
|
||||
|
||||
if isaction(ast) then
|
||||
|
||||
local act, op1, op2
|
||||
act = ast["action"]
|
||||
op1 = ast["op1"]
|
||||
op2 = ast["op2"]
|
||||
|
||||
if act == "and" and not tokenrule then
|
||||
|
||||
op2 = tryadderror(op2, op1)
|
||||
|
||||
end
|
||||
|
||||
ast["op1"] = adderrors_aux(op1,tokenrule)
|
||||
ast["op2"] = adderrors_aux(op2,tokenrule)
|
||||
end
|
||||
return ast
|
||||
end
|
||||
local function adderrors(t, followset)
|
||||
GENERATED_ERRORS = 0
|
||||
TERRS = {}
|
||||
if isgrammar(t) then
|
||||
for pos,val in pairs(t) do
|
||||
local currentrule = val.rulename
|
||||
FOLLOW = followset[currentrule]
|
||||
local rule = val.rule
|
||||
local istokenrule = val.token == "1"
|
||||
adderrors_aux(rule,istokenrule)
|
||||
end
|
||||
else
|
||||
FOLLOW = followset['']
|
||||
adderrors_aux(t,false)
|
||||
end
|
||||
return TERRS
|
||||
end
|
||||
|
||||
return {follow=follow,adderrors=adderrors}
|
149
06/deps/parser-gen/parser-gen-tests.lua
Normal file
149
06/deps/parser-gen/parser-gen-tests.lua
Normal file
|
@ -0,0 +1,149 @@
|
|||
local pg = require("parser-gen")
|
||||
local equals = require("equals").equals
|
||||
|
||||
-- terminals
|
||||
-- space allowed
|
||||
rule = pg.compile [[
|
||||
rule <- 'a'
|
||||
]]
|
||||
str = "a a aa "
|
||||
res = pg.parse(str,rule)
|
||||
assert(res)
|
||||
|
||||
-- space not allowed
|
||||
rule = pg.compile [[
|
||||
RULE <- 'a' 'b'
|
||||
]]
|
||||
str = "a b"
|
||||
res = pg.parse(str,rule)
|
||||
assert(not res)
|
||||
|
||||
-- space not allowed 2
|
||||
rule = pg.compile [[
|
||||
rule <- 'a' 'b'
|
||||
SKIP <- ''
|
||||
SYNC <- ''
|
||||
]]
|
||||
str = "a b"
|
||||
res = pg.parse(str,rule)
|
||||
assert(not res)
|
||||
|
||||
-- custom space
|
||||
rule = pg.compile [[
|
||||
rule <- 'a' 'b'
|
||||
SKIP <- DOT
|
||||
DOT <- '.'
|
||||
]]
|
||||
str = "a...b"
|
||||
res = pg.parse(str,rule)
|
||||
assert(res)
|
||||
|
||||
-- non terminals
|
||||
-- space allowed
|
||||
rule = pg.compile [[
|
||||
rule <- A B
|
||||
A <- 'a'
|
||||
B <- 'b'
|
||||
]]
|
||||
str = "a b"
|
||||
res, err = pg.parse(str,rule)
|
||||
assert(res)
|
||||
-- no spaces allowed
|
||||
rule = pg.compile [[
|
||||
RULE <- A B
|
||||
A <- 'a'
|
||||
B <- 'b'
|
||||
]]
|
||||
str = "a b"
|
||||
res = pg.parse(str,rule)
|
||||
assert(not res)
|
||||
|
||||
-- space in the beginning and end of string
|
||||
rule = pg.compile [[
|
||||
rule <- A B
|
||||
A <- 'a'
|
||||
B <- 'b'
|
||||
]]
|
||||
str = " a b "
|
||||
res = pg.parse(str,rule)
|
||||
assert(res)
|
||||
|
||||
|
||||
|
||||
-- TESTING CAPTURES
|
||||
|
||||
r = pg.compile([[ rule <- {| {:'a' 'b':}* |}
|
||||
|
||||
]],_,_,true)
|
||||
res = pg.parse("ababab", r)
|
||||
|
||||
assert(equals(res,{"ab","ab","ab"}))
|
||||
-- space in capture
|
||||
|
||||
rule = pg.compile([[ rule <- {| {: 'a' :}* |}
|
||||
]],_,_,true)
|
||||
str = " a a a "
|
||||
res = pg.parse(str,rule)
|
||||
|
||||
assert(equals(res,{"a","a","a"})) -- fails
|
||||
|
||||
-- TESTING ERROR LABELS
|
||||
local labs = {errName = "Error number 1",errName2 = "Error number 2"}
|
||||
pg.setlabels(labs)
|
||||
rule = pg.compile [[ rule <- 'a' / %{errName}
|
||||
SYNC <- ''
|
||||
]]
|
||||
local errorcalled = false
|
||||
local function err(desc, line, col, sfail, recexp)
|
||||
errorcalled = true
|
||||
assert(desc == "Error number 1")
|
||||
end
|
||||
res = pg.parse("b",rule,err)
|
||||
assert(errorcalled)
|
||||
|
||||
-- TESTING ERROR RECOVERY
|
||||
|
||||
local labs = {errName = "Error number 1",errName2 = "Error number 2"}
|
||||
pg.setlabels(labs)
|
||||
|
||||
rule = pg.compile [[
|
||||
rule <- As //{errName,errName2} Bs
|
||||
As <- 'a'* / %{errName2}
|
||||
Bs <- 'b'*
|
||||
]]
|
||||
res1 = pg.parse(" a a a",rule)
|
||||
res2 = pg.parse("b b b ",rule)
|
||||
assert(res1 and res2)
|
||||
|
||||
-- TESTING ERROR GENERATION
|
||||
|
||||
pg.setlabels({})
|
||||
rule = pg.compile([[
|
||||
rule <- A B C
|
||||
A <- 'a'
|
||||
B <- 'b'
|
||||
C <- 'c'
|
||||
|
||||
]],_,true)
|
||||
res1, errs = pg.parse("ab",rule)
|
||||
assert(errs[1]["msg"] == "Expected C")
|
||||
|
||||
-- TESTING RECOVERY GENERATION
|
||||
|
||||
|
||||
-- SELF-DESCRIPTION
|
||||
pg.setlabels(pg.labels)
|
||||
gram = pg.compile(pg.gram, pg.defs,_,true)
|
||||
res1, errs = pg.parse(pg.gram,gram)
|
||||
assert(res1) -- parse succesful
|
||||
|
||||
--[[ this test is invalid since tool added ^LABEL syntax
|
||||
r = re.compile(pg.gram,pg.defs)
|
||||
res2 = r:match(pg.gram)
|
||||
|
||||
--pg.print_r(res2)
|
||||
|
||||
assert(equals(res1, res2))
|
||||
]]--
|
||||
|
||||
print("all tests succesful")
|
563
06/deps/parser-gen/parser-gen.lua
Normal file
563
06/deps/parser-gen/parser-gen.lua
Normal file
|
@ -0,0 +1,563 @@
|
|||
local f = (...):match("(.-)[^%.]+$")
|
||||
local peg = require(f .. "peg-parser")
|
||||
local eg = require(f .. "errorgen")
|
||||
local s = require(f .. "stack")
|
||||
|
||||
-- Create stack for tokens inside captures.
|
||||
-- nil - not inside capture,
|
||||
-- 0 - inside capture,
|
||||
-- 1 - token found inside capture
|
||||
local tokenstack = s.Stack:Create()
|
||||
local subject, errors, errorfunc
|
||||
local unpack = table.unpack;
|
||||
|
||||
local Predef = { nl = m.P"\n", cr = m.P"\r", tab = m.P"\t" }
|
||||
local mem = {} -- for compiled grammars
|
||||
|
||||
local function updatelocale()
|
||||
m.locale(Predef)
|
||||
local any = m.P(1)
|
||||
Predef.a = Predef.alpha
|
||||
Predef.c = Predef.cntrl
|
||||
Predef.d = Predef.digit
|
||||
Predef.g = Predef.graph
|
||||
Predef.l = Predef.lower
|
||||
Predef.p = Predef.punct
|
||||
Predef.s = Predef.space
|
||||
Predef.u = Predef.upper
|
||||
Predef.w = Predef.alnum
|
||||
Predef.x = Predef.xdigit
|
||||
Predef.A = any - Predef.a
|
||||
Predef.C = any - Predef.c
|
||||
Predef.D = any - Predef.d
|
||||
Predef.G = any - Predef.g
|
||||
Predef.L = any - Predef.l
|
||||
Predef.P = any - Predef.p
|
||||
Predef.S = any - Predef.s
|
||||
Predef.U = any - Predef.u
|
||||
Predef.W = any - Predef.w
|
||||
Predef.X = any - Predef.x
|
||||
mem = {}
|
||||
end
|
||||
|
||||
updatelocale()
|
||||
|
||||
local definitions = {}
|
||||
local tlabels = {}
|
||||
local totallabels = 0
|
||||
local tlabelnames = {} -- reverse table
|
||||
local tdescs = {}
|
||||
local trecs = {} -- recovery for each error
|
||||
|
||||
|
||||
local function defaultsync(patt)
|
||||
return (m.P(1)^-1) * (-patt * m.P(1))^0
|
||||
end
|
||||
|
||||
-- TODO: store these variables for each grammar
|
||||
local SKIP = (Predef.space + Predef.nl)
|
||||
local SYNC = defaultsync(SKIP)
|
||||
|
||||
|
||||
local recovery = true
|
||||
local skipspaces = true
|
||||
local buildast = true
|
||||
|
||||
local function sync (patt)
|
||||
return patt --(-patt * m.P(1))^0 * patt^0 -- skip until we find the pattern and consume it(if we do)
|
||||
end
|
||||
|
||||
|
||||
local function pattspaces (patt)
|
||||
if skipspaces then
|
||||
return patt * SKIP^0
|
||||
else
|
||||
return patt
|
||||
end
|
||||
end
|
||||
|
||||
local function token (patt)
|
||||
local incapture = tokenstack:pop() -- returns nil if not in capture
|
||||
if not incapture then
|
||||
return pattspaces(patt)
|
||||
end
|
||||
tokenstack:push(1)
|
||||
return patt
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
-- functions used by the tool
|
||||
|
||||
local function iscompiled (gr)
|
||||
return m.type(gr) == "pattern"
|
||||
end
|
||||
|
||||
local function istoken (t)
|
||||
return t["token"] == "1"
|
||||
end
|
||||
|
||||
local function isfinal(t)
|
||||
return t["t"] or t["nt"] or t["func"] or t["s"] or t["num"]
|
||||
end
|
||||
|
||||
local function isaction(t)
|
||||
return t["action"]
|
||||
end
|
||||
|
||||
|
||||
local function isrule(t)
|
||||
return t and t["rulename"]
|
||||
end
|
||||
local function isgrammar(t)
|
||||
if type(t) == "table" and not(t["action"]) then
|
||||
return isrule(t[1])
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
local function iscapture (action)
|
||||
return action == "=>" or action == "gcap" or action == "scap" or action == "subcap" or action == "poscap"
|
||||
end
|
||||
|
||||
local function finalNode (t)
|
||||
if t["t"] then
|
||||
return"t",t["t"] -- terminal
|
||||
elseif t["nt"] then
|
||||
return "nt", t["nt"], istoken(t) -- nonterminal
|
||||
elseif t["func"] then
|
||||
return "func", t["func"] -- function
|
||||
elseif t["s"] then
|
||||
return "s", t["s"]
|
||||
elseif t["num"] then
|
||||
return "num", t["num"]
|
||||
end
|
||||
return nil
|
||||
end
|
||||
local bg = {} -- local variable to keep global function buildgrammar
|
||||
|
||||
|
||||
local function addspaces (caps)
|
||||
local hastoken = tokenstack:pop()
|
||||
if hastoken == 1 then
|
||||
return pattspaces(caps)
|
||||
end
|
||||
return caps
|
||||
end
|
||||
|
||||
local function applyaction(action, op1, op2, labels,tokenrule)
|
||||
if action == "or" then
|
||||
if labels then -- labels = {{s="errName"},{s="errName2"}}
|
||||
for i, v in ipairs(labels) do
|
||||
local labname = v["s"]
|
||||
local lab = tlabels[labname]
|
||||
if not lab then
|
||||
error("Label '"..labname.."' undefined")
|
||||
end
|
||||
labels[i] = lab
|
||||
end
|
||||
return m.Rec(op1,op2,unpack(labels))
|
||||
end
|
||||
return op1 + op2
|
||||
elseif action == "and" then
|
||||
|
||||
return op1 * op2
|
||||
elseif action == "&" then
|
||||
return #op1
|
||||
elseif action == "!" then
|
||||
return -op1
|
||||
elseif action == "+" then
|
||||
return op1^1
|
||||
elseif action == "*" then
|
||||
return op1^0
|
||||
elseif action == "?" then
|
||||
return op1^-1
|
||||
elseif action == "^" then
|
||||
return op1^op2
|
||||
elseif action == "^LABEL" then
|
||||
local lab = tlabels[op2]
|
||||
if not lab then
|
||||
error("Label '"..op2.."' unspecified using setlabels()")
|
||||
end
|
||||
return op1 + m.T(lab)
|
||||
elseif action == "->" then
|
||||
return op1 / op2
|
||||
-- in captures we add SPACES^0
|
||||
elseif action == "=>" then
|
||||
return addspaces(m.Cmt(op1,op2))
|
||||
elseif action == "tcap" then
|
||||
return m.Ct(op1) -- nospaces
|
||||
elseif action == "gcap" then
|
||||
return addspaces(m.Cg(op1, op2))
|
||||
elseif action == "bref" then
|
||||
return m.Cb(op1) --m.Cmt(m.Cb(op1), equalcap) -- do we need to add spaces to bcap?
|
||||
elseif action == "poscap" then
|
||||
return addspaces(m.Cp())
|
||||
elseif action == "subcap" then
|
||||
return addspaces(m.Cs(op1))
|
||||
elseif action == "scap" then
|
||||
return addspaces(m.C(op1))
|
||||
elseif action == "anychar" then
|
||||
if buildast and not tokenrule then
|
||||
return m.C(m.P(1))
|
||||
end
|
||||
return m.P(1)
|
||||
elseif action == "label" then
|
||||
local lab = tlabels[op1]
|
||||
if not lab then
|
||||
error("Label '"..op1.."' unspecified using setlabels()")
|
||||
end
|
||||
return m.T(lab) -- lpeglabel
|
||||
elseif action == "%" then
|
||||
if definitions[op1] then
|
||||
return definitions[op1]
|
||||
elseif Predef[op1] then
|
||||
return Predef[op1]
|
||||
else
|
||||
error("Definition for '%"..op1.."' unspecified(use second parameter of parser-gen.compile())")
|
||||
end
|
||||
elseif action == "invert" then
|
||||
return m.P(1) - op1
|
||||
elseif action == "range" then
|
||||
local res = m.R(op1)
|
||||
if not tokenrule then
|
||||
if buildast then
|
||||
res = m.C(res)
|
||||
end
|
||||
res = token(res)
|
||||
end
|
||||
return res
|
||||
else
|
||||
error("Unsupported action '"..action.."'")
|
||||
end
|
||||
end
|
||||
|
||||
local function applyfinal(action, term, tokenterm, tokenrule)
|
||||
|
||||
if action == "t" then
|
||||
local res = m.P(term)
|
||||
if not tokenrule then
|
||||
if buildast then
|
||||
res = m.C(res)
|
||||
end
|
||||
if skipspaces then
|
||||
res = token(res)
|
||||
end
|
||||
end
|
||||
return res
|
||||
elseif action == "nt" then
|
||||
if skipspaces and tokenterm and (not tokenrule) then
|
||||
return token(m.V(term))
|
||||
else
|
||||
return m.V(term)
|
||||
end
|
||||
elseif action == "func" then
|
||||
if definitions[term] then
|
||||
return definitions[term]
|
||||
else
|
||||
error("Definition for function '"..term.."' unspecified (use second parameter of parser-gen.compile())")
|
||||
end
|
||||
elseif action == "s" then -- simple string
|
||||
return term
|
||||
elseif action == "num" then -- numbered string
|
||||
return tonumber(term)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
local function applygrammar(gram)
|
||||
return m.P(gram)
|
||||
end
|
||||
|
||||
local function traverse (ast, tokenrule)
|
||||
if not ast then
|
||||
return nil
|
||||
end
|
||||
|
||||
if isfinal(ast) then
|
||||
local typefn, fn, tok = finalNode(ast)
|
||||
return applyfinal(typefn, fn, tok, tokenrule)
|
||||
|
||||
elseif isaction(ast) then
|
||||
|
||||
local act, op1, op2, labs, ret1, ret2
|
||||
act = ast["action"]
|
||||
op1 = ast["op1"]
|
||||
op2 = ast["op2"]
|
||||
labs = ast["condition"] -- recovery operations
|
||||
|
||||
-- post-order traversal
|
||||
if iscapture(act) then
|
||||
tokenstack:push(0) -- not found any tokens yet
|
||||
end
|
||||
|
||||
ret1 = traverse(op1, tokenrule)
|
||||
ret2 = traverse(op2, tokenrule)
|
||||
|
||||
|
||||
return applyaction(act, ret1, ret2, labs, tokenrule)
|
||||
|
||||
elseif isgrammar(ast) then
|
||||
--
|
||||
local g = bg.buildgrammar (ast)
|
||||
return applygrammar (g)
|
||||
|
||||
else
|
||||
peg.print_r(ast)
|
||||
error("Unsupported AST")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
local function specialrules(ast, builder)
|
||||
-- initialize values
|
||||
SKIP = (Predef.space + Predef.nl)
|
||||
skipspaces = true
|
||||
SYNC = nil
|
||||
recovery = true
|
||||
-- find SPACE and SYNC rules
|
||||
for i, v in ipairs(ast) do
|
||||
local name = v["rulename"]
|
||||
local rule
|
||||
if name == "SKIP" then
|
||||
rule = traverse(v["rule"], true)
|
||||
if v["rule"]["t"] == '' then
|
||||
skipspaces = false
|
||||
else
|
||||
skipspaces = true
|
||||
SKIP = rule
|
||||
end
|
||||
builder[name] = rule
|
||||
elseif name == "SYNC" then
|
||||
rule = traverse(v["rule"], true)
|
||||
if v["rule"]["t"] == '' then-- SYNC <- ''
|
||||
recovery=false
|
||||
else
|
||||
recovery= true
|
||||
SYNC = rule
|
||||
end
|
||||
builder[name] = rule
|
||||
end
|
||||
end
|
||||
if not SYNC and recovery then
|
||||
SYNC = defaultsync(SKIP)
|
||||
end
|
||||
end
|
||||
|
||||
local function recorderror(position,label)
|
||||
-- call error function here
|
||||
local line, col = peg.calcline(subject, position)
|
||||
local desc
|
||||
if label == 0 then
|
||||
desc = "Syntax error"
|
||||
else
|
||||
desc = tdescs[label]
|
||||
end
|
||||
if errorfunc then
|
||||
local temp = string.sub(subject,position)
|
||||
local strend = string.find(temp, "\n")
|
||||
local sfail = string.sub(temp, 1, strend)
|
||||
errorfunc(desc,line,col,sfail,trecs[label])
|
||||
end
|
||||
|
||||
local err = { line = line, col = col, label=tlabelnames[label], msg = desc }
|
||||
table.insert(errors, err)
|
||||
|
||||
end
|
||||
local function record(label)
|
||||
return (m.Cp() * m.Cc(label)) / recorderror
|
||||
end
|
||||
|
||||
local function buildrecovery(grammar)
|
||||
|
||||
local synctoken = pattspaces(sync(SYNC))
|
||||
local grec = grammar
|
||||
|
||||
for k,v in pairs(tlabels) do
|
||||
|
||||
if trecs[v] then -- custom sync token
|
||||
grec = m.Rec(grec,record(v) * pattspaces(trecs[v]), v)
|
||||
else -- use global sync token
|
||||
grec = m.Rec(grec,record(v) * synctoken, v)
|
||||
end
|
||||
end
|
||||
return grec
|
||||
|
||||
end
|
||||
local usenode = false
|
||||
|
||||
local function usenodes(val)
|
||||
usenode = val
|
||||
end
|
||||
|
||||
|
||||
function bg.buildgrammar (ast)
|
||||
local builder = {}
|
||||
specialrules(ast, builder)
|
||||
local initialrule
|
||||
for i, v in ipairs(ast) do
|
||||
local istokenrule = v["token"] == "1"
|
||||
local isfragment = v["fragment"] == "1"
|
||||
local isnode = v["node"] == "1"
|
||||
|
||||
if isnode and not usenodes then
|
||||
error("Node mode disabled - please use parser-gen.usenodes(true) before compiling the grammar")
|
||||
end
|
||||
|
||||
local name = v["rulename"]
|
||||
local isspecial = name == "SKIP" or name == "SYNC"
|
||||
local rule = v["rule"]
|
||||
if i == 1 then
|
||||
initialrule = name
|
||||
table.insert(builder, name) -- lpeg syntax
|
||||
builder[name] = traverse(rule, istokenrule)
|
||||
else
|
||||
if not builder[name] then -- dont traverse rules for SKIP and SYNC twice
|
||||
builder[name] = traverse(rule, istokenrule)
|
||||
end
|
||||
end
|
||||
if buildast and not isfragment and not isspecial and ((not usenode) or (usenode and isnode)) then
|
||||
if istokenrule then
|
||||
builder[name] = m.C(builder[name])
|
||||
end
|
||||
builder[name] = m.Ct(m.Cg(m.Cc(name),"rule") * m.Cg(m.Cp(),"pos") * builder[name])
|
||||
end
|
||||
end
|
||||
|
||||
if skipspaces then
|
||||
builder[initialrule] = SKIP^0 * builder[initialrule] -- skip spaces at the beginning of the input
|
||||
end
|
||||
if recovery then
|
||||
builder[initialrule] = buildrecovery(builder[initialrule]) -- build recovery on top of initial rule
|
||||
end
|
||||
return builder
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
local function build(ast, defs)
|
||||
if defs then
|
||||
definitions = defs
|
||||
end
|
||||
if isgrammar(ast) then
|
||||
return traverse(ast)
|
||||
else
|
||||
SKIP = (Predef.space + Predef.nl)
|
||||
skipspaces = true
|
||||
SYNC = nil
|
||||
recovery = true
|
||||
SYNC = defaultsync(SKIP)
|
||||
local res = SKIP ^0 * traverse(ast)
|
||||
if buildast then
|
||||
res = m.Ct(res)
|
||||
end
|
||||
return res -- input is not a grammar - skip spaces and sync by default
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
-- recovery grammar
|
||||
|
||||
|
||||
|
||||
|
||||
-- end
|
||||
|
||||
|
||||
|
||||
-- t = {errName="Error description",...}
|
||||
local function setlabels (t, errorgen)
|
||||
local index
|
||||
if errorgen then
|
||||
index = totallabels + 1
|
||||
else
|
||||
-- reset error tables
|
||||
index = 1
|
||||
tlabels = {}
|
||||
|
||||
tdescs = {}
|
||||
trecs = {}
|
||||
end
|
||||
for key,value in pairs(t) do
|
||||
if index >= 255 then
|
||||
error("Error label limit reached(255)")
|
||||
end
|
||||
if type(value) == "table" then -- we have a recovery expression
|
||||
tdescs[index] = value[1]
|
||||
|
||||
trecs[index] = traverse(peg.pegToAST(value[2]), true)-- PEG to LPEG
|
||||
else
|
||||
tdescs[index] = value
|
||||
end
|
||||
tlabels[key] = index
|
||||
tlabelnames[index] = key -- reverse table
|
||||
index = index + 1
|
||||
end
|
||||
totallabels = index-1
|
||||
end
|
||||
|
||||
|
||||
local function compile (input, defs, generrors, nocaptures)
|
||||
if iscompiled(input) then
|
||||
return input
|
||||
end
|
||||
if not mem[input] then
|
||||
buildast = true
|
||||
if nocaptures then
|
||||
buildast=false
|
||||
end
|
||||
--re.setlabels(tlabels)
|
||||
--re.compile(input,defs)
|
||||
-- build ast
|
||||
local ast = peg.pegToAST(input)
|
||||
if generrors then
|
||||
local follow = eg.follow(ast)
|
||||
local errors = eg.adderrors(ast, follow)
|
||||
setlabels (errors, true) -- add errors generated by errorgen
|
||||
end
|
||||
local gram = build(ast,defs)
|
||||
if not gram then
|
||||
-- find error using relabel module
|
||||
|
||||
end
|
||||
mem[input] = gram-- store if the user forgets to compile it
|
||||
end
|
||||
return mem[input]
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
local function parse (input, grammar, errorfunction)
|
||||
if not iscompiled(grammar) then
|
||||
|
||||
local cp = compile(grammar)
|
||||
grammar = cp
|
||||
end
|
||||
-- set up recovery table
|
||||
errorfunc = errorfunction
|
||||
subject = input
|
||||
errors = {}
|
||||
-- end
|
||||
local r, e, sfail = m.match(grammar,input)
|
||||
if not r then
|
||||
recorderror(#input - #sfail, e)
|
||||
end
|
||||
if #errors == 0 then errors=nil end
|
||||
return r, errors
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
-- could be wrong
|
||||
local follow = eg.follow
|
||||
|
||||
local pg = {compile=compile, setlabels=setlabels, parse=parse,follow=follow, calcline = peg.calcline, usenodes = usenodes}
|
||||
|
||||
return pg
|
799
06/deps/parser-gen/parsers/lua-parser-tests.lua
Normal file
799
06/deps/parser-gen/parsers/lua-parser-tests.lua
Normal file
|
@ -0,0 +1,799 @@
|
|||
local lua = require "lua-parser"
|
||||
local peg = require "peg-parser"
|
||||
|
||||
local eq = require "equals"
|
||||
local equals = eq.equals
|
||||
print("\n\n [[ PARSING LUA TEST SUITE FILES ]] \n\n")
|
||||
local filenames = {
|
||||
'all.lua',
|
||||
'main.lua',
|
||||
'gc.lua',
|
||||
'db.lua',
|
||||
'calls.lua',
|
||||
'strings.lua',
|
||||
'literals.lua',
|
||||
'tpack.lua',
|
||||
'attrib.lua',
|
||||
'locals.lua',
|
||||
'constructs.lua',
|
||||
'code.lua',
|
||||
'big.lua',
|
||||
'nextvar.lua',
|
||||
'pm.lua',
|
||||
'utf8.lua',
|
||||
'api.lua',
|
||||
'events.lua',
|
||||
'vararg.lua',
|
||||
'closure.lua',
|
||||
'coroutine.lua',
|
||||
'goto.lua',
|
||||
'errors.lua',
|
||||
'math.lua',
|
||||
'sort.lua',
|
||||
'bitwise.lua',
|
||||
'verybig.lua',
|
||||
'files.lua',
|
||||
}
|
||||
local errs = 0
|
||||
for k,v in ipairs(filenames) do
|
||||
local filename = "lua-5.3.4-tests/"..v
|
||||
local f = assert(io.open(filename, "r"))
|
||||
|
||||
local t = f:read("*all")
|
||||
|
||||
local res, err = lua.parse(t)
|
||||
local s = "OK"
|
||||
if not res then s = "FAIL" end -- only check if succesful since grammar ensures whole file is read
|
||||
print("Testing file '"..v.."': ["..s.."]")
|
||||
if not res then
|
||||
errs = errs + 1
|
||||
print("Error: "..err[1]["msg"])
|
||||
end
|
||||
f:close()
|
||||
end
|
||||
assert(errs == 0)
|
||||
|
||||
print("\n\n Test suite files compiled successfully")
|
||||
|
||||
|
||||
print("\n\n [[ TESTING ERROR LABELS ]] ")
|
||||
local pr = peg.print_r
|
||||
-- test errors
|
||||
local s,res, err
|
||||
local ErrExtra="unexpected character(s), expected EOF"
|
||||
s = [[ return; ! ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExtra)
|
||||
|
||||
local ErrInvalidStat="unexpected token, invalid start of statement"
|
||||
s = [[ ! ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrInvalidStat)
|
||||
|
||||
|
||||
local ErrEndIf="expected 'end' to close the if statement"
|
||||
|
||||
s = [[ if c then b=1 ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEndIf)
|
||||
|
||||
local ErrExprIf="expected a condition after 'if'"
|
||||
|
||||
s = [[ if then b=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprIf)
|
||||
|
||||
local ErrThenIf="expected 'then' after the condition"
|
||||
|
||||
s = [[ if c b=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrThenIf)
|
||||
|
||||
local ErrExprEIf="expected a condition after 'elseif'"
|
||||
|
||||
s = [[ if a then b=1 elseif then d=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprEIf)
|
||||
|
||||
local ErrThenEIf="expected 'then' after the condition"
|
||||
|
||||
s = [[ if a b=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrThenEIf)
|
||||
|
||||
local ErrEndDo="expected 'end' to close the do block"
|
||||
|
||||
s = [[ do x=1 ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEndDo)
|
||||
|
||||
local ErrExprWhile="expected a condition after 'while'"
|
||||
|
||||
s = [[ while do c=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprWhile)
|
||||
|
||||
local ErrDoWhile="expected 'do' after the condition"
|
||||
|
||||
s = [[ while a c=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrDoWhile)
|
||||
|
||||
local ErrEndWhile="expected 'end' to close the while loop"
|
||||
|
||||
s = [[ while a do b=1]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEndWhile)
|
||||
|
||||
local ErrUntilRep="expected 'until' at the end of the repeat loop"
|
||||
|
||||
s = [[ repeat c=1 ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrUntilRep)
|
||||
|
||||
local ErrExprRep="expected a conditions after 'until'"
|
||||
|
||||
s = [[ repeat c=1 until ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprRep)
|
||||
|
||||
local ErrForRange="expected a numeric or generic range after 'for'"
|
||||
|
||||
s = [[ for 3,4 do x=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrForRange)
|
||||
|
||||
local ErrEndFor="expected 'end' to close the for loop"
|
||||
|
||||
s = [[ for c=1,3 do a=1 ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEndFor)
|
||||
|
||||
local ErrExprFor1="expected a starting expression for the numeric range"
|
||||
|
||||
s = [[ for a=,4 do a=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprFor1)
|
||||
|
||||
local ErrCommaFor="expected ',' to split the start and end of the range"
|
||||
|
||||
s = [[ for a=4 5 do a=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCommaFor)
|
||||
|
||||
local ErrExprFor2="expected an ending expression for the numeric range"
|
||||
|
||||
s = [[ for a=4, do a=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprFor2)
|
||||
|
||||
local ErrExprFor3="expected a step expression for the numeric range after ','"
|
||||
|
||||
s = [[ for a=1,2, do a=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprFor3)
|
||||
|
||||
local ErrInFor="expected '=' or 'in' after the variable(s)"
|
||||
|
||||
s = [[ for a of 1 do a=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrInFor)
|
||||
|
||||
local ErrEListFor="expected one or more expressions after 'in'"
|
||||
|
||||
s = [[ for a in do a=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEListFor)
|
||||
|
||||
local ErrDoFor="expected 'do' after the range of the for loop"
|
||||
|
||||
s = [[ for a=1,2 a=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrDoFor)
|
||||
|
||||
local ErrDefLocal="expected a function definition or assignment after local"
|
||||
|
||||
s = [[ local return c ]]
|
||||
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
|
||||
assert(err[1]["msg"] == ErrDefLocal)
|
||||
|
||||
|
||||
local ErrNameLFunc="expected a function name after 'function'"
|
||||
|
||||
s = [[ local function() c=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrNameLFunc)
|
||||
|
||||
|
||||
local ErrEListLAssign="expected one or more expressions after '='"
|
||||
|
||||
s = [[ local a = return b ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEListLAssign)
|
||||
|
||||
local ErrEListAssign="expected one or more expressions after '='"
|
||||
|
||||
s = [[ a = return b ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEListAssign)
|
||||
|
||||
|
||||
local ErrFuncName="expected a function name after 'function'"
|
||||
|
||||
s = [[ function () a=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrFuncName)
|
||||
|
||||
local ErrNameFunc1="expected a function name after '.'"
|
||||
|
||||
s = [[ function a.() a=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrNameFunc1)
|
||||
|
||||
local ErrNameFunc2="expected a method name after ':'"
|
||||
|
||||
s = [[ function a:() a=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrNameFunc2)
|
||||
|
||||
local ErrOParenPList="expected '(' for the parameter list"
|
||||
|
||||
s = [[ function a b=1 end]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrOParenPList)
|
||||
|
||||
local ErrCParenPList="expected ')' to close the parameter list"
|
||||
|
||||
s = [[
|
||||
function a(
|
||||
b=1
|
||||
|
||||
end
|
||||
]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCParenPList)
|
||||
|
||||
local ErrEndFunc="expected 'end' to close the function body"
|
||||
|
||||
s = [[ function a() b=1 ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEndFunc)
|
||||
|
||||
local ErrParList="expected a variable name or '...' after ','"
|
||||
|
||||
s = [[ function a(b, ) b=1 end ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrParList)
|
||||
|
||||
|
||||
local ErrLabel="expected a label name after '::'"
|
||||
|
||||
s = [[ :: return b ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrLabel)
|
||||
|
||||
local ErrCloseLabel="expected '::' after the label"
|
||||
|
||||
s = [[ :: abc return a]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCloseLabel)
|
||||
|
||||
local ErrGoto="expected a label after 'goto'"
|
||||
|
||||
s = [[ goto return c]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrGoto)
|
||||
|
||||
|
||||
|
||||
local ErrVarList="expected a variable name after ','"
|
||||
|
||||
s = [[ abc,
|
||||
= 3]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
|
||||
assert(err[1]["msg"] == ErrVarList)
|
||||
|
||||
local ErrExprList="expected an expression after ','"
|
||||
|
||||
s = [[ return a,;]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprList)
|
||||
|
||||
|
||||
local ErrOrExpr="expected an expression after 'or'"
|
||||
|
||||
s = [[ return a or; ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrOrExpr)
|
||||
|
||||
local ErrAndExpr="expected an expression after 'and'"
|
||||
|
||||
s = [[ return a and;]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrAndExpr)
|
||||
|
||||
local ErrRelExpr="expected an expression after the relational operator"
|
||||
|
||||
s = [[ return a >;]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrRelExpr)
|
||||
|
||||
|
||||
local ErrBitwiseExpr="expected an expression after bitwise operator"
|
||||
|
||||
s = [[ return b & ; ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrBitwiseExpr)
|
||||
|
||||
local ErrConcatExpr="expected an expression after '..'"
|
||||
|
||||
s = [[ print(a..) ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrConcatExpr)
|
||||
|
||||
local ErrAddExpr="expected an expression after the additive operator"
|
||||
|
||||
s = [[ return a - ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrAddExpr)
|
||||
|
||||
local ErrMulExpr="expected an expression after the multiplicative operator"
|
||||
|
||||
s = [[ return a/ ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrMulExpr)
|
||||
|
||||
local ErrUnaryExpr="expected an expression after the unary operator"
|
||||
|
||||
s = [[ return # ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrUnaryExpr)
|
||||
|
||||
local ErrPowExpr="expected an expression after '^'"
|
||||
|
||||
s = [[ return a^ ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrPowExpr)
|
||||
|
||||
|
||||
local ErrExprParen="expected an expression after '('"
|
||||
|
||||
s = [[ return a + () ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprParen)
|
||||
|
||||
local ErrCParenExpr="expected ')' to close the expression"
|
||||
|
||||
s = [[ return a + (a ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCParenExpr)
|
||||
|
||||
local ErrNameIndex="expected a field name after '.'"
|
||||
|
||||
s = [[ return a. ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrNameIndex)
|
||||
|
||||
local ErrExprIndex="expected an expression after '['"
|
||||
|
||||
s = [[ return a [ ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprIndex)
|
||||
|
||||
local ErrCBracketIndex="expected ']' to close the indexing expression"
|
||||
|
||||
s = [[ return a[1 ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCBracketIndex)
|
||||
|
||||
local ErrNameMeth="expected a method name after ':'"
|
||||
|
||||
s = [[ return a: ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrNameMeth)
|
||||
|
||||
local ErrMethArgs="expected some arguments for the method call (or '()')"
|
||||
s = [[ a:b ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrMethArgs)
|
||||
|
||||
|
||||
|
||||
local ErrCParenArgs="expected ')' to close the argument list"
|
||||
|
||||
s = [[ return a(c ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCParenArgs)
|
||||
|
||||
|
||||
local ErrCBraceTable="expected '}' to close the table constructor"
|
||||
|
||||
s = [[ return { ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCBraceTable)
|
||||
|
||||
local ErrEqField="expected '=' after the table key"
|
||||
|
||||
s = [[ a = {[b] b} ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEqField)
|
||||
|
||||
local ErrExprField="expected an expression after '='"
|
||||
|
||||
s = [[ a = {[a] = } ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprField)
|
||||
|
||||
local ErrExprFKey="expected an expression after '[' for the table key"
|
||||
|
||||
s = [[ a = {[ = b} ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrExprFKey)
|
||||
|
||||
local ErrCBracketFKey="expected ']' to close the table key"
|
||||
|
||||
s = [[ a = {[a = b} ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCBracketFKey)
|
||||
|
||||
|
||||
local ErrDigitHex="expected one or more hexadecimal digits after '0x'"
|
||||
|
||||
s = [[ a = 0x ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrDigitHex)
|
||||
|
||||
local ErrDigitDeci="expected one or more digits after the decimal point"
|
||||
|
||||
s = [[ a = . ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrDigitDeci)
|
||||
|
||||
local ErrDigitExpo="expected one or more digits for the exponent"
|
||||
|
||||
|
||||
s = [[ a = 1.0e ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrDigitExpo)
|
||||
|
||||
local ErrQuote="unclosed string"
|
||||
|
||||
s = [[ a = ";]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrQuote)
|
||||
|
||||
local ErrHexEsc="expected exactly two hexadecimal digits after '\\x'"
|
||||
|
||||
s = [[ a = "a\x1" ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrHexEsc)
|
||||
|
||||
local ErrOBraceUEsc="expected '{' after '\\u'"
|
||||
|
||||
s = [[ a = "a\u" ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrOBraceUEsc)
|
||||
|
||||
local ErrDigitUEsc="expected one or more hexadecimal digits for the UTF-8 code point"
|
||||
|
||||
s = [[ a = "\u{}"]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrDigitUEsc)
|
||||
|
||||
local ErrCBraceUEsc="expected '}' after the code point"
|
||||
|
||||
s = [[ a = "\u{12" ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCBraceUEsc)
|
||||
|
||||
local ErrEscSeq="invalid escape sequence"
|
||||
|
||||
s = [[ a = "\;" ]]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrEscSeq)
|
||||
|
||||
local ErrCloseLStr="unclosed long string"
|
||||
|
||||
|
||||
s = [==[ a = [[ abc return; ]==]
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
assert(err[1]["msg"] == ErrCloseLStr)
|
||||
|
||||
print("\n\n All error labels generated successfully")
|
||||
|
||||
print("\n\n [[ TESTING AST GENERATION ]]\n\n ")
|
||||
|
||||
-- TODO: AST
|
||||
|
||||
s = [[
|
||||
if abc > 123 then
|
||||
abc=123
|
||||
end]]
|
||||
rez = {
|
||||
rule='chunk',
|
||||
pos=3,
|
||||
{
|
||||
rule='block',
|
||||
pos=3,
|
||||
{
|
||||
rule='stat',
|
||||
pos=3,
|
||||
'if',
|
||||
{
|
||||
rule='exp',
|
||||
pos=6,
|
||||
{
|
||||
rule='exp',
|
||||
{
|
||||
rule='exp',
|
||||
{
|
||||
rule='expTokens',
|
||||
pos=6,
|
||||
{
|
||||
rule='prefixexp',
|
||||
pos=6,
|
||||
{
|
||||
rule='varOrExp',
|
||||
pos=6,
|
||||
{
|
||||
rule='var',
|
||||
pos=6,
|
||||
{
|
||||
rule='NAME',
|
||||
pos=6,
|
||||
'abc',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
rule='operatorComparison',
|
||||
pos=10,
|
||||
'>',
|
||||
},
|
||||
{
|
||||
rule='expTokens',
|
||||
pos=12,
|
||||
{
|
||||
rule='number',
|
||||
pos=12,
|
||||
{
|
||||
rule='INT',
|
||||
pos=12,
|
||||
'123',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
'then',
|
||||
{
|
||||
rule='block',
|
||||
pos=23,
|
||||
{
|
||||
rule='stat',
|
||||
pos=23,
|
||||
{
|
||||
rule='varlist',
|
||||
pos=23,
|
||||
{
|
||||
rule='var',
|
||||
pos=23,
|
||||
{
|
||||
rule='NAME',
|
||||
pos=23,
|
||||
'abc',
|
||||
},
|
||||
},
|
||||
},
|
||||
'=',
|
||||
{
|
||||
rule='explist',
|
||||
pos=27,
|
||||
{
|
||||
rule='exp',
|
||||
pos=27,
|
||||
{
|
||||
rule='expTokens',
|
||||
pos=27,
|
||||
{
|
||||
rule='number',
|
||||
pos=27,
|
||||
{
|
||||
rule='INT',
|
||||
pos=27,
|
||||
'123',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
'end',
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
peg.print_t(res)
|
||||
assert(equals(res,rez))
|
||||
|
||||
s = [[
|
||||
local a = [=[ long string ]=]
|
||||
|
||||
-- aaa
|
||||
return a
|
||||
--[==[ hi
|
||||
|
||||
]==]
|
||||
]]
|
||||
rez = {
|
||||
|
||||
rule='chunk',
|
||||
pos=3,
|
||||
{
|
||||
rule='block',
|
||||
pos=3,
|
||||
{
|
||||
rule='stat',
|
||||
pos=3,
|
||||
'local',
|
||||
{
|
||||
rule='localAssign',
|
||||
pos=9,
|
||||
{
|
||||
rule='namelist',
|
||||
pos=9,
|
||||
{
|
||||
rule='NAME',
|
||||
pos=9,
|
||||
'a',
|
||||
},
|
||||
},
|
||||
'=',
|
||||
{
|
||||
rule='explist',
|
||||
pos=13,
|
||||
{
|
||||
rule='exp',
|
||||
pos=13,
|
||||
{
|
||||
rule='expTokens',
|
||||
pos=13,
|
||||
{
|
||||
rule='string',
|
||||
pos=13,
|
||||
' long string ',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
rule='retstat',
|
||||
pos=41,
|
||||
'return',
|
||||
{
|
||||
rule='explist',
|
||||
pos=48,
|
||||
{
|
||||
rule='exp',
|
||||
pos=48,
|
||||
{
|
||||
rule='expTokens',
|
||||
pos=48,
|
||||
{
|
||||
rule='prefixexp',
|
||||
pos=48,
|
||||
{
|
||||
rule='varOrExp',
|
||||
pos=48,
|
||||
{
|
||||
rule='var',
|
||||
pos=48,
|
||||
{
|
||||
rule='NAME',
|
||||
pos=48,
|
||||
'a',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
||||
|
||||
|
||||
}
|
||||
print("Parsing '"..s.."'")
|
||||
res, err = lua.parse(s)
|
||||
peg.print_t(res)
|
||||
assert(equals(res,rez))
|
||||
|
||||
print("\n\n All AST's generated successfully")
|
||||
|
||||
print("\n\nAll tests passed!")
|
291
06/deps/parser-gen/parsers/lua-parser.lua
Normal file
291
06/deps/parser-gen/parsers/lua-parser.lua
Normal file
|
@ -0,0 +1,291 @@
|
|||
--[==[
|
||||
Parser for Lua 5.3
|
||||
Based on https://github.com/antlr/grammars-v4/blob/master/lua/Lua.g4 and https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
|
||||
]==]
|
||||
package.path = package.path .. ";../?.lua"
|
||||
local pg = require "parser-gen"
|
||||
function equals(s,i,a,b) return #a == #b end
|
||||
function fixexp (...)
|
||||
local exp = {...}
|
||||
local len = #exp
|
||||
if len > 1 then
|
||||
exp.rule = "exp"
|
||||
exp[len].rule = "exp"
|
||||
return exp
|
||||
elseif exp[1] then
|
||||
if exp[1].rule == "expTokens" then
|
||||
return exp[1]
|
||||
else
|
||||
return exp[1][1]
|
||||
end
|
||||
end
|
||||
end
|
||||
function fold (...)
|
||||
local exp = {...}
|
||||
local len = #exp
|
||||
if len > 1 then
|
||||
local folded = { rule = "exp", fixexp(exp[1]) }
|
||||
for i = 2, len, 2 do
|
||||
folded = { rule = "exp", folded, exp[i], fixexp(exp[i+1]) }
|
||||
end
|
||||
return folded
|
||||
elseif exp[1] then
|
||||
return exp[1][1]
|
||||
end
|
||||
end
|
||||
-- from https://github.com/andremm/lua-parser/blob/master/lua-parser/parser.lua
|
||||
local labels = {
|
||||
ErrExtra="unexpected character(s), expected EOF",
|
||||
ErrInvalidStat={"unexpected token, invalid start of statement",[[ (!%nl .)* ]]},
|
||||
|
||||
ErrEndIf="expected 'end' to close the if statement",
|
||||
ErrExprIf="expected a condition after 'if'",
|
||||
ErrThenIf="expected 'then' after the condition",
|
||||
ErrExprEIf="expected a condition after 'elseif'",
|
||||
ErrThenEIf="expected 'then' after the condition",
|
||||
|
||||
ErrEndDo="expected 'end' to close the do block",
|
||||
ErrExprWhile="expected a condition after 'while'",
|
||||
ErrDoWhile="expected 'do' after the condition",
|
||||
ErrEndWhile="expected 'end' to close the while loop",
|
||||
ErrUntilRep="expected 'until' at the end of the repeat loop",
|
||||
ErrExprRep="expected a conditions after 'until'",
|
||||
|
||||
ErrForRange="expected a numeric or generic range after 'for'",
|
||||
ErrEndFor="expected 'end' to close the for loop",
|
||||
ErrExprFor1="expected a starting expression for the numeric range",
|
||||
ErrCommaFor="expected ',' to split the start and end of the range",
|
||||
ErrExprFor2="expected an ending expression for the numeric range",
|
||||
ErrExprFor3={"expected a step expression for the numeric range after ','",[[ (!'do' !%nl .)* ]]},
|
||||
ErrInFor="expected '=' or 'in' after the variable(s)",
|
||||
ErrEListFor="expected one or more expressions after 'in'",
|
||||
ErrDoFor="expected 'do' after the range of the for loop",
|
||||
|
||||
ErrDefLocal="expected a function definition or assignment after local",
|
||||
ErrNameLFunc="expected a function name after 'function'",
|
||||
ErrEListLAssign="expected one or more expressions after '='",
|
||||
ErrEListAssign="expected one or more expressions after '='",
|
||||
|
||||
ErrFuncName="expected a function name after 'function'",
|
||||
ErrNameFunc1="expected a function name after '.'",
|
||||
ErrNameFunc2="expected a method name after ':'",
|
||||
ErrOParenPList="expected '(' for the parameter list",
|
||||
ErrCParenPList="expected ')' to close the parameter list",
|
||||
ErrEndFunc="expected 'end' to close the function body",
|
||||
ErrParList="expected a variable name or '...' after ','",
|
||||
|
||||
ErrLabel="expected a label name after '::'",
|
||||
ErrCloseLabel="expected '::' after the label",
|
||||
ErrGoto="expected a label after 'goto'",
|
||||
|
||||
ErrVarList={"expected a variable name after ','",[[ (!'=' !%nl .)* ]]},
|
||||
ErrExprList="expected an expression after ','",
|
||||
|
||||
ErrOrExpr="expected an expression after 'or'",
|
||||
ErrAndExpr="expected an expression after 'and'",
|
||||
ErrRelExpr="expected an expression after the relational operator",
|
||||
|
||||
ErrBitwiseExpr="expected an expression after bitwise operator",
|
||||
|
||||
ErrConcatExpr="expected an expression after '..'",
|
||||
ErrAddExpr="expected an expression after the additive operator",
|
||||
ErrMulExpr="expected an expression after the multiplicative operator",
|
||||
ErrUnaryExpr="expected an expression after the unary operator",
|
||||
ErrPowExpr="expected an expression after '^'",
|
||||
|
||||
ErrExprParen="expected an expression after '('",
|
||||
ErrCParenExpr="expected ')' to close the expression",
|
||||
ErrNameIndex="expected a field name after '.'",
|
||||
ErrExprIndex="expected an expression after '['",
|
||||
ErrCBracketIndex="expected ']' to close the indexing expression",
|
||||
ErrNameMeth="expected a method name after ':'",
|
||||
ErrMethArgs="expected some arguments for the method call (or '()')",
|
||||
|
||||
|
||||
ErrCParenArgs="expected ')' to close the argument list",
|
||||
|
||||
ErrCBraceTable="expected '}' to close the table constructor",
|
||||
ErrEqField="expected '=' after the table key",
|
||||
ErrExprField="expected an expression after '='",
|
||||
ErrExprFKey={"expected an expression after '[' for the table key",[[ (!']' !%nl .)* ]] },
|
||||
ErrCBracketFKey={"expected ']' to close the table key",[[ (!'=' !%nl .)* ]]},
|
||||
|
||||
ErrDigitHex="expected one or more hexadecimal digits after '0x'",
|
||||
ErrDigitDeci="expected one or more digits after the decimal point",
|
||||
ErrDigitExpo="expected one or more digits for the exponent",
|
||||
|
||||
ErrQuote="unclosed string",
|
||||
ErrHexEsc={"expected exactly two hexadecimal digits after '\\x'",[[ (!('"' / "'" / %nl) .)* ]]},
|
||||
ErrOBraceUEsc="expected '{' after '\\u'",
|
||||
ErrDigitUEsc={"expected one or more hexadecimal digits for the UTF-8 code point",[[ (!'}' !%nl .)* ]]},
|
||||
ErrCBraceUEsc={"expected '}' after the code point",[[ (!('"' / "'") .)* ]]},
|
||||
ErrEscSeq={"invalid escape sequence",[[ (!('"' / "'" / %nl) .)* ]]},
|
||||
ErrCloseLStr="unclosed long string",
|
||||
ErrEqAssign="expected '=' after variable list in assign statement"
|
||||
}
|
||||
pg.setlabels(labels)
|
||||
local grammar = pg.compile([==[
|
||||
chunk <- block (!.)^ErrExtra
|
||||
block <- stat* retstat?
|
||||
stat <- ';' /
|
||||
functioncall /
|
||||
varlist '='^ErrEqAssign explist^ErrEListAssign /
|
||||
'break' /
|
||||
'goto' NAME^ErrGoto /
|
||||
'do' block 'end'^ErrEndDo /
|
||||
'while' exp^ErrExprWhile 'do'^ErrDoWhile block 'end'^ErrEndWhile /
|
||||
'repeat' block 'until'^ErrUntilRep exp^ErrExprRep /
|
||||
'if' exp^ErrExprIf 'then'^ErrThenIf block ('elseif' exp^ErrExprEIf 'then'^ErrThenEIf block)* ('else' block)? 'end'^ErrEndIf /
|
||||
'for' (forNum / forIn)^ErrForRange 'do'^ErrDoFor block 'end'^ErrEndFor /
|
||||
|
||||
'function' funcname^ErrFuncName funcbody /
|
||||
'local' (localAssign / localFunc)^ErrDefLocal /
|
||||
label /
|
||||
!blockEnd %{ErrInvalidStat}
|
||||
blockEnd <- 'return' / 'end' / 'elseif' / 'else' / 'until' / !.
|
||||
retstat <- 'return' explist? ';'?
|
||||
forNum <- NAME '=' exp^ErrExprFor1 ','^ErrCommaFor exp^ErrExprFor2 (',' exp^ErrExprFor3)?
|
||||
forIn <- namelist 'in'^ErrInFor explist^ErrEListFor
|
||||
localFunc <- 'function' NAME^ErrNameLFunc funcbody
|
||||
localAssign <- namelist ('=' explist^ErrEListLAssign)?
|
||||
label <- '::' NAME^ErrLabel '::'^ErrCloseLabel
|
||||
funcname <- NAME ('.' NAME^ErrNameFunc1)* (':' NAME^ErrNameFunc2)?
|
||||
varlist <- var (',' var^ErrVarList)*
|
||||
namelist <- NAME (',' NAME)*
|
||||
explist <- exp (',' exp^ErrExprList )*
|
||||
|
||||
exp <- expOR -> fixexp
|
||||
expOR <- (expAND (operatorOr expAND^ErrOrExpr)*) -> fold
|
||||
expAND <- (expREL (operatorAnd expREL^ErrAndExpr)*) -> fold
|
||||
expREL <- (expBIT (operatorComparison expBIT^ErrRelExpr)*) -> fold
|
||||
expBIT <- (expCAT (operatorBitwise expCAT^ErrBitwiseExpr)*) -> fold
|
||||
expCAT <- (expADD (operatorStrcat expCAT^ErrConcatExpr)?) -> fixexp
|
||||
expADD <- (expMUL (operatorAddSub expMUL^ErrAddExpr)*) -> fold
|
||||
expMUL <- (expUNA (operatorMulDivMod expUNA^ErrMulExpr)*) -> fold
|
||||
expUNA <- ((operatorUnary expUNA^ErrUnaryExpr) / expPOW) -> fixexp
|
||||
expPOW <- (expTokens (operatorPower expUNA^ErrPowExpr)?) -> fixexp
|
||||
|
||||
expTokens <- 'nil' / 'false' / 'true' /
|
||||
number /
|
||||
string /
|
||||
'...' /
|
||||
'function' funcbody /
|
||||
tableconstructor /
|
||||
prefixexp
|
||||
|
||||
prefixexp <- varOrExp nameAndArgs*
|
||||
functioncall <- varOrExp nameAndArgs+
|
||||
varOrExp <- var / brackexp
|
||||
brackexp <- '(' exp^ErrExprParen ')'^ErrCParenExpr
|
||||
var <- (NAME / brackexp varSuffix) varSuffix*
|
||||
varSuffix <- nameAndArgs* ('[' exp^ErrExprIndex ']'^ErrCBracketIndex / '.' !'.' NAME^ErrNameIndex)
|
||||
nameAndArgs <- (':' !':' NAME^ErrNameMeth args^ErrMethArgs) /
|
||||
args
|
||||
args <- '(' explist? ')'^ErrCParenArgs / tableconstructor / string
|
||||
funcbody <- '('^ErrOParenPList parlist? ')'^ErrCParenPList block 'end'^ErrEndFunc
|
||||
parlist <- namelist (',' '...'^ErrParList)? / '...'
|
||||
tableconstructor<- '{' fieldlist? '}'^ErrCBraceTable
|
||||
fieldlist <- field (fieldsep field)* fieldsep?
|
||||
field <- !OPEN '[' exp^ErrExprFKey ']'^ErrCBracketFKey '='^ErrEqField exp^ErrExprField /
|
||||
NAME '=' exp /
|
||||
exp
|
||||
fieldsep <- ',' / ';'
|
||||
operatorOr <- 'or'
|
||||
operatorAnd <- 'and'
|
||||
operatorComparison<- '<=' / '>=' / '~=' / '==' / '<' !'<' / '>' !'>'
|
||||
operatorStrcat <- !'...' '..'
|
||||
operatorAddSub <- '+' / '-'
|
||||
operatorMulDivMod<- '*' / '%' / '//' / '/'
|
||||
operatorBitwise <- '&' / '|' / !'~=' '~' / '<<' / '>>'
|
||||
operatorUnary <- 'not' / '#' / '-' / !'~=' '~'
|
||||
operatorPower <- '^'
|
||||
number <- FLOAT / HEX_FLOAT / HEX / INT
|
||||
string <- NORMALSTRING / CHARSTRING / LONGSTRING
|
||||
-- lexer
|
||||
fragment
|
||||
RESERVED <- KEYWORDS !IDREST
|
||||
fragment
|
||||
IDREST <- [a-zA-Z_0-9]
|
||||
fragment
|
||||
KEYWORDS <- 'and' / 'break' / 'do' / 'elseif' / 'else' / 'end' /
|
||||
'false' / 'for' / 'function' / 'goto' / 'if' / 'in' /
|
||||
'local' / 'nil' / 'not' / 'or' / 'repeat' / 'return' /
|
||||
'then' / 'true' / 'until' / 'while'
|
||||
NAME <- !RESERVED [a-zA-Z_] [a-zA-Z_0-9]*
|
||||
fragment
|
||||
NORMALSTRING <- '"' {( ESC / [^"\] )*} '"'^ErrQuote
|
||||
fragment
|
||||
CHARSTRING <- "'" {( ESC / [^\'] )*} "'"^ErrQuote
|
||||
fragment
|
||||
LONGSTRING <- (OPEN {(!CLOSEEQ .)*} CLOSE^ErrCloseLStr) -> 1 -- capture only the string
|
||||
|
||||
fragment
|
||||
OPEN <- '[' {:openEq: EQUALS :} '[' %nl?
|
||||
fragment
|
||||
CLOSE <- ']' {EQUALS} ']'
|
||||
fragment
|
||||
EQUALS <- '='*
|
||||
fragment
|
||||
CLOSEEQ <- (CLOSE =openEq) => equals
|
||||
|
||||
INT <- DIGIT+
|
||||
HEX <- '0' [xX] HEXDIGIT+^ErrDigitHex
|
||||
FLOAT <- DIGIT+ '.' DIGIT* ExponentPart? /
|
||||
'.' !'.' DIGIT+^ErrDigitDeci ExponentPart? /
|
||||
DIGIT+ ExponentPart
|
||||
HEX_FLOAT <- '0' [xX] HEXDIGIT+ '.' HEXDIGIT* HexExponentPart? /
|
||||
'0' [xX] '.' HEXDIGIT+ HexExponentPart? /
|
||||
'0' [xX] HEXDIGIT+^ErrDigitHex HexExponentPart
|
||||
fragment
|
||||
ExponentPart <- [eE] [+-]? DIGIT+^ErrDigitExpo
|
||||
fragment
|
||||
HexExponentPart <- [pP] [+-]? DIGIT+^ErrDigitExpo
|
||||
fragment
|
||||
ESC <- '\' [abfnrtvz"'\] /
|
||||
'\' %nl /
|
||||
DECESC /
|
||||
HEXESC/
|
||||
UTFESC/
|
||||
'\' %{ErrEscSeq}
|
||||
fragment
|
||||
DECESC <- '\' ( DIGIT DIGIT? / [0-2] DIGIT DIGIT)
|
||||
fragment
|
||||
HEXESC <- '\' 'x' (HEXDIGIT HEXDIGIT)^ErrHexEsc
|
||||
fragment
|
||||
UTFESC <- '\' 'u' '{'^ErrOBraceUEsc HEXDIGIT+^ErrDigitUEsc '}'^ErrCBraceUEsc
|
||||
fragment
|
||||
DIGIT <- [0-9]
|
||||
fragment
|
||||
HEXDIGIT <- [0-9a-fA-F]
|
||||
|
||||
|
||||
fragment
|
||||
COMMENT <- '--' LONGSTRING -> 0 -- skip this
|
||||
fragment
|
||||
LINE_COMMENT <- '--' COM_TYPES ( %nl / !.)
|
||||
fragment
|
||||
COM_TYPES <- '[' '='* [^[=%nl] [^%nl]* /
|
||||
'[' '='* /
|
||||
[^[%nl] [^%nl]* /
|
||||
''
|
||||
fragment
|
||||
SHEBANG <- '#' '!' [^%nl]*
|
||||
|
||||
|
||||
SKIP <- %nl / %s / COMMENT / LINE_COMMENT / SHEBANG
|
||||
fragment
|
||||
HELPER <- RESERVED / '(' / ')' -- for sync expression
|
||||
SYNC <- ((!HELPER !SKIP .)+ / .?) SKIP* -- either sync to reserved keyword or skip characters and consume them
|
||||
|
||||
]==],{ equals = equals, fixexp = fixexp, fold = fold })
|
||||
local errnr = 1
|
||||
local function err (desc, line, col, sfail, recexp)
|
||||
print("Syntax error #"..errnr..": "..desc.." at line "..line.."(col "..col..")")
|
||||
errnr = errnr+1
|
||||
end
|
||||
local function parse (input)
|
||||
errnr = 1
|
||||
local ast, errs = pg.parse(input,grammar,err)
|
||||
return ast, errs
|
||||
end
|
||||
return {parse=parse}
|
55
06/deps/parser-gen/parsers/tiny-parser.lua
Normal file
55
06/deps/parser-gen/parsers/tiny-parser.lua
Normal file
|
@ -0,0 +1,55 @@
|
|||
package.path = package.path .. ";../?.lua"
|
||||
local pg = require "parser-gen"
|
||||
local peg = require "peg-parser"
|
||||
local errs = {errMissingThen = "Missing Then"}
|
||||
pg.setlabels(errs)
|
||||
|
||||
|
||||
local grammar = pg.compile([[
|
||||
|
||||
program <- stmtsequence !.
|
||||
stmtsequence <- statement (';' statement)*
|
||||
statement <- ifstmt / repeatstmt / assignstmt / readstmt / writestmt
|
||||
ifstmt <- 'if' exp 'then'^errMissingThen stmtsequence elsestmt? 'end'
|
||||
elsestmt <- ('else' stmtsequence)
|
||||
repeatstmt <- 'repeat' stmtsequence 'until' exp
|
||||
assignstmt <- IDENTIFIER ':=' exp
|
||||
readstmt <- 'read' IDENTIFIER
|
||||
writestmt <- 'write' exp
|
||||
exp <- simpleexp (COMPARISONOP simpleexp)*
|
||||
COMPARISONOP <- '<' / '='
|
||||
simpleexp <- term (ADDOP term)*
|
||||
ADDOP <- [+-]
|
||||
term <- factor (MULOP factor)*
|
||||
MULOP <- [*/]
|
||||
factor <- '(' exp ')' / NUMBER / IDENTIFIER
|
||||
|
||||
NUMBER <- '-'? [0-9]+
|
||||
KEYWORDS <- 'if' / 'repeat' / 'read' / 'write' / 'then' / 'else' / 'end' / 'until'
|
||||
RESERVED <- KEYWORDS ![a-zA-Z]
|
||||
IDENTIFIER <- !RESERVED [a-zA-Z]+
|
||||
HELPER <- ';' / %nl / %s / KEYWORDS / !.
|
||||
SYNC <- (!HELPER .)*
|
||||
|
||||
]], _, true)
|
||||
local errors = 0
|
||||
local function printerror(desc,line,col,sfail,trec)
|
||||
errors = errors+1
|
||||
print("Error #"..errors..": "..desc.." on line "..line.."(col "..col..")")
|
||||
end
|
||||
|
||||
|
||||
local function parse(input)
|
||||
errors = 0
|
||||
result, errors = pg.parse(input,grammar,printerror)
|
||||
return result, errors
|
||||
end
|
||||
|
||||
if arg[1] then
|
||||
-- argument must be in quotes if it contains spaces
|
||||
res, errs = parse(arg[1])
|
||||
peg.print_t(res)
|
||||
peg.print_r(errs)
|
||||
end
|
||||
local ret = {parse=parse}
|
||||
return ret
|
260
06/deps/parser-gen/peg-parser-tests.lua
Normal file
260
06/deps/parser-gen/peg-parser-tests.lua
Normal file
|
@ -0,0 +1,260 @@
|
|||
local peg = require("peg-parser")
|
||||
local f = peg.pegToAST
|
||||
|
||||
local eq = require "equals"
|
||||
local equals = eq.equals
|
||||
|
||||
|
||||
-- self-description of peg-parser:
|
||||
|
||||
--assert(f(peg.gram))
|
||||
|
||||
-- ( p ) grouping
|
||||
e = f("('a')")
|
||||
res = {t="a"}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
-- 'string' literal string
|
||||
|
||||
e = f("'string'")
|
||||
res = {t="string"}
|
||||
assert(equals(e,res))
|
||||
|
||||
-- "string" literal string
|
||||
e = f('"string"')
|
||||
res = {t="string"}
|
||||
|
||||
assert(equals(e,res))
|
||||
--[class] character class
|
||||
e = f("[^a-zA-Z01]")
|
||||
res = {
|
||||
action = "invert",
|
||||
op1 = {
|
||||
action = "or",
|
||||
op1 = {
|
||||
action = "or",
|
||||
op1 = {
|
||||
action = "or",
|
||||
op1 = {
|
||||
action = "range",
|
||||
op1 = {
|
||||
s = "az"
|
||||
}
|
||||
},
|
||||
op2 = {
|
||||
action = "range",
|
||||
op1 = {
|
||||
s = "AZ"
|
||||
}
|
||||
}
|
||||
},
|
||||
op2 = {
|
||||
t = "0"
|
||||
}
|
||||
},
|
||||
op2 = {
|
||||
t = "1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--. any character
|
||||
e = f(".")
|
||||
res = {action="anychar"}
|
||||
|
||||
assert(equals(e,res))
|
||||
--%name pattern defs[name] or a pre-defined pattern
|
||||
e = f("%name")
|
||||
res = {action="%", op1={s="name"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
--name non terminal
|
||||
e = f("name")
|
||||
res = {nt="name"}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--<name> non terminal
|
||||
e = f("<name>")
|
||||
res = {nt="name"}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--{} position capture
|
||||
e = f("{}")
|
||||
|
||||
res = {action="poscap"}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--{ p } simple capture
|
||||
e = f("{name}")
|
||||
res = {action="scap", op1= {nt="name"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--{: p :} anonymous group capture
|
||||
e = f("{:name:}")
|
||||
res = {action="gcap", op1= {nt="name"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--{:name: p :} named group capture
|
||||
e = f("{:g: name:}")
|
||||
res = {action="gcap", op1= {nt="name"} , op2={s="g"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
--{~ p ~} substitution capture
|
||||
e = f("{~ name ~}")
|
||||
|
||||
res = {action="subcap", op1= {nt="name"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--{| p |} table capture
|
||||
e = f("{| name |}")
|
||||
res = {action="tcap", op1= {nt="name"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--=name back reference
|
||||
e = f("=name")
|
||||
res = {action="bref", op1= {s="name"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p ? optional match
|
||||
e = f("name?")
|
||||
res = {action="?", op1= {nt="name"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p * zero or more repetitions
|
||||
e = f("name*")
|
||||
res = {action="*", op1= {nt="name"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p + one or more repetitions
|
||||
e = f("name+")
|
||||
res = {action="+", op1= {nt="name"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p^num exactly n repetitions
|
||||
e = f("name^3")
|
||||
res = {action="^", op1= {nt="name"}, op2 = {num="3"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p^+num at least n repetitions
|
||||
e = f("name^+3")
|
||||
res = {action="^", op1= {nt="name"}, op2 = {num="+3"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p^-num at most n repetitions
|
||||
e = f("name^-3")
|
||||
res = {action="^", op1= {nt="name"}, op2 = {num="-3"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p^LABEL error label
|
||||
e = f("name^err")
|
||||
res = {action = "^LABEL", op1= {nt="name"}, op2 = {s="err"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p -> 'string' string capture
|
||||
e = f("name -> 'a'")
|
||||
res = {action="->", op1= {nt="name"}, op2 = {s="a"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p -> "string" string capture
|
||||
e = f('name -> "a"')
|
||||
res = {action="->", op1= {nt="name"}, op2 = {s="a"}}
|
||||
assert(equals(e,res))
|
||||
|
||||
--p -> num numbered capture
|
||||
|
||||
e = f('name -> 3')
|
||||
|
||||
res = {action="->", op1= {nt="name"}, op2 = {num="3"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--p -> name function/query/string capture equivalent to p / defs[name]
|
||||
|
||||
e = f('name -> func')
|
||||
res = {action="->", op1= {nt="name"}, op2 = {func="func"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
|
||||
|
||||
--p => name match-time capture equivalent to lpeg.Cmt(p, defs[name])
|
||||
|
||||
e = f('name => func')
|
||||
res = {action="=>", op1= {nt="name"}, op2 = {func="func"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
|
||||
--& p and predicate
|
||||
|
||||
e = f('&name')
|
||||
res = {action="&", op1= {nt="name"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
|
||||
--! p not predicate
|
||||
|
||||
|
||||
e = f('!name')
|
||||
res = {action="!", op1= {nt="name"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
|
||||
--p1 p2 p3 concatenation with left association
|
||||
|
||||
e = f('name name2 name3')
|
||||
res = {action="and", op1= {action = "and", op1={nt="name"}, op2={nt="name2"}}, op2={nt="name3"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
--p1 / p2 / p3 ordered choice with left association
|
||||
|
||||
e = f('name / name2 / name3')
|
||||
res = {action="or", op1= {action = "or", op1={nt="name"}, op2={nt="name2"}}, op2={nt="name3"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
|
||||
--(name <- p)+ grammar
|
||||
|
||||
e = f('a <- b b <- c')
|
||||
res = {
|
||||
{rulename = "a", rule = {nt="b"}},
|
||||
{rulename = "b", rule = {nt="c"}}
|
||||
}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
-- error labels
|
||||
-- %{errName}
|
||||
|
||||
--peg.setlabels({errName=1})
|
||||
e = f('%{errName}')
|
||||
|
||||
res = {action="label", op1={s="errName"}}
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
-- a //{errName,errName2} b
|
||||
|
||||
--peg.setlabels({errName=1, errName2=2})
|
||||
e = f('a //{errName,errName2} b')
|
||||
|
||||
res = {action="or", condition={{s="errName"},{s="errName2"}}, op1={nt="a"}, op2={nt="b"}}
|
||||
|
||||
|
||||
assert(equals(e,res))
|
||||
|
||||
print("all tests succesful")
|
334
06/deps/parser-gen/peg-parser.lua
Normal file
334
06/deps/parser-gen/peg-parser.lua
Normal file
|
@ -0,0 +1,334 @@
|
|||
local re = require("deps.lpeglabel.relabel")
|
||||
|
||||
local peg = {}
|
||||
|
||||
-- from relabel.lua
|
||||
|
||||
local errinfo = {
|
||||
{"NoPatt", "no pattern found"},
|
||||
{"ExtraChars", "unexpected characters after the pattern"},
|
||||
|
||||
{"ExpPatt1", "expected a pattern after '/' or '//{...}'"},
|
||||
|
||||
{"ExpPatt2", "expected a pattern after '&'"},
|
||||
{"ExpPatt3", "expected a pattern after '!'"},
|
||||
|
||||
{"ExpPatt4", "expected a pattern after '('"},
|
||||
{"ExpPatt5", "expected a pattern after ':'"},
|
||||
{"ExpPatt6", "expected a pattern after '{~'"},
|
||||
{"ExpPatt7", "expected a pattern after '{|'"},
|
||||
|
||||
{"ExpPatt8", "expected a pattern after '<-'"},
|
||||
|
||||
{"ExpPattOrClose", "expected a pattern or closing '}' after '{'"},
|
||||
|
||||
{"ExpNum", "expected a number after '^', '+' or '-' (no space)"},
|
||||
{"ExpNumOrLab", "expected a number or a label after ^"},
|
||||
{"ExpCap", "expected a string, number, '{}' or name after '->'"},
|
||||
|
||||
{"ExpName1", "expected the name of a rule after '=>'"},
|
||||
{"ExpName2", "expected the name of a rule after '=' (no space)"},
|
||||
{"ExpName3", "expected the name of a rule after '<' (no space)"},
|
||||
|
||||
{"ExpLab1", "expected at least one label after '{'"},
|
||||
{"ExpLab2", "expected a label after the comma"},
|
||||
|
||||
{"ExpNameOrLab", "expected a name or label after '%' (no space)"},
|
||||
|
||||
{"ExpItem", "expected at least one item after '[' or '^'"},
|
||||
|
||||
{"MisClose1", "missing closing ')'"},
|
||||
{"MisClose2", "missing closing ':}'"},
|
||||
{"MisClose3", "missing closing '~}'"},
|
||||
{"MisClose4", "missing closing '|}'"},
|
||||
{"MisClose5", "missing closing '}'"}, -- for the captures
|
||||
|
||||
{"MisClose6", "missing closing '>'"},
|
||||
{"MisClose7", "missing closing '}'"}, -- for the labels
|
||||
|
||||
{"MisClose8", "missing closing ']'"},
|
||||
|
||||
{"MisTerm1", "missing terminating single quote"},
|
||||
{"MisTerm2", "missing terminating double quote"},
|
||||
}
|
||||
|
||||
local errmsgs = {}
|
||||
local labels = {}
|
||||
|
||||
for i, err in ipairs(errinfo) do
|
||||
errmsgs[i] = err[2]
|
||||
labels[err[1]] = i
|
||||
end
|
||||
|
||||
re.setlabels(labels)
|
||||
|
||||
local function concat(a,b)
|
||||
return a..b
|
||||
end
|
||||
local function foldtable(action,t)
|
||||
local re
|
||||
local first = true
|
||||
for key,value in pairs(t) do
|
||||
if first then
|
||||
re = value
|
||||
first = false
|
||||
else
|
||||
|
||||
local temp = re
|
||||
if action == "suf" then -- suffix actions
|
||||
local act = value[1]
|
||||
if act == "*" or act == "?" or act == "+" then
|
||||
re = {action=act, op1=temp}
|
||||
else
|
||||
re = {action=act, op1=temp, op2=value[2]}
|
||||
end
|
||||
elseif action == "or" and #value == 2 then -- recovery expression
|
||||
local labels = value[1]
|
||||
local op2 = value[2]
|
||||
re = {action=action, op1=temp, op2=op2, condition=labels}
|
||||
else
|
||||
re = {action=action, op1=temp, op2=value}
|
||||
end
|
||||
end
|
||||
end
|
||||
return re
|
||||
end
|
||||
|
||||
|
||||
local gram = [=[
|
||||
|
||||
pattern <- (exp / %{NoPatt}) (!. / %{ExtraChars})
|
||||
exp <- S (grammar / alternative)
|
||||
|
||||
labels <- {| '{' {: (label / %{ExpLab1}) :} (',' {: (label / %{ExpLab2}) :})* ('}' / %{MisClose7}) |}
|
||||
|
||||
|
||||
alternative <- ( {:''->'or':} {| {: seq :} ('/' (('/' {| {: labels :} S {: (seq / %{ExpPatt1}) :} |}) / (S {: (seq / %{ExpPatt1}) :} ) ) )* |} ) -> foldtable
|
||||
|
||||
|
||||
seq <- ( {:''->'and':} {| {: prefix :}+ |} ) -> foldtable
|
||||
|
||||
|
||||
prefix <- {| {:action: '&' :} S {:op1: (prefix / %{ExpPatt2}) :} |}
|
||||
/ {| {:action: '!' :} S {:op1: (prefix / %{ExpPatt3}) :} |}
|
||||
/ suffix
|
||||
|
||||
suffix <- ( {:''->'suf':} {| primary S {| suffixaction S |}* |} ) -> foldtable
|
||||
|
||||
|
||||
suffixaction <- {[+*?]}
|
||||
/ {'^'} {| {:num: [+-]? NUM:} |}
|
||||
/ '^'->'^LABEL' (label / %{ExpNumOrLab})
|
||||
/ {'->'} S ((string / {| {:action:'{}'->'poscap':} |} / funcname / {|{:num: NUM :} |}) / %{ExpCap})
|
||||
/ {'=>'} S (funcname / %{ExpName1})
|
||||
|
||||
|
||||
|
||||
|
||||
primary <- '(' (exp / %{ExpPatt4}) (')' / %{MisClose1})
|
||||
/ term
|
||||
/ class
|
||||
/ defined
|
||||
/ {| {:action: '%'->'label':} ('{' / %{ExpNameOrLab}) S ({:op1: label:} / %{ExpLab1}) S ('}' / %{MisClose7}) |}
|
||||
/ {| {:action: '{:'->'gcap':} {:op2: defname:} ':' !'}' ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
||||
/ {| {:action: '{:'->'gcap':} ({:op1:exp:} / %{ExpPatt5}) (':}' / %{MisClose2}) |}
|
||||
/ {| {:action: '='->'bref':} ({:op1: defname:} / %{ExpName2}) |}
|
||||
/ {| {:action: '{}'->'poscap':} |}
|
||||
/ {| {:action: '{~'->'subcap':} ({:op1: exp:} / %{ExpPatt6}) ('~}' / %{MisClose3}) |}
|
||||
/ {| {:action: '{|'->'tcap':} ({:op1: exp:} / %{ExpPatt7}) ('|}' / %{MisClose4}) |}
|
||||
/ {| {:action: '{'->'scap':} ({:op1: exp:} / %{ExpPattOrClose}) ('}' / %{MisClose5}) |}
|
||||
/ {| {:action: '.'->'anychar':} |}
|
||||
/ !frag !nodee name S !ARROW
|
||||
/ '<' (name / %{ExpName3}) ('>' / %{MisClose6}) -- old-style non terminals
|
||||
|
||||
grammar <- {| definition+ |}
|
||||
definition <- {| (frag / nodee)? (token / nontoken) S ARROW ({:rule: exp :} / %{ExpPatt8}) |}
|
||||
|
||||
label <- {| {:s: ERRORNAME :} |}
|
||||
|
||||
frag <- {:fragment: 'fragment'->'1' :} ![0-9_a-z] S !ARROW
|
||||
nodee <- {:node: 'node'->'1' :} ![0-9_a-z] S !ARROW
|
||||
token <- {:rulename: TOKENNAME :} {:token:''->'1':}
|
||||
nontoken <- {:rulename: NAMESTRING :}
|
||||
|
||||
class <- '[' ( ('^' {| {:action:''->'invert':} {:op1: classset :} |} ) / classset ) (']' / %{MisClose8})
|
||||
classset <- ( {:''->'or':} {| {: (item / %{ExpItem}) :} (!']' {: (item / %{ExpItem}) :})* |} ) -> foldtable
|
||||
item <- defined / range / {| {:t: . :} |}
|
||||
range <- {| {:action:''->'range':} {:op1: {| {:s: ({: . :} ('-') {: [^]] :} ) -> concat :} |} :} |}
|
||||
|
||||
S <- (%s / '--' [^%nl]*)* -- spaces and comments
|
||||
name <- {| {:nt: TOKENNAME :} {:token:''->'1':} / {:nt: NAMESTRING :} |}
|
||||
|
||||
funcname <- {| {:func: NAMESTRING :} |}
|
||||
ERRORNAME <- NAMESTRING
|
||||
NAMESTRING <- [A-Za-z][A-Za-z0-9_]*
|
||||
TOKENNAME <- [A-Z_]+ ![0-9a-z]
|
||||
defname <- {| {:s: NAMESTRING :} |}
|
||||
ARROW <- '<-'
|
||||
NUM <- [0-9]+
|
||||
term <- {| '"' {:t: [^"]* :} ('"' / %{MisTerm2}) / "'" {:t: [^']* :} ("'" / %{MisTerm1}) |}
|
||||
string <- {| '"' {:s: [^"]* :} ('"' / %{MisTerm2}) / "'" {:s: [^']* :} ("'" / %{MisTerm1}) |}
|
||||
defined <- {| {:action: '%':} {:op1: defname :} |}
|
||||
]=]
|
||||
|
||||
local defs = {foldtable=foldtable, concat=concat}
|
||||
peg.gram = gram
|
||||
peg.defs = defs
|
||||
peg.labels = labels
|
||||
local p = re.compile ( gram, defs)
|
||||
|
||||
|
||||
|
||||
|
||||
--[[
|
||||
Function: pegToAST(input)
|
||||
|
||||
Input: a grammar in PEG format, described in https://github.com/vsbenas/parser-gen
|
||||
|
||||
Output: if parsing successful - a table of grammar rules, else - runtime error
|
||||
|
||||
Example input: "
|
||||
|
||||
Program <- stmt* / SPACE
|
||||
stmt <- ('a' / 'b')+
|
||||
SPACE <- ''
|
||||
|
||||
"
|
||||
|
||||
Example output: {
|
||||
{rulename = "Program", rule = {action = "or", op1 = {action = "*", op1 = {nt = "stmt"}}, op2 = {nt = "SPACE", token="1"}}},
|
||||
{rulename = "stmt", rule = {action = "+", op1 = {action="or", op1 = {t = "a"}, op2 = {t = "b"}}}},
|
||||
{rulename = "SPACE", rule = {t=""}, token=1},
|
||||
}
|
||||
|
||||
The rules are further processed and turned into lpeg compatible format in parser-gen.lua
|
||||
|
||||
Action names:
|
||||
or (has parameter condition for recovery expresions)
|
||||
and
|
||||
&
|
||||
!
|
||||
+
|
||||
*
|
||||
?
|
||||
^num (num is a number with an optional plus or minus sign)
|
||||
^label (label is an error label set with setlabels)
|
||||
->
|
||||
=>
|
||||
tcap
|
||||
gcap (op2= name, anonymous otherwise)
|
||||
bref
|
||||
poscap
|
||||
subcap
|
||||
scap
|
||||
anychar
|
||||
label
|
||||
%
|
||||
range
|
||||
|
||||
Final token actions:
|
||||
t - terminal
|
||||
nt - non terminal
|
||||
func - function definition
|
||||
s - literal string
|
||||
num - literal number
|
||||
]]--
|
||||
local function splitlines(str)
|
||||
local t = {}
|
||||
local function helper(line) table.insert(t, line) return "" end
|
||||
helper((str:gsub("(.-)\r?\n", helper)))
|
||||
return t
|
||||
end
|
||||
function peg.pegToAST(input, defs)
|
||||
local r, e, sfail = p:match(input, defs)
|
||||
if not r then
|
||||
local lab
|
||||
if e == 0 then
|
||||
lab = "Syntax error"
|
||||
else
|
||||
lab = errmsgs[e]
|
||||
end
|
||||
local lines = splitlines(input)
|
||||
local line, col = re.calcline(input, #input - #sfail + 1)
|
||||
local err = {}
|
||||
table.insert(err, "L" .. line .. ":C" .. col .. ": " .. lab)
|
||||
table.insert(err, lines[line])
|
||||
table.insert(err, string.rep(" ", col-1) .. "^")
|
||||
error("syntax error(s) in pattern\n" .. table.concat(err, "\n"), 3)
|
||||
end
|
||||
return r
|
||||
end
|
||||
|
||||
|
||||
function peg.print_r ( t ) -- for debugging
|
||||
local print_r_cache={}
|
||||
local function sub_print_r(t,indent)
|
||||
if (print_r_cache[tostring(t)]) then
|
||||
print(indent.."*"..tostring(t))
|
||||
else
|
||||
print_r_cache[tostring(t)]=true
|
||||
if (type(t)=="table") then
|
||||
for pos,val in pairs(t) do
|
||||
if (type(val)=="table") then
|
||||
print(indent.."["..pos.."] => {")
|
||||
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
||||
print(indent..string.rep(" ",string.len(pos)+6).."}")
|
||||
else
|
||||
print(indent.."["..pos.."] => '"..tostring(val).."'")
|
||||
end
|
||||
end
|
||||
else
|
||||
print(indent..tostring(t))
|
||||
end
|
||||
end
|
||||
end
|
||||
sub_print_r(t,"")
|
||||
end
|
||||
function peg.print_t ( t ) -- for debugging
|
||||
local print_r_cache={}
|
||||
local function sub_print_r (t,indent)
|
||||
if (print_r_cache[tostring(t)]) then
|
||||
print(indent.."*"..tostring(t))
|
||||
else
|
||||
print_r_cache[tostring(t)]=true
|
||||
if (type(t)=="table") then
|
||||
local function subprint (pos,val,indent)
|
||||
if (type(val)=="table") then
|
||||
print(indent.."{")
|
||||
sub_print_r(val,indent..string.rep(" ",string.len(pos)+8))
|
||||
print(indent..string.rep(" ",string.len(pos)-1).."},")
|
||||
else
|
||||
if type(val) ~= "number" then
|
||||
val = "'"..tostring(val).."'"
|
||||
end
|
||||
|
||||
if tonumber(pos) then
|
||||
print(indent..val..",")
|
||||
else
|
||||
print(indent..pos.."="..val..",")
|
||||
end
|
||||
end
|
||||
end
|
||||
if t["rule"] then
|
||||
subprint("rule",t["rule"],indent)
|
||||
end
|
||||
if t["pos"] then
|
||||
subprint("pos",t["pos"],indent)
|
||||
end
|
||||
for pos,val in pairs(t) do
|
||||
if pos ~= "rule" and pos ~= "pos" then
|
||||
subprint(pos,val,indent)
|
||||
end
|
||||
end
|
||||
else
|
||||
print(indent..tostring(t))
|
||||
end
|
||||
end
|
||||
end
|
||||
sub_print_r(t,"")
|
||||
end
|
||||
|
||||
function peg.calcline(subject, pos)
|
||||
return re.calcline(subject,pos)
|
||||
end
|
||||
return peg
|
27
06/deps/parser-gen/rockspecs/parser-gen-1.0.rockspec
Normal file
27
06/deps/parser-gen/rockspecs/parser-gen-1.0.rockspec
Normal file
|
@ -0,0 +1,27 @@
|
|||
package = "parser-gen"
|
||||
version = "1.0-7"
|
||||
source = {
|
||||
url = "git://github.com/vsbenas/parser-gen",
|
||||
tag = "v1.0"
|
||||
}
|
||||
description = {
|
||||
summary = "A PEG parser generator that handles space characters, generates ASTs and adds error labels automatically.",
|
||||
homepage = "https://github.com/vsbenas/parser-gen",
|
||||
license = "MIT/X11"
|
||||
}
|
||||
dependencies = {
|
||||
"lua >= 5.1, < 5.4",
|
||||
"lpeglabel >= 0.12.2"
|
||||
}
|
||||
build = {
|
||||
type = "builtin",
|
||||
modules = {
|
||||
["parser-gen"] = "parser-gen.lua",
|
||||
["peg-parser"] = "peg-parser.lua",
|
||||
["stack"] = "stack.lua",
|
||||
["equals"] = "equals.lua",
|
||||
["errorgen"] = "errorgen.lua",
|
||||
|
||||
|
||||
}
|
||||
}
|
27
06/deps/parser-gen/rockspecs/parser-gen-1.1.rockspec
Normal file
27
06/deps/parser-gen/rockspecs/parser-gen-1.1.rockspec
Normal file
|
@ -0,0 +1,27 @@
|
|||
package = "parser-gen"
|
||||
version = "1.1-0"
|
||||
source = {
|
||||
url = "git://github.com/vsbenas/parser-gen",
|
||||
tag = "v1.1"
|
||||
}
|
||||
description = {
|
||||
summary = "A PEG parser generator that handles space characters, generates ASTs and adds error labels automatically.",
|
||||
homepage = "https://github.com/vsbenas/parser-gen",
|
||||
license = "MIT/X11"
|
||||
}
|
||||
dependencies = {
|
||||
"lua >= 5.1, < 5.4",
|
||||
"lpeglabel >= 0.12.2"
|
||||
}
|
||||
build = {
|
||||
type = "builtin",
|
||||
modules = {
|
||||
["parser-gen"] = "parser-gen.lua",
|
||||
["peg-parser"] = "peg-parser.lua",
|
||||
["stack"] = "stack.lua",
|
||||
["equals"] = "equals.lua",
|
||||
["errorgen"] = "errorgen.lua",
|
||||
|
||||
|
||||
}
|
||||
}
|
27
06/deps/parser-gen/rockspecs/parser-gen-1.2.rockspec
Normal file
27
06/deps/parser-gen/rockspecs/parser-gen-1.2.rockspec
Normal file
|
@ -0,0 +1,27 @@
|
|||
package = "parser-gen"
|
||||
version = "1.2-0"
|
||||
source = {
|
||||
url = "git://github.com/vsbenas/parser-gen",
|
||||
tag = "v1.2"
|
||||
}
|
||||
description = {
|
||||
summary = "A PEG parser generator that handles space characters, generates ASTs and adds error labels automatically.",
|
||||
homepage = "https://github.com/vsbenas/parser-gen",
|
||||
license = "MIT/X11"
|
||||
}
|
||||
dependencies = {
|
||||
"lua >= 5.1, < 5.4",
|
||||
"lpeglabel >= 0.12.2"
|
||||
}
|
||||
build = {
|
||||
type = "builtin",
|
||||
modules = {
|
||||
["parser-gen"] = "parser-gen.lua",
|
||||
["peg-parser"] = "peg-parser.lua",
|
||||
["stack"] = "stack.lua",
|
||||
["equals"] = "equals.lua",
|
||||
["errorgen"] = "errorgen.lua",
|
||||
|
||||
|
||||
}
|
||||
}
|
66
06/deps/parser-gen/stack.lua
Normal file
66
06/deps/parser-gen/stack.lua
Normal file
|
@ -0,0 +1,66 @@
|
|||
-- Stack Table
|
||||
-- Uses a table as stack, use <table>:push(value) and <table>:pop()
|
||||
local unpack = table.unpack
|
||||
-- GLOBAL
|
||||
local Stack = {}
|
||||
|
||||
-- Create a Table with stack functions
|
||||
function Stack:Create()
|
||||
|
||||
-- stack table
|
||||
local t = {}
|
||||
-- entry table
|
||||
t._et = {}
|
||||
|
||||
-- push a value on to the stack
|
||||
function t:push(...)
|
||||
if ... then
|
||||
local targs = {...}
|
||||
-- add values
|
||||
for _,v in ipairs(targs) do
|
||||
table.insert(self._et, v)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
-- pop a value from the stack
|
||||
function t:pop(num)
|
||||
|
||||
-- get num values from stack
|
||||
local num = num or 1
|
||||
|
||||
-- return table
|
||||
local entries = {}
|
||||
|
||||
-- get values into entries
|
||||
for i = 1, num do
|
||||
-- get last entry
|
||||
if #self._et ~= 0 then
|
||||
table.insert(entries, self._et[#self._et])
|
||||
-- remove last value
|
||||
table.remove(self._et)
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
-- return unpacked entries
|
||||
return unpack(entries)
|
||||
end
|
||||
|
||||
-- get entries
|
||||
function t:getn()
|
||||
return #self._et
|
||||
end
|
||||
|
||||
-- list values
|
||||
function t:list()
|
||||
for i,v in pairs(self._et) do
|
||||
print(i, v)
|
||||
end
|
||||
end
|
||||
return t
|
||||
end
|
||||
|
||||
return {Stack=Stack}
|
||||
|
||||
-- CHILLCODE™
|
Loading…
Add table
Add a link
Reference in a new issue