The goal of stage 06 is to try parse zig synax in lua. I pulled in lpeglable 1.2.0 and parser-gen off github to get started. All of this needs to be cleaned up rather soon. Lua boostraps using tcc and musl from the previous stage. Since musl 0.6.0 doesn't support dynamic linking this build of lua doesn't support shared libraries. I couldn't easily patch musl with dlopen and friends so instead I link statically and call deps with c api.
374 lines
12 KiB
Lua
374 lines
12 KiB
Lua
-- $Id: pm.lua,v 1.48 2016/11/07 13:11:28 roberto Exp $
|
||
-- See Copyright Notice in file all.lua
|
||
|
||
print('testing pattern matching')
|
||
|
||
local function checkerror (msg, f, ...)
|
||
local s, err = pcall(f, ...)
|
||
assert(not s and string.find(err, msg))
|
||
end
|
||
|
||
|
||
function f(s, p)
|
||
local i,e = string.find(s, p)
|
||
if i then return string.sub(s, i, e) end
|
||
end
|
||
|
||
a,b = string.find('', '') -- empty patterns are tricky
|
||
assert(a == 1 and b == 0);
|
||
a,b = string.find('alo', '')
|
||
assert(a == 1 and b == 0)
|
||
a,b = string.find('a\0o a\0o a\0o', 'a', 1) -- first position
|
||
assert(a == 1 and b == 1)
|
||
a,b = string.find('a\0o a\0o a\0o', 'a\0o', 2) -- starts in the midle
|
||
assert(a == 5 and b == 7)
|
||
a,b = string.find('a\0o a\0o a\0o', 'a\0o', 9) -- starts in the midle
|
||
assert(a == 9 and b == 11)
|
||
a,b = string.find('a\0a\0a\0a\0\0ab', '\0ab', 2); -- finds at the end
|
||
assert(a == 9 and b == 11);
|
||
a,b = string.find('a\0a\0a\0a\0\0ab', 'b') -- last position
|
||
assert(a == 11 and b == 11)
|
||
assert(string.find('a\0a\0a\0a\0\0ab', 'b\0') == nil) -- check ending
|
||
assert(string.find('', '\0') == nil)
|
||
assert(string.find('alo123alo', '12') == 4)
|
||
assert(string.find('alo123alo', '^12') == nil)
|
||
|
||
assert(string.match("aaab", ".*b") == "aaab")
|
||
assert(string.match("aaa", ".*a") == "aaa")
|
||
assert(string.match("b", ".*b") == "b")
|
||
|
||
assert(string.match("aaab", ".+b") == "aaab")
|
||
assert(string.match("aaa", ".+a") == "aaa")
|
||
assert(not string.match("b", ".+b"))
|
||
|
||
assert(string.match("aaab", ".?b") == "ab")
|
||
assert(string.match("aaa", ".?a") == "aa")
|
||
assert(string.match("b", ".?b") == "b")
|
||
|
||
assert(f('aloALO', '%l*') == 'alo')
|
||
assert(f('aLo_ALO', '%a*') == 'aLo')
|
||
|
||
assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu")
|
||
|
||
assert(f('aaab', 'a*') == 'aaa');
|
||
assert(f('aaa', '^.*$') == 'aaa');
|
||
assert(f('aaa', 'b*') == '');
|
||
assert(f('aaa', 'ab*a') == 'aa')
|
||
assert(f('aba', 'ab*a') == 'aba')
|
||
assert(f('aaab', 'a+') == 'aaa')
|
||
assert(f('aaa', '^.+$') == 'aaa')
|
||
assert(f('aaa', 'b+') == nil)
|
||
assert(f('aaa', 'ab+a') == nil)
|
||
assert(f('aba', 'ab+a') == 'aba')
|
||
assert(f('a$a', '.$') == 'a')
|
||
assert(f('a$a', '.%$') == 'a$')
|
||
assert(f('a$a', '.$.') == 'a$a')
|
||
assert(f('a$a', '$$') == nil)
|
||
assert(f('a$b', 'a$') == nil)
|
||
assert(f('a$a', '$') == '')
|
||
assert(f('', 'b*') == '')
|
||
assert(f('aaa', 'bb*') == nil)
|
||
assert(f('aaab', 'a-') == '')
|
||
assert(f('aaa', '^.-$') == 'aaa')
|
||
assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab')
|
||
assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab')
|
||
assert(f('alo xo', '.o$') == 'xo')
|
||
assert(f(' \n isto <20> assim', '%S%S*') == 'isto')
|
||
assert(f(' \n isto <20> assim', '%S*$') == 'assim')
|
||
assert(f(' \n isto <20> assim', '[a-z]*$') == 'assim')
|
||
assert(f('um caracter ? extra', '[^%sa-z]') == '?')
|
||
assert(f('', 'a?') == '')
|
||
assert(f('<EFBFBD>', '<EFBFBD>?') == '<EFBFBD>')
|
||
assert(f('<EFBFBD>bl', '<EFBFBD>?b?l?') == '<EFBFBD>bl')
|
||
assert(f(' <20>bl', '<EFBFBD>?b?l?') == '')
|
||
assert(f('aa', '^aa?a?a') == 'aa')
|
||
assert(f(']]]<5D>b', '[^]]') == '<EFBFBD>')
|
||
assert(f("0alo alo", "%x*") == "0a")
|
||
assert(f("alo alo", "%C+") == "alo alo")
|
||
print('+')
|
||
|
||
|
||
function f1(s, p)
|
||
p = string.gsub(p, "%%([0-9])", function (s)
|
||
return "%" .. (tonumber(s)+1)
|
||
end)
|
||
p = string.gsub(p, "^(^?)", "%1()", 1)
|
||
p = string.gsub(p, "($?)$", "()%1", 1)
|
||
local t = {string.match(s, p)}
|
||
return string.sub(s, t[1], t[#t] - 1)
|
||
end
|
||
|
||
assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o")
|
||
assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3')
|
||
assert(f1('=======', '^(=*)=%1$') == '=======')
|
||
assert(string.match('==========', '^([=]*)=%1$') == nil)
|
||
|
||
local function range (i, j)
|
||
if i <= j then
|
||
return i, range(i+1, j)
|
||
end
|
||
end
|
||
|
||
local abc = string.char(range(0, 255));
|
||
|
||
assert(string.len(abc) == 256)
|
||
|
||
function strset (p)
|
||
local res = {s=''}
|
||
string.gsub(abc, p, function (c) res.s = res.s .. c end)
|
||
return res.s
|
||
end;
|
||
|
||
assert(string.len(strset('[\200-\210]')) == 11)
|
||
|
||
assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz")
|
||
assert(strset('[a-z%d]') == strset('[%da-uu-z]'))
|
||
assert(strset('[a-]') == "-a")
|
||
assert(strset('[^%W]') == strset('[%w]'))
|
||
assert(strset('[]%%]') == '%]')
|
||
assert(strset('[a%-z]') == '-az')
|
||
assert(strset('[%^%[%-a%]%-b]') == '-[]^ab')
|
||
assert(strset('%Z') == strset('[\1-\255]'))
|
||
assert(strset('.') == strset('[\1-\255%z]'))
|
||
print('+');
|
||
|
||
assert(string.match("alo xyzK", "(%w+)K") == "xyz")
|
||
assert(string.match("254 K", "(%d*)K") == "")
|
||
assert(string.match("alo ", "(%w*)$") == "")
|
||
assert(string.match("alo ", "(%w+)$") == nil)
|
||
assert(string.find("(<28>lo)", "%(<28>") == 1)
|
||
local a, b, c, d, e = string.match("<EFBFBD>lo alo", "^(((.).).* (%w*))$")
|
||
assert(a == '<EFBFBD>lo alo' and b == '<EFBFBD>l' and c == '<EFBFBD>' and d == 'alo' and e == nil)
|
||
a, b, c, d = string.match('0123456789', '(.+(.?)())')
|
||
assert(a == '0123456789' and b == '' and c == 11 and d == nil)
|
||
print('+')
|
||
|
||
assert(string.gsub('<EFBFBD>lo <20>lo', '<EFBFBD>', 'x') == 'xlo xlo')
|
||
assert(string.gsub('alo <20>lo ', ' +$', '') == 'alo <20>lo') -- trim
|
||
assert(string.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim
|
||
assert(string.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ')
|
||
t = "ab<EFBFBD> d"
|
||
a, b = string.gsub(t, '(.)', '%1@')
|
||
assert('@'..a == string.gsub(t, '', '@') and b == 5)
|
||
a, b = string.gsub('ab<EFBFBD>d', '(.)', '%0@', 2)
|
||
assert(a == 'a@b@<40>d' and b == 2)
|
||
assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o')
|
||
assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
|
||
"xyz=abc-abc=xyz")
|
||
assert(string.gsub("abc", "%w", "%1%0") == "aabbcc")
|
||
assert(string.gsub("abc", "%w+", "%0%1") == "abcabc")
|
||
assert(string.gsub('<EFBFBD><EFBFBD><EFBFBD>', '$', '\0<EFBFBD><EFBFBD>') == '<EFBFBD><EFBFBD><EFBFBD>\0<EFBFBD><EFBFBD>')
|
||
assert(string.gsub('', '^', 'r') == 'r')
|
||
assert(string.gsub('', '$', 'r') == 'r')
|
||
print('+')
|
||
|
||
|
||
do -- new (5.3.3) semantics for empty matches
|
||
assert(string.gsub("a b cd", " *", "-") == "-a-b-c-d-")
|
||
|
||
local res = ""
|
||
local sub = "a \nbc\t\td"
|
||
local i = 1
|
||
for p, e in string.gmatch(sub, "()%s*()") do
|
||
res = res .. string.sub(sub, i, p - 1) .. "-"
|
||
i = e
|
||
end
|
||
assert(res == "-a-b-c-d-")
|
||
end
|
||
|
||
|
||
assert(string.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) ==
|
||
"um (DOIS) tres (QUATRO)")
|
||
|
||
do
|
||
local function setglobal (n,v) rawset(_G, n, v) end
|
||
string.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", setglobal)
|
||
assert(_G.a=="roberto" and _G.roberto=="a")
|
||
end
|
||
|
||
function f(a,b) return string.gsub(a,'.',b) end
|
||
assert(string.gsub("trocar tudo em |teste|b| <20> |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
|
||
"trocar tudo em bbbbb <20> alalalalalal")
|
||
|
||
local function dostring (s) return load(s, "")() or "" end
|
||
assert(string.gsub("alo $a='x'$ novamente $return a$",
|
||
"$([^$]*)%$",
|
||
dostring) == "alo novamente x")
|
||
|
||
x = string.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$",
|
||
"$([^$]*)%$", dostring)
|
||
assert(x == ' assim vai para ALO')
|
||
|
||
t = {}
|
||
s = 'a alo jose joao'
|
||
r = string.gsub(s, '()(%w+)()', function (a,w,b)
|
||
assert(string.len(w) == b-a);
|
||
t[a] = b-a;
|
||
end)
|
||
assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4)
|
||
|
||
|
||
function isbalanced (s)
|
||
return string.find(string.gsub(s, "%b()", ""), "[()]") == nil
|
||
end
|
||
|
||
assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"))
|
||
assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"))
|
||
assert(string.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo')
|
||
|
||
|
||
local t = {"apple", "orange", "lime"; n=0}
|
||
assert(string.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end)
|
||
== "apple and orange and lime")
|
||
|
||
t = {n=0}
|
||
string.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end)
|
||
assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3)
|
||
|
||
t = {n=0}
|
||
assert(string.gsub("first second word", "%w+",
|
||
function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word")
|
||
assert(t[1] == "first" and t[2] == "second" and t[3] == nil)
|
||
|
||
checkerror("invalid replacement value %(a table%)",
|
||
string.gsub, "alo", ".", {a = {}})
|
||
checkerror("invalid capture index %%2", string.gsub, "alo", ".", "%2")
|
||
checkerror("invalid capture index %%0", string.gsub, "alo", "(%0)", "a")
|
||
checkerror("invalid capture index %%1", string.gsub, "alo", "(%1)", "a")
|
||
checkerror("invalid use of '%%'", string.gsub, "alo", ".", "%x")
|
||
|
||
-- bug since 2.5 (C-stack overflow)
|
||
do
|
||
local function f (size)
|
||
local s = string.rep("a", size)
|
||
local p = string.rep(".?", size)
|
||
return pcall(string.match, s, p)
|
||
end
|
||
local r, m = f(80)
|
||
assert(r and #m == 80)
|
||
r, m = f(200000)
|
||
assert(not r and string.find(m, "too complex"))
|
||
end
|
||
|
||
if not _soft then
|
||
print("big strings")
|
||
local a = string.rep('a', 300000)
|
||
assert(string.find(a, '^a*.?$'))
|
||
assert(not string.find(a, '^a*.?b$'))
|
||
assert(string.find(a, '^a-.?$'))
|
||
|
||
-- bug in 5.1.2
|
||
a = string.rep('a', 10000) .. string.rep('b', 10000)
|
||
assert(not pcall(string.gsub, a, 'b'))
|
||
end
|
||
|
||
-- recursive nest of gsubs
|
||
function rev (s)
|
||
return string.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end)
|
||
end
|
||
|
||
local x = "abcdef"
|
||
assert(rev(rev(x)) == x)
|
||
|
||
|
||
-- gsub with tables
|
||
assert(string.gsub("alo alo", ".", {}) == "alo alo")
|
||
assert(string.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo")
|
||
assert(string.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo")
|
||
assert(string.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo")
|
||
|
||
assert(string.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo")
|
||
|
||
t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end})
|
||
assert(string.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI")
|
||
|
||
|
||
-- tests for gmatch
|
||
local a = 0
|
||
for i in string.gmatch('abcde', '()') do assert(i == a+1); a=i end
|
||
assert(a==6)
|
||
|
||
t = {n=0}
|
||
for w in string.gmatch("first second word", "%w+") do
|
||
t.n=t.n+1; t[t.n] = w
|
||
end
|
||
assert(t[1] == "first" and t[2] == "second" and t[3] == "word")
|
||
|
||
t = {3, 6, 9}
|
||
for i in string.gmatch ("xuxx uu ppar r", "()(.)%2") do
|
||
assert(i == table.remove(t, 1))
|
||
end
|
||
assert(#t == 0)
|
||
|
||
t = {}
|
||
for i,j in string.gmatch("13 14 10 = 11, 15= 16, 22=23", "(%d+)%s*=%s*(%d+)") do
|
||
t[tonumber(i)] = tonumber(j)
|
||
end
|
||
a = 0
|
||
for k,v in pairs(t) do assert(k+1 == v+0); a=a+1 end
|
||
assert(a == 3)
|
||
|
||
|
||
-- tests for `%f' (`frontiers')
|
||
|
||
assert(string.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x")
|
||
assert(string.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[")
|
||
assert(string.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3")
|
||
assert(string.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.")
|
||
assert(string.gsub("function", "%f[\1-\255]%w", ".") == ".unction")
|
||
assert(string.gsub("function", "%f[^\1-\255]", ".") == "function.")
|
||
|
||
assert(string.find("a", "%f[a]") == 1)
|
||
assert(string.find("a", "%f[^%z]") == 1)
|
||
assert(string.find("a", "%f[^%l]") == 2)
|
||
assert(string.find("aba", "%f[a%z]") == 3)
|
||
assert(string.find("aba", "%f[%z]") == 4)
|
||
assert(not string.find("aba", "%f[%l%z]"))
|
||
assert(not string.find("aba", "%f[^%l%z]"))
|
||
|
||
local i, e = string.find(" alo aalo allo", "%f[%S].-%f[%s].-%f[%S]")
|
||
assert(i == 2 and e == 5)
|
||
local k = string.match(" alo aalo allo", "%f[%S](.-%f[%s].-%f[%S])")
|
||
assert(k == 'alo ')
|
||
|
||
local a = {1, 5, 9, 14, 17,}
|
||
for k in string.gmatch("alo alo th02 is 1hat", "()%f[%w%d]") do
|
||
assert(table.remove(a, 1) == k)
|
||
end
|
||
assert(#a == 0)
|
||
|
||
|
||
-- malformed patterns
|
||
local function malform (p, m)
|
||
m = m or "malformed"
|
||
local r, msg = pcall(string.find, "a", p)
|
||
assert(not r and string.find(msg, m))
|
||
end
|
||
|
||
malform("(.", "unfinished capture")
|
||
malform(".)", "invalid pattern capture")
|
||
malform("[a")
|
||
malform("[]")
|
||
malform("[^]")
|
||
malform("[a%]")
|
||
malform("[a%")
|
||
malform("%b")
|
||
malform("%ba")
|
||
malform("%")
|
||
malform("%f", "missing")
|
||
|
||
-- \0 in patterns
|
||
assert(string.match("ab\0\1\2c", "[\0-\2]+") == "\0\1\2")
|
||
assert(string.match("ab\0\1\2c", "[\0-\0]+") == "\0")
|
||
assert(string.find("b$a", "$\0?") == 2)
|
||
assert(string.find("abc\0efg", "%\0") == 4)
|
||
assert(string.match("abc\0efg\0\1e\1g", "%b\0\1") == "\0efg\0\1e\1")
|
||
assert(string.match("abc\0\0\0", "%\0+") == "\0\0\0")
|
||
assert(string.match("abc\0\0\0", "%\0%\0?") == "\0\0")
|
||
|
||
-- magic char after \0
|
||
assert(string.find("abc\0\0","\0.") == 4)
|
||
assert(string.find("abcx\0\0abc\0abc","x\0\0abc\0a.") == 4)
|
||
|
||
print('OK')
|
||
|