diff options
| author | unwox <me@unwox.com> | 2024-09-26 17:46:38 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2024-09-26 17:46:38 +0600 |
| commit | 9b82db238f9e2e02a76f95c793f8d6ef2387ecfd (patch) | |
| tree | cdb2a16d01f09553b560ab1034d53392d07bae42 /vendor/lpeglj/lpeglj.lua | |
init
Diffstat (limited to 'vendor/lpeglj/lpeglj.lua')
| -rw-r--r-- | vendor/lpeglj/lpeglj.lua | 1373 |
1 files changed, 1373 insertions, 0 deletions
diff --git a/vendor/lpeglj/lpeglj.lua b/vendor/lpeglj/lpeglj.lua new file mode 100644 index 0000000..de4fca3 --- /dev/null +++ b/vendor/lpeglj/lpeglj.lua @@ -0,0 +1,1373 @@ +--[[ +LPEGLJ +lpeglj.lua +Main module and tree generation +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] + +assert(jit.version_num > 20000, "Use LuaJIT v2.0.1 or higher.") + +local ffi = require "ffi" +local lpcode = require "lpcode" +local lpprint = require "lpprint" +local lpvm = require "lpvm" + +local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift + +ffi.cdef [[ + int isalnum(int c); + int isalpha(int c); + int iscntrl(int c); + int isdigit(int c); + int isgraph(int c); + int islower(int c); + int isprint(int c); + int ispunct(int c); + int isspace(int c); + int isupper(int c); + int isxdigit(int c); +]] + +local MAXBEHIND = 255 +local MAXRULES = 200 +local VERSION = "1.0.0.0LJ" + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local PEnullable = 0 +local PEnofail = 1 +local PEleftrecursion = 2 + +local newgrammar + +local RuleLR = 0x10000 +local Ruleused = 0x20000 +local BCapcandelete = 0x30000 + +local LREnable = false + +-- number of siblings for each tree +local numsiblings = { + 0, 0, 0, -- char, set, any + 0, 0, -- true, false + 1, -- rep + 2, 2, -- seq, choice + 1, 1, -- not, and + 0, 0, 2, 1, -- call, opencall, rule, grammar + 1, -- behind + 1, 1 -- capture, runtime capture +} + + + +local patternid = 0 +local valuetable = {} + +local funcnames = setmetatable({}, { __mode = 'k' }) + +local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT') +local treepattern = ffi.typeof('TREEPATTERN') +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') +local uint32 = ffi.typeof('uint32_t[1]') + +-- Fix a TOpenCall into a TCall node, using table 'postable' to +-- translate a key to its rule address in the tree. Raises an +-- error if key does not exist. + +local function fixonecall(postable, grammar, index, valuetable) + local name = valuetable[grammar.p[index].val] -- get rule's name + local n = postable[name] -- query name in position table + -- no position? + if not n then + error(("rule '%s' undefined in given grammar"):format(type(name) == 'table' and '(a table)' or name), 0) + end + grammar.p[index].tag = TCall; + grammar.p[index].ps = n - index -- position relative to node + grammar.p[index + grammar.p[index].ps].cap = bit.bor(grammar.p[index + grammar.p[index].ps].cap, Ruleused) +end + + +-- Transform left associative constructions into right +-- associative ones, for sequence and choice; that is: +-- (t11 + t12) + t2 => t11 + (t12 + t2) +-- (t11 * t12) * t2 => t11 * (t12 * t2) +-- (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) + +local function correctassociativity(tree, index) + local t1 = index + 1 + assert(tree.p[index].tag == TChoice or tree.p[index].tag == TSeq) + while tree.p[t1].tag == tree.p[index].tag do + local n1size = tree.p[index].ps - 1; -- t1 == Op t11 t12 + local n11size = tree.p[t1].ps - 1; + local n12size = n1size - n11size - 1 + for i = 1, n11size do + ffi.copy(tree.p + index + i, tree.p + t1 + i, ffi.sizeof(treepatternelement)) + end + tree.p[index].ps = n11size + 1 + tree.p[index + tree.p[index].ps].tag = tree.p[index].tag + tree.p[index + tree.p[index].ps].ps = n12size + 1 + end +end + + +-- Make final adjustments in a tree. Fix open calls in tree, +-- making them refer to their respective rules or raising appropriate +-- errors (if not inside a grammar). Correct associativity of associative +-- constructions (making them right associative). + +local function finalfix(fix, postable, grammar, index, valuetable) + + local tag = grammar.p[index].tag + --subgrammars were already fixed + if tag == TGrammar then + return + elseif tag == TOpenCall then + -- inside a grammar? + if fix then + fixonecall(postable, grammar, index, valuetable) + -- open call outside grammar + else + error(("rule '%s' used outside a grammar"):format(tostring(valuetable[grammar.p[index].val])), 0) + end + elseif tag == TSeq or tag == TChoice then + correctassociativity(grammar, index) + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return finalfix(fix, postable, grammar, index + 1, valuetable) + elseif ns == 2 then + finalfix(fix, postable, grammar, index + 1, valuetable) + return finalfix(fix, postable, grammar, index + grammar.p[index].ps, valuetable) + else + assert(false) + end +end + + +-- {====================================================== +-- Tree generation +-- ======================================================= + +local function newcharset() + local tree = treepattern(1) + valuetable[tree.id] = { settype() } + tree.p[0].tag = TSet + tree.p[0].val = 1 + return tree, valuetable[tree.id][1] +end + + +-- add to tree a sequence where first sibling is 'sib' (with size +-- 'sibsize') + +local function seqaux(tree, sib, start, sibsize) + tree.p[start].tag = TSeq; + tree.p[start].ps = sibsize + 1 + ffi.copy(tree.p + start + 1, sib.p, ffi.sizeof(treepatternelement) * sibsize) +end + + +-- Build a sequence of 'n' nodes, each with tag 'tag' and 'val' got +-- from the array 's' (or 0 if array is NULL). (TSeq is binary, so it +-- must build a sequence of sequence of sequence...) + +local function fillseq(tree, tag, start, n, s) + -- initial n-1 copies of Seq tag; Seq ... + for i = 1, n - 1 do + tree.p[start].tag = TSeq + tree.p[start].ps = 2 + tree.p[start + 1].tag = tag + if s then + tree.p[start + 1].val = s:sub(i, i):byte() + end + start = start + tree.p[start].ps + end + tree.p[start].tag = tag -- last one does not need TSeq + if s then + tree.p[start].val = s:sub(n, n):byte() + end +end + + +-- Numbers as patterns: +-- 0 == true (always match); n == TAny repeated 'n' times; +-- -n == not (TAny repeated 'n' times) + +local function numtree(n) + if n == 0 then + local tree = treepattern(1) + tree.p[0].tag = TTrue + return tree + else + local tree, start + if n > 0 then + tree = treepattern(2 * n - 1) + start = 0 + -- negative: code it as !(-n) + else + n = -n; + tree = treepattern(2 * n) + tree.p[0].tag = TNot + start = 1 + end + fillseq(tree, TAny, start, n) -- sequence of 'n' any's + return tree; + end +end + + +-- Convert value to a pattern + +local function getpatt(val, name) + local typ = type(val) + if typ == 'string' then + -- empty? + if #val == 0 then + local pat = treepattern(1) + pat.p[0].tag = TTrue -- always match + return pat + else + local tree = treepattern(2 * (#val - 1) + 1) + fillseq(tree, TChar, 0, #val, val) -- sequence of '#val' chars + return tree + end + elseif typ == 'number' then + return numtree(val) + elseif typ == 'boolean' then + local pat = treepattern(1) + pat.p[0].tag = val and TTrue or TFalse + return pat + elseif typ == 'table' then + return newgrammar(val) + elseif typ == 'function' then + if name and type(name) == 'string' then + funcnames[val] = name + end + local pat = treepattern(2) + valuetable[pat.id] = { val } + pat.p[0].tag = TRunTime + pat.p[0].val = 1 + pat.p[1].tag = TTrue + return pat + elseif ffi.istype(treepattern, val) then + assert(val.treesize > 0) + return val + end + assert(false) +end + +local function copykeys(ktable1, ktable2) + local ktable, offset = {}, 0 + if not ktable1 and not ktable2 then + return ktable, 0 + elseif ktable1 then + for i = 1, #ktable1 do + ktable[#ktable + 1] = ktable1[i] + end + offset = #ktable1 + if not ktable2 then + return ktable, 0 + end + end + if ktable2 then + for i = 1, #ktable2 do + ktable[#ktable + 1] = ktable2[i] + end + end + assert(#ktable < 65536, "too many Lua values in pattern") + return ktable, offset +end + +local function correctkeys(tree, index, offset) + local tag = tree.p[index].tag + if (tag == TSet or tag == TRule or tag == TCall or tag == TRunTime or tag == TOpenCall or tag == TCapture) and + tree.p[index].val ~= 0 then + tree.p[index].val = tree.p[index].val + offset + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return correctkeys(tree, index + 1, offset) + elseif ns == 2 then + correctkeys(tree, index + 1, offset) + return correctkeys(tree, index + tree.p[index].ps, offset) + else + assert(false) + end +end + + + +-- create a new tree, with a new root and one sibling. + +local function newroot1sib(tag, pat) + local tree1 = getpatt(pat) + local tree = treepattern(1 + tree1.treesize) -- create new tree + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + tree.p[0].tag = tag + ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + return tree +end + + +-- create a new tree, with a new root and 2 siblings. + +local function newroot2sib(tag, pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local tree = treepattern(1 + tree1.treesize + tree2.treesize) -- create new tree + local ktable, offset = copykeys(valuetable[tree1.id], valuetable[tree2.id]) + valuetable[tree.id] = ktable + tree.p[0].tag = tag + tree.p[0].ps = 1 + tree1.treesize + ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + ffi.copy(tree.p + 1 + tree1.treesize, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize) + if offset > 0 then + correctkeys(tree, 1 + tree1.treesize, offset) + end + return tree; +end + + +local function lp_P(val, name) + assert(type(val) ~= 'nil') + return getpatt(val, name) +end + + +-- sequence operator; optimizations: +-- false x => false, x true => x, true x => x +-- (cannot do x . false => false because x may have runtime captures) + +local function lp_seq(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + -- false . x == false, x . true = x + if tree1.p[0].tag == TFalse or tree2.p[0].tag == TTrue then + return tree1 + -- true . x = x + elseif tree1.p[0].tag == TTrue then + return tree2 + else + return newroot2sib(TSeq, tree1, tree2) + end +end + + +-- choice operator; optimizations: +-- charset / charset => charset +-- true / x => true, x / false => x, false / x => x +-- (x / true is not equivalent to true) + +local function lp_choice(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id]) + local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id]) + if charset1 and charset2 then + local t, set = newcharset() + for i = 0, 7 do + set[i] = bor(charset1[i], charset2[i]) + end + return t + elseif lpcode.checkaux(tree1, PEnofail, 0) or tree2.p[0].tag == TFalse then + return tree1 -- true / x => true, x / false => x + elseif tree1.p[0].tag == TFalse then + return tree2 -- false / x => x + else + return newroot2sib(TChoice, tree1, tree2) + end +end + + +-- p^n + +local function lp_star(tree1, n) + local tree + n = tonumber(n) + assert(type(n) == 'number') + -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) + if n >= 0 then + tree = treepattern((n + 1) * (tree1.treesize + 1)) + if lpcode.checkaux(tree1, PEnullable, 0) then + error("loop body may accept empty string", 0) + end + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + local start = 0 + -- repeat 'n' times + for i = 1, n do + seqaux(tree, tree1, start, tree1.treesize) + start = start + tree.p[start].ps + end + tree.p[start].tag = TRep + ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + -- choice (seq tree1 ... choice tree1 true ...) true + else + n = -n; + -- size = (choice + seq + tree1 + true) * n, but the last has no seq + tree = treepattern(n * (tree1.treesize + 3) - 1) + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + local start = 0 + -- repeat (n - 1) times + for i = n, 2, -1 do + tree.p[start].tag = TChoice; + tree.p[start].ps = i * (tree1.treesize + 3) - 2 + tree.p[start + tree.p[start].ps].tag = TTrue; + start = start + 1 + seqaux(tree, tree1, start, tree1.treesize) + start = start + tree.p[start].ps + end + tree.p[start].tag = TChoice; + tree.p[start].ps = tree1.treesize + 1 + tree.p[start + tree.p[start].ps].tag = TTrue + ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + end + return tree +end + + +-- #p == &p + +local function lp_and(pat) + return newroot1sib(TAnd, pat) +end + + +-- -p == !p + +local function lp_not(pat) + return newroot1sib(TNot, pat) +end + + +-- [t1 - t2] == Seq (Not t2) t1 +-- If t1 and t2 are charsets, make their difference. + +local function lp_sub(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id]) + local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id]) + if charset1 and charset2 then + local tree, set = newcharset() + for i = 0, 7 do + set[i] = band(charset1[i], bnot(charset2[i])) + end + return tree + else + local tree = treepattern(2 + tree1.treesize + tree2.treesize) + local ktable, offset = copykeys(valuetable[tree2.id], valuetable[tree1.id]) + valuetable[tree.id] = ktable + tree.p[0].tag = TSeq; -- sequence of... + tree.p[0].ps = 2 + tree2.treesize + tree.p[1].tag = TNot; -- ...not... + ffi.copy(tree.p + 2, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize) + ffi.copy(tree.p + tree2.treesize + 2, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + if offset > 0 then + correctkeys(tree, 2 + tree2.treesize, offset) + end + return tree + end +end + + +local function lp_set(val) + assert(type(val) == 'string') + local tree, set = newcharset() + for i = 1, #val do + local b = val:sub(i, i):byte() + set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31))) + end + return tree +end + + +local function lp_range(...) + local args = { ... } + local top = #args + local tree, set = newcharset() + for i = 1, top do + assert(#args[i] == 2, args[i] .. " range must have two characters") + for b = args[i]:sub(1, 1):byte(), args[i]:sub(2, 2):byte() do + set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31))) + end + end + return tree +end + + +-- Look-behind predicate + +local function lp_behind(pat) + local tree1 = getpatt(pat) + local n = lpcode.fixedlenx(tree1, 0, 0, 0) + assert(not lpcode.hascaptures(tree1, 0), "pattern have captures") + assert(n >= 0, "pattern may not have fixed length") + assert(n <= MAXBEHIND, "pattern too long to look behind") + local tree = newroot1sib(TBehind, pat) + tree.p[0].val = n; + return tree +end + + +-- Create a non-terminal + +local function lp_V(val, p) + assert(val, "non-nil value expected") + local tree = treepattern(1) + valuetable[tree.id] = { val } + tree.p[0].tag = TOpenCall + tree.p[0].val = 1 + tree.p[0].cap = p or 0 + return tree +end + + +-- Create a tree for a non-empty capture, with a body and +-- optionally with an associated value + +local function capture_aux(cap, pat, val) + local tree = newroot1sib(TCapture, pat) + tree.p[0].cap = cap + if val then + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern" .. ind) + valuetable[tree.id][ind] = val + tree.p[0].val = ind + end + return tree +end + + +-- Fill a tree with an empty capture, using an empty (TTrue) sibling. + +local function auxemptycap(tree, cap, par, start) + tree.p[start].tag = TCapture; + tree.p[start].cap = cap + if type(par) ~= 'nil' then + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = par + tree.p[start].val = ind + end + tree.p[start + 1].tag = TTrue; +end + + +-- Create a tree for an empty capture + +local function newemptycap(cap, par) + local tree = treepattern(2) + if type(par) ~= 'nil' then valuetable[tree.id] = {} end + auxemptycap(tree, cap, par, 0) + return tree +end + + +-- Captures with syntax p / v +-- (function capture, query capture, string capture, or number capture) + +local function lp_divcapture(pat, par, xxx) + local typ = type(par) + if typ == "function" then + return capture_aux(Cfunction, pat, par) + elseif typ == "table" then + return capture_aux(Cquery, pat, par) + elseif typ == "string" then + return capture_aux(Cstring, pat, par) + elseif typ == "number" then + local tree = newroot1sib(TCapture, pat) + assert(0 <= par and par <= 0xffff, "invalid number") + tree.p[0].cap = Cnum; + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = par + tree.p[0].val = ind + return tree + else + error("invalid replacement value", 0) + end +end + + +local function lp_substcapture(pat) + return capture_aux(Csubst, pat) +end + + +local function lp_tablecapture(pat) + return capture_aux(Ctable, pat, 0) +end + + +local function lp_groupcapture(pat, val) + if not val then + return capture_aux(Cgroup, pat) + else + return capture_aux(Cgroup, pat, val) + end +end + + +local function lp_foldcapture(pat, fce) + assert(type(fce) == 'function') + return capture_aux(Cfold, pat, fce) +end + + +local function lp_simplecapture(pat) + return capture_aux(Csimple, pat) +end + + +local function lp_poscapture() + return newemptycap(Cposition) +end + + +local function lp_argcapture(val) + assert(type(val) == 'number') + local tree = newemptycap(Carg, 0) + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = val + tree.p[0].val = ind + assert(0 < val and val <= 0xffff, "invalid argument index") + return tree +end + + +local function lp_backref(val) + return newemptycap(Cbackref, val) +end + + +-- Constant capture + +local function lp_constcapture(...) + local tree + local args = { ... } + local n = select('#', ...) -- number of values + -- no values? + if n == 0 then + tree = treepattern(1) -- no capture + tree.p[0].tag = TTrue + elseif n == 1 then + tree = newemptycap(Cconst, args[1]) -- single constant capture + -- create a group capture with all values + else + tree = treepattern(3 + 3 * (n - 1)) + valuetable[tree.id] = {} + tree.p[0].tag = TCapture + tree.p[0].cap = Cgroup + local start = 1 + for i = 1, n - 1 do + tree.p[start].tag = TSeq + tree.p[start].ps = 3 + auxemptycap(tree, Cconst, args[i], start + 1) + start = start + tree.p[start].ps + end + auxemptycap(tree, Cconst, args[n], start) + end + return tree +end + + +local function lp_matchtime(pat, fce, name) + assert(type(fce) == 'function') + if name and type(name) == 'string' then + funcnames[fce] = name + end + local tree = newroot1sib(TRunTime, pat) + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = fce + tree.p[0].val = ind + return tree +end + +-- ====================================================== + + + +-- ====================================================== +-- Grammar - Tree generation +-- ======================================================= + + +-- return index and the pattern for the +-- initial rule of grammar; +-- also add that index into position table. + +local function getfirstrule(pat, postab) + local key + -- access first element + if type(pat[1]) == 'string' then + key = pat[1] + else + key = 1 + end + local rule = pat[key] + if not rule then + error("grammar has no initial rule", 0) + end + -- initial rule not a pattern? + if not ffi.istype(treepattern, rule) then + error(("initial rule '%s' is not a pattern"):format(tostring(key)), 0) + end + postab[key] = 1 + return key, rule +end + + +-- traverse grammar, collect all its keys and patterns +-- into rule table. Create a new table (before all pairs key-pattern) to +-- collect all keys and their associated positions in the final tree +-- (the "position table"). +-- Return the number of rules and the total size +-- for the new tree. + +local function collectrules(pat) + local n = 1; -- to count number of rules + local postab = {} + local firstkeyrule, firstrule = getfirstrule(pat, postab) + local rules = { firstkeyrule, firstrule } + local size = 2 + firstrule.treesize -- TGrammar + TRule + rule + for key, val in pairs(pat) do + -- initial rule? + if key ~= 1 and tostring(val) ~= tostring(firstrule) then + -- value is not a pattern? + if not ffi.istype(treepattern, val) then + error(("rule '%s' is not a pattern"):format(tostring(key)), 0) + end + rules[#rules + 1] = key + rules[#rules + 1] = val + postab[key] = size + size = 1 + size + val.treesize + n = n + 1 + end + end + size = size + 1; -- TTrue to finish list of rules + return n, size, rules, postab +end + + +local function buildgrammar(grammar, rules, n, index, valuetable) + local ktable, offset = {}, 0 + -- add each rule into new tree + for i = 1, n do + local size = rules[i * 2].treesize + grammar.p[index].tag = TRule; + grammar.p[index].cap = i; -- rule number + grammar.p[index].ps = size + 1; -- point to next rule + local ind = #ktable + 1 + ktable[ind] = rules[i * 2 - 1] + grammar.p[index].val = ind + ffi.copy(grammar.p + index + 1, rules[i * 2].p, ffi.sizeof(treepatternelement) * size) -- copy rule + ktable, offset = copykeys(ktable, valuetable[rules[i * 2].id]) + if offset > 0 then + correctkeys(grammar, index + 1, offset) + end + index = index + grammar.p[index].ps; -- move to next rule + end + grammar.p[index].tag = TTrue; -- finish list of rules + return ktable +end + + +-- Check whether a tree has potential infinite loops + +local function checkloops(tree, index) + local tag = tree.p[index].tag + if tag == TRep and lpcode.checkaux(tree, PEnullable, index + 1) then + return true + elseif tag == TGrammar then + return -- sub-grammars already checked + else + local tag = numsiblings[tree.p[index].tag + 1] + if tag == 0 then + return + elseif tag == 1 then + return checkloops(tree, index + 1) + elseif tag == 2 then + if checkloops(tree, index + 1) then + return true + else + return checkloops(tree, index + tree.p[index].ps) + end + else + assert(false) + end + end +end + +-- Check whether a rule can be left recursive; returns PEleftrecursion in that +-- case; otherwise return 1 iff pattern is nullable. + +local function verifyrule(rulename, tree, passed, nullable, index, valuetable) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then + return nullable; -- cannot pass from here + elseif tag == TTrue or tag == TBehind then + return true; + elseif tag == TNot or tag == TAnd or tag == TRep then + return verifyrule(rulename, tree, passed, true, index + 1, valuetable) + elseif tag == TCapture or tag == TRunTime then + return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + elseif tag == TCall then + local rule = valuetable[tree.p[index].val] + if rule == rulename then return PEleftrecursion end + if passed[rule] and passed[rule] > MAXRULES then + return nullable + end + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + -- only check 2nd child if first is nullable + elseif tag == TSeq then + local res = verifyrule(rulename, tree, passed, false, index + 1, valuetable) + if res == PEleftrecursion then + return res + elseif not res then + return nullable + else + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + end + -- must check both children + elseif tag == TChoice then + nullable = verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + if nullable == PEleftrecursion then return nullable end + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + elseif tag == TRule then + local rule = valuetable[tree.p[index].val] + passed[rule] = (passed[rule] or 0) + 1 + return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + elseif tag == TGrammar then + return lpcode.checkaux(tree, PEnullable, index) -- sub-grammar cannot be left recursive + else + assert(false) + end +end + + +local function verifygrammar(rule, index, valuetable) + -- check left-recursive rules + local LR = {} + local ind = index + 1 + while rule.p[ind].tag == TRule do + local rulename = valuetable[rule.p[ind].val] + -- used rule + if rulename then + if verifyrule(rulename, rule, {}, false, ind + 1, valuetable) == PEleftrecursion then + if not LREnable then + error(("rule '%s' may be left recursive"):format(rulename), 0) + end + LR[rulename] = true + end + end + ind = ind + rule.p[ind].ps + end + assert(rule.p[ind].tag == TTrue) + + for i = 0, rule.treesize - 1 do + if rule.p[i].tag == TRule and LR[valuetable[rule.p[i].val]] then + rule.p[i].cap = bor(rule.p[i].cap, RuleLR) --TRule can be left recursive + end + if rule.p[i].tag == TCall and LR[valuetable[rule.p[i].val]] then + if rule.p[i].cap == 0 then + rule.p[i].cap = 1 --TCall can be left recursive + end + end + end + + -- check infinite loops inside rules + ind = index + 1 + while rule.p[ind].tag == TRule do + -- used rule + if rule.p[ind].val then + if checkloops(rule, ind + 1) then + error(("empty loop in rule '%s'"):format(tostring(valuetable[rule.p[ind].val])), 0) + end + end + ind = ind + rule.p[ind].ps + end + assert(rule.p[ind].tag == TTrue) +end + + +-- Give a name for the initial rule if it is not referenced + +local function initialrulename(grammar, val, valuetable) + grammar.p[1].cap = bit.bor(grammar.p[1].cap, Ruleused) + -- initial rule is not referenced? + if grammar.p[1].val == 0 then + local ind = #valuetable + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[ind] = val + grammar.p[1].val = ind + end +end + + +function newgrammar(pat) + -- traverse grammar. Create a new table (before all pairs key-pattern) to + -- collect all keys and their associated positions in the final tree + -- (the "position table"). + -- Return new tree. + + local n, size, rules, postab = collectrules(pat) + local grammar = treepattern(size) + local start = 0 + grammar.p[start].tag = TGrammar + grammar.p[start].val = n + valuetable[grammar.id] = buildgrammar(grammar, rules, n, start + 1, valuetable) + finalfix(true, postab, grammar, start + 1, valuetable[grammar.id]) + initialrulename(grammar, rules[1], valuetable[grammar.id]) + verifygrammar(grammar, 0, valuetable[grammar.id]) + return grammar +end + +-- ====================================================== + +-- remove duplicity from value table + +local function reducevaluetable(p) + local vtable = valuetable[p.id] + local value = {} + local newvaluetable = {} + + local function check(v) + if v > 0 then + local ord = value[vtable[v]] + if not ord then + newvaluetable[#newvaluetable + 1] = vtable[v] + ord = #newvaluetable + value[vtable[v]] = ord + end + return ord + end + return 0 + end + + local function itertree(p, index) + local tag = p.p[index].tag + if tag == TSet or tag == TCall or tag == TOpenCall or + tag == TRule or tag == TCapture or tag == TRunTime then + p.p[index].val = check(p.p[index].val) + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return itertree(p, index + 1) + elseif ns == 2 then + itertree(p, index + 1) + return itertree(p, index + p.p[index].ps) + else + assert(false) + end + end + + if p.treesize > 0 then + itertree(p, 0) + end + if p.code ~= nil then + for i = 0, p.code.size - 1 do + local code = p.code.p[i].code + if code == ICall or code == IJmp then + p.code.p[i].aux = check(p.code.p[i].aux) + elseif code == ISet or code == ITestSet or code == ISpan then + p.code.p[i].val = check(p.code.p[i].val) + elseif code == IOpenCapture or code == IFullCapture then + p.code.p[i].offset = check(p.code.p[i].offset) + end + end + end + valuetable[p.id] = newvaluetable +end + + +local function checkalt(tree) + local notchecked = {} + local notinalternativerules = {} + + local function iter(tree, index, choice, rule) + local tag = tree[index].tag + if tag == TCapture and bit.band(tree[index].cap, 0xffff) == Cgroup then + if not choice then + if rule then + notchecked[rule] = index + end + else + tree[index].cap = bit.bor(tree[index].cap, BCapcandelete) + end + elseif tag == TChoice then + choice = true + elseif tag == TRule then + rule = tree[index].val + if bit.band(tree[index].cap, 0xffff) - 1 == 0 then + notinalternativerules[rule] = notinalternativerules[rule] or true + end + elseif tag == TCall then + local r = tree[index].val + if not choice then + notinalternativerules[r] = notinalternativerules[r] or true + end + end + local sibs = numsiblings[tree[index].tag + 1] or 0 + if sibs >= 1 then + iter(tree, index + 1, choice, rule) + if sibs >= 2 then + return iter(tree, index + tree[index].ps, choice, rule) + end + end + end + + iter(tree, 0) + for k, v in pairs(notchecked) do + if not notinalternativerules[k] then + tree[v].cap = bit.bor(tree[v].cap, BCapcandelete) + end + end +end + + +local function prepcompile(p, index) + finalfix(false, nil, p, index, valuetable[p.id]) + checkalt(p.p) + lpcode.compile(p, index, valuetable[p.id]) + reducevaluetable(p) + return p.code +end + + +local function lp_printtree(pat, c) + assert(pat.treesize > 0) + if c then + finalfix(false, nil, pat, 0, valuetable[pat.id]) + end + lpprint.printtree(pat.p, 0, 0, valuetable[pat.id]) +end + + +local function lp_printcode(pat) + -- not compiled yet? + if pat.code == nil then + prepcompile(pat, 0) + end + lpprint.printpatt(pat.code, valuetable[pat.id]) +end + + +-- Main match function + +local function lp_match(pat, s, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.match(p, s, init, valuetable[p.id], ...) +end + +local function lp_streammatch(pat, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.streammatch(p, init, valuetable[p.id], ...) +end + +-- Only for testing purpose +-- stream emulation (send all chars from string one char after char) +local function lp_emulatestreammatch(pat, s, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.emulatestreammatch(p, s, init, valuetable[p.id], ...) +end + +-- {====================================================== +-- Library creation and functions not related to matching +-- ======================================================= + +local function lp_setmax(val) + lpvm.setmax(val) +end + +local function lp_setmaxbehind(val) + lpvm.setmaxbehind(val) +end + +local function lp_enableleftrecursion(val) + LREnable = val +end + +local function lp_version() + return VERSION +end + + +local function lp_type(pat) + if ffi.istype(treepattern, pat) then + return "pattern" + end +end + + +local function createcat(tab, catname, catfce) + local t, set = newcharset() + for i = 0, 255 do + if catfce(i) ~= 0 then + set[rshift(i, 5)] = bor(set[rshift(i, 5)], lshift(1, band(i, 31))) + end + end + tab[catname] = t +end + + +local function lp_locale(tab) + tab = tab or {} + createcat(tab, "alnum", function(c) return ffi.C.isalnum(c) end) + createcat(tab, "alpha", function(c) return ffi.C.isalpha(c) end) + createcat(tab, "cntrl", function(c) return ffi.C.iscntrl(c) end) + createcat(tab, "digit", function(c) return ffi.C.isdigit(c) end) + createcat(tab, "graph", function(c) return ffi.C.isgraph(c) end) + createcat(tab, "lower", function(c) return ffi.C.islower(c) end) + createcat(tab, "print", function(c) return ffi.C.isprint(c) end) + createcat(tab, "punct", function(c) return ffi.C.ispunct(c) end) + createcat(tab, "space", function(c) return ffi.C.isspace(c) end) + createcat(tab, "upper", function(c) return ffi.C.isupper(c) end) + createcat(tab, "xdigit", function(c) return ffi.C.isxdigit(c) end) + return tab +end + + +local function lp_new(ct, size) + local pat = ffi.new(ct, size) + pat.treesize = size + patternid = patternid + 1 + pat.id = patternid + return pat +end + + +local function lp_gc(ct) + valuetable[ct.id] = nil + if ct.code ~= nil then + ffi.C.free(ct.code.p) + ffi.C.free(ct.code) + end +end + +local function lp_eq(ct1, ct2) + return tostring(ct1) == tostring(ct2) +end + +local function lp_load(str, fcetab) + local pat, t = lpvm.load(str, fcetab, true) + valuetable[pat.id] = t + return pat +end + +local function lp_loadfile(fname, fcetab) + local pat, t = lpvm.loadfile(fname, fcetab, true) + valuetable[pat.id] = t + return pat +end + +local function lp_dump(ct, tree) + local funccount = 0 + -- not compiled yet? + if ct.code == nil then + prepcompile(ct, 0) + end + local out = {} + if tree then + out[#out + 1] = ffi.string(uint32(ct.treesize), 4) + out[#out + 1] = ffi.string(ct.p, ffi.sizeof(treepatternelement) * ct.treesize) + else + out[#out + 1] = ffi.string(uint32(0), 4) + end + out[#out + 1] = ffi.string(uint32(ct.code.size), 4) + out[#out + 1] = ffi.string(ct.code.p, ct.code.size * ffi.sizeof(patternelement)) + local t = valuetable[ct.id] + local len = t and #t or 0 + out[#out + 1] = ffi.string(uint32(len), 4) + if len > 0 then + for _, val in ipairs(t) do + local typ = type(val) + if typ == 'string' then + out[#out + 1] = 'str' + out[#out + 1] = ffi.string(uint32(#val), 4) + out[#out + 1] = val + elseif typ == 'number' then + local val = tostring(val) + out[#out + 1] = 'num' + out[#out + 1] = ffi.string(uint32(#val), 4) + out[#out + 1] = val + elseif typ == 'cdata' then + out[#out + 1] = 'cdt' + out[#out + 1] = ffi.string(val, ffi.sizeof(val)) + elseif typ == 'function' then + out[#out + 1] = 'fnc' + funccount = funccount + 1 + local name = funcnames[val] or ('FNAME%03d'):format(funccount) + out[#out + 1] = ffi.string(uint32(#name), 4) + out[#out + 1] = name + if not funcnames[val] and debug.getupvalue(val, 1) then + io.write(("Patterns function (%d) contains upvalue (%s) - use symbol name for function (%s).\n"):format(funccount, debug.getupvalue(val, 1), name), 0) + end + local data = string.dump(val, true) + out[#out + 1] = ffi.string(uint32(#data), 4) + out[#out + 1] = data + else + error(("Type '%s' NYI for dump"):format(typ), 0) + end + end + end + return table.concat(out) +end + +local function lp_save(ct, fname, tree) + local file = assert(io.open(fname, 'wb')) + file:write(lp_dump(ct, tree)) + file:close() +end + + +local pattreg = { + ["ptree"] = lp_printtree, + ["pcode"] = lp_printcode, + ["match"] = lp_match, + ["streammatch"] = lp_streammatch, + ["emulatestreammatch"] = lp_emulatestreammatch, + ["setmaxbehind"] = lp_setmaxbehind, + ["B"] = lp_behind, + ["V"] = lp_V, + ["C"] = lp_simplecapture, + ["Cc"] = lp_constcapture, + ["Cmt"] = lp_matchtime, + ["Cb"] = lp_backref, + ["Carg"] = lp_argcapture, + ["Cp"] = lp_poscapture, + ["Cs"] = lp_substcapture, + ["Ct"] = lp_tablecapture, + ["Cf"] = lp_foldcapture, + ["Cg"] = lp_groupcapture, + ["P"] = lp_P, + ["S"] = lp_set, + ["R"] = lp_range, + ["L"] = lp_and, + ["locale"] = lp_locale, + ["version"] = lp_version, + ["setmaxstack"] = lp_setmax, + ["type"] = lp_type, + ["enableleftrecursion"] = lp_enableleftrecursion, + ["enablememoization"] = lpvm.enablememoization, + ["enabletracing"] = lpvm.enabletracing, + ["save"] = lp_save, + ["dump"] = lp_dump, + ["load"] = lp_load, + ["loadfile"] = lp_loadfile, + ["__mul"] = lp_seq, + ["__add"] = lp_choice, + ["__pow"] = lp_star, + ["__len"] = lp_and, + ["__div"] = lp_divcapture, + ["__unm"] = lp_not, + ["__sub"] = lp_sub, +} + +local metareg = { + ["__gc"] = lp_gc, + ["__new"] = lp_new, + ["__mul"] = lp_seq, + ["__add"] = lp_choice, + ["__pow"] = lp_star, + ["__len"] = lp_and, + ["__div"] = lp_divcapture, + ["__unm"] = lp_not, + ["__sub"] = lp_sub, + ["__eq"] = lp_eq, + ["__index"] = pattreg +} + +ffi.metatype(treepattern, metareg) + +return pattreg |
