summaryrefslogtreecommitdiff
path: root/vendor/lpeglj/lpeglj.lua
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/lpeglj/lpeglj.lua')
-rw-r--r--vendor/lpeglj/lpeglj.lua1373
1 files changed, 1373 insertions, 0 deletions
diff --git a/vendor/lpeglj/lpeglj.lua b/vendor/lpeglj/lpeglj.lua
new file mode 100644
index 0000000..de4fca3
--- /dev/null
+++ b/vendor/lpeglj/lpeglj.lua
@@ -0,0 +1,1373 @@
+--[[
+LPEGLJ
+lpeglj.lua
+Main module and tree generation
+Copyright (C) 2014 Rostislav Sacek.
+based on LPeg v1.0 - PEG pattern matching for Lua
+Lua.org & PUC-Rio written by Roberto Ierusalimschy
+http://www.inf.puc-rio.br/~roberto/lpeg/
+
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+--]]
+
+assert(jit.version_num > 20000, "Use LuaJIT v2.0.1 or higher.")
+
+local ffi = require "ffi"
+local lpcode = require "lpcode"
+local lpprint = require "lpprint"
+local lpvm = require "lpvm"
+
+local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift
+
+ffi.cdef [[
+ int isalnum(int c);
+ int isalpha(int c);
+ int iscntrl(int c);
+ int isdigit(int c);
+ int isgraph(int c);
+ int islower(int c);
+ int isprint(int c);
+ int ispunct(int c);
+ int isspace(int c);
+ int isupper(int c);
+ int isxdigit(int c);
+]]
+
+local MAXBEHIND = 255
+local MAXRULES = 200
+local VERSION = "1.0.0.0LJ"
+
+local TChar = 0
+local TSet = 1
+local TAny = 2 -- standard PEG elements
+local TTrue = 3
+local TFalse = 4
+local TRep = 5
+local TSeq = 6
+local TChoice = 7
+local TNot = 8
+local TAnd = 9
+local TCall = 10
+local TOpenCall = 11
+local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
+local TGrammar = 13 -- sib1 is initial (and first) rule
+local TBehind = 14 -- match behind
+local TCapture = 15 -- regular capture
+local TRunTime = 16 -- run-time capture
+
+local IAny = 0 -- if no char, fail
+local IChar = 1 -- if char != val, fail
+local ISet = 2 -- if char not in val, fail
+local ITestAny = 3 -- in no char, jump to 'offset'
+local ITestChar = 4 -- if char != val, jump to 'offset'
+local ITestSet = 5 -- if char not in val, jump to 'offset'
+local ISpan = 6 -- read a span of chars in val
+local IBehind = 7 -- walk back 'val' characters (fail if not possible)
+local IRet = 8 -- return from a rule
+local IEnd = 9 -- end of pattern
+local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
+local IJmp = 11 -- jump to 'offset'
+local ICall = 12 -- call rule at 'offset'
+local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
+local ICommit = 14 -- pop choice and jump to 'offset'
+local IPartialCommit = 15 -- update top choice to current position and jump
+local IBackCommit = 16 -- "fails" but jump to its own 'offset'
+local IFailTwice = 17 -- pop one choice and then fail
+local IFail = 18 -- go back to saved state on choice and jump to saved offset
+local IGiveup = 19 -- internal use
+local IFullCapture = 20 -- complete capture of last 'off' chars
+local IOpenCapture = 21 -- start a capture
+local ICloseCapture = 22
+local ICloseRunTime = 23
+
+local Cclose = 0
+local Cposition = 1
+local Cconst = 2
+local Cbackref = 3
+local Carg = 4
+local Csimple = 5
+local Ctable = 6
+local Cfunction = 7
+local Cquery = 8
+local Cstring = 9
+local Cnum = 10
+local Csubst = 11
+local Cfold = 12
+local Cruntime = 13
+local Cgroup = 14
+
+local PEnullable = 0
+local PEnofail = 1
+local PEleftrecursion = 2
+
+local newgrammar
+
+local RuleLR = 0x10000
+local Ruleused = 0x20000
+local BCapcandelete = 0x30000
+
+local LREnable = false
+
+-- number of siblings for each tree
+local numsiblings = {
+ 0, 0, 0, -- char, set, any
+ 0, 0, -- true, false
+ 1, -- rep
+ 2, 2, -- seq, choice
+ 1, 1, -- not, and
+ 0, 0, 2, 1, -- call, opencall, rule, grammar
+ 1, -- behind
+ 1, 1 -- capture, runtime capture
+}
+
+
+
+local patternid = 0
+local valuetable = {}
+
+local funcnames = setmetatable({}, { __mode = 'k' })
+
+local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT')
+local treepattern = ffi.typeof('TREEPATTERN')
+local patternelement = ffi.typeof('PATTERN_ELEMENT')
+local pattern = ffi.typeof('PATTERN')
+local settype = ffi.typeof('int32_t[8]')
+local uint32 = ffi.typeof('uint32_t[1]')
+
+-- Fix a TOpenCall into a TCall node, using table 'postable' to
+-- translate a key to its rule address in the tree. Raises an
+-- error if key does not exist.
+
+local function fixonecall(postable, grammar, index, valuetable)
+ local name = valuetable[grammar.p[index].val] -- get rule's name
+ local n = postable[name] -- query name in position table
+ -- no position?
+ if not n then
+ error(("rule '%s' undefined in given grammar"):format(type(name) == 'table' and '(a table)' or name), 0)
+ end
+ grammar.p[index].tag = TCall;
+ grammar.p[index].ps = n - index -- position relative to node
+ grammar.p[index + grammar.p[index].ps].cap = bit.bor(grammar.p[index + grammar.p[index].ps].cap, Ruleused)
+end
+
+
+-- Transform left associative constructions into right
+-- associative ones, for sequence and choice; that is:
+-- (t11 + t12) + t2 => t11 + (t12 + t2)
+-- (t11 * t12) * t2 => t11 * (t12 * t2)
+-- (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2))
+
+local function correctassociativity(tree, index)
+ local t1 = index + 1
+ assert(tree.p[index].tag == TChoice or tree.p[index].tag == TSeq)
+ while tree.p[t1].tag == tree.p[index].tag do
+ local n1size = tree.p[index].ps - 1; -- t1 == Op t11 t12
+ local n11size = tree.p[t1].ps - 1;
+ local n12size = n1size - n11size - 1
+ for i = 1, n11size do
+ ffi.copy(tree.p + index + i, tree.p + t1 + i, ffi.sizeof(treepatternelement))
+ end
+ tree.p[index].ps = n11size + 1
+ tree.p[index + tree.p[index].ps].tag = tree.p[index].tag
+ tree.p[index + tree.p[index].ps].ps = n12size + 1
+ end
+end
+
+
+-- Make final adjustments in a tree. Fix open calls in tree,
+-- making them refer to their respective rules or raising appropriate
+-- errors (if not inside a grammar). Correct associativity of associative
+-- constructions (making them right associative).
+
+local function finalfix(fix, postable, grammar, index, valuetable)
+
+ local tag = grammar.p[index].tag
+ --subgrammars were already fixed
+ if tag == TGrammar then
+ return
+ elseif tag == TOpenCall then
+ -- inside a grammar?
+ if fix then
+ fixonecall(postable, grammar, index, valuetable)
+ -- open call outside grammar
+ else
+ error(("rule '%s' used outside a grammar"):format(tostring(valuetable[grammar.p[index].val])), 0)
+ end
+ elseif tag == TSeq or tag == TChoice then
+ correctassociativity(grammar, index)
+ end
+ local ns = numsiblings[tag + 1]
+ if ns == 0 then
+ elseif ns == 1 then
+ return finalfix(fix, postable, grammar, index + 1, valuetable)
+ elseif ns == 2 then
+ finalfix(fix, postable, grammar, index + 1, valuetable)
+ return finalfix(fix, postable, grammar, index + grammar.p[index].ps, valuetable)
+ else
+ assert(false)
+ end
+end
+
+
+-- {======================================================
+-- Tree generation
+-- =======================================================
+
+local function newcharset()
+ local tree = treepattern(1)
+ valuetable[tree.id] = { settype() }
+ tree.p[0].tag = TSet
+ tree.p[0].val = 1
+ return tree, valuetable[tree.id][1]
+end
+
+
+-- add to tree a sequence where first sibling is 'sib' (with size
+-- 'sibsize')
+
+local function seqaux(tree, sib, start, sibsize)
+ tree.p[start].tag = TSeq;
+ tree.p[start].ps = sibsize + 1
+ ffi.copy(tree.p + start + 1, sib.p, ffi.sizeof(treepatternelement) * sibsize)
+end
+
+
+-- Build a sequence of 'n' nodes, each with tag 'tag' and 'val' got
+-- from the array 's' (or 0 if array is NULL). (TSeq is binary, so it
+-- must build a sequence of sequence of sequence...)
+
+local function fillseq(tree, tag, start, n, s)
+ -- initial n-1 copies of Seq tag; Seq ...
+ for i = 1, n - 1 do
+ tree.p[start].tag = TSeq
+ tree.p[start].ps = 2
+ tree.p[start + 1].tag = tag
+ if s then
+ tree.p[start + 1].val = s:sub(i, i):byte()
+ end
+ start = start + tree.p[start].ps
+ end
+ tree.p[start].tag = tag -- last one does not need TSeq
+ if s then
+ tree.p[start].val = s:sub(n, n):byte()
+ end
+end
+
+
+-- Numbers as patterns:
+-- 0 == true (always match); n == TAny repeated 'n' times;
+-- -n == not (TAny repeated 'n' times)
+
+local function numtree(n)
+ if n == 0 then
+ local tree = treepattern(1)
+ tree.p[0].tag = TTrue
+ return tree
+ else
+ local tree, start
+ if n > 0 then
+ tree = treepattern(2 * n - 1)
+ start = 0
+ -- negative: code it as !(-n)
+ else
+ n = -n;
+ tree = treepattern(2 * n)
+ tree.p[0].tag = TNot
+ start = 1
+ end
+ fillseq(tree, TAny, start, n) -- sequence of 'n' any's
+ return tree;
+ end
+end
+
+
+-- Convert value to a pattern
+
+local function getpatt(val, name)
+ local typ = type(val)
+ if typ == 'string' then
+ -- empty?
+ if #val == 0 then
+ local pat = treepattern(1)
+ pat.p[0].tag = TTrue -- always match
+ return pat
+ else
+ local tree = treepattern(2 * (#val - 1) + 1)
+ fillseq(tree, TChar, 0, #val, val) -- sequence of '#val' chars
+ return tree
+ end
+ elseif typ == 'number' then
+ return numtree(val)
+ elseif typ == 'boolean' then
+ local pat = treepattern(1)
+ pat.p[0].tag = val and TTrue or TFalse
+ return pat
+ elseif typ == 'table' then
+ return newgrammar(val)
+ elseif typ == 'function' then
+ if name and type(name) == 'string' then
+ funcnames[val] = name
+ end
+ local pat = treepattern(2)
+ valuetable[pat.id] = { val }
+ pat.p[0].tag = TRunTime
+ pat.p[0].val = 1
+ pat.p[1].tag = TTrue
+ return pat
+ elseif ffi.istype(treepattern, val) then
+ assert(val.treesize > 0)
+ return val
+ end
+ assert(false)
+end
+
+local function copykeys(ktable1, ktable2)
+ local ktable, offset = {}, 0
+ if not ktable1 and not ktable2 then
+ return ktable, 0
+ elseif ktable1 then
+ for i = 1, #ktable1 do
+ ktable[#ktable + 1] = ktable1[i]
+ end
+ offset = #ktable1
+ if not ktable2 then
+ return ktable, 0
+ end
+ end
+ if ktable2 then
+ for i = 1, #ktable2 do
+ ktable[#ktable + 1] = ktable2[i]
+ end
+ end
+ assert(#ktable < 65536, "too many Lua values in pattern")
+ return ktable, offset
+end
+
+local function correctkeys(tree, index, offset)
+ local tag = tree.p[index].tag
+ if (tag == TSet or tag == TRule or tag == TCall or tag == TRunTime or tag == TOpenCall or tag == TCapture) and
+ tree.p[index].val ~= 0 then
+ tree.p[index].val = tree.p[index].val + offset
+ end
+ local ns = numsiblings[tag + 1]
+ if ns == 0 then
+ elseif ns == 1 then
+ return correctkeys(tree, index + 1, offset)
+ elseif ns == 2 then
+ correctkeys(tree, index + 1, offset)
+ return correctkeys(tree, index + tree.p[index].ps, offset)
+ else
+ assert(false)
+ end
+end
+
+
+
+-- create a new tree, with a new root and one sibling.
+
+local function newroot1sib(tag, pat)
+ local tree1 = getpatt(pat)
+ local tree = treepattern(1 + tree1.treesize) -- create new tree
+ valuetable[tree.id] = copykeys(valuetable[tree1.id])
+ tree.p[0].tag = tag
+ ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ return tree
+end
+
+
+-- create a new tree, with a new root and 2 siblings.
+
+local function newroot2sib(tag, pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ local tree = treepattern(1 + tree1.treesize + tree2.treesize) -- create new tree
+ local ktable, offset = copykeys(valuetable[tree1.id], valuetable[tree2.id])
+ valuetable[tree.id] = ktable
+ tree.p[0].tag = tag
+ tree.p[0].ps = 1 + tree1.treesize
+ ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ ffi.copy(tree.p + 1 + tree1.treesize, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
+ if offset > 0 then
+ correctkeys(tree, 1 + tree1.treesize, offset)
+ end
+ return tree;
+end
+
+
+local function lp_P(val, name)
+ assert(type(val) ~= 'nil')
+ return getpatt(val, name)
+end
+
+
+-- sequence operator; optimizations:
+-- false x => false, x true => x, true x => x
+-- (cannot do x . false => false because x may have runtime captures)
+
+local function lp_seq(pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ -- false . x == false, x . true = x
+ if tree1.p[0].tag == TFalse or tree2.p[0].tag == TTrue then
+ return tree1
+ -- true . x = x
+ elseif tree1.p[0].tag == TTrue then
+ return tree2
+ else
+ return newroot2sib(TSeq, tree1, tree2)
+ end
+end
+
+
+-- choice operator; optimizations:
+-- charset / charset => charset
+-- true / x => true, x / false => x, false / x => x
+-- (x / true is not equivalent to true)
+
+local function lp_choice(pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
+ local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
+ if charset1 and charset2 then
+ local t, set = newcharset()
+ for i = 0, 7 do
+ set[i] = bor(charset1[i], charset2[i])
+ end
+ return t
+ elseif lpcode.checkaux(tree1, PEnofail, 0) or tree2.p[0].tag == TFalse then
+ return tree1 -- true / x => true, x / false => x
+ elseif tree1.p[0].tag == TFalse then
+ return tree2 -- false / x => x
+ else
+ return newroot2sib(TChoice, tree1, tree2)
+ end
+end
+
+
+-- p^n
+
+local function lp_star(tree1, n)
+ local tree
+ n = tonumber(n)
+ assert(type(n) == 'number')
+ -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1)))
+ if n >= 0 then
+ tree = treepattern((n + 1) * (tree1.treesize + 1))
+ if lpcode.checkaux(tree1, PEnullable, 0) then
+ error("loop body may accept empty string", 0)
+ end
+ valuetable[tree.id] = copykeys(valuetable[tree1.id])
+ local start = 0
+ -- repeat 'n' times
+ for i = 1, n do
+ seqaux(tree, tree1, start, tree1.treesize)
+ start = start + tree.p[start].ps
+ end
+ tree.p[start].tag = TRep
+ ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ -- choice (seq tree1 ... choice tree1 true ...) true
+ else
+ n = -n;
+ -- size = (choice + seq + tree1 + true) * n, but the last has no seq
+ tree = treepattern(n * (tree1.treesize + 3) - 1)
+ valuetable[tree.id] = copykeys(valuetable[tree1.id])
+ local start = 0
+ -- repeat (n - 1) times
+ for i = n, 2, -1 do
+ tree.p[start].tag = TChoice;
+ tree.p[start].ps = i * (tree1.treesize + 3) - 2
+ tree.p[start + tree.p[start].ps].tag = TTrue;
+ start = start + 1
+ seqaux(tree, tree1, start, tree1.treesize)
+ start = start + tree.p[start].ps
+ end
+ tree.p[start].tag = TChoice;
+ tree.p[start].ps = tree1.treesize + 1
+ tree.p[start + tree.p[start].ps].tag = TTrue
+ ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ end
+ return tree
+end
+
+
+-- #p == &p
+
+local function lp_and(pat)
+ return newroot1sib(TAnd, pat)
+end
+
+
+-- -p == !p
+
+local function lp_not(pat)
+ return newroot1sib(TNot, pat)
+end
+
+
+-- [t1 - t2] == Seq (Not t2) t1
+-- If t1 and t2 are charsets, make their difference.
+
+local function lp_sub(pat1, pat2)
+ local tree1 = getpatt(pat1)
+ local tree2 = getpatt(pat2)
+ local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id])
+ local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id])
+ if charset1 and charset2 then
+ local tree, set = newcharset()
+ for i = 0, 7 do
+ set[i] = band(charset1[i], bnot(charset2[i]))
+ end
+ return tree
+ else
+ local tree = treepattern(2 + tree1.treesize + tree2.treesize)
+ local ktable, offset = copykeys(valuetable[tree2.id], valuetable[tree1.id])
+ valuetable[tree.id] = ktable
+ tree.p[0].tag = TSeq; -- sequence of...
+ tree.p[0].ps = 2 + tree2.treesize
+ tree.p[1].tag = TNot; -- ...not...
+ ffi.copy(tree.p + 2, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize)
+ ffi.copy(tree.p + tree2.treesize + 2, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize)
+ if offset > 0 then
+ correctkeys(tree, 2 + tree2.treesize, offset)
+ end
+ return tree
+ end
+end
+
+
+local function lp_set(val)
+ assert(type(val) == 'string')
+ local tree, set = newcharset()
+ for i = 1, #val do
+ local b = val:sub(i, i):byte()
+ set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
+ end
+ return tree
+end
+
+
+local function lp_range(...)
+ local args = { ... }
+ local top = #args
+ local tree, set = newcharset()
+ for i = 1, top do
+ assert(#args[i] == 2, args[i] .. " range must have two characters")
+ for b = args[i]:sub(1, 1):byte(), args[i]:sub(2, 2):byte() do
+ set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31)))
+ end
+ end
+ return tree
+end
+
+
+-- Look-behind predicate
+
+local function lp_behind(pat)
+ local tree1 = getpatt(pat)
+ local n = lpcode.fixedlenx(tree1, 0, 0, 0)
+ assert(not lpcode.hascaptures(tree1, 0), "pattern have captures")
+ assert(n >= 0, "pattern may not have fixed length")
+ assert(n <= MAXBEHIND, "pattern too long to look behind")
+ local tree = newroot1sib(TBehind, pat)
+ tree.p[0].val = n;
+ return tree
+end
+
+
+-- Create a non-terminal
+
+local function lp_V(val, p)
+ assert(val, "non-nil value expected")
+ local tree = treepattern(1)
+ valuetable[tree.id] = { val }
+ tree.p[0].tag = TOpenCall
+ tree.p[0].val = 1
+ tree.p[0].cap = p or 0
+ return tree
+end
+
+
+-- Create a tree for a non-empty capture, with a body and
+-- optionally with an associated value
+
+local function capture_aux(cap, pat, val)
+ local tree = newroot1sib(TCapture, pat)
+ tree.p[0].cap = cap
+ if val then
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern" .. ind)
+ valuetable[tree.id][ind] = val
+ tree.p[0].val = ind
+ end
+ return tree
+end
+
+
+-- Fill a tree with an empty capture, using an empty (TTrue) sibling.
+
+local function auxemptycap(tree, cap, par, start)
+ tree.p[start].tag = TCapture;
+ tree.p[start].cap = cap
+ if type(par) ~= 'nil' then
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = par
+ tree.p[start].val = ind
+ end
+ tree.p[start + 1].tag = TTrue;
+end
+
+
+-- Create a tree for an empty capture
+
+local function newemptycap(cap, par)
+ local tree = treepattern(2)
+ if type(par) ~= 'nil' then valuetable[tree.id] = {} end
+ auxemptycap(tree, cap, par, 0)
+ return tree
+end
+
+
+-- Captures with syntax p / v
+-- (function capture, query capture, string capture, or number capture)
+
+local function lp_divcapture(pat, par, xxx)
+ local typ = type(par)
+ if typ == "function" then
+ return capture_aux(Cfunction, pat, par)
+ elseif typ == "table" then
+ return capture_aux(Cquery, pat, par)
+ elseif typ == "string" then
+ return capture_aux(Cstring, pat, par)
+ elseif typ == "number" then
+ local tree = newroot1sib(TCapture, pat)
+ assert(0 <= par and par <= 0xffff, "invalid number")
+ tree.p[0].cap = Cnum;
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = par
+ tree.p[0].val = ind
+ return tree
+ else
+ error("invalid replacement value", 0)
+ end
+end
+
+
+local function lp_substcapture(pat)
+ return capture_aux(Csubst, pat)
+end
+
+
+local function lp_tablecapture(pat)
+ return capture_aux(Ctable, pat, 0)
+end
+
+
+local function lp_groupcapture(pat, val)
+ if not val then
+ return capture_aux(Cgroup, pat)
+ else
+ return capture_aux(Cgroup, pat, val)
+ end
+end
+
+
+local function lp_foldcapture(pat, fce)
+ assert(type(fce) == 'function')
+ return capture_aux(Cfold, pat, fce)
+end
+
+
+local function lp_simplecapture(pat)
+ return capture_aux(Csimple, pat)
+end
+
+
+local function lp_poscapture()
+ return newemptycap(Cposition)
+end
+
+
+local function lp_argcapture(val)
+ assert(type(val) == 'number')
+ local tree = newemptycap(Carg, 0)
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = val
+ tree.p[0].val = ind
+ assert(0 < val and val <= 0xffff, "invalid argument index")
+ return tree
+end
+
+
+local function lp_backref(val)
+ return newemptycap(Cbackref, val)
+end
+
+
+-- Constant capture
+
+local function lp_constcapture(...)
+ local tree
+ local args = { ... }
+ local n = select('#', ...) -- number of values
+ -- no values?
+ if n == 0 then
+ tree = treepattern(1) -- no capture
+ tree.p[0].tag = TTrue
+ elseif n == 1 then
+ tree = newemptycap(Cconst, args[1]) -- single constant capture
+ -- create a group capture with all values
+ else
+ tree = treepattern(3 + 3 * (n - 1))
+ valuetable[tree.id] = {}
+ tree.p[0].tag = TCapture
+ tree.p[0].cap = Cgroup
+ local start = 1
+ for i = 1, n - 1 do
+ tree.p[start].tag = TSeq
+ tree.p[start].ps = 3
+ auxemptycap(tree, Cconst, args[i], start + 1)
+ start = start + tree.p[start].ps
+ end
+ auxemptycap(tree, Cconst, args[n], start)
+ end
+ return tree
+end
+
+
+local function lp_matchtime(pat, fce, name)
+ assert(type(fce) == 'function')
+ if name and type(name) == 'string' then
+ funcnames[fce] = name
+ end
+ local tree = newroot1sib(TRunTime, pat)
+ local ind = #valuetable[tree.id] + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[tree.id][ind] = fce
+ tree.p[0].val = ind
+ return tree
+end
+
+-- ======================================================
+
+
+
+-- ======================================================
+-- Grammar - Tree generation
+-- =======================================================
+
+
+-- return index and the pattern for the
+-- initial rule of grammar;
+-- also add that index into position table.
+
+local function getfirstrule(pat, postab)
+ local key
+ -- access first element
+ if type(pat[1]) == 'string' then
+ key = pat[1]
+ else
+ key = 1
+ end
+ local rule = pat[key]
+ if not rule then
+ error("grammar has no initial rule", 0)
+ end
+ -- initial rule not a pattern?
+ if not ffi.istype(treepattern, rule) then
+ error(("initial rule '%s' is not a pattern"):format(tostring(key)), 0)
+ end
+ postab[key] = 1
+ return key, rule
+end
+
+
+-- traverse grammar, collect all its keys and patterns
+-- into rule table. Create a new table (before all pairs key-pattern) to
+-- collect all keys and their associated positions in the final tree
+-- (the "position table").
+-- Return the number of rules and the total size
+-- for the new tree.
+
+local function collectrules(pat)
+ local n = 1; -- to count number of rules
+ local postab = {}
+ local firstkeyrule, firstrule = getfirstrule(pat, postab)
+ local rules = { firstkeyrule, firstrule }
+ local size = 2 + firstrule.treesize -- TGrammar + TRule + rule
+ for key, val in pairs(pat) do
+ -- initial rule?
+ if key ~= 1 and tostring(val) ~= tostring(firstrule) then
+ -- value is not a pattern?
+ if not ffi.istype(treepattern, val) then
+ error(("rule '%s' is not a pattern"):format(tostring(key)), 0)
+ end
+ rules[#rules + 1] = key
+ rules[#rules + 1] = val
+ postab[key] = size
+ size = 1 + size + val.treesize
+ n = n + 1
+ end
+ end
+ size = size + 1; -- TTrue to finish list of rules
+ return n, size, rules, postab
+end
+
+
+local function buildgrammar(grammar, rules, n, index, valuetable)
+ local ktable, offset = {}, 0
+ -- add each rule into new tree
+ for i = 1, n do
+ local size = rules[i * 2].treesize
+ grammar.p[index].tag = TRule;
+ grammar.p[index].cap = i; -- rule number
+ grammar.p[index].ps = size + 1; -- point to next rule
+ local ind = #ktable + 1
+ ktable[ind] = rules[i * 2 - 1]
+ grammar.p[index].val = ind
+ ffi.copy(grammar.p + index + 1, rules[i * 2].p, ffi.sizeof(treepatternelement) * size) -- copy rule
+ ktable, offset = copykeys(ktable, valuetable[rules[i * 2].id])
+ if offset > 0 then
+ correctkeys(grammar, index + 1, offset)
+ end
+ index = index + grammar.p[index].ps; -- move to next rule
+ end
+ grammar.p[index].tag = TTrue; -- finish list of rules
+ return ktable
+end
+
+
+-- Check whether a tree has potential infinite loops
+
+local function checkloops(tree, index)
+ local tag = tree.p[index].tag
+ if tag == TRep and lpcode.checkaux(tree, PEnullable, index + 1) then
+ return true
+ elseif tag == TGrammar then
+ return -- sub-grammars already checked
+ else
+ local tag = numsiblings[tree.p[index].tag + 1]
+ if tag == 0 then
+ return
+ elseif tag == 1 then
+ return checkloops(tree, index + 1)
+ elseif tag == 2 then
+ if checkloops(tree, index + 1) then
+ return true
+ else
+ return checkloops(tree, index + tree.p[index].ps)
+ end
+ else
+ assert(false)
+ end
+ end
+end
+
+-- Check whether a rule can be left recursive; returns PEleftrecursion in that
+-- case; otherwise return 1 iff pattern is nullable.
+
+local function verifyrule(rulename, tree, passed, nullable, index, valuetable)
+ local tag = tree.p[index].tag
+ if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then
+ return nullable; -- cannot pass from here
+ elseif tag == TTrue or tag == TBehind then
+ return true;
+ elseif tag == TNot or tag == TAnd or tag == TRep then
+ return verifyrule(rulename, tree, passed, true, index + 1, valuetable)
+ elseif tag == TCapture or tag == TRunTime then
+ return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
+ elseif tag == TCall then
+ local rule = valuetable[tree.p[index].val]
+ if rule == rulename then return PEleftrecursion end
+ if passed[rule] and passed[rule] > MAXRULES then
+ return nullable
+ end
+ return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
+ -- only check 2nd child if first is nullable
+ elseif tag == TSeq then
+ local res = verifyrule(rulename, tree, passed, false, index + 1, valuetable)
+ if res == PEleftrecursion then
+ return res
+ elseif not res then
+ return nullable
+ else
+ return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
+ end
+ -- must check both children
+ elseif tag == TChoice then
+ nullable = verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
+ if nullable == PEleftrecursion then return nullable end
+ return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable)
+ elseif tag == TRule then
+ local rule = valuetable[tree.p[index].val]
+ passed[rule] = (passed[rule] or 0) + 1
+ return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable)
+ elseif tag == TGrammar then
+ return lpcode.checkaux(tree, PEnullable, index) -- sub-grammar cannot be left recursive
+ else
+ assert(false)
+ end
+end
+
+
+local function verifygrammar(rule, index, valuetable)
+ -- check left-recursive rules
+ local LR = {}
+ local ind = index + 1
+ while rule.p[ind].tag == TRule do
+ local rulename = valuetable[rule.p[ind].val]
+ -- used rule
+ if rulename then
+ if verifyrule(rulename, rule, {}, false, ind + 1, valuetable) == PEleftrecursion then
+ if not LREnable then
+ error(("rule '%s' may be left recursive"):format(rulename), 0)
+ end
+ LR[rulename] = true
+ end
+ end
+ ind = ind + rule.p[ind].ps
+ end
+ assert(rule.p[ind].tag == TTrue)
+
+ for i = 0, rule.treesize - 1 do
+ if rule.p[i].tag == TRule and LR[valuetable[rule.p[i].val]] then
+ rule.p[i].cap = bor(rule.p[i].cap, RuleLR) --TRule can be left recursive
+ end
+ if rule.p[i].tag == TCall and LR[valuetable[rule.p[i].val]] then
+ if rule.p[i].cap == 0 then
+ rule.p[i].cap = 1 --TCall can be left recursive
+ end
+ end
+ end
+
+ -- check infinite loops inside rules
+ ind = index + 1
+ while rule.p[ind].tag == TRule do
+ -- used rule
+ if rule.p[ind].val then
+ if checkloops(rule, ind + 1) then
+ error(("empty loop in rule '%s'"):format(tostring(valuetable[rule.p[ind].val])), 0)
+ end
+ end
+ ind = ind + rule.p[ind].ps
+ end
+ assert(rule.p[ind].tag == TTrue)
+end
+
+
+-- Give a name for the initial rule if it is not referenced
+
+local function initialrulename(grammar, val, valuetable)
+ grammar.p[1].cap = bit.bor(grammar.p[1].cap, Ruleused)
+ -- initial rule is not referenced?
+ if grammar.p[1].val == 0 then
+ local ind = #valuetable + 1
+ assert(ind <= 65536, "too many Lua values in pattern")
+ valuetable[ind] = val
+ grammar.p[1].val = ind
+ end
+end
+
+
+function newgrammar(pat)
+ -- traverse grammar. Create a new table (before all pairs key-pattern) to
+ -- collect all keys and their associated positions in the final tree
+ -- (the "position table").
+ -- Return new tree.
+
+ local n, size, rules, postab = collectrules(pat)
+ local grammar = treepattern(size)
+ local start = 0
+ grammar.p[start].tag = TGrammar
+ grammar.p[start].val = n
+ valuetable[grammar.id] = buildgrammar(grammar, rules, n, start + 1, valuetable)
+ finalfix(true, postab, grammar, start + 1, valuetable[grammar.id])
+ initialrulename(grammar, rules[1], valuetable[grammar.id])
+ verifygrammar(grammar, 0, valuetable[grammar.id])
+ return grammar
+end
+
+-- ======================================================
+
+-- remove duplicity from value table
+
+local function reducevaluetable(p)
+ local vtable = valuetable[p.id]
+ local value = {}
+ local newvaluetable = {}
+
+ local function check(v)
+ if v > 0 then
+ local ord = value[vtable[v]]
+ if not ord then
+ newvaluetable[#newvaluetable + 1] = vtable[v]
+ ord = #newvaluetable
+ value[vtable[v]] = ord
+ end
+ return ord
+ end
+ return 0
+ end
+
+ local function itertree(p, index)
+ local tag = p.p[index].tag
+ if tag == TSet or tag == TCall or tag == TOpenCall or
+ tag == TRule or tag == TCapture or tag == TRunTime then
+ p.p[index].val = check(p.p[index].val)
+ end
+ local ns = numsiblings[tag + 1]
+ if ns == 0 then
+ elseif ns == 1 then
+ return itertree(p, index + 1)
+ elseif ns == 2 then
+ itertree(p, index + 1)
+ return itertree(p, index + p.p[index].ps)
+ else
+ assert(false)
+ end
+ end
+
+ if p.treesize > 0 then
+ itertree(p, 0)
+ end
+ if p.code ~= nil then
+ for i = 0, p.code.size - 1 do
+ local code = p.code.p[i].code
+ if code == ICall or code == IJmp then
+ p.code.p[i].aux = check(p.code.p[i].aux)
+ elseif code == ISet or code == ITestSet or code == ISpan then
+ p.code.p[i].val = check(p.code.p[i].val)
+ elseif code == IOpenCapture or code == IFullCapture then
+ p.code.p[i].offset = check(p.code.p[i].offset)
+ end
+ end
+ end
+ valuetable[p.id] = newvaluetable
+end
+
+
+local function checkalt(tree)
+ local notchecked = {}
+ local notinalternativerules = {}
+
+ local function iter(tree, index, choice, rule)
+ local tag = tree[index].tag
+ if tag == TCapture and bit.band(tree[index].cap, 0xffff) == Cgroup then
+ if not choice then
+ if rule then
+ notchecked[rule] = index
+ end
+ else
+ tree[index].cap = bit.bor(tree[index].cap, BCapcandelete)
+ end
+ elseif tag == TChoice then
+ choice = true
+ elseif tag == TRule then
+ rule = tree[index].val
+ if bit.band(tree[index].cap, 0xffff) - 1 == 0 then
+ notinalternativerules[rule] = notinalternativerules[rule] or true
+ end
+ elseif tag == TCall then
+ local r = tree[index].val
+ if not choice then
+ notinalternativerules[r] = notinalternativerules[r] or true
+ end
+ end
+ local sibs = numsiblings[tree[index].tag + 1] or 0
+ if sibs >= 1 then
+ iter(tree, index + 1, choice, rule)
+ if sibs >= 2 then
+ return iter(tree, index + tree[index].ps, choice, rule)
+ end
+ end
+ end
+
+ iter(tree, 0)
+ for k, v in pairs(notchecked) do
+ if not notinalternativerules[k] then
+ tree[v].cap = bit.bor(tree[v].cap, BCapcandelete)
+ end
+ end
+end
+
+
+local function prepcompile(p, index)
+ finalfix(false, nil, p, index, valuetable[p.id])
+ checkalt(p.p)
+ lpcode.compile(p, index, valuetable[p.id])
+ reducevaluetable(p)
+ return p.code
+end
+
+
+local function lp_printtree(pat, c)
+ assert(pat.treesize > 0)
+ if c then
+ finalfix(false, nil, pat, 0, valuetable[pat.id])
+ end
+ lpprint.printtree(pat.p, 0, 0, valuetable[pat.id])
+end
+
+
+local function lp_printcode(pat)
+ -- not compiled yet?
+ if pat.code == nil then
+ prepcompile(pat, 0)
+ end
+ lpprint.printpatt(pat.code, valuetable[pat.id])
+end
+
+
+-- Main match function
+
+local function lp_match(pat, s, init, ...)
+ local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
+ p.code = p.code ~= nil and p.code or prepcompile(p, 0)
+ return lpvm.match(p, s, init, valuetable[p.id], ...)
+end
+
+local function lp_streammatch(pat, init, ...)
+ local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
+ p.code = p.code ~= nil and p.code or prepcompile(p, 0)
+ return lpvm.streammatch(p, init, valuetable[p.id], ...)
+end
+
+-- Only for testing purpose
+-- stream emulation (send all chars from string one char after char)
+local function lp_emulatestreammatch(pat, s, init, ...)
+ local p = ffi.istype(treepattern, pat) and pat or getpatt(pat)
+ p.code = p.code ~= nil and p.code or prepcompile(p, 0)
+ return lpvm.emulatestreammatch(p, s, init, valuetable[p.id], ...)
+end
+
+-- {======================================================
+-- Library creation and functions not related to matching
+-- =======================================================
+
+local function lp_setmax(val)
+ lpvm.setmax(val)
+end
+
+local function lp_setmaxbehind(val)
+ lpvm.setmaxbehind(val)
+end
+
+local function lp_enableleftrecursion(val)
+ LREnable = val
+end
+
+local function lp_version()
+ return VERSION
+end
+
+
+local function lp_type(pat)
+ if ffi.istype(treepattern, pat) then
+ return "pattern"
+ end
+end
+
+
+local function createcat(tab, catname, catfce)
+ local t, set = newcharset()
+ for i = 0, 255 do
+ if catfce(i) ~= 0 then
+ set[rshift(i, 5)] = bor(set[rshift(i, 5)], lshift(1, band(i, 31)))
+ end
+ end
+ tab[catname] = t
+end
+
+
+local function lp_locale(tab)
+ tab = tab or {}
+ createcat(tab, "alnum", function(c) return ffi.C.isalnum(c) end)
+ createcat(tab, "alpha", function(c) return ffi.C.isalpha(c) end)
+ createcat(tab, "cntrl", function(c) return ffi.C.iscntrl(c) end)
+ createcat(tab, "digit", function(c) return ffi.C.isdigit(c) end)
+ createcat(tab, "graph", function(c) return ffi.C.isgraph(c) end)
+ createcat(tab, "lower", function(c) return ffi.C.islower(c) end)
+ createcat(tab, "print", function(c) return ffi.C.isprint(c) end)
+ createcat(tab, "punct", function(c) return ffi.C.ispunct(c) end)
+ createcat(tab, "space", function(c) return ffi.C.isspace(c) end)
+ createcat(tab, "upper", function(c) return ffi.C.isupper(c) end)
+ createcat(tab, "xdigit", function(c) return ffi.C.isxdigit(c) end)
+ return tab
+end
+
+
+local function lp_new(ct, size)
+ local pat = ffi.new(ct, size)
+ pat.treesize = size
+ patternid = patternid + 1
+ pat.id = patternid
+ return pat
+end
+
+
+local function lp_gc(ct)
+ valuetable[ct.id] = nil
+ if ct.code ~= nil then
+ ffi.C.free(ct.code.p)
+ ffi.C.free(ct.code)
+ end
+end
+
+local function lp_eq(ct1, ct2)
+ return tostring(ct1) == tostring(ct2)
+end
+
+local function lp_load(str, fcetab)
+ local pat, t = lpvm.load(str, fcetab, true)
+ valuetable[pat.id] = t
+ return pat
+end
+
+local function lp_loadfile(fname, fcetab)
+ local pat, t = lpvm.loadfile(fname, fcetab, true)
+ valuetable[pat.id] = t
+ return pat
+end
+
+local function lp_dump(ct, tree)
+ local funccount = 0
+ -- not compiled yet?
+ if ct.code == nil then
+ prepcompile(ct, 0)
+ end
+ local out = {}
+ if tree then
+ out[#out + 1] = ffi.string(uint32(ct.treesize), 4)
+ out[#out + 1] = ffi.string(ct.p, ffi.sizeof(treepatternelement) * ct.treesize)
+ else
+ out[#out + 1] = ffi.string(uint32(0), 4)
+ end
+ out[#out + 1] = ffi.string(uint32(ct.code.size), 4)
+ out[#out + 1] = ffi.string(ct.code.p, ct.code.size * ffi.sizeof(patternelement))
+ local t = valuetable[ct.id]
+ local len = t and #t or 0
+ out[#out + 1] = ffi.string(uint32(len), 4)
+ if len > 0 then
+ for _, val in ipairs(t) do
+ local typ = type(val)
+ if typ == 'string' then
+ out[#out + 1] = 'str'
+ out[#out + 1] = ffi.string(uint32(#val), 4)
+ out[#out + 1] = val
+ elseif typ == 'number' then
+ local val = tostring(val)
+ out[#out + 1] = 'num'
+ out[#out + 1] = ffi.string(uint32(#val), 4)
+ out[#out + 1] = val
+ elseif typ == 'cdata' then
+ out[#out + 1] = 'cdt'
+ out[#out + 1] = ffi.string(val, ffi.sizeof(val))
+ elseif typ == 'function' then
+ out[#out + 1] = 'fnc'
+ funccount = funccount + 1
+ local name = funcnames[val] or ('FNAME%03d'):format(funccount)
+ out[#out + 1] = ffi.string(uint32(#name), 4)
+ out[#out + 1] = name
+ if not funcnames[val] and debug.getupvalue(val, 1) then
+ io.write(("Patterns function (%d) contains upvalue (%s) - use symbol name for function (%s).\n"):format(funccount, debug.getupvalue(val, 1), name), 0)
+ end
+ local data = string.dump(val, true)
+ out[#out + 1] = ffi.string(uint32(#data), 4)
+ out[#out + 1] = data
+ else
+ error(("Type '%s' NYI for dump"):format(typ), 0)
+ end
+ end
+ end
+ return table.concat(out)
+end
+
+local function lp_save(ct, fname, tree)
+ local file = assert(io.open(fname, 'wb'))
+ file:write(lp_dump(ct, tree))
+ file:close()
+end
+
+
+local pattreg = {
+ ["ptree"] = lp_printtree,
+ ["pcode"] = lp_printcode,
+ ["match"] = lp_match,
+ ["streammatch"] = lp_streammatch,
+ ["emulatestreammatch"] = lp_emulatestreammatch,
+ ["setmaxbehind"] = lp_setmaxbehind,
+ ["B"] = lp_behind,
+ ["V"] = lp_V,
+ ["C"] = lp_simplecapture,
+ ["Cc"] = lp_constcapture,
+ ["Cmt"] = lp_matchtime,
+ ["Cb"] = lp_backref,
+ ["Carg"] = lp_argcapture,
+ ["Cp"] = lp_poscapture,
+ ["Cs"] = lp_substcapture,
+ ["Ct"] = lp_tablecapture,
+ ["Cf"] = lp_foldcapture,
+ ["Cg"] = lp_groupcapture,
+ ["P"] = lp_P,
+ ["S"] = lp_set,
+ ["R"] = lp_range,
+ ["L"] = lp_and,
+ ["locale"] = lp_locale,
+ ["version"] = lp_version,
+ ["setmaxstack"] = lp_setmax,
+ ["type"] = lp_type,
+ ["enableleftrecursion"] = lp_enableleftrecursion,
+ ["enablememoization"] = lpvm.enablememoization,
+ ["enabletracing"] = lpvm.enabletracing,
+ ["save"] = lp_save,
+ ["dump"] = lp_dump,
+ ["load"] = lp_load,
+ ["loadfile"] = lp_loadfile,
+ ["__mul"] = lp_seq,
+ ["__add"] = lp_choice,
+ ["__pow"] = lp_star,
+ ["__len"] = lp_and,
+ ["__div"] = lp_divcapture,
+ ["__unm"] = lp_not,
+ ["__sub"] = lp_sub,
+}
+
+local metareg = {
+ ["__gc"] = lp_gc,
+ ["__new"] = lp_new,
+ ["__mul"] = lp_seq,
+ ["__add"] = lp_choice,
+ ["__pow"] = lp_star,
+ ["__len"] = lp_and,
+ ["__div"] = lp_divcapture,
+ ["__unm"] = lp_not,
+ ["__sub"] = lp_sub,
+ ["__eq"] = lp_eq,
+ ["__index"] = pattreg
+}
+
+ffi.metatype(treepattern, metareg)
+
+return pattreg