diff options
| author | unwox <me@unwox.com> | 2024-09-26 17:46:38 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2024-09-26 17:46:38 +0600 |
| commit | 9b82db238f9e2e02a76f95c793f8d6ef2387ecfd (patch) | |
| tree | cdb2a16d01f09553b560ab1034d53392d07bae42 /vendor/lpeglj | |
init
Diffstat (limited to 'vendor/lpeglj')
| -rw-r--r-- | vendor/lpeglj/lpcap.lua | 625 | ||||
| -rw-r--r-- | vendor/lpeglj/lpcode.lua | 1057 | ||||
| -rw-r--r-- | vendor/lpeglj/lpeglj.lua | 1373 | ||||
| -rw-r--r-- | vendor/lpeglj/lpprint.lua | 356 | ||||
| -rw-r--r-- | vendor/lpeglj/lpvm.lua | 1041 | ||||
| -rw-r--r-- | vendor/lpeglj/re.lua | 286 |
6 files changed, 4738 insertions, 0 deletions
diff --git a/vendor/lpeglj/lpcap.lua b/vendor/lpeglj/lpcap.lua new file mode 100644 index 0000000..06fbee8 --- /dev/null +++ b/vendor/lpeglj/lpcap.lua @@ -0,0 +1,625 @@ +--[[ +LPEGLJ +lpcap.lua +Capture functions +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] +local ffi = require "ffi" + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local MAXSTRCAPS = 10 + +local pushcapture +local addonestring + + +-- Goes back in a list of captures looking for an open capture +-- corresponding to a close + +local function findopen(cs, index) + local n = 0; -- number of closes waiting an open + while true do + index = index - 1 + if cs.ocap[index].kind == Cclose then + n = n + 1 -- one more open to skip + elseif cs.ocap[index].siz == 0 then + if n == 0 then + return index + end + n = n - 1 + end + end +end + + +local function checknextcap(cs, captop) + local cap = cs.cap; + -- not a single capture? ((cap)->siz != 0) + if cs.ocap[cap].siz == 0 then + local n = 0; -- number of opens waiting a close + -- look for corresponding close + while true do + cap = cap + 1 + if cap > captop then return end + if cs.ocap[cap].kind == Cclose then + n = n - 1 + if n + 1 == 0 then + break; + end + elseif cs.ocap[cap].siz == 0 then + n = n + 1 + end + end + end + cap = cap + 1; -- + 1 to skip last close (or entire single capture) + if cap > captop then return end + return true +end + + +-- Go to the next capture + +local function nextcap(cs) + local cap = cs.cap; + -- not a single capture? ((cap)->siz != 0) + if cs.ocap[cap].siz == 0 then + local n = 0; -- number of opens waiting a close + -- look for corresponding close + while true do + cap = cap + 1 + if cs.ocap[cap].kind == Cclose then + n = n - 1 + if n + 1 == 0 then + break; + end + elseif cs.ocap[cap].siz == 0 then + n = n + 1 + end + end + end + cs.cap = cap + 1; -- + 1 to skip last close (or entire single capture) +end + + +-- Push on the Lua stack all values generated by nested captures inside +-- the current capture. Returns number of values pushed. 'addextra' +-- makes it push the entire match after all captured values. The +-- entire match is pushed also if there are no other nested values, +-- so the function never returns zero. + +local function pushnestedvalues(cs, addextra, out, valuetable) + local co = cs.cap + cs.cap = cs.cap + 1 + -- no nested captures? + if cs.ocap[cs.cap - 1].siz ~= 0 then + local st = cs.ocap[co].s + local l = cs.ocap[co].siz - 1 + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1) + return 1; -- that is it + else + local n = 0; + while cs.ocap[cs.cap].kind ~= Cclose do -- repeat for all nested patterns + n = n + pushcapture(cs, out, valuetable); + end + -- need extra? + if addextra or n == 0 then + local st = cs.ocap[co].s + local l = cs.ocap[cs.cap].s - cs.ocap[co].s + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.s and cs.s:sub(st, st + l - 1) or cs.stream(st, st + l - 1) + n = n + 1 + end + cs.cap = cs.cap + 1 -- skip close entry + return n; + end +end + + +-- Push only the first value generated by nested captures + +local function pushonenestedvalue(cs, out, valuetable) + local n = pushnestedvalues(cs, false, out, valuetable) + for i = n, 2, -1 do + out.out[out.outindex] = nil + out.outindex = out.outindex - 1 + end +end + + +-- Try to find a named group capture with the name given at the top of +-- the stack; goes backward from 'cap'. + +local function findback(cs, cap, name, valuetable) + -- repeat until end of list + while cap > 0 do + cap = cap - 1 + local continue + if cs.ocap[cap].kind == Cclose then + cap = findopen(cs, cap); -- skip nested captures + elseif cs.ocap[cap].siz == 0 then + continue = true -- opening an enclosing capture: skip and get previous + end + if not continue and cs.ocap[cap].kind == Cgroup and cs.ocap[cap].idx ~= 0 then + local gname = valuetable[cs.ocap[cap].idx] -- get group name + -- right group? + if name == gname then + return cap; + end + end + end + error(("back reference '%s' not found"):format(name), 0) +end + + +-- Back-reference capture. Return number of values pushed. + +local function backrefcap(cs, out, valuetable) + local curr = cs.cap; + local name = valuetable[cs.ocap[cs.cap].idx] -- reference name + cs.cap = findback(cs, curr, name, valuetable) -- find corresponding group + local n = pushnestedvalues(cs, false, out, valuetable); -- push group's values + cs.cap = curr + 1; + return n; +end + + +-- Table capture: creates a new table and populates it with nested +-- captures. + +local function tablecap(cs, out, valuetable) + local n = 0; + local t = {} + cs.cap = cs.cap + 1 + -- table is empty + if cs.ocap[cs.cap - 1].siz == 0 then + while cs.ocap[cs.cap].kind ~= Cclose do + local subout = { outindex = 0, out = {} } + -- named group? + if cs.ocap[cs.cap].kind == Cgroup and cs.ocap[cs.cap].idx ~= 0 then + local groupname = valuetable[cs.ocap[cs.cap].idx] -- push group name + pushonenestedvalue(cs, subout, valuetable) + t[groupname] = subout.out[1] + else + -- not a named group + local k = pushcapture(cs, subout, valuetable) + -- store all values into table + for i = 1, subout.outindex do + t[i + n] = subout.out[i] + end + n = n + k; + end + end + cs.cap = cs.cap + 1 -- skip close entry + end + out.outindex = out.outindex + 1 + out.out[out.outindex] = t + return 1; -- number of values pushed (only the table) +end + + +-- Table-query capture + +local function querycap(cs, out, valuetable) + local table = valuetable[cs.ocap[cs.cap].idx] + local subout = { outindex = 0, out = {} } + pushonenestedvalue(cs, subout, valuetable) -- get nested capture + -- query cap. value at table + if table[subout.out[1]] ~= nil then + out.outindex = out.outindex + 1 + out.out[out.outindex] = table[subout.out[1]] + return 1 + end + return 0 +end + + +-- Fold capture + +local function foldcap(cs, out, valuetable) + local fce = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + -- no nested captures? + -- or no nested captures (large subject)? + if cs.ocap[cs.cap - 1].siz ~= 0 or + cs.ocap[cs.cap].kind == Cclose then + error("no initial value for fold capture", 0); + end + local subout = { outindex = 0; out = {} } + local n = pushcapture(cs, subout, valuetable) -- nested captures with no values? + if n == 0 then + error("no initial value for fold capture", 0); + end + local acumulator = subout.out[1] -- leave only one result for accumulator + while cs.ocap[cs.cap].kind ~= Cclose do + local subout = { outindex = 0; out = {} } + n = pushcapture(cs, subout, valuetable); -- get next capture's values + acumulator = fce(acumulator, unpack(subout.out, 1, subout.outindex)) -- call folding function + end + cs.cap = cs.cap + 1; -- skip close entry + out.outindex = out.outindex + 1 + out.out[out.outindex] = acumulator + return 1; -- only accumulator left on the stack +end + + +local function retcount(...) + return select('#', ...), { ... } +end + + +-- Function capture + +local function functioncap(cs, out, valuetable) + local fce = valuetable[cs.ocap[cs.cap].idx] -- push function + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures + local count, ret = retcount(fce(unpack(subout.out, 1, n))) -- call function + for i = 1, count do + out.outindex = out.outindex + 1 + out.out[out.outindex] = ret[i] + end + return count +end + + +-- Select capture + +local function numcap(cs, out, valuetable) + local idx = valuetable[cs.ocap[cs.cap].idx] -- value to select + -- no values? + if idx == 0 then + nextcap(cs); -- skip entire capture + return 0; -- no value produced + else + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable) + -- invalid index? + if n < idx then + error(("no capture '%d'"):format(idx), 0) + else + out.outindex = out.outindex + 1 + out.out[out.outindex] = subout.out[idx] -- get selected capture + return 1; + end + end +end + + +-- Calls a runtime capture. Returns number of captures removed by +-- the call, including the initial Cgroup. (Captures to be added are +-- on the Lua stack.) + +local function runtimecap(cs, close, s, out, valuetable) + local open = findopen(cs, close) + assert(cs.ocap[open].kind == Cgroup) + cs.ocap[close].kind = Cclose; -- closes the group + cs.ocap[close].s = s; + cs.cap = open; + local fce = valuetable[cs.ocap[cs.cap].idx] -- push function to be called + local subout = { outindex = 0, out = {} } + local n = pushnestedvalues(cs, false, subout, valuetable); -- push nested captures + local count, ret = retcount(fce(cs.s or cs.stream, s, unpack(subout.out, 1, n))) -- call dynamic function + for i = 1, count do + out.outindex = out.outindex + 1 + out.out[out.outindex] = ret[i] + end + return close - open -- number of captures of all kinds removed +end + +-- Collect values from current capture into array 'cps'. Current +-- capture must be Cstring (first call) or Csimple (recursive calls). +-- (In first call, fills %0 with whole match for Cstring.) +-- Returns number of elements in the array that were filled. + +local function getstrcaps(cs, cps, n) + local k = n + n = n + 1 + cps[k + 1].isstring = true; -- get string value + cps[k + 1].startstr = cs.ocap[cs.cap].s; -- starts here + cs.cap = cs.cap + 1 + -- nested captures? + if cs.ocap[cs.cap - 1].siz == 0 then + -- traverse them + while cs.ocap[cs.cap].kind ~= Cclose do + -- too many captures? + if n >= MAXSTRCAPS then + nextcap(cs); -- skip extra captures (will not need them) + elseif cs.ocap[cs.cap].kind == Csimple then + -- string? + n = getstrcaps(cs, cps, n); -- put info. into array + else + cps[n + 1].isstring = false; -- not a string + cps[n + 1].origcap = cs.cap; -- keep original capture + nextcap(cs); + n = n + 1; + end + end + cs.cap = cs.cap + 1 -- skip close + end + cps[k + 1].endstr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1 -- ends here + return n; +end + + +-- add next capture value (which should be a string) to buffer 'b' + +-- String capture: add result to buffer 'b' (instead of pushing +-- it into the stack) + +local function stringcap(cs, b, valuetable) + local cps = {} + for i = 1, MAXSTRCAPS do + cps[#cps + 1] = {} + end + local fmt = valuetable[cs.ocap[cs.cap].idx] + local n = getstrcaps(cs, cps, 0) - 1; -- collect nested captures + local i = 1 + -- traverse them + while i <= #fmt do + local c = fmt:sub(i, i) + -- not an escape? + if c ~= '%' then + b[#b + 1] = c -- add it to buffer + elseif fmt:sub(i + 1, i + 1) < '0' or fmt:sub(i + 1, i + 1) > '9' then + -- not followed by a digit? + i = i + 1 + b[#b + 1] = fmt:sub(i, i) + else + i = i + 1 + local l = fmt:sub(i, i) - '0'; -- capture index + if l > n then + error(("invalid capture index (%d)"):format(l), 0) + elseif cps[l + 1].isstring then + b[#b + 1] = cs.s and cs.s:sub(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) or + cs.stream(cps[l + 1].startstr, cps[l + 1].endstr - cps[l + 1].startstr + cps[l + 1].startstr - 1) + else + local curr = cs.cap; + cs.cap = cps[l + 1].origcap; -- go back to evaluate that nested capture + if not addonestring(cs, b, "capture", valuetable) then + error(("no values in capture index %d"):format(l), 0) + end + cs.cap = curr; -- continue from where it stopped + end + end + i = i + 1 + end +end + + +-- Substitution capture: add result to buffer 'b' + +local function substcap(cs, b, valuetable) + local curr = cs.ocap[cs.cap].s; + -- no nested captures? + if cs.ocap[cs.cap].siz ~= 0 then + -- keep original text + b[#b + 1] = cs.s and cs.s:sub(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) or + cs.stream(curr, cs.ocap[cs.cap].siz - 1 + curr - 1) + else + cs.cap = cs.cap + 1 -- skip open entry + -- traverse nested captures + while cs.ocap[cs.cap].kind ~= Cclose do + local next = cs.ocap[cs.cap].s; + b[#b + 1] = cs.s and cs.s:sub(curr, next - curr + curr - 1) or + cs.stream(curr, next - curr + curr - 1) -- add text up to capture + if addonestring(cs, b, "replacement", valuetable) then + curr = cs.ocap[cs.cap - 1].s + cs.ocap[cs.cap - 1].siz - 1; -- continue after match + else + -- no capture value + curr = next; -- keep original text in final result + end + end + b[#b + 1] = cs.s and cs.s:sub(curr, curr + cs.ocap[cs.cap].s - curr - 1) or + cs.stream(curr, curr + cs.ocap[cs.cap].s - curr - 1) -- add last piece of text + end + cs.cap = cs.cap + 1 -- go to next capture +end + + +-- Evaluates a capture and adds its first value to buffer 'b'; returns +-- whether there was a value + +function addonestring(cs, b, what, valuetable) + local tag = cs.ocap[cs.cap].kind + if tag == Cstring then + stringcap(cs, b, valuetable); -- add capture directly to buffer + return 1 + elseif tag == Csubst then + substcap(cs, b, valuetable); -- add capture directly to buffer + return 1 + else + local subout = { outindex = 0, out = {} } + local n = pushcapture(cs, subout, valuetable); + if n > 0 then + if type(subout.out[1]) ~= 'string' and type(subout.out[1]) ~= 'number' then + error(("invalid %s value (a %s)"):format(what, type(subout.out[1])), 0) + end + b[#b + 1] = subout.out[1] + return n + end + end +end + + +-- Push all values of the current capture into the stack; returns +-- number of values pushed + +function pushcapture(cs, out, valuetable) + local type = cs.ocap[cs.cap].kind + if type == Cposition then + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.ocap[cs.cap].s + cs.cap = cs.cap + 1; + return 1; + elseif type == Cconst then + out.outindex = out.outindex + 1 + out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + return 1; + elseif type == Carg then + local arg = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1 + if arg > cs.ptopcount then + error(("reference to absent extra argument #%d"):format(arg), 0) + end + out.outindex = out.outindex + 1 + out.out[out.outindex] = cs.ptop[arg] + return 1; + elseif type == Csimple then + local k = pushnestedvalues(cs, true, out, valuetable) + local index = out.outindex + table.insert(out.out, index - k + 1, out.out[index]) + out[index + 1] = nil + return k; + elseif type == Cruntime then + out.outindex = out.outindex + 1 + out.out[out.outindex] = valuetable[cs.ocap[cs.cap].idx] + cs.cap = cs.cap + 1; + return 1; + elseif type == Cstring then + local b = {} + stringcap(cs, b, valuetable) + out.outindex = out.outindex + 1 + out.out[out.outindex] = table.concat(b) + return 1; + elseif type == Csubst then + local b = {} + substcap(cs, b, valuetable); + out.outindex = out.outindex + 1 + out.out[out.outindex] = table.concat(b) + return 1; + elseif type == Cgroup then + -- anonymous group? + if cs.ocap[cs.cap].idx == 0 then + return pushnestedvalues(cs, false, out, valuetable); -- add all nested values + else + -- named group: add no values + nextcap(cs); -- skip capture + return 0 + end + elseif type == Cbackref then + return backrefcap(cs, out, valuetable) + elseif type == Ctable then + return tablecap(cs, out, valuetable) + elseif type == Cfunction then + return functioncap(cs, out, valuetable) + elseif type == Cnum then + return numcap(cs, out, valuetable) + elseif type == Cquery then + return querycap(cs, out, valuetable) + elseif type == Cfold then + return foldcap(cs, out, valuetable) + else + assert(false) + end +end + + +-- Prepare a CapState structure and traverse the entire list of +-- captures in the stack pushing its results. 's' is the subject +-- string, 'r' is the final position of the match, and 'ptop' +-- the index in the stack where some useful values were pushed. +-- Returns the number of results pushed. (If the list produces no +-- results, push the final position of the match.) + +local function getcaptures(capture, s, stream, r, valuetable, ...) + local n = 0; + local cs = { cap = 0 } + local out = { outindex = 0; out = {} } + -- is there any capture? + if capture[cs.cap].kind ~= Cclose then + cs.ocap = capture + cs.s = s; + cs.stream = stream + cs.ptopcount, cs.ptop = retcount(...) + repeat -- collect their values + n = n + pushcapture(cs, out, valuetable) + until cs.ocap[cs.cap].kind == Cclose + end + -- no capture values? + if n == 0 then + if not r then + return + else + return r + end + end + assert(out.outindex < 7998, "(too many captures)") + return unpack(out.out, 1, out.outindex) +end + +local function getcapturesruntime(capture, s, stream, notdelete, min, max, captop, valuetable, ...) + local n = 0; + local cs = { cap = min } + local out = { outindex = 0; out = {} } + cs.ocap = capture + cs.s = s + cs.stream = stream + cs.ptopcount, cs.ptop = retcount(...) + local start = 0 + repeat -- collect their values + if not checknextcap(cs, max) then break end + local notdelete = notdelete or capture[cs.cap].kind == Cgroup and capture[cs.cap].idx ~= 0 and capture[cs.cap].candelete == 0 + pushcapture(cs, out, valuetable) + if notdelete then + start = cs.cap + else + n = n + cs.cap - start + for i = 0, captop - cs.cap - 1 do + ffi.copy(capture + start + i, capture + cs.cap + i, ffi.sizeof('CAPTURE')) + end + max = max - (cs.cap - start) + captop = captop - (cs.cap - start) + cs.cap = start + end + until cs.cap == max + assert(out.outindex < 7998, "(too many captures)") + return n, out.out, out.outindex +end + +return { + getcaptures = getcaptures, + runtimecap = runtimecap, + getcapturesruntime = getcapturesruntime, +} + diff --git a/vendor/lpeglj/lpcode.lua b/vendor/lpeglj/lpcode.lua new file mode 100644 index 0000000..365d80b --- /dev/null +++ b/vendor/lpeglj/lpcode.lua @@ -0,0 +1,1057 @@ +--[[ +LPEGLJ +lpcode.lua +Generating code from tree +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] +local ffi = require "ffi" +require "lpvm" + +local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + + +local PEnullable = 0 +local PEnofail = 1 +local RuleLR = 0x10000 +local NOINST = -2 + + +local MAXBEHINDPREDICATE = 255 +local MAXRULES = 200 +local MAXOFF = 0xF + +-- number of siblings for each tree +local numsiblings = { + 0, 0, 0, -- char, set, any + 0, 0, -- true, false + 1, -- rep + 2, 2, -- seq, choice + 1, 1, -- not, and + 0, 0, 2, 1, -- call, opencall, rule, grammar + 1, -- behind + 1, 1 -- capture, runtime capture +} + + +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') +local fullset = settype(-1, -1, -1, -1, -1, -1, -1, -1) + +-- {====================================================== +-- Analysis and some optimizations +-- ======================================================= + +local codegen + + +-- Check whether a charset is empty (IFail), singleton (IChar), +-- full (IAny), or none of those (ISet). + +local function charsettype(cs) + local count = 0; + local candidate = -1; -- candidate position for a char + for i = 0, 8 - 1 do + local b = cs[i]; + if b == 0 then + if count > 1 then + return ISet; -- else set is still empty + end + elseif b == -1 then + if count < (i * 32) then + return ISet; + else + count = count + 32; -- set is still full + end + -- byte has only one bit? + elseif band(b, (b - 1)) == 0 then + if count > 0 then + return ISet; -- set is neither full nor empty + -- set has only one char till now; track it + else + count = count + 1; + candidate = i; + end + else + return ISet; -- byte is neither empty, full, nor singleton + end + end + if count == 0 then + return IFail, 0 -- empty set + -- singleton; find character bit inside byte + elseif count == 1 then + local b = cs[candidate]; + local c = candidate * 32; + for i = 1, 32 do + if b == 1 then + c = c + i - 1 + break + end + b = rshift(b, 1) + end + return IChar, c + elseif count == 256 then + return IAny, 0 -- full set + else + assert(false) -- should have returned by now + end +end + + +-- A few basic operations on Charsets + +local function cs_complement(cs) + for i = 0, 8 - 1 do + cs[i] = bnot(cs[i]) + end +end + + +local function cs_equal(cs1, cs2) + for i = 0, 8 - 1 do + if cs1[i] ~= cs2[i] then + return + end + end + return true +end + + +-- computes whether sets st1 and st2 are disjoint + +local function cs_disjoint(st1, st2) + for i = 0, 8 - 1 do + if band(st1[i], st2[i]) ~= 0 then + return + end + end + return true +end + + +-- Convert a 'char' pattern (TSet, TChar, TAny) to a charset + +local function tocharset(tree, index, valuetable) + local val = settype() + if tree.p[index].tag == TSet then + ffi.copy(val, valuetable[tree.p[index].val], ffi.sizeof(val)) + return val + elseif tree.p[index].tag == TChar then + local b = tree.p[index].val + -- only one char + -- add that one + val[rshift(b, 5)] = lshift(1, band(b, 31)) + return val + elseif tree.p[index].tag == TAny then + ffi.fill(val, ffi.sizeof(val), 0xff) + return val + end +end + + +-- checks whether a pattern has captures + +local function hascaptures(tree, index) + if tree.p[index].tag == TCapture or tree.p[index].tag == TRunTime then + return true + elseif tree.p[index].tag == TCall then + return hascaptures(tree, index + tree.p[index].ps) + else + local ns = numsiblings[tree.p[index].tag + 1] + if ns == 0 then + return + elseif ns == 1 then + return hascaptures(tree, index + 1) + elseif ns == 2 then + if hascaptures(tree, index + 1) then + return true + elseif tree.p[index].tag ~= TRule then + return hascaptures(tree, index + tree.p[index].ps) + end + else + assert(false) + end + end +end + + +-- Checks how a pattern behaves regarding the empty string, +-- in one of two different ways: +-- A pattern is *nullable* if it can match without consuming any character; +-- A pattern is *nofail* if it never fails for any string +-- (including the empty string). +-- The difference is only for predicates; for patterns without +-- predicates, the two properties are equivalent. +-- (With predicates, &'a' is nullable but not nofail. Of course, +-- nofail => nullable.) +-- These functions are all convervative in the following way: +-- p is nullable => nullable(p) +-- nofail(p) => p cannot fail +-- (The function assumes that TOpenCall and TRunTime are not nullable: +-- TOpenCall must be checked again when the grammar is fixed; +-- TRunTime is an arbitrary choice.) + +local function checkaux(tree, pred, index, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or + tag == TFalse or tag == TOpenCall then + return -- not nullable + elseif tag == TRep or tag == TTrue then + return true -- no fail + elseif tag == TNot or tag == TBehind then + -- can match empty, but may fail + if pred == PEnofail then + return + else + return true -- PEnullable + end + elseif tag == TAnd then + -- can match empty; fail iff body does + if pred == PEnullable then + return true + else + return checkaux(tree, pred, index + 1, lrcall) + end + -- can fail; match empty iff body does + elseif tag == TRunTime then + if pred == PEnofail then + return + else + return checkaux(tree, pred, index + 1, lrcall) + end + elseif tag == TSeq then + if not checkaux(tree, pred, index + 1, lrcall) then + return + else + return checkaux(tree, pred, index + tree.p[index].ps, lrcall) + end + elseif tag == TChoice then + if checkaux(tree, pred, index + tree.p[index].ps, lrcall) then + return true + else + return checkaux(tree, pred, index + 1, lrcall) + end + elseif tag == TCapture or tag == TGrammar or tag == TRule then + return checkaux(tree, pred, index + 1, lrcall) + elseif tag == TCall then + --left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return + end + lrcall[lr] = true + end + return checkaux(tree, pred, index + tree.p[index].ps, lrcall) + else + assert(false) + end +end + + +-- number of characters to match a pattern (or -1 if variable) +-- ('count' avoids infinite loops for grammars) + +local function fixedlenx(tree, count, len, index) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny then + return len + 1; + elseif tag == TFalse or tag == TTrue or tag == TNot or tag == TAnd or tag == TBehind then + return len; + elseif tag == TRep or tag == TRunTime or tag == TOpenCall then + return -1; + elseif tag == TCapture or tag == TRule or tag == TGrammar then + return fixedlenx(tree, count, len, index + 1) + elseif tag == TCall then + if count >= MAXRULES then + return -1; -- may be a loop + else + return fixedlenx(tree, count + 1, len, index + tree.p[index].ps) + end + elseif tag == TSeq then + len = fixedlenx(tree, count, len, index + 1) + if (len < 0) then + return -1; + else + return fixedlenx(tree, count, len, index + tree.p[index].ps) + end + elseif tag == TChoice then + local n1 = fixedlenx(tree, count, len, index + 1) + if n1 < 0 then return -1 end + local n2 = fixedlenx(tree, count, len, index + tree.p[index].ps) + if n1 == n2 then + return n1 + else + return -1 + end + else + assert(false) + end +end + + +-- Computes the 'first set' of a pattern. +-- The result is a conservative aproximation: +-- match p ax -> x' for some x ==> a in first(p). +-- match p '' -> '' ==> returns 1. +-- The set 'follow' is the first set of what follows the +-- pattern (full set if nothing follows it) + +local function getfirst(tree, follow, index, valuetable, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny then + local firstset = tocharset(tree, index, valuetable) + return 0, firstset + elseif tag == TTrue then + local firstset = settype() + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return 1, firstset + elseif tag == TFalse then + local firstset = settype() + return 0, firstset + elseif tag == TChoice then + local e1, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = bor(firstset[i], csaux[i]) + end + return bor(e1, e2), firstset + elseif tag == TSeq then + if not checkaux(tree, PEnullable, index + 1) then + return getfirst(tree, fullset, index + 1, valuetable, lrcall) + -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl)) + else + local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + local e1, firstset = getfirst(tree, csaux, index + 1, valuetable, lrcall) + if e1 == 0 then -- 'e1' ensures that first can be used + return 0, firstset + -- one of the children has a matchtime? + elseif band(bor(e1, e2), 2) == 2 then + return 2, firstset -- pattern has a matchtime capture + else + return e2, firstset -- else depends on 'e2' + end + end + elseif tag == TRep then + local _, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = bor(firstset[i], follow[i]) + end + return 1, firstset -- accept the empty string + elseif tag == TCapture or tag == TGrammar or tag == TRule then + return getfirst(tree, follow, index + 1, valuetable, lrcall) + -- function invalidates any follow info. + elseif tag == TRunTime then + local e, firstset = getfirst(tree, fullset, index + 1, valuetable, lrcall) + if e ~= 0 then + return 2, firstset -- function is not "protected"? + else + return 0, firstset -- pattern inside capture ensures first can be used + end + elseif tag == TCall then + -- left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return 0, settype() + else + lrcall[lr] = true + end + end + return getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall) + elseif tag == TAnd then + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + for i = 0, 8 - 1 do + firstset[i] = band(firstset[i], follow[i]) + end + return e, firstset + elseif tag == TNot then + local firstset = tocharset(tree, index + 1, valuetable) + if firstset then + cs_complement(firstset) + return 1, firstset + end + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return bor(e, 1), firstset -- always can accept the empty string + -- instruction gives no new information + elseif tag == TBehind then + -- call 'getfirst' to check for math-time captures + local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall) + ffi.copy(firstset, follow, ffi.sizeof(firstset)) + return bor(e, 1), firstset -- always can accept the empty string + else + assert(false) + end +end + + +-- If it returns true, then pattern can fail only depending on the next +-- character of the subject + +local function headfail(tree, index, lrcall) + lrcall = lrcall or {} + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then + return true + elseif tag == TTrue or tag == TRep or tag == TRunTime or tag == TNot or tag == TBehind then + return + elseif tag == TCapture or tag == TGrammar or tag == TRule or tag == TAnd then + return headfail(tree, index + 1, lrcall) + elseif tag == TCall then + -- left recursive rule + if bit.band(tree.p[index].cap, 0xffff) ~= 0 then + local lr = index + tree.p[index].ps + if lrcall[lr] then + return true + else + lrcall[lr] = true + end + end + return headfail(tree, index + tree.p[index].ps, lrcall) + elseif tag == TSeq then + if not checkaux(tree, PEnofail, index + tree.p[index].ps) then + return + else + return headfail(tree, index + 1, lrcall) + end + elseif tag == TChoice then + if not headfail(tree, index + 1, lrcall) then + return + else + return headfail(tree, index + tree.p[index].ps, lrcall) + end + else + assert(false) + end +end + + +-- Check whether the code generation for the given tree can benefit +-- from a follow set (to avoid computing the follow set when it is +-- not needed) + +local function needfollow(tree, index) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse or tag == TTrue or tag == TAnd or tag == TNot or + tag == TRunTime or tag == TGrammar or tag == TCall or tag == TBehind then + return + elseif tag == TChoice or tag == TRep then + return true + elseif tag == TCapture then + return needfollow(tree, index + 1) + elseif tag == TSeq then + return needfollow(tree, index + tree.p[index].ps) + else + assert(false) + end +end + +-- ====================================================== + + +-- {====================================================== +-- Code generation +-- ======================================================= + + +-- code generation is recursive; 'opt' indicates that the code is +-- being generated under a 'IChoice' operator jumping to its end. +-- 'tt' points to a previous test protecting this code. 'fl' is +-- the follow set of the pattern. + + +local function addinstruction(code, op, val) + local size = code.size + if size >= code.allocsize then + code:doublesize() + end + code.p[size].code = op + code.p[size].val = val + code.size = size + 1 + return size +end + + +local function setoffset(code, instruction, offset) + code.p[instruction].offset = offset; +end + + +-- Add a capture instruction: +-- 'op' is the capture instruction; 'cap' the capture kind; +-- 'key' the key into ktable; 'aux' is optional offset + +local function addinstcap(code, op, cap, key, aux) + local i = addinstruction(code, op, bor(cap, lshift(aux, 4))) + setoffset(code, i, key) + return i +end + + +local function jumptothere(code, instruction, target) + if instruction >= 0 then + setoffset(code, instruction, target - instruction) + end +end + + +local function jumptohere(code, instruction) + jumptothere(code, instruction, code.size) +end + + +-- Code an IChar instruction, or IAny if there is an equivalent +-- test dominating it + +local function codechar(code, c, tt) + assert(tt ~= -1) + if tt >= 0 and code.p[tt].code == ITestChar and + code.p[tt].val == c then + addinstruction(code, IAny, 0) + else + addinstruction(code, IChar, c) + end +end + + +-- Code an ISet instruction + +local function coderealcharset(code, cs, valuetable) + local ind = #valuetable + 1 + valuetable[ind] = cs + return addinstruction(code, ISet, ind) +end + + +-- code a char set, optimizing unit sets for IChar, "complete" +-- sets for IAny, and empty sets for IFail; also use an IAny +-- when instruction is dominated by an equivalent test. + +local function codecharset(code, cs, tt, valuetable) + local op, c = charsettype(cs) + if op == IChar then + codechar(code, c, tt) + elseif op == ISet then + assert(tt ~= -1) + if tt >= 0 and code.p[tt].code == ITestSet and + cs_equal(cs, valuetable[code.p[tt].val]) then + addinstruction(code, IAny, 0) + else + coderealcharset(code, cs, valuetable) + end + else + addinstruction(code, op, c) + end +end + + +-- code a test set, optimizing unit sets for ITestChar, "complete" +-- sets for ITestAny, and empty sets for IJmp (always fails). +-- 'e' is true iff test should accept the empty string. (Test +-- instructions in the current VM never accept the empty string.) + +local function codetestset(code, cs, e, valuetable) + if e ~= 0 then + return NOINST -- no test + else + local pos = code.size + codecharset(code, cs, NOINST, valuetable) + local inst = code.p[pos] + local code = inst.code + if code == IFail then + inst.code = IJmp -- always jump + elseif code == IAny then + inst.code = ITestAny + elseif code == IChar then + inst.code = ITestChar + elseif code == ISet then + inst.code = ITestSet + else + assert(false) + end + return pos + end +end + + +-- Find the final destination of a sequence of jumps + +local function finaltarget(code, i) + while code.p[i].code == IJmp do + i = i + code.p[i].offset + end + return i +end + + +-- final label (after traversing any jumps) + +local function finallabel(code, i) + return finaltarget(code, i + code.p[i].offset) +end + +-- <behind(p)> == behind n; <p> (where n = fixedlen(p)) + +local function codebehind(code, tree, index, valuetable) + if tree.p[index].val > 0 then + addinstruction(code, IBehind, tree.p[index].val) + end + codegen(code, tree, fullset, false, NOINST, index + 1, valuetable) -- NOINST +end + + +-- Choice; optimizations: +-- - when p1 is headfail +-- - when first(p1) and first(p2) are disjoint; than +-- a character not in first(p1) cannot go to p1, and a character +-- in first(p1) cannot go to p2 (at it is not in first(p2)). +-- (The optimization is not valid if p1 accepts the empty string, +-- as then there is no character at all...) +-- - when p2 is empty and opt is true; a IPartialCommit can resuse +-- the Choice already active in the stack. + +local function codechoice(code, tree, fl, opt, p1, p2, valuetable) + local emptyp2 = tree.p[p2].tag == TTrue + local e1, st1 = getfirst(tree, fullset, p1, valuetable) + local _, st2 = getfirst(tree, fl, p2, valuetable) + if headfail(tree, p1) or (e1 == 0 and cs_disjoint(st1, st2)) then + -- <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2: + local test = codetestset(code, st1, 0, valuetable) + local jmp = NOINST; + codegen(code, tree, fl, false, test, p1, valuetable) + if not emptyp2 then + jmp = addinstruction(code, IJmp, 0) + end + jumptohere(code, test) + codegen(code, tree, fl, opt, NOINST, p2, valuetable) + jumptohere(code, jmp) + elseif opt and emptyp2 then + -- p1? == IPartialCommit; p1 + jumptohere(code, addinstruction(code, IPartialCommit, 0)) + codegen(code, tree, fullset, true, NOINST, p1, valuetable) + else + -- <p1 / p2> == + -- test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: + local test = codetestset(code, st1, e1, valuetable) + local pchoice = addinstruction(code, IChoice, 0) + codegen(code, tree, fullset, emptyp2, test, p1, valuetable) + local pcommit = addinstruction(code, ICommit, 0) + jumptohere(code, pchoice) + jumptohere(code, test) + codegen(code, tree, fl, opt, NOINST, p2, valuetable) + jumptohere(code, pcommit) + end +end + + +-- And predicate +-- optimization: fixedlen(p) = n ==> <&p> == <p>; behind n +-- (valid only when 'p' has no captures) + +local function codeand(code, tree, tt, index, valuetable) + local n = fixedlenx(tree, 0, 0, index) + if n >= 0 and n <= MAXBEHINDPREDICATE and not hascaptures(tree, index) then + codegen(code, tree, fullset, false, tt, index, valuetable) + if n > 0 then + addinstruction(code, IBehind, n) + end + else + -- default: Choice L1; p1; BackCommit L2; L1: Fail; L2: + local pchoice = addinstruction(code, IChoice, 0) + codegen(code, tree, fullset, false, tt, index, valuetable) + local pcommit = addinstruction(code, IBackCommit, 0) + jumptohere(code, pchoice) + addinstruction(code, IFail, 0) + jumptohere(code, pcommit) + end +end + + +-- Captures: if pattern has fixed (and not too big) length, use +-- a single IFullCapture instruction after the match; otherwise, +-- enclose the pattern with OpenCapture - CloseCapture. + +local function codecapture(code, tree, fl, tt, index, valuetable) + local len = fixedlenx(tree, 0, 0, index + 1) + if len >= 0 and len <= MAXOFF and not hascaptures(tree, index + 1) then + codegen(code, tree, fl, false, tt, index + 1, valuetable) + addinstcap(code, IFullCapture, tree.p[index].cap, tree.p[index].val, len) + else + addinstcap(code, IOpenCapture, tree.p[index].cap, tree.p[index].val, 0) + codegen(code, tree, fl, false, tt, index + 1, valuetable) + addinstcap(code, ICloseCapture, Cclose, 0, 0) + end +end + + +local function coderuntime(code, tree, tt, index, valuetable) + addinstcap(code, IOpenCapture, Cgroup, tree.p[index].val, 0) + codegen(code, tree, fullset, false, tt, index + 1, valuetable) + addinstcap(code, ICloseRunTime, Cclose, 0, 0) +end + + +-- Repetion; optimizations: +-- When pattern is a charset, can use special instruction ISpan. +-- When pattern is head fail, or if it starts with characters that +-- are disjoint from what follows the repetions, a simple test +-- is enough (a fail inside the repetition would backtrack to fail +-- again in the following pattern, so there is no need for a choice). +-- When 'opt' is true, the repetion can reuse the Choice already +-- active in the stack. + +local function coderep(code, tree, opt, fl, index, valuetable) + local st = tocharset(tree, index, valuetable) + if st then + local op = coderealcharset(code, st, valuetable) + code.p[op].code = ISpan; + else + local e1, st = getfirst(tree, fullset, index, valuetable) + if headfail(tree, index) or (e1 == 0 and cs_disjoint(st, fl)) then + -- L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: + local test = codetestset(code, st, 0, valuetable) + codegen(code, tree, fullset, false, test, index, valuetable) + local jmp = addinstruction(code, IJmp, 0) + jumptohere(code, test) + jumptothere(code, jmp, test) + else + -- test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2: + -- or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1; + local test = codetestset(code, st, e1, valuetable) + local pchoice = NOINST; + if opt then + jumptohere(code, addinstruction(code, IPartialCommit, 0)) + else + pchoice = addinstruction(code, IChoice, 0) + end + local l2 = code.size + codegen(code, tree, fullset, false, NOINST, index, valuetable) + local commit = addinstruction(code, IPartialCommit, 0) + jumptothere(code, commit, l2) + jumptohere(code, pchoice) + jumptohere(code, test) + end + end +end + + +-- Not predicate; optimizations: +-- In any case, if first test fails, 'not' succeeds, so it can jump to +-- the end. If pattern is headfail, that is all (it cannot fail +-- in other parts); this case includes 'not' of simple sets. Otherwise, +-- use the default code (a choice plus a failtwice). + +local function codenot(code, tree, index, valuetable) + local e, st = getfirst(tree, fullset, index, valuetable) + local test = codetestset(code, st, e, valuetable) + -- test (fail(p1)) -> L1; fail; L1: + if headfail(tree, index) then + addinstruction(code, IFail, 0) + else + -- test(fail(p))-> L1; choice L1; <p>; failtwice; L1: + local pchoice = addinstruction(code, IChoice, 0) + codegen(code, tree, fullset, false, NOINST, index, valuetable) + addinstruction(code, IFailTwice, 0) + jumptohere(code, pchoice) + end + jumptohere(code, test) +end + + +-- change open calls to calls, using list 'positions' to find +-- correct offsets; also optimize tail calls + +local function correctcalls(code, positions, from, to) + for i = from, to - 1 do + if code.p[i].code == IOpenCall then + local n = code.p[i].offset; -- rule number + local rule = positions[n]; -- rule position + assert(rule == from or code.p[rule - 1].code == IRet) + -- call; ret ? + if bit.band(code.p[i].val, 0xffff) == 0 and code.p[finaltarget(code, i + 1)].code == IRet then + code.p[i].code = IJmp; -- tail call + else + code.p[i].code = ICall; + end + jumptothere(code, i, rule) -- call jumps to respective rule + end + end +end + + +-- Code for a grammar: +-- call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2: + +local function codegrammar(code, tree, index, valuetable) + local positions = {} + local rulenumber = 1; + -- tree.p[rule].tag + local rule = index + 1 + assert(tree.p[rule].tag == TRule) + local LR = 0 + if band(RuleLR, tree.p[rule].cap) ~= 0 then LR = 1 end + local firstcall = addinstruction(code, ICall, LR) -- call initial rule + code.p[firstcall].aux = tree.p[rule].val + local jumptoend = addinstruction(code, IJmp, 0) -- jump to the end + jumptohere(code, firstcall) -- here starts the initial rule + while tree.p[rule].tag == TRule do + positions[rulenumber] = code.size -- save rule position + rulenumber = rulenumber + 1 + codegen(code, tree, fullset, false, NOINST, rule + 1, valuetable) -- code rule + addinstruction(code, IRet, 0) + rule = rule + tree.p[rule].ps + end + assert(tree.p[rule].tag == TTrue) + jumptohere(code, jumptoend) + correctcalls(code, positions, firstcall + 2, code.size) +end + + +local function codecall(code, tree, index, val) + local c = addinstruction(code, IOpenCall, tree.p[index].cap) -- to be corrected later + code.p[c].aux = val + assert(tree.p[index + tree.p[index].ps].tag == TRule) + setoffset(code, c, band(tree.p[index + tree.p[index].ps].cap, 0x7fff)) -- offset = rule number +end + + +local function codeseq(code, tree, fl, opt, tt, p1, p2, valuetable) + if needfollow(tree, p1) then + local _, fll = getfirst(tree, fl, p2, valuetable) -- p1 follow is p2 first + codegen(code, tree, fll, false, tt, p1, valuetable) + else + -- use 'fullset' as follow + codegen(code, tree, fullset, false, tt, p1, valuetable) + end + -- can p1 consume anything? + if (fixedlenx(tree, 0, 0, p1) ~= 0) then + tt = NOINST; -- invalidate test + end + return codegen(code, tree, fl, opt, tt, p2, valuetable) +end + + +-- Main code-generation function: dispatch to auxiliar functions +-- according to kind of tree + +-- code generation is recursive; 'opt' indicates that the code is being +-- generated as the last thing inside an optional pattern (so, if that +-- code is optional too, it can reuse the 'IChoice' already in place for +-- the outer pattern). 'tt' points to a previous test protecting this +-- code (or NOINST). 'fl' is the follow set of the pattern. + +function codegen(code, tree, fl, opt, tt, index, valuetable) + local tag = tree.p[index].tag + if tag == TChar then + return codechar(code, tree.p[index].val, tt) + elseif tag == TAny then + return addinstruction(code, IAny, 0) + elseif tag == TSet then + return codecharset(code, valuetable[tree.p[index].val], tt, valuetable) + elseif tag == TTrue then + elseif tag == TFalse then + return addinstruction(code, IFail, 0) + elseif tag == TSeq then + return codeseq(code, tree, fl, opt, tt, index + 1, index + tree.p[index].ps, valuetable) + elseif tag == TChoice then + return codechoice(code, tree, fl, opt, index + 1, index + tree.p[index].ps, valuetable) + elseif tag == TRep then + return coderep(code, tree, opt, fl, index + 1, valuetable) + elseif tag == TBehind then + return codebehind(code, tree, index, valuetable) + elseif tag == TNot then + return codenot(code, tree, index + 1, valuetable) + elseif tag == TAnd then + return codeand(code, tree, tt, index + 1, valuetable) + elseif tag == TCapture then + return codecapture(code, tree, fl, tt, index, valuetable) + elseif tag == TRunTime then + return coderuntime(code, tree, tt, index, valuetable) + elseif tag == TGrammar then + return codegrammar(code, tree, index, valuetable) + elseif tag == TCall then + return codecall(code, tree, index, tree.p[index].val) + else + assert(false) + end +end + + +-- Optimize jumps and other jump-like instructions. +-- * Update labels of instructions with labels to their final +-- destinations (e.g., choice L1; ... L1: jmp L2: becomes +-- choice L2) +-- * Jumps to other instructions that do jumps become those +-- instructions (e.g., jump to return becomes a return; jump +-- to commit becomes a commit) + +local function peephole(code) + local i = 0 + while i < code.size do + local tag = code.p[i].code + if tag == IChoice or tag == ICall or tag == ICommit or tag == IPartialCommit or + tag == IBackCommit or tag == ITestChar or tag == ITestSet or tag == ITestAny then + -- instructions with labels + jumptothere(code, i, finallabel(code, i)) -- optimize label + + elseif tag == IJmp then + local ft = finaltarget(code, i) + local tag = code.p[ft].code -- jumping to what? + -- instructions with unconditional implicit jumps + if tag == IRet or tag == IFail or tag == IFailTwice or tag == IEnd then + ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction + elseif tag == ICommit or tag == IPartialCommit or tag == IBackCommit then + -- inst. with unconditional explicit jumps + local fft = finallabel(code, ft) + ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction... + jumptothere(code, i, fft) -- but must correct its offset + i = i - 1 -- reoptimize its label + else + jumptothere(code, i, ft) -- optimize label + end + end + i = i + 1 + end +end + + +-- Compile a pattern + +local function compile(tree, index, valuetable) + local code = pattern() + codegen(code, tree, fullset, false, NOINST, index, valuetable) + addinstruction(code, IEnd, 0) + peephole(code) + ffi.C.free(tree.code) + tree.code = code +end + +local function pat_new(ct, size) + size = size or 0 + local allocsize = size + if allocsize < 10 then + allocsize = 10 + end + local pat = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern))) + assert(pat ~= nil) + pat.allocsize = allocsize + pat.size = size + pat.p = ffi.C.malloc(ffi.sizeof(patternelement) * allocsize) + assert(pat.p ~= nil) + ffi.fill(pat.p, ffi.sizeof(patternelement) * allocsize) + return pat +end + +local function doublesize(ct) + ct.p = ffi.C.realloc(ct.p, ffi.sizeof(patternelement) * ct.allocsize * 2) + assert(ct.p ~= nil) + ffi.fill(ct.p + ct.allocsize, ffi.sizeof(patternelement) * ct.allocsize) + ct.allocsize = ct.allocsize * 2 +end + +local pattreg = { + doublesize = doublesize, +} + +local metareg = { + ["__new"] = pat_new, + ["__index"] = pattreg +} + +ffi.metatype(pattern, metareg) + +return { + checkaux = checkaux, + tocharset = tocharset, + fixedlenx = fixedlenx, + hascaptures = hascaptures, + compile = compile, +} diff --git a/vendor/lpeglj/lpeglj.lua b/vendor/lpeglj/lpeglj.lua new file mode 100644 index 0000000..de4fca3 --- /dev/null +++ b/vendor/lpeglj/lpeglj.lua @@ -0,0 +1,1373 @@ +--[[ +LPEGLJ +lpeglj.lua +Main module and tree generation +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] + +assert(jit.version_num > 20000, "Use LuaJIT v2.0.1 or higher.") + +local ffi = require "ffi" +local lpcode = require "lpcode" +local lpprint = require "lpprint" +local lpvm = require "lpvm" + +local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift + +ffi.cdef [[ + int isalnum(int c); + int isalpha(int c); + int iscntrl(int c); + int isdigit(int c); + int isgraph(int c); + int islower(int c); + int isprint(int c); + int ispunct(int c); + int isspace(int c); + int isupper(int c); + int isxdigit(int c); +]] + +local MAXBEHIND = 255 +local MAXRULES = 200 +local VERSION = "1.0.0.0LJ" + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local PEnullable = 0 +local PEnofail = 1 +local PEleftrecursion = 2 + +local newgrammar + +local RuleLR = 0x10000 +local Ruleused = 0x20000 +local BCapcandelete = 0x30000 + +local LREnable = false + +-- number of siblings for each tree +local numsiblings = { + 0, 0, 0, -- char, set, any + 0, 0, -- true, false + 1, -- rep + 2, 2, -- seq, choice + 1, 1, -- not, and + 0, 0, 2, 1, -- call, opencall, rule, grammar + 1, -- behind + 1, 1 -- capture, runtime capture +} + + + +local patternid = 0 +local valuetable = {} + +local funcnames = setmetatable({}, { __mode = 'k' }) + +local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT') +local treepattern = ffi.typeof('TREEPATTERN') +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') +local uint32 = ffi.typeof('uint32_t[1]') + +-- Fix a TOpenCall into a TCall node, using table 'postable' to +-- translate a key to its rule address in the tree. Raises an +-- error if key does not exist. + +local function fixonecall(postable, grammar, index, valuetable) + local name = valuetable[grammar.p[index].val] -- get rule's name + local n = postable[name] -- query name in position table + -- no position? + if not n then + error(("rule '%s' undefined in given grammar"):format(type(name) == 'table' and '(a table)' or name), 0) + end + grammar.p[index].tag = TCall; + grammar.p[index].ps = n - index -- position relative to node + grammar.p[index + grammar.p[index].ps].cap = bit.bor(grammar.p[index + grammar.p[index].ps].cap, Ruleused) +end + + +-- Transform left associative constructions into right +-- associative ones, for sequence and choice; that is: +-- (t11 + t12) + t2 => t11 + (t12 + t2) +-- (t11 * t12) * t2 => t11 * (t12 * t2) +-- (that is, Op (Op t11 t12) t2 => Op t11 (Op t12 t2)) + +local function correctassociativity(tree, index) + local t1 = index + 1 + assert(tree.p[index].tag == TChoice or tree.p[index].tag == TSeq) + while tree.p[t1].tag == tree.p[index].tag do + local n1size = tree.p[index].ps - 1; -- t1 == Op t11 t12 + local n11size = tree.p[t1].ps - 1; + local n12size = n1size - n11size - 1 + for i = 1, n11size do + ffi.copy(tree.p + index + i, tree.p + t1 + i, ffi.sizeof(treepatternelement)) + end + tree.p[index].ps = n11size + 1 + tree.p[index + tree.p[index].ps].tag = tree.p[index].tag + tree.p[index + tree.p[index].ps].ps = n12size + 1 + end +end + + +-- Make final adjustments in a tree. Fix open calls in tree, +-- making them refer to their respective rules or raising appropriate +-- errors (if not inside a grammar). Correct associativity of associative +-- constructions (making them right associative). + +local function finalfix(fix, postable, grammar, index, valuetable) + + local tag = grammar.p[index].tag + --subgrammars were already fixed + if tag == TGrammar then + return + elseif tag == TOpenCall then + -- inside a grammar? + if fix then + fixonecall(postable, grammar, index, valuetable) + -- open call outside grammar + else + error(("rule '%s' used outside a grammar"):format(tostring(valuetable[grammar.p[index].val])), 0) + end + elseif tag == TSeq or tag == TChoice then + correctassociativity(grammar, index) + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return finalfix(fix, postable, grammar, index + 1, valuetable) + elseif ns == 2 then + finalfix(fix, postable, grammar, index + 1, valuetable) + return finalfix(fix, postable, grammar, index + grammar.p[index].ps, valuetable) + else + assert(false) + end +end + + +-- {====================================================== +-- Tree generation +-- ======================================================= + +local function newcharset() + local tree = treepattern(1) + valuetable[tree.id] = { settype() } + tree.p[0].tag = TSet + tree.p[0].val = 1 + return tree, valuetable[tree.id][1] +end + + +-- add to tree a sequence where first sibling is 'sib' (with size +-- 'sibsize') + +local function seqaux(tree, sib, start, sibsize) + tree.p[start].tag = TSeq; + tree.p[start].ps = sibsize + 1 + ffi.copy(tree.p + start + 1, sib.p, ffi.sizeof(treepatternelement) * sibsize) +end + + +-- Build a sequence of 'n' nodes, each with tag 'tag' and 'val' got +-- from the array 's' (or 0 if array is NULL). (TSeq is binary, so it +-- must build a sequence of sequence of sequence...) + +local function fillseq(tree, tag, start, n, s) + -- initial n-1 copies of Seq tag; Seq ... + for i = 1, n - 1 do + tree.p[start].tag = TSeq + tree.p[start].ps = 2 + tree.p[start + 1].tag = tag + if s then + tree.p[start + 1].val = s:sub(i, i):byte() + end + start = start + tree.p[start].ps + end + tree.p[start].tag = tag -- last one does not need TSeq + if s then + tree.p[start].val = s:sub(n, n):byte() + end +end + + +-- Numbers as patterns: +-- 0 == true (always match); n == TAny repeated 'n' times; +-- -n == not (TAny repeated 'n' times) + +local function numtree(n) + if n == 0 then + local tree = treepattern(1) + tree.p[0].tag = TTrue + return tree + else + local tree, start + if n > 0 then + tree = treepattern(2 * n - 1) + start = 0 + -- negative: code it as !(-n) + else + n = -n; + tree = treepattern(2 * n) + tree.p[0].tag = TNot + start = 1 + end + fillseq(tree, TAny, start, n) -- sequence of 'n' any's + return tree; + end +end + + +-- Convert value to a pattern + +local function getpatt(val, name) + local typ = type(val) + if typ == 'string' then + -- empty? + if #val == 0 then + local pat = treepattern(1) + pat.p[0].tag = TTrue -- always match + return pat + else + local tree = treepattern(2 * (#val - 1) + 1) + fillseq(tree, TChar, 0, #val, val) -- sequence of '#val' chars + return tree + end + elseif typ == 'number' then + return numtree(val) + elseif typ == 'boolean' then + local pat = treepattern(1) + pat.p[0].tag = val and TTrue or TFalse + return pat + elseif typ == 'table' then + return newgrammar(val) + elseif typ == 'function' then + if name and type(name) == 'string' then + funcnames[val] = name + end + local pat = treepattern(2) + valuetable[pat.id] = { val } + pat.p[0].tag = TRunTime + pat.p[0].val = 1 + pat.p[1].tag = TTrue + return pat + elseif ffi.istype(treepattern, val) then + assert(val.treesize > 0) + return val + end + assert(false) +end + +local function copykeys(ktable1, ktable2) + local ktable, offset = {}, 0 + if not ktable1 and not ktable2 then + return ktable, 0 + elseif ktable1 then + for i = 1, #ktable1 do + ktable[#ktable + 1] = ktable1[i] + end + offset = #ktable1 + if not ktable2 then + return ktable, 0 + end + end + if ktable2 then + for i = 1, #ktable2 do + ktable[#ktable + 1] = ktable2[i] + end + end + assert(#ktable < 65536, "too many Lua values in pattern") + return ktable, offset +end + +local function correctkeys(tree, index, offset) + local tag = tree.p[index].tag + if (tag == TSet or tag == TRule or tag == TCall or tag == TRunTime or tag == TOpenCall or tag == TCapture) and + tree.p[index].val ~= 0 then + tree.p[index].val = tree.p[index].val + offset + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return correctkeys(tree, index + 1, offset) + elseif ns == 2 then + correctkeys(tree, index + 1, offset) + return correctkeys(tree, index + tree.p[index].ps, offset) + else + assert(false) + end +end + + + +-- create a new tree, with a new root and one sibling. + +local function newroot1sib(tag, pat) + local tree1 = getpatt(pat) + local tree = treepattern(1 + tree1.treesize) -- create new tree + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + tree.p[0].tag = tag + ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + return tree +end + + +-- create a new tree, with a new root and 2 siblings. + +local function newroot2sib(tag, pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local tree = treepattern(1 + tree1.treesize + tree2.treesize) -- create new tree + local ktable, offset = copykeys(valuetable[tree1.id], valuetable[tree2.id]) + valuetable[tree.id] = ktable + tree.p[0].tag = tag + tree.p[0].ps = 1 + tree1.treesize + ffi.copy(tree.p + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + ffi.copy(tree.p + 1 + tree1.treesize, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize) + if offset > 0 then + correctkeys(tree, 1 + tree1.treesize, offset) + end + return tree; +end + + +local function lp_P(val, name) + assert(type(val) ~= 'nil') + return getpatt(val, name) +end + + +-- sequence operator; optimizations: +-- false x => false, x true => x, true x => x +-- (cannot do x . false => false because x may have runtime captures) + +local function lp_seq(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + -- false . x == false, x . true = x + if tree1.p[0].tag == TFalse or tree2.p[0].tag == TTrue then + return tree1 + -- true . x = x + elseif tree1.p[0].tag == TTrue then + return tree2 + else + return newroot2sib(TSeq, tree1, tree2) + end +end + + +-- choice operator; optimizations: +-- charset / charset => charset +-- true / x => true, x / false => x, false / x => x +-- (x / true is not equivalent to true) + +local function lp_choice(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id]) + local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id]) + if charset1 and charset2 then + local t, set = newcharset() + for i = 0, 7 do + set[i] = bor(charset1[i], charset2[i]) + end + return t + elseif lpcode.checkaux(tree1, PEnofail, 0) or tree2.p[0].tag == TFalse then + return tree1 -- true / x => true, x / false => x + elseif tree1.p[0].tag == TFalse then + return tree2 -- false / x => x + else + return newroot2sib(TChoice, tree1, tree2) + end +end + + +-- p^n + +local function lp_star(tree1, n) + local tree + n = tonumber(n) + assert(type(n) == 'number') + -- seq tree1 (seq tree1 ... (seq tree1 (rep tree1))) + if n >= 0 then + tree = treepattern((n + 1) * (tree1.treesize + 1)) + if lpcode.checkaux(tree1, PEnullable, 0) then + error("loop body may accept empty string", 0) + end + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + local start = 0 + -- repeat 'n' times + for i = 1, n do + seqaux(tree, tree1, start, tree1.treesize) + start = start + tree.p[start].ps + end + tree.p[start].tag = TRep + ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + -- choice (seq tree1 ... choice tree1 true ...) true + else + n = -n; + -- size = (choice + seq + tree1 + true) * n, but the last has no seq + tree = treepattern(n * (tree1.treesize + 3) - 1) + valuetable[tree.id] = copykeys(valuetable[tree1.id]) + local start = 0 + -- repeat (n - 1) times + for i = n, 2, -1 do + tree.p[start].tag = TChoice; + tree.p[start].ps = i * (tree1.treesize + 3) - 2 + tree.p[start + tree.p[start].ps].tag = TTrue; + start = start + 1 + seqaux(tree, tree1, start, tree1.treesize) + start = start + tree.p[start].ps + end + tree.p[start].tag = TChoice; + tree.p[start].ps = tree1.treesize + 1 + tree.p[start + tree.p[start].ps].tag = TTrue + ffi.copy(tree.p + start + 1, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + end + return tree +end + + +-- #p == &p + +local function lp_and(pat) + return newroot1sib(TAnd, pat) +end + + +-- -p == !p + +local function lp_not(pat) + return newroot1sib(TNot, pat) +end + + +-- [t1 - t2] == Seq (Not t2) t1 +-- If t1 and t2 are charsets, make their difference. + +local function lp_sub(pat1, pat2) + local tree1 = getpatt(pat1) + local tree2 = getpatt(pat2) + local charset1 = lpcode.tocharset(tree1, 0, valuetable[tree1.id]) + local charset2 = lpcode.tocharset(tree2, 0, valuetable[tree2.id]) + if charset1 and charset2 then + local tree, set = newcharset() + for i = 0, 7 do + set[i] = band(charset1[i], bnot(charset2[i])) + end + return tree + else + local tree = treepattern(2 + tree1.treesize + tree2.treesize) + local ktable, offset = copykeys(valuetable[tree2.id], valuetable[tree1.id]) + valuetable[tree.id] = ktable + tree.p[0].tag = TSeq; -- sequence of... + tree.p[0].ps = 2 + tree2.treesize + tree.p[1].tag = TNot; -- ...not... + ffi.copy(tree.p + 2, tree2.p, ffi.sizeof(treepatternelement) * tree2.treesize) + ffi.copy(tree.p + tree2.treesize + 2, tree1.p, ffi.sizeof(treepatternelement) * tree1.treesize) + if offset > 0 then + correctkeys(tree, 2 + tree2.treesize, offset) + end + return tree + end +end + + +local function lp_set(val) + assert(type(val) == 'string') + local tree, set = newcharset() + for i = 1, #val do + local b = val:sub(i, i):byte() + set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31))) + end + return tree +end + + +local function lp_range(...) + local args = { ... } + local top = #args + local tree, set = newcharset() + for i = 1, top do + assert(#args[i] == 2, args[i] .. " range must have two characters") + for b = args[i]:sub(1, 1):byte(), args[i]:sub(2, 2):byte() do + set[rshift(b, 5)] = bor(set[rshift(b, 5)], lshift(1, band(b, 31))) + end + end + return tree +end + + +-- Look-behind predicate + +local function lp_behind(pat) + local tree1 = getpatt(pat) + local n = lpcode.fixedlenx(tree1, 0, 0, 0) + assert(not lpcode.hascaptures(tree1, 0), "pattern have captures") + assert(n >= 0, "pattern may not have fixed length") + assert(n <= MAXBEHIND, "pattern too long to look behind") + local tree = newroot1sib(TBehind, pat) + tree.p[0].val = n; + return tree +end + + +-- Create a non-terminal + +local function lp_V(val, p) + assert(val, "non-nil value expected") + local tree = treepattern(1) + valuetable[tree.id] = { val } + tree.p[0].tag = TOpenCall + tree.p[0].val = 1 + tree.p[0].cap = p or 0 + return tree +end + + +-- Create a tree for a non-empty capture, with a body and +-- optionally with an associated value + +local function capture_aux(cap, pat, val) + local tree = newroot1sib(TCapture, pat) + tree.p[0].cap = cap + if val then + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern" .. ind) + valuetable[tree.id][ind] = val + tree.p[0].val = ind + end + return tree +end + + +-- Fill a tree with an empty capture, using an empty (TTrue) sibling. + +local function auxemptycap(tree, cap, par, start) + tree.p[start].tag = TCapture; + tree.p[start].cap = cap + if type(par) ~= 'nil' then + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = par + tree.p[start].val = ind + end + tree.p[start + 1].tag = TTrue; +end + + +-- Create a tree for an empty capture + +local function newemptycap(cap, par) + local tree = treepattern(2) + if type(par) ~= 'nil' then valuetable[tree.id] = {} end + auxemptycap(tree, cap, par, 0) + return tree +end + + +-- Captures with syntax p / v +-- (function capture, query capture, string capture, or number capture) + +local function lp_divcapture(pat, par, xxx) + local typ = type(par) + if typ == "function" then + return capture_aux(Cfunction, pat, par) + elseif typ == "table" then + return capture_aux(Cquery, pat, par) + elseif typ == "string" then + return capture_aux(Cstring, pat, par) + elseif typ == "number" then + local tree = newroot1sib(TCapture, pat) + assert(0 <= par and par <= 0xffff, "invalid number") + tree.p[0].cap = Cnum; + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = par + tree.p[0].val = ind + return tree + else + error("invalid replacement value", 0) + end +end + + +local function lp_substcapture(pat) + return capture_aux(Csubst, pat) +end + + +local function lp_tablecapture(pat) + return capture_aux(Ctable, pat, 0) +end + + +local function lp_groupcapture(pat, val) + if not val then + return capture_aux(Cgroup, pat) + else + return capture_aux(Cgroup, pat, val) + end +end + + +local function lp_foldcapture(pat, fce) + assert(type(fce) == 'function') + return capture_aux(Cfold, pat, fce) +end + + +local function lp_simplecapture(pat) + return capture_aux(Csimple, pat) +end + + +local function lp_poscapture() + return newemptycap(Cposition) +end + + +local function lp_argcapture(val) + assert(type(val) == 'number') + local tree = newemptycap(Carg, 0) + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = val + tree.p[0].val = ind + assert(0 < val and val <= 0xffff, "invalid argument index") + return tree +end + + +local function lp_backref(val) + return newemptycap(Cbackref, val) +end + + +-- Constant capture + +local function lp_constcapture(...) + local tree + local args = { ... } + local n = select('#', ...) -- number of values + -- no values? + if n == 0 then + tree = treepattern(1) -- no capture + tree.p[0].tag = TTrue + elseif n == 1 then + tree = newemptycap(Cconst, args[1]) -- single constant capture + -- create a group capture with all values + else + tree = treepattern(3 + 3 * (n - 1)) + valuetable[tree.id] = {} + tree.p[0].tag = TCapture + tree.p[0].cap = Cgroup + local start = 1 + for i = 1, n - 1 do + tree.p[start].tag = TSeq + tree.p[start].ps = 3 + auxemptycap(tree, Cconst, args[i], start + 1) + start = start + tree.p[start].ps + end + auxemptycap(tree, Cconst, args[n], start) + end + return tree +end + + +local function lp_matchtime(pat, fce, name) + assert(type(fce) == 'function') + if name and type(name) == 'string' then + funcnames[fce] = name + end + local tree = newroot1sib(TRunTime, pat) + local ind = #valuetable[tree.id] + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[tree.id][ind] = fce + tree.p[0].val = ind + return tree +end + +-- ====================================================== + + + +-- ====================================================== +-- Grammar - Tree generation +-- ======================================================= + + +-- return index and the pattern for the +-- initial rule of grammar; +-- also add that index into position table. + +local function getfirstrule(pat, postab) + local key + -- access first element + if type(pat[1]) == 'string' then + key = pat[1] + else + key = 1 + end + local rule = pat[key] + if not rule then + error("grammar has no initial rule", 0) + end + -- initial rule not a pattern? + if not ffi.istype(treepattern, rule) then + error(("initial rule '%s' is not a pattern"):format(tostring(key)), 0) + end + postab[key] = 1 + return key, rule +end + + +-- traverse grammar, collect all its keys and patterns +-- into rule table. Create a new table (before all pairs key-pattern) to +-- collect all keys and their associated positions in the final tree +-- (the "position table"). +-- Return the number of rules and the total size +-- for the new tree. + +local function collectrules(pat) + local n = 1; -- to count number of rules + local postab = {} + local firstkeyrule, firstrule = getfirstrule(pat, postab) + local rules = { firstkeyrule, firstrule } + local size = 2 + firstrule.treesize -- TGrammar + TRule + rule + for key, val in pairs(pat) do + -- initial rule? + if key ~= 1 and tostring(val) ~= tostring(firstrule) then + -- value is not a pattern? + if not ffi.istype(treepattern, val) then + error(("rule '%s' is not a pattern"):format(tostring(key)), 0) + end + rules[#rules + 1] = key + rules[#rules + 1] = val + postab[key] = size + size = 1 + size + val.treesize + n = n + 1 + end + end + size = size + 1; -- TTrue to finish list of rules + return n, size, rules, postab +end + + +local function buildgrammar(grammar, rules, n, index, valuetable) + local ktable, offset = {}, 0 + -- add each rule into new tree + for i = 1, n do + local size = rules[i * 2].treesize + grammar.p[index].tag = TRule; + grammar.p[index].cap = i; -- rule number + grammar.p[index].ps = size + 1; -- point to next rule + local ind = #ktable + 1 + ktable[ind] = rules[i * 2 - 1] + grammar.p[index].val = ind + ffi.copy(grammar.p + index + 1, rules[i * 2].p, ffi.sizeof(treepatternelement) * size) -- copy rule + ktable, offset = copykeys(ktable, valuetable[rules[i * 2].id]) + if offset > 0 then + correctkeys(grammar, index + 1, offset) + end + index = index + grammar.p[index].ps; -- move to next rule + end + grammar.p[index].tag = TTrue; -- finish list of rules + return ktable +end + + +-- Check whether a tree has potential infinite loops + +local function checkloops(tree, index) + local tag = tree.p[index].tag + if tag == TRep and lpcode.checkaux(tree, PEnullable, index + 1) then + return true + elseif tag == TGrammar then + return -- sub-grammars already checked + else + local tag = numsiblings[tree.p[index].tag + 1] + if tag == 0 then + return + elseif tag == 1 then + return checkloops(tree, index + 1) + elseif tag == 2 then + if checkloops(tree, index + 1) then + return true + else + return checkloops(tree, index + tree.p[index].ps) + end + else + assert(false) + end + end +end + +-- Check whether a rule can be left recursive; returns PEleftrecursion in that +-- case; otherwise return 1 iff pattern is nullable. + +local function verifyrule(rulename, tree, passed, nullable, index, valuetable) + local tag = tree.p[index].tag + if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then + return nullable; -- cannot pass from here + elseif tag == TTrue or tag == TBehind then + return true; + elseif tag == TNot or tag == TAnd or tag == TRep then + return verifyrule(rulename, tree, passed, true, index + 1, valuetable) + elseif tag == TCapture or tag == TRunTime then + return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + elseif tag == TCall then + local rule = valuetable[tree.p[index].val] + if rule == rulename then return PEleftrecursion end + if passed[rule] and passed[rule] > MAXRULES then + return nullable + end + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + -- only check 2nd child if first is nullable + elseif tag == TSeq then + local res = verifyrule(rulename, tree, passed, false, index + 1, valuetable) + if res == PEleftrecursion then + return res + elseif not res then + return nullable + else + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + end + -- must check both children + elseif tag == TChoice then + nullable = verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + if nullable == PEleftrecursion then return nullable end + return verifyrule(rulename, tree, passed, nullable, index + tree.p[index].ps, valuetable) + elseif tag == TRule then + local rule = valuetable[tree.p[index].val] + passed[rule] = (passed[rule] or 0) + 1 + return verifyrule(rulename, tree, passed, nullable, index + 1, valuetable) + elseif tag == TGrammar then + return lpcode.checkaux(tree, PEnullable, index) -- sub-grammar cannot be left recursive + else + assert(false) + end +end + + +local function verifygrammar(rule, index, valuetable) + -- check left-recursive rules + local LR = {} + local ind = index + 1 + while rule.p[ind].tag == TRule do + local rulename = valuetable[rule.p[ind].val] + -- used rule + if rulename then + if verifyrule(rulename, rule, {}, false, ind + 1, valuetable) == PEleftrecursion then + if not LREnable then + error(("rule '%s' may be left recursive"):format(rulename), 0) + end + LR[rulename] = true + end + end + ind = ind + rule.p[ind].ps + end + assert(rule.p[ind].tag == TTrue) + + for i = 0, rule.treesize - 1 do + if rule.p[i].tag == TRule and LR[valuetable[rule.p[i].val]] then + rule.p[i].cap = bor(rule.p[i].cap, RuleLR) --TRule can be left recursive + end + if rule.p[i].tag == TCall and LR[valuetable[rule.p[i].val]] then + if rule.p[i].cap == 0 then + rule.p[i].cap = 1 --TCall can be left recursive + end + end + end + + -- check infinite loops inside rules + ind = index + 1 + while rule.p[ind].tag == TRule do + -- used rule + if rule.p[ind].val then + if checkloops(rule, ind + 1) then + error(("empty loop in rule '%s'"):format(tostring(valuetable[rule.p[ind].val])), 0) + end + end + ind = ind + rule.p[ind].ps + end + assert(rule.p[ind].tag == TTrue) +end + + +-- Give a name for the initial rule if it is not referenced + +local function initialrulename(grammar, val, valuetable) + grammar.p[1].cap = bit.bor(grammar.p[1].cap, Ruleused) + -- initial rule is not referenced? + if grammar.p[1].val == 0 then + local ind = #valuetable + 1 + assert(ind <= 65536, "too many Lua values in pattern") + valuetable[ind] = val + grammar.p[1].val = ind + end +end + + +function newgrammar(pat) + -- traverse grammar. Create a new table (before all pairs key-pattern) to + -- collect all keys and their associated positions in the final tree + -- (the "position table"). + -- Return new tree. + + local n, size, rules, postab = collectrules(pat) + local grammar = treepattern(size) + local start = 0 + grammar.p[start].tag = TGrammar + grammar.p[start].val = n + valuetable[grammar.id] = buildgrammar(grammar, rules, n, start + 1, valuetable) + finalfix(true, postab, grammar, start + 1, valuetable[grammar.id]) + initialrulename(grammar, rules[1], valuetable[grammar.id]) + verifygrammar(grammar, 0, valuetable[grammar.id]) + return grammar +end + +-- ====================================================== + +-- remove duplicity from value table + +local function reducevaluetable(p) + local vtable = valuetable[p.id] + local value = {} + local newvaluetable = {} + + local function check(v) + if v > 0 then + local ord = value[vtable[v]] + if not ord then + newvaluetable[#newvaluetable + 1] = vtable[v] + ord = #newvaluetable + value[vtable[v]] = ord + end + return ord + end + return 0 + end + + local function itertree(p, index) + local tag = p.p[index].tag + if tag == TSet or tag == TCall or tag == TOpenCall or + tag == TRule or tag == TCapture or tag == TRunTime then + p.p[index].val = check(p.p[index].val) + end + local ns = numsiblings[tag + 1] + if ns == 0 then + elseif ns == 1 then + return itertree(p, index + 1) + elseif ns == 2 then + itertree(p, index + 1) + return itertree(p, index + p.p[index].ps) + else + assert(false) + end + end + + if p.treesize > 0 then + itertree(p, 0) + end + if p.code ~= nil then + for i = 0, p.code.size - 1 do + local code = p.code.p[i].code + if code == ICall or code == IJmp then + p.code.p[i].aux = check(p.code.p[i].aux) + elseif code == ISet or code == ITestSet or code == ISpan then + p.code.p[i].val = check(p.code.p[i].val) + elseif code == IOpenCapture or code == IFullCapture then + p.code.p[i].offset = check(p.code.p[i].offset) + end + end + end + valuetable[p.id] = newvaluetable +end + + +local function checkalt(tree) + local notchecked = {} + local notinalternativerules = {} + + local function iter(tree, index, choice, rule) + local tag = tree[index].tag + if tag == TCapture and bit.band(tree[index].cap, 0xffff) == Cgroup then + if not choice then + if rule then + notchecked[rule] = index + end + else + tree[index].cap = bit.bor(tree[index].cap, BCapcandelete) + end + elseif tag == TChoice then + choice = true + elseif tag == TRule then + rule = tree[index].val + if bit.band(tree[index].cap, 0xffff) - 1 == 0 then + notinalternativerules[rule] = notinalternativerules[rule] or true + end + elseif tag == TCall then + local r = tree[index].val + if not choice then + notinalternativerules[r] = notinalternativerules[r] or true + end + end + local sibs = numsiblings[tree[index].tag + 1] or 0 + if sibs >= 1 then + iter(tree, index + 1, choice, rule) + if sibs >= 2 then + return iter(tree, index + tree[index].ps, choice, rule) + end + end + end + + iter(tree, 0) + for k, v in pairs(notchecked) do + if not notinalternativerules[k] then + tree[v].cap = bit.bor(tree[v].cap, BCapcandelete) + end + end +end + + +local function prepcompile(p, index) + finalfix(false, nil, p, index, valuetable[p.id]) + checkalt(p.p) + lpcode.compile(p, index, valuetable[p.id]) + reducevaluetable(p) + return p.code +end + + +local function lp_printtree(pat, c) + assert(pat.treesize > 0) + if c then + finalfix(false, nil, pat, 0, valuetable[pat.id]) + end + lpprint.printtree(pat.p, 0, 0, valuetable[pat.id]) +end + + +local function lp_printcode(pat) + -- not compiled yet? + if pat.code == nil then + prepcompile(pat, 0) + end + lpprint.printpatt(pat.code, valuetable[pat.id]) +end + + +-- Main match function + +local function lp_match(pat, s, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.match(p, s, init, valuetable[p.id], ...) +end + +local function lp_streammatch(pat, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.streammatch(p, init, valuetable[p.id], ...) +end + +-- Only for testing purpose +-- stream emulation (send all chars from string one char after char) +local function lp_emulatestreammatch(pat, s, init, ...) + local p = ffi.istype(treepattern, pat) and pat or getpatt(pat) + p.code = p.code ~= nil and p.code or prepcompile(p, 0) + return lpvm.emulatestreammatch(p, s, init, valuetable[p.id], ...) +end + +-- {====================================================== +-- Library creation and functions not related to matching +-- ======================================================= + +local function lp_setmax(val) + lpvm.setmax(val) +end + +local function lp_setmaxbehind(val) + lpvm.setmaxbehind(val) +end + +local function lp_enableleftrecursion(val) + LREnable = val +end + +local function lp_version() + return VERSION +end + + +local function lp_type(pat) + if ffi.istype(treepattern, pat) then + return "pattern" + end +end + + +local function createcat(tab, catname, catfce) + local t, set = newcharset() + for i = 0, 255 do + if catfce(i) ~= 0 then + set[rshift(i, 5)] = bor(set[rshift(i, 5)], lshift(1, band(i, 31))) + end + end + tab[catname] = t +end + + +local function lp_locale(tab) + tab = tab or {} + createcat(tab, "alnum", function(c) return ffi.C.isalnum(c) end) + createcat(tab, "alpha", function(c) return ffi.C.isalpha(c) end) + createcat(tab, "cntrl", function(c) return ffi.C.iscntrl(c) end) + createcat(tab, "digit", function(c) return ffi.C.isdigit(c) end) + createcat(tab, "graph", function(c) return ffi.C.isgraph(c) end) + createcat(tab, "lower", function(c) return ffi.C.islower(c) end) + createcat(tab, "print", function(c) return ffi.C.isprint(c) end) + createcat(tab, "punct", function(c) return ffi.C.ispunct(c) end) + createcat(tab, "space", function(c) return ffi.C.isspace(c) end) + createcat(tab, "upper", function(c) return ffi.C.isupper(c) end) + createcat(tab, "xdigit", function(c) return ffi.C.isxdigit(c) end) + return tab +end + + +local function lp_new(ct, size) + local pat = ffi.new(ct, size) + pat.treesize = size + patternid = patternid + 1 + pat.id = patternid + return pat +end + + +local function lp_gc(ct) + valuetable[ct.id] = nil + if ct.code ~= nil then + ffi.C.free(ct.code.p) + ffi.C.free(ct.code) + end +end + +local function lp_eq(ct1, ct2) + return tostring(ct1) == tostring(ct2) +end + +local function lp_load(str, fcetab) + local pat, t = lpvm.load(str, fcetab, true) + valuetable[pat.id] = t + return pat +end + +local function lp_loadfile(fname, fcetab) + local pat, t = lpvm.loadfile(fname, fcetab, true) + valuetable[pat.id] = t + return pat +end + +local function lp_dump(ct, tree) + local funccount = 0 + -- not compiled yet? + if ct.code == nil then + prepcompile(ct, 0) + end + local out = {} + if tree then + out[#out + 1] = ffi.string(uint32(ct.treesize), 4) + out[#out + 1] = ffi.string(ct.p, ffi.sizeof(treepatternelement) * ct.treesize) + else + out[#out + 1] = ffi.string(uint32(0), 4) + end + out[#out + 1] = ffi.string(uint32(ct.code.size), 4) + out[#out + 1] = ffi.string(ct.code.p, ct.code.size * ffi.sizeof(patternelement)) + local t = valuetable[ct.id] + local len = t and #t or 0 + out[#out + 1] = ffi.string(uint32(len), 4) + if len > 0 then + for _, val in ipairs(t) do + local typ = type(val) + if typ == 'string' then + out[#out + 1] = 'str' + out[#out + 1] = ffi.string(uint32(#val), 4) + out[#out + 1] = val + elseif typ == 'number' then + local val = tostring(val) + out[#out + 1] = 'num' + out[#out + 1] = ffi.string(uint32(#val), 4) + out[#out + 1] = val + elseif typ == 'cdata' then + out[#out + 1] = 'cdt' + out[#out + 1] = ffi.string(val, ffi.sizeof(val)) + elseif typ == 'function' then + out[#out + 1] = 'fnc' + funccount = funccount + 1 + local name = funcnames[val] or ('FNAME%03d'):format(funccount) + out[#out + 1] = ffi.string(uint32(#name), 4) + out[#out + 1] = name + if not funcnames[val] and debug.getupvalue(val, 1) then + io.write(("Patterns function (%d) contains upvalue (%s) - use symbol name for function (%s).\n"):format(funccount, debug.getupvalue(val, 1), name), 0) + end + local data = string.dump(val, true) + out[#out + 1] = ffi.string(uint32(#data), 4) + out[#out + 1] = data + else + error(("Type '%s' NYI for dump"):format(typ), 0) + end + end + end + return table.concat(out) +end + +local function lp_save(ct, fname, tree) + local file = assert(io.open(fname, 'wb')) + file:write(lp_dump(ct, tree)) + file:close() +end + + +local pattreg = { + ["ptree"] = lp_printtree, + ["pcode"] = lp_printcode, + ["match"] = lp_match, + ["streammatch"] = lp_streammatch, + ["emulatestreammatch"] = lp_emulatestreammatch, + ["setmaxbehind"] = lp_setmaxbehind, + ["B"] = lp_behind, + ["V"] = lp_V, + ["C"] = lp_simplecapture, + ["Cc"] = lp_constcapture, + ["Cmt"] = lp_matchtime, + ["Cb"] = lp_backref, + ["Carg"] = lp_argcapture, + ["Cp"] = lp_poscapture, + ["Cs"] = lp_substcapture, + ["Ct"] = lp_tablecapture, + ["Cf"] = lp_foldcapture, + ["Cg"] = lp_groupcapture, + ["P"] = lp_P, + ["S"] = lp_set, + ["R"] = lp_range, + ["L"] = lp_and, + ["locale"] = lp_locale, + ["version"] = lp_version, + ["setmaxstack"] = lp_setmax, + ["type"] = lp_type, + ["enableleftrecursion"] = lp_enableleftrecursion, + ["enablememoization"] = lpvm.enablememoization, + ["enabletracing"] = lpvm.enabletracing, + ["save"] = lp_save, + ["dump"] = lp_dump, + ["load"] = lp_load, + ["loadfile"] = lp_loadfile, + ["__mul"] = lp_seq, + ["__add"] = lp_choice, + ["__pow"] = lp_star, + ["__len"] = lp_and, + ["__div"] = lp_divcapture, + ["__unm"] = lp_not, + ["__sub"] = lp_sub, +} + +local metareg = { + ["__gc"] = lp_gc, + ["__new"] = lp_new, + ["__mul"] = lp_seq, + ["__add"] = lp_choice, + ["__pow"] = lp_star, + ["__len"] = lp_and, + ["__div"] = lp_divcapture, + ["__unm"] = lp_not, + ["__sub"] = lp_sub, + ["__eq"] = lp_eq, + ["__index"] = pattreg +} + +ffi.metatype(treepattern, metareg) + +return pattreg diff --git a/vendor/lpeglj/lpprint.lua b/vendor/lpeglj/lpprint.lua new file mode 100644 index 0000000..86f6897 --- /dev/null +++ b/vendor/lpeglj/lpprint.lua @@ -0,0 +1,356 @@ +--[[ +LPEGLJ +lpprint.lua +Tree, code and debug print function (only for debuging) +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] + +local ffi = require"ffi" +local band, rshift, lshift = bit.band, bit.rshift, bit.lshift + +ffi.cdef[[ + int isprint ( int c ); +]] + +local RuleLR = 0x10000 +local Ruleused = 0x20000 + +-- {====================================================== +-- Printing patterns (for debugging) +-- ======================================================= + +local TChar = 0 +local TSet = 1 +local TAny = 2 -- standard PEG elements +local TTrue = 3 +local TFalse = 4 +local TRep = 5 +local TSeq = 6 +local TChoice = 7 +local TNot = 8 +local TAnd = 9 +local TCall = 10 +local TOpenCall = 11 +local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule +local TGrammar = 13 -- sib1 is initial (and first) rule +local TBehind = 14 -- match behind +local TCapture = 15 -- regular capture +local TRunTime = 16 -- run-time capture + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != aux, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != aux, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'aux' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + + +-- number of siblings for each tree +local numsiblings = { + [TRep] = 1, + [TSeq] = 2, + [TChoice] = 2, + [TNot] = 1, + [TAnd] = 1, + [TRule] = 2, + [TGrammar] = 1, + [TBehind] = 1, + [TCapture] = 1, + [TRunTime] = 1, +} +local names = { + [IAny] = "any", + [IChar] = "char", + [ISet] = "set", + [ITestAny] = "testany", + [ITestChar] = "testchar", + [ITestSet] = "testset", + [ISpan] = "span", + [IBehind] = "behind", + [IRet] = "ret", + [IEnd] = "end", + [IChoice] = "choice", + [IJmp] = "jmp", + [ICall] = "call", + [IOpenCall] = "open_call", + [ICommit] = "commit", + [IPartialCommit] = "partial_commit", + [IBackCommit] = "back_commit", + [IFailTwice] = "failtwice", + [IFail] = "fail", + [IGiveup] = "giveup", + [IFullCapture] = "fullcapture", + [IOpenCapture] = "opencapture", + [ICloseCapture] = "closecapture", + [ICloseRunTime] = "closeruntime" +} + +local function printcharset(st) + io.write("["); + local i = 0 + while i <= 255 do + local first = i; + while band(st[rshift(i, 5)], lshift(1, band(i, 31))) ~= 0 and i <= 255 do + i = i + 1 + end + if i - 1 == first then -- unary range? + io.write(("(%02x)"):format(first)) + elseif i - 1 > first then -- non-empty range? + io.write(("(%02x-%02x)"):format(first, i - 1)) + end + i = i + 1 + end + io.write("]") +end + +local modes = { + [Cclose] = "close", + [Cposition] = "position", + [Cconst] = "constant", + [Cbackref] = "backref", + [Carg] = "argument", + [Csimple] = "simple", + [Ctable] = "table", + [Cfunction] = "function", + [Cquery] = "query", + [Cstring] = "string", + [Cnum] = "num", + [Csubst] = "substitution", + [Cfold] = "fold", + [Cruntime] = "runtime", + [Cgroup] = "group" +} + +local function printcapkind(kind) + io.write(("%s"):format(modes[kind])) +end + +local function printjmp(p, index) + io.write(("-> %d"):format(index + p[index].offset)) +end + +local function printrulename(p, index, rulenames) + if rulenames and rulenames[index + p[index].offset] then + io.write(' ', rulenames[index + p[index].offset]) + end +end + +local function printinst(p, index, valuetable, rulenames) + local code = p[index].code + if rulenames and rulenames[index] then + io.write(rulenames[index], '\n') + end + io.write(("%04d: %s "):format(index, names[code])) + if code == IChar then + io.write(("'%s'"):format(string.char(p[index].val))) + elseif code == ITestChar then + io.write(("'%s'"):format(string.char(p[index].val))) + printjmp(p, index) + printrulename(p, index, rulenames) + elseif code == IFullCapture then + printcapkind(band(p[index].val, 0x0f)); + io.write((" (size = %d) (idx = %s)"):format(band(rshift(p[index].val, 4), 0xF), tostring(valuetable[p[index].offset]))) + elseif code == IOpenCapture then + printcapkind(band(p[index].val, 0x0f)) + io.write((" (idx = %s)"):format(tostring(valuetable[p[index].offset]))) + elseif code == ISet then + printcharset(valuetable[p[index].val]); + elseif code == ITestSet then + printcharset(valuetable[p[index].val]) + printjmp(p, index); + printrulename(p, index, rulenames) + elseif code == ISpan then + printcharset(valuetable[p[index].val]); + elseif code == IOpenCall then + io.write(("-> %d"):format(p[index].offset)) + elseif code == IBehind then + io.write(("%d"):format(p[index].val)) + elseif code == IJmp or code == ICall or code == ICommit or code == IChoice or + code == IPartialCommit or code == IBackCommit or code == ITestAny then + printjmp(p, index); + if (code == ICall or code == IJmp) and p[index].aux > 0 then + io.write(' ', valuetable[p[index].aux]) + else + printrulename(p, index, rulenames) + end + end + io.write("\n") +end + + +local function printpatt(p, valuetable) + local ruleNames = {} + for i = 0, p.size - 1 do + local code = p.p[i].code + if (code == ICall or code == IJmp) and p.p[i].aux > 0 then + local index = i + p.p[i].offset + ruleNames[index] = valuetable[p.p[i].aux] + end + end + for i = 0, p.size - 1 do + printinst(p.p, i, valuetable, ruleNames) + end +end + + +local function printcap(cap, index, valuetable) + printcapkind(cap[index].kind) + io.write((" (idx: %s - size: %d) -> %d\n"):format(valuetable[cap[index].idx], cap[index].siz, cap[index].s)) +end + + +local function printcaplist(cap, limit, valuetable) + io.write(">======\n") + local index = 0 + while cap[index].s and index < limit do + printcap(cap, index, valuetable) + index = index + 1 + end + io.write("=======\n") +end + +-- ====================================================== + + + +-- {====================================================== +-- Printing trees (for debugging) +-- ======================================================= + +local tagnames = { + [TChar] = "char", + [TSet] = "set", + [TAny] = "any", + [TTrue] = "true", + [TFalse] = "false", + [TRep] = "rep", + [TSeq] = "seq", + [TChoice] = "choice", + [TNot] = "not", + [TAnd] = "and", + [TCall] = "call", + [TOpenCall] = "opencall", + [TRule] = "rule", + [TGrammar] = "grammar", + [TBehind] = "behind", + [TCapture] = "capture", + [TRunTime] = "run-time" +} + + +local function printtree(tree, ident, index, valuetable) + for i = 1, ident do + io.write(" ") + end + local tag = tree[index].tag + io.write(("%s"):format(tagnames[tag])) + if tag == TChar then + local c = tree[index].val + if ffi.C.isprint(c) then + io.write((" '%c'\n"):format(c)) + else + io.write((" (%02X)\n"):format(c)) + end + elseif tag == TSet then + printcharset(valuetable[tree[index].val]); + io.write("\n") + elseif tag == TOpenCall or tag == TCall then + io.write((" key: %s\n"):format(tostring(valuetable[tree[index].val]))) + elseif tag == TBehind then + io.write((" %d\n"):format(tree[index].val)) + printtree(tree, ident + 2, index + 1, valuetable); + elseif tag == TCapture then + io.write((" cap: %s n: %s\n"):format(modes[bit.band(tree[index].cap, 0xffff)], valuetable[tree[index].val])) + printtree(tree, ident + 2, index + 1, valuetable); + elseif tag == TRule then + local extra = bit.band(tree[index].cap, RuleLR) == RuleLR and ' left recursive' or '' + extra = extra .. (bit.band(tree[index].cap, Ruleused) ~= Ruleused and ' not used' or '') + io.write((" n: %d key: %s%s\n"):format(bit.band(tree[index].cap, 0xffff) - 1, valuetable[tree[index].val], extra)) + printtree(tree, ident + 2, index + 1, valuetable); + -- do not print next rule as a sibling + elseif tag == TGrammar then + local ruleindex = index + 1 + io.write((" %d\n"):format(tree[index].val)) -- number of rules + for i = 1, tree[index].val do + printtree(tree, ident + 2, ruleindex, valuetable); + ruleindex = ruleindex + tree[ruleindex].ps + end + assert(tree[ruleindex].tag == TTrue); -- sentinel + else + local sibs = numsiblings[tree[index].tag] or 0 + io.write("\n") + if sibs >= 1 then + printtree(tree, ident + 2, index + 1, valuetable); + if sibs >= 2 then + printtree(tree, ident + 2, index + tree[index].ps, valuetable) + end + end + end +end + +-- }====================================================== */ + +return { + printtree = printtree, + printpatt = printpatt, + printcaplist = printcaplist, + printinst = printinst +}
\ No newline at end of file diff --git a/vendor/lpeglj/lpvm.lua b/vendor/lpeglj/lpvm.lua new file mode 100644 index 0000000..1a86dc4 --- /dev/null +++ b/vendor/lpeglj/lpvm.lua @@ -0,0 +1,1041 @@ +--[[ +LPEGLJ +lpvm.lua +Virtual machine +Copyright (C) 2014 Rostislav Sacek. +based on LPeg v1.0 - PEG pattern matching for Lua +Lua.org & PUC-Rio written by Roberto Ierusalimschy +http://www.inf.puc-rio.br/~roberto/lpeg/ + +** Permission is hereby granted, free of charge, to any person obtaining +** a copy of this software and associated documentation files (the +** "Software"), to deal in the Software without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Software, and to +** permit persons to whom the Software is furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be +** included in all copies or substantial portions of the Software. +** +** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +** +** [ MIT license: http://www.opensource.org/licenses/mit-license.php ] +--]] + +local ffi = require "ffi" +local lpcap = require "lpcap" +--[[ Only for debug purpose +local lpprint = require"lpprint" +--]] + +local band, rshift, lshift = bit.band, bit.rshift, bit.lshift + +-- {====================================================== +-- Virtual Machine +-- ======================================================= + +-- Interpret the result of a dynamic capture: false -> fail; +-- true -> keep current position; number -> next position. +-- Return new subject position. 'fr' is stack index where +-- is the result; 'curr' is current subject position; 'limit' +-- is subject's size. + +local MAXBEHINDPREDICATE = 255 -- max behind for Look-behind predicate +local MAXOFF = 0xF -- maximum for full capture +local MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF) -- maximum before current pos +local INITBACK = 400 -- default maximum size for call/backtrack stack + +local IAny = 0 -- if no char, fail +local IChar = 1 -- if char != val, fail +local ISet = 2 -- if char not in val, fail +local ITestAny = 3 -- in no char, jump to 'offset' +local ITestChar = 4 -- if char != val, jump to 'offset' +local ITestSet = 5 -- if char not in val, jump to 'offset' +local ISpan = 6 -- read a span of chars in val +local IBehind = 7 -- walk back 'val' characters (fail if not possible) +local IRet = 8 -- return from a rule +local IEnd = 9 -- end of pattern +local IChoice = 10 -- stack a choice; next fail will jump to 'offset' +local IJmp = 11 -- jump to 'offset' +local ICall = 12 -- call rule at 'offset' +local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall) +local ICommit = 14 -- pop choice and jump to 'offset' +local IPartialCommit = 15 -- update top choice to current position and jump +local IBackCommit = 16 -- "fails" but jump to its own 'offset' +local IFailTwice = 17 -- pop one choice and then fail +local IFail = 18 -- go back to saved state on choice and jump to saved offset +local IGiveup = 19 -- internal use +local IFullCapture = 20 -- complete capture of last 'off' chars +local IOpenCapture = 21 -- start a capture +local ICloseCapture = 22 +local ICloseRunTime = 23 + +local Cclose = 0 +local Cposition = 1 +local Cconst = 2 +local Cbackref = 3 +local Carg = 4 +local Csimple = 5 +local Ctable = 6 +local Cfunction = 7 +local Cquery = 8 +local Cstring = 9 +local Cnum = 10 +local Csubst = 11 +local Cfold = 12 +local Cruntime = 13 +local Cgroup = 14 + +local BCapcandelete = 0x30000 +local maxstack = INITBACK +local maxcapturedefault = 100 +local maxmemo = 1000 +local usememoization = false +local trace = false + +local FAIL = -1 +local LRFAIL = -1 +local VOID = -2 +local CHOICE = -3 +local CALL = -4 + +ffi.cdef [[ +typedef struct { + int code; + int val; + int offset; + int aux; + } PATTERN_ELEMENT; +typedef struct { + int allocsize; + int size; + PATTERN_ELEMENT *p; + } PATTERN; +typedef struct { + int tag; + int val; + int ps; + int cap; + } TREEPATTERN_ELEMENT; +typedef struct { + int id; + int treesize; + PATTERN *code; + TREEPATTERN_ELEMENT p[?]; + } TREEPATTERN; + +typedef struct { + double s; + double X; + double memos; + int p; + int caplevel; + int pA; + int valuetabletop; + } STACK; + +typedef struct { + double s; + int siz; + int idx; + int kind; + int candelete; + } CAPTURE; + +void *malloc( size_t size ); +void free( void *memblock ); +void *realloc( void *memblock, size_t size ); +]] + +local treepatternelement = ffi.typeof('TREEPATTERN_ELEMENT') +local treepattern = ffi.typeof('TREEPATTERN') +local patternelement = ffi.typeof('PATTERN_ELEMENT') +local pattern = ffi.typeof('PATTERN') +local settype = ffi.typeof('int32_t[8]') + +local function resdyncaptures(fr, curr, limit, checkstreamlen) + local typ = type(fr) + -- false value? + if not fr then + return FAIL -- and fail + elseif typ == 'boolean' then + -- true? + return curr -- keep current position + else + local res = fr -- new position + if res < curr or (limit and res > limit) or (not limit and checkstreamlen and not checkstreamlen(res - 2)) then + error("invalid position returned by match-time capture", 0) + end + return res + end + assert(false) +end + + +-- Add capture values returned by a dynamic capture to the capture list +-- 'base', nested inside a group capture. 'fd' indexes the first capture +-- value, 'n' is the number of values (at least 1). + +local function adddyncaptures(s, base, index, n, fd, valuetable) + -- Cgroup capture is already there + assert(base[index].kind == Cgroup and base[index].siz == 0) + base[index].idx = 0 -- make it an anonymous group + base[index + 1] = {} + -- add runtime captures + for i = 1, n do + base[index + i].kind = Cruntime + base[index + i].siz = 1 -- mark it as closed + local ind = #valuetable + 1 + valuetable[ind] = fd[i + 1] + base[index + i].idx = ind -- stack index of capture value + base[index + i].s = s + base[index + i + 1] = {} + end + base[index + n + 1].kind = Cclose -- close group + base[index + n + 1].siz = 1 + base[index + n + 1].s = s + base[index + n + 2] = {} +end + + +-- Opcode interpreter + +local function match(stream, last, o, s, op, valuetable, ...) + local arg = { ... } + local argcount = select('#', ...) + local len = #o + local ptr = ffi.cast('const unsigned char*', o) + s = s - 1 + local stackptr = 0 -- point to first empty slot in stack + local captop = 0 -- point to first empty slot in captures + local STACK = ffi.new("STACK[?]", INITBACK) + local CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault) + local CAPTURESTACK = { { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } } + local capturestackptr = #CAPTURESTACK + local maxcapture = maxcapturedefault + local stacklimit = INITBACK + local L = {} + local Memo1, Memo2 = {}, {} + local memoind = 0 + local maxpointer = 2 ^ math.ceil(math.log(op.size) / math.log(2)) + local nocapturereleased = true + + local p = 0 -- current instruction + local streambufsize = 2 ^ 8 + local streambufsizemask = streambufsize - 1 -- faster modulo + local streambufs = {} + local streambufoffset = 0 + local streamstartbuffer = 0 + local streambufferscount = 0 + local level = -1 + + local function deletestreambuffers() + local min = s + for i = stackptr - 1, 0, -1 do + local val = STACK[i].s + if val >= 0 then + min = math.min(val, min) + end + end + + for i = captop - 1, 0, -1 do + local val = CAPTURE[i].s + if val >= 0 then + min = math.min(val, min) + end + end + for i = streamstartbuffer + 1, streambufoffset - streambufsize, streambufsize do + -- max behind for full capture and max behind for Look-behind predicate + if i + streambufsize + MAXBEHIND < min then + streambufs[i] = nil + streambufferscount = streambufferscount - 1 + else + streamstartbuffer = i - 1 + break + end + end + end + + local function addstreamdata(s, last) + local len = #s + local srcoffset = 0 + if streambufferscount > 128 then + deletestreambuffers() + end + repeat + local offset = bit.band(streambufoffset, streambufsizemask) + if offset > 0 then + local index = streambufoffset - offset + 1 + local count = math.min(len, streambufsize - offset) + ffi.copy(streambufs[index] + offset, s:sub(srcoffset + 1, srcoffset + 1 + count), count) + len = len - count + srcoffset = srcoffset + count + streambufoffset = streambufoffset + count + end + if len > 0 then + local index = streambufoffset - (bit.band(streambufoffset, streambufsizemask)) + 1 + local buf = ffi.new('unsigned char[?]', streambufsize) + streambufferscount = streambufferscount + 1 + streambufs[index] = buf + local count = math.min(len, streambufsize) + ffi.copy(buf, s:sub(srcoffset + 1, srcoffset + 1 + count), count) + len = len - count + srcoffset = srcoffset + count + streambufoffset = streambufoffset + count + end + if streambufoffset >= 2 ^ 47 then + error("too big input stream", 0) + end + until len == 0 + end + + local function getstreamchar(s) + local offset = bit.band(s, streambufsizemask) + local index = s - offset + 1 + return streambufs[index][offset] + end + + local checkstreamlen + + local function getstreamstring(st, en) + -- TODO Optimalize access + local str = {} + local i = st >= 0 and st or 1 + local to = en >= 0 and en or math.huge + while true do + if i > to then break end + if not checkstreamlen(i - 1) then return end + if last and (st < 0 or en < 0) then + for j = i, streambufoffset do + str[#str + 1] = string.char(getstreamchar(j - 1)) + end + en = en < 0 and streambufoffset + en + 1 or en + en = st > 0 and en - st + 1 or en + st = st < 0 and streambufoffset + st + 1 or 1 + return table.concat(str):sub(st, en) + else + str[#str + 1] = string.char(getstreamchar(i - 1)) + i = i + 1 + end + end + return table.concat(str) + end + + function checkstreamlen(index) + local str + while true do + if index < streambufoffset then + return true + else + if last then + s = streambufoffset + return false + end + local max = captop + for i = stackptr - 1, 0, -1 do + local val = STACK[i].X == CHOICE and STACK[i].caplevel or -1 + if val >= 0 then + max = math.min(val, max) + end + end + local n, out, outindex = lpcap.getcapturesruntime(CAPTURE, nil, getstreamstring, false, 0, max, captop, valuetable, unpack(arg, 1, argcount)) + if n > 0 then + for i = stackptr - 1, 0, -1 do + local val = STACK[i].caplevel + if val > 0 then + STACK[i].caplevel = STACK[i].caplevel - n + end + end + captop = captop - n + end + if outindex > 0 then + nocapturereleased = false + end + str, last = coroutine.yield(1, unpack(out, 1, outindex)) + addstreamdata(str) + end + end + end + + local function doublecapture() + maxcapture = maxcapture * 2 + local NEWCAPTURE = ffi.new("CAPTURE[?]", maxcapture) + ffi.copy(NEWCAPTURE, CAPTURE, ffi.sizeof('CAPTURE') * captop) + CAPTURE = NEWCAPTURE + CAPTURESTACK[capturestackptr].capture = CAPTURE + CAPTURESTACK[capturestackptr].maxcapture = maxcapture + end + + local function pushcapture() + CAPTURE[captop].idx = op.p[p].offset + CAPTURE[captop].kind = band(op.p[p].val, 0x0f) + CAPTURE[captop].candelete = band(op.p[p].val, BCapcandelete) ~= 0 and 1 or 0 + captop = captop + 1 + p = p + 1 + if captop >= maxcapture then + doublecapture() + end + end + + local function traceenter(typ, par) + level = level + (par or 0) + io.write(('%s+%s %s\n'):format((' '):rep(level), typ, valuetable[op.p[p].aux])) + end + + local function traceleave(inst) + io.write(('%s- %s\n'):format((' '):rep(level), valuetable[op.p[inst].aux])) + level = level - 1 + end + + local function tracematch(typ, start, par, from, to, inst, extra, ...) + local n, caps, capscount = lpcap.getcapturesruntime(CAPTURE, o, getstreamstring, true, start, captop, captop, valuetable, ...) + local capstr = {} + for i = 1, capscount do capstr[i] = tostring(caps[i]) end + extra = extra and '(' .. extra .. ')' or '' + io.write(('%s=%s %s%s %s %s \n'):format((' '):rep(level), typ, valuetable[op.p[inst].aux], extra, + o and o:sub(from, to) or getstreamstring(from, to), table.concat(capstr, " "))) + level = level - par + end + + local function fail() + -- pattern failed: try to backtrack + local X + repeat -- remove pending calls + stackptr = stackptr - 1 + if stackptr == -1 then + p = FAIL + return + end + s = STACK[stackptr].s + X = STACK[stackptr].X + if usememoization and X == CALL and STACK[stackptr].memos ~= VOID then + Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL + Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = FAIL + end + -- lvar.2 rest + if X == LRFAIL then + CAPTURESTACK[capturestackptr] = nil + capturestackptr = capturestackptr - 1 + CAPTURE = CAPTURESTACK[capturestackptr].capture + maxcapture = CAPTURESTACK[capturestackptr].maxcapture + L[STACK[stackptr].pA + s * maxpointer] = nil + end + if trace and (X == CALL or X == LRFAIL) then traceleave(STACK[stackptr].p - 1) end + until X == CHOICE or X >= 0 + p = STACK[stackptr].p + for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do + table.remove(valuetable) + end + -- inc.2 + if X >= 0 then + s = X + capturestackptr = capturestackptr - 1 + CAPTURE = CAPTURESTACK[capturestackptr].capture + captop = CAPTURESTACK[capturestackptr].captop + maxcapture = CAPTURESTACK[capturestackptr].maxcapture + local capture = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].capturecommit + while captop + capture.captop >= maxcapture do + doublecapture() + end + ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) + captop = captop + capture.captop + if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, unpack(arg, 1, argcount)) end + CAPTURESTACK[capturestackptr + 1] = nil + L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil + else + captop = STACK[stackptr].caplevel + end + end + + local function doublestack() + if stackptr >= maxstack then + error(("backtrack stack overflow (current limit is %d)"):format(maxstack), 0) + end + stacklimit = stacklimit * 2 + stacklimit = (stacklimit > maxstack) and maxstack or stacklimit + local NEWSTACK = ffi.new("STACK[?]", stacklimit) + ffi.copy(NEWSTACK, STACK, ffi.sizeof('STACK') * stackptr) + STACK = NEWSTACK + end + + if stream then + addstreamdata(o) + len = nil + o = nil + ptr = nil + end + while true do + --[[ Only for debug + io.write(("s: |%s| stck:%d, caps:%d \n"):format(s + 1, stackptr, captop)) + if p ~= FAIL then + lpprint.printinst(op.p, p, valuetable) + lpprint.printcaplist(CAPTURE, captop, valuetable) + end + --]] + if p == FAIL then return -1 end + local code = op.p[p].code + if code == IEnd then + CAPTURE[captop].kind = Cclose + CAPTURE[captop].s = -1 + return 0, lpcap.getcaptures(CAPTURE, o, getstreamstring, nocapturereleased and s + 1, valuetable, ...) + elseif code == IRet then + if STACK[stackptr - 1].X == CALL then + stackptr = stackptr - 1 + if trace then tracematch('', STACK[stackptr].caplevel, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, nil, ...) end + p = STACK[stackptr].p + if usememoization and STACK[stackptr].memos ~= VOID then + local dif = captop - STACK[stackptr].caplevel + local caps + if dif > 0 then + caps = ffi.new("CAPTURE[?]", dif) + ffi.copy(caps, CAPTURE + captop - dif, dif * ffi.sizeof('CAPTURE')) + end + local val = { s, dif, caps } + Memo1[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val + Memo2[STACK[stackptr].pA + STACK[stackptr].memos * maxpointer] = val + end + else + local X = STACK[stackptr - 1].X + -- lvar.1 inc.1 + if X == LRFAIL or s > X then + if trace then tracematch('IB', 0, 0, STACK[stackptr - 1].s + 1, s, STACK[stackptr - 1].p - 1, L[STACK[stackptr - 1].pA + STACK[stackptr - 1].s * maxpointer].level + 1, ...) end + STACK[stackptr - 1].X = s + p = STACK[stackptr - 1].pA + s = STACK[stackptr - 1].s + local lambda = L[p + s * maxpointer] + lambda.level = lambda.level + 1 + lambda.X = STACK[stackptr - 1].X + STACK[stackptr - 1].caplevel = captop + STACK[stackptr - 1].valuetabletop = #valuetable + CAPTURESTACK[capturestackptr].captop = captop + lambda.capturecommit = CAPTURESTACK[capturestackptr] + captop = 0 + CAPTURE = ffi.new("CAPTURE[?]", maxcapturedefault) + CAPTURESTACK[capturestackptr] = { capture = CAPTURE, captop = captop, maxcapture = maxcapturedefault } + maxcapture = maxcapturedefault + else + -- inc.3 + stackptr = stackptr - 1 + p = STACK[stackptr].p + s = STACK[stackptr].X + for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do + table.remove(valuetable) + end + local lambda = L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] + capturestackptr = capturestackptr - 1 + CAPTURE = CAPTURESTACK[capturestackptr].capture + captop = CAPTURESTACK[capturestackptr].captop + maxcapture = CAPTURESTACK[capturestackptr].maxcapture + local capture = lambda.capturecommit + while captop + capture.captop >= maxcapture do + doublecapture() + end + ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) + captop = captop + capture.captop + if trace then tracematch('', captop - capture.captop, 1, STACK[stackptr].s + 1, s, STACK[stackptr].p - 1, L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer].level, ...) end + CAPTURESTACK[capturestackptr + 1] = nil + L[STACK[stackptr].pA + STACK[stackptr].s * maxpointer] = nil + end + end + elseif code == IBehind then + local n = op.p[p].val + if n > s then + fail() + else + s = s - n + p = p + 1 + end + elseif code == IJmp then + if trace and op.p[p].aux ~= 0 then traceenter('TC') end + p = p + op.p[p].offset + elseif code == IChoice then + if stackptr == stacklimit then + doublestack() + end + STACK[stackptr].X = CHOICE + STACK[stackptr].p = p + op.p[p].offset + STACK[stackptr].s = s + STACK[stackptr].caplevel = captop + STACK[stackptr].valuetabletop = #valuetable + stackptr = stackptr + 1 + p = p + 1 + elseif code == ICall then + if stackptr == stacklimit then + doublestack() + end + local k = bit.band(op.p[p].val, 0xffff) + if k == 0 then + local pA = p + op.p[p].offset + local memo = Memo1[pA + s * maxpointer] + if usememoization and memo then + if trace then traceenter('M', 1) end + if memo == FAIL then + if trace then traceleave(p) end + fail() + else + local dif = memo[2] + if dif > 0 then + while captop + dif >= maxcapture do + doublecapture() + end + local caps = memo[3] + ffi.copy(CAPTURE + captop, caps, dif * ffi.sizeof('CAPTURE')) + captop = captop + dif + end + if trace then tracematch('M', captop - dif, 1, s + 1, memo[1], p, nil, ...) end + s = memo[1] + p = p + 1 + end + else + if trace then traceenter('', 1) end + STACK[stackptr].X = CALL + STACK[stackptr].s = s + STACK[stackptr].p = p + 1 -- save return address + STACK[stackptr].pA = pA + STACK[stackptr].memos = s + STACK[stackptr].caplevel = captop + stackptr = stackptr + 1 + p = pA + if usememoization and not memo then + memoind = memoind + 1 + if memoind > maxmemo then + memoind = 0 + Memo1 = Memo2 + Memo2 = {} + end + end + end + else + local pA = p + op.p[p].offset + local X = L[pA + s * maxpointer] + -- lvar.1 lvar.2 + if not X then + if trace then traceenter('', 1) end + CAPTURESTACK[capturestackptr].captop = captop + local capture = ffi.new("CAPTURE[?]", maxcapturedefault) + capturestackptr = capturestackptr + 1 + CAPTURESTACK[capturestackptr] = { capture = capture, captop = captop, maxcapture = maxcapturedefault } + CAPTURE = capture + maxcapture = maxcapturedefault + captop = 0 + L[pA + s * maxpointer] = { X = LRFAIL, k = k, cs = capturestackptr, level = 0 } + STACK[stackptr].p = p + 1 + STACK[stackptr].pA = pA + STACK[stackptr].s = s + STACK[stackptr].X = LRFAIL + stackptr = stackptr + 1 + p = pA + elseif X.X == LRFAIL or k < X.k then + -- lvar.3 lvar.5 + fail() + else + -- lvar.4 + local capture = X.capturecommit + while captop + capture.captop >= maxcapture do + doublecapture() + end + ffi.copy(CAPTURE + captop, capture.capture, capture.captop * ffi.sizeof('CAPTURE')) + captop = captop + capture.captop + p = p + 1 + s = X.X + end + end + elseif code == ICommit then + stackptr = stackptr - 1 + p = p + op.p[p].offset + elseif code == IPartialCommit then + STACK[stackptr - 1].s = s + STACK[stackptr - 1].caplevel = captop + STACK[stackptr - 1].valuetabletop = #valuetable + p = p + op.p[p].offset + elseif code == IBackCommit then + stackptr = stackptr - 1 + s = STACK[stackptr].s + captop = STACK[stackptr].caplevel + for i = #valuetable, STACK[stackptr].valuetabletop + 1, -1 do + table.remove(valuetable) + end + p = p + op.p[p].offset + elseif code == IFailTwice then + stackptr = stackptr - 1 + fail() + elseif code == IFail then + fail() + elseif code == ICloseRunTime then + -- invalidate memo + for i = 0, stackptr - 1 do + STACK[i].memos = VOID + end + local cs = {} + cs.s = o + cs.stream = getstreamstring + cs.ocap = CAPTURE + cs.ptop = arg + cs.ptopcount = argcount + local out = { outindex = 0, out = {} } + local n = lpcap.runtimecap(cs, captop, s + 1, out, valuetable) -- call function + captop = captop - n + local res = resdyncaptures(out.out[1], s + 1, len and len + 1, checkstreamlen) -- get result + -- fail? + if res == FAIL then + fail() + else + s = res - 1 -- else update current position + n = out.outindex - 1 -- number of new captures + -- any new capture? + if n > 0 then + captop = captop + 1 + while captop + n + 1 >= maxcapture do + doublecapture() + end + captop = captop + n + 1 + -- add new captures to 'capture' list + adddyncaptures(s + 1, CAPTURE, captop - n - 2, n, out.out, valuetable) + end + p = p + 1 + end + elseif code == ICloseCapture then + local s1 = s + 1 + assert(captop > 0) + -- if possible, turn capture into a full capture + if CAPTURE[captop - 1].siz == 0 and + s1 - CAPTURE[captop - 1].s < 255 then + CAPTURE[captop - 1].siz = s1 - CAPTURE[captop - 1].s + 1 + p = p + 1 + else + CAPTURE[captop].siz = 1 + CAPTURE[captop].s = s + 1 + pushcapture() + end + elseif code == IOpenCapture then + CAPTURE[captop].siz = 0 + CAPTURE[captop].s = s + 1 + pushcapture() + elseif code == IFullCapture then + CAPTURE[captop].siz = band(rshift(op.p[p].val, 4), 0x0F) + 1 -- save capture size + CAPTURE[captop].s = s + 1 - band(rshift(op.p[p].val, 4), 0x0F) + pushcapture() + -- standard mode + elseif o then + if code == IAny then + if s < len then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestAny then + if s < len then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == IChar then + if s < len and ptr[s] == op.p[p].val then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestChar then + if s < len and ptr[s] == op.p[p].val then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISet then + local c = ptr[s] + local set = valuetable[op.p[p].val] + if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestSet then + local c = ptr[s] + local set = valuetable[op.p[p].val] + if s < len and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISpan then + while s < len do + local c = ptr[s] + local set = valuetable[op.p[p].val] + if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then + break + end + s = s + 1 + end + p = p + 1 + end + else + -- stream mode + if code == IAny then + if checkstreamlen(s) then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestAny then + if checkstreamlen(s) then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == IChar then + if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestChar then + if checkstreamlen(s) and getstreamchar(s) == op.p[p].val then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISet then + local c = checkstreamlen(s) and getstreamchar(s) + local set = valuetable[op.p[p].val] + if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + s = s + 1 + else + fail() + end + elseif code == ITestSet then + local c = checkstreamlen(s) and getstreamchar(s) + local set = valuetable[op.p[p].val] + if c and band(set[rshift(c, 5)], lshift(1, band(c, 31))) ~= 0 then + p = p + 1 + else + p = p + op.p[p].offset + end + elseif code == ISpan then + while checkstreamlen(s) do + local c = getstreamchar(s) + local set = valuetable[op.p[p].val] + if band(set[rshift(c, 5)], lshift(1, band(c, 31))) == 0 then + break + end + s = s + 1 + end + p = p + 1 + end + end + end +end + +local function setmax(val) + maxstack = val + if maxstack < INITBACK then + maxstack = INITBACK + end +end + +local function setmaxbehind(val) + MAXBEHIND = math.max(MAXBEHINDPREDICATE, MAXOFF, val or 0) +end + +local function enablememoization(val) + usememoization = val +end + +local function enabletracing(val) + trace = val +end + +-- Get the initial position for the match, interpreting negative +-- values from the end of the subject + +local function initposition(len, pos) + local ii = pos or 1 + -- positive index? + if (ii > 0) then + -- inside the string? + if ii <= len then + return ii - 1; -- return it (corrected to 0-base) + else + return len; -- crop at the end + end + else + -- negative index + -- inside the string? + if -ii <= len then + return len + ii -- return position from the end + else + return 0; -- crop at the beginning + end + end +end + +local function lp_match(pat, s, init, valuetable, ...) + local i = initposition(s:len(), init) + 1 + return select(2, match(false, true, s, i, pat.code, valuetable, ...)) +end + +local function lp_streammatch(pat, init, valuetable, ...) + local params = { ... } + local paramslength = select('#', ...) + local fce = coroutine.wrap(function(s, last) + return match(true, last, s, init or 1, pat.code, valuetable, unpack(params, 1, paramslength)) + end) + return fce +end + +local function retcount(...) + return select('#', ...), { ... } +end + +-- Only for testing purpose +-- stream emulation (send all chars from string one char after char) +local function lp_emulatestreammatch(pat, s, init, valuetable, ...) + local init = initposition(s:len(), init) + 1 + local fce = lp_streammatch(pat, init, valuetable, ...) + local ret, count = {}, 0 + for j = 1, #s do + local pcount, pret = retcount(fce(s:sub(j, j), j == #s)) -- one char + if pret[1] == -1 then + return -- fail + elseif pret[1] == 0 then + -- parsing finished + -- collect result + for i = 2, pcount do + ret[count + i - 1] = pret[i] + end + count = count + pcount - 1 + return unpack(ret, 1, count) + end + for i = 2, pcount do + ret[count + i - 1] = pret[i] + end + count = count + pcount - 1 + end + return select(2, fce(s, true)) -- empty string +end + +local function lp_load(str, fcetab, usemeta) + local index = 0 + assert(str) + local ptr = ffi.cast('const char*', str) + local patsize = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local len = ffi.sizeof(treepatternelement) * patsize + + local pat + if usemeta then + pat = treepattern(patsize) + else + pat = ffi.gc(ffi.cast('TREEPATTERN*', ffi.C.malloc(ffi.sizeof(treepattern, patsize))), + function(ct) + if ct.code ~= nil then + ffi.C.free(ct.code.p) + ffi.C.free(ct.code) + end + ffi.C.free(ct) + end) + ffi.fill(pat, ffi.sizeof(treepattern, patsize)) + pat.treesize = patsize + pat.id = 0 + end + ffi.copy(pat.p, ptr + index, len) + index = index + len + if usemeta then + pat.code = pattern() + else + pat.code = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern))) + assert(pat.code ~= nil) + pat.code.allocsize = 10 + pat.code.size = 0 + pat.code.p = ffi.C.malloc(ffi.sizeof(patternelement) * pat.code.allocsize) + assert(pat.code.p ~= nil) + ffi.fill(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize) + end + pat.code.size = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local len = pat.code.size * ffi.sizeof(patternelement) + local data = ffi.string(ptr + index, len) + index = index + len + local count = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local valuetable = {} + for i = 1, count do + local tag = ffi.string(ptr + index, 3) + index = index + 3 + --string + if tag == 'str' then + local len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local val = ffi.string(ptr + index, len) + index = index + len + valuetable[#valuetable + 1] = val + elseif tag == 'num' then + --number + local len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local val = ffi.string(ptr + index, len) + index = index + len + valuetable[#valuetable + 1] = tonumber(val) + elseif tag == 'cdt' then + --ctype + local val = settype() + ffi.copy(val, ptr + index, ffi.sizeof(settype)) + index = index + ffi.sizeof(settype) + valuetable[#valuetable + 1] = val + elseif tag == 'fnc' then + --function + local len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local fname = ffi.string(ptr + index, len) + index = index + len + len = ffi.cast('uint32_t*', ptr + index)[0] + index = index + 4 + local val = ffi.string(ptr + index, len) + index = index + len + if fcetab and fcetab[fname] then + assert(type(fcetab[fname]) == 'function', ('"%s" is not function'):format(fname)) + valuetable[#valuetable + 1] = fcetab[fname] + else + valuetable[#valuetable + 1] = loadstring(val) + end + end + end + pat.code.allocsize = pat.code.size + pat.code.p = ffi.C.realloc(pat.code.p, ffi.sizeof(patternelement) * pat.code.allocsize) + assert(pat.code.p ~= nil) + ffi.copy(pat.code.p, data, ffi.sizeof(patternelement) * pat.code.allocsize) + return pat, valuetable +end + +local function lp_loadfile(fname, fcetab, usemeta) + local file = assert(io.open(fname, 'rb')) + local pat, valuetable = lp_load(assert(file:read("*a")), fcetab, usemeta) + file:close() + return pat, valuetable +end + +-- ====================================================== + +return { + match = lp_match, + streammatch = lp_streammatch, + emulatestreammatch = lp_emulatestreammatch, + load = lp_load, + loadfile = lp_loadfile, + setmax = setmax, + setmaxbehind = setmaxbehind, + enablememoization = enablememoization, + enabletracing = enabletracing +} diff --git a/vendor/lpeglj/re.lua b/vendor/lpeglj/re.lua new file mode 100644 index 0000000..3d00232 --- /dev/null +++ b/vendor/lpeglj/re.lua @@ -0,0 +1,286 @@ +-- $Id: re.lua,v 1.44 2013/03/26 20:11:40 roberto Exp $ +-- 2014/08/15 changes rostislav + +-- imported functions and modules +local tonumber, print, error = tonumber, print, error +local setmetatable = setmetatable +local m = require"lpeglj" + +-- 'm' will be used to parse expressions, and 'mm' will be used to +-- create expressions; that is, 're' runs on 'm', creating patterns +-- on 'mm' +local mm = m + +-- pattern's metatable +local mt = getmetatable(mm.P(0)) +mt = m.version() == "1.0.0.0LJ" and m or mt + + + +-- No more global accesses after this point +local version = _VERSION +if version == "Lua 5.2" then _ENV = nil end + + +local any = m.P(1) + + +-- Pre-defined names +local Predef = { nl = m.P"\n" } + + +local mem +local fmem +local gmem + + +local function updatelocale () + mm.locale(Predef) + Predef.a = Predef.alpha + Predef.c = Predef.cntrl + Predef.d = Predef.digit + Predef.g = Predef.graph + Predef.l = Predef.lower + Predef.p = Predef.punct + Predef.s = Predef.space + Predef.u = Predef.upper + Predef.w = Predef.alnum + Predef.x = Predef.xdigit + Predef.A = any - Predef.a + Predef.C = any - Predef.c + Predef.D = any - Predef.d + Predef.G = any - Predef.g + Predef.L = any - Predef.l + Predef.P = any - Predef.p + Predef.S = any - Predef.s + Predef.U = any - Predef.u + Predef.W = any - Predef.w + Predef.X = any - Predef.x + mem = {} -- restart memoization + fmem = {} + gmem = {} + local mt = {__mode = "v"} + setmetatable(mem, mt) + setmetatable(fmem, mt) + setmetatable(gmem, mt) +end + + +updatelocale() + + + +local I = m.P(function (s,i) print(i, s:sub(1, i-1)); return i end) + + +local function getdef (id, defs) + local c = defs and defs[id] + if not c then error("undefined name: " .. id) end + return c +end + + +local function patt_error (s, i) + local msg = (#s < i + 20) and s:sub(i) + or s:sub(i,i+20) .. "..." + msg = ("pattern error near '%s'"):format(msg) + error(msg, 2) +end + +local function mult (p, n) + local np = mm.P(true) + while n >= 1 do + if n%2 >= 1 then np = np * p end + p = p * p + n = n/2 + end + return np +end + +local function equalcap (s, i, c) + if type(c) ~= "string" then return nil end + local e = #c + i + if type(s) == 'function' then -- stream mode + if s(i, e - 1) == c then return e else return nil end + else + if s:sub(i, e - 1) == c then return e else return nil end + end +end + + +local S = (Predef.space + "--" * (any - Predef.nl)^0)^0 + +local name = m.R("AZ", "az", "__") * m.R("AZ", "az", "__", "09")^0 + +local arrow = S * "<-" + +local seq_follow = m.P"/" + ")" + "}" + ":}" + "~}" + "|}" + (name * arrow) + -1 + +name = m.C(name) + + +-- a defined name only have meaning in a given environment +local Def = name * m.Carg(1) + +local num = m.C(m.R"09"^1) * S / tonumber + +local String = "'" * m.C((any - "'")^0) * "'" + + '"' * m.C((any - '"')^0) * '"' + + +local defined = "%" * Def / function (c,Defs) + local cat = Defs and Defs[c] or Predef[c] + if not cat then error ("name '" .. c .. "' undefined") end + return cat +end + +local Range = m.Cs(any * (m.P"-"/"") * (any - "]")) / mm.R + +local item = defined + Range + m.C(any) + +local Class = + "[" + * (m.C(m.P"^"^-1)) -- optional complement symbol + * m.Cf(item * (item - "]")^0, mt.__add) / + function (c, p) return c == "^" and any - p or p end + * "]" + +local function adddef (t, k, exp) + if t[k] then + error("'"..k.."' already defined as a rule") + else + t[k] = exp + end + return t +end + +local function firstdef (n, r) return adddef({n}, n, r) end + + +local function NT (n, b, p) + if not b then + error("rule '"..n.."' used outside a grammar") + else return mm.V(n, p or 0) + end +end + + +local exp = m.P{ "Exp", + Exp = S * ( m.V"Grammar" + + m.Cf(m.V"Seq" * ("/" * S * m.V"Seq")^0, mt.__add) ); + Seq = m.Cf(m.Cc(m.P"") * m.V"Prefix"^0 , mt.__mul) + * (#seq_follow + patt_error); + Prefix = "&" * S * m.V"Prefix" / mt.__len + + "!" * S * m.V"Prefix" / mt.__unm + + m.V"Suffix"; + Suffix = m.Cf(m.V"Primary" * S * + ( ( m.P"+" * m.Cc(1, mt.__pow) + + m.P"*" * m.Cc(0, mt.__pow) + + m.P"?" * m.Cc(-1, mt.__pow) + + "^" * ( m.Cg(num * m.Cc(mult)) + + m.Cg(m.C(m.S"+-" * m.R"09"^1) * m.Cc(mt.__pow)) + ) + + "->" * S * ( m.Cg((String + num) * m.Cc(mt.__div)) + + m.P"{}" * m.Cc(nil, m.Ct) + + m.Cg(Def / getdef * m.Cc(mt.__div)) + ) + + "=>" * S * m.Cg(Def / getdef * m.Cc(m.Cmt)) + ) * S + )^0, function (a,b,f) return f(a,b) end ); + Primary = "(" * m.V"Exp" * ")" + + String / mm.P + + Class + + defined + + "{:" * (name * ":" + m.Cc(nil)) * m.V"Exp" * ":}" / + function (n, p) return mm.Cg(p, n) end + + "=" * name / function (n) return mm.Cmt(mm.Cb(n), equalcap) end + + m.P"{}" / mm.Cp + + "{~" * m.V"Exp" * "~}" / mm.Cs + + "{|" * m.V"Exp" * "|}" / mm.Ct + + "{" * m.V"Exp" * "}" / mm.C + + m.P"." * m.Cc(any) + + (name * m.Cb("G") * (S * ":" * S * num)^-1 * -arrow + "<" * name * m.Cb("G") * (S * ":" * S * num)^-1 * ">") / NT; + Definition = name * arrow * m.V"Exp"; + Grammar = m.Cg(m.Cc(true), "G") * + m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, + adddef) / mm.P +} + +local pattern = S * m.Cg(m.Cc(false), "G") * exp / mm.P * (-any + patt_error) + + +local function compile (p, defs) + if mm.type(p) == "pattern" then return p end -- already compiled + local cp = pattern:match(p, 1, defs) + if not cp then error("incorrect pattern", 3) end + return cp +end + +local function match (s, p, i) + local cp = mem[p] + if not cp then + cp = compile(p) + mem[p] = cp + end + return cp:match(s, i or 1) +end + +local function streammatch (p, i) + local cp = mem[p] + if not cp then + cp = compile(p) + mem[p] = cp + end + return cp:streammatch(i or 1) +end + +-- Only for testing purpose +local function emulatestreammatch(s, p, i) + local cp = mem[p] + if not cp then + cp = compile(p) + mem[p] = cp + end + return cp:emulatestreammatch(s, i or 1) +end + +local function find (s, p, i) + local cp = fmem[p] + if not cp then + cp = compile(p) / 0 + cp = mm.P{ mm.Cp() * cp * mm.Cp() + 1 * mm.V(1) } + fmem[p] = cp + end + local i, e = cp:match(s, i or 1) + if i then return i, e - 1 + else return i + end +end + +local function gsub (s, p, rep) + local g = gmem[p] or {} -- ensure gmem[p] is not collected while here + gmem[p] = g + local cp = g[rep] + if not cp then + cp = compile(p) + cp = mm.Cs((cp / rep + 1)^0) + g[rep] = cp + end + return cp:match(s) +end + + +-- exported names +local re = { + compile = compile, + match = match, + streammatch = streammatch, + emulatestreammatch = emulatestreammatch, + find = find, + gsub = gsub, + updatelocale = updatelocale, +} + +if version == "Lua 5.1" then _G.re = re end + +return re |
