summaryrefslogtreecommitdiff
path: root/vendor/lpeglj/lpcode.lua
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2024-09-26 17:46:38 +0600
committerunwox <me@unwox.com>2024-09-26 17:46:38 +0600
commit9b82db238f9e2e02a76f95c793f8d6ef2387ecfd (patch)
treecdb2a16d01f09553b560ab1034d53392d07bae42 /vendor/lpeglj/lpcode.lua
init
Diffstat (limited to 'vendor/lpeglj/lpcode.lua')
-rw-r--r--vendor/lpeglj/lpcode.lua1057
1 files changed, 1057 insertions, 0 deletions
diff --git a/vendor/lpeglj/lpcode.lua b/vendor/lpeglj/lpcode.lua
new file mode 100644
index 0000000..365d80b
--- /dev/null
+++ b/vendor/lpeglj/lpcode.lua
@@ -0,0 +1,1057 @@
+--[[
+LPEGLJ
+lpcode.lua
+Generating code from tree
+Copyright (C) 2014 Rostislav Sacek.
+based on LPeg v1.0 - PEG pattern matching for Lua
+Lua.org & PUC-Rio written by Roberto Ierusalimschy
+http://www.inf.puc-rio.br/~roberto/lpeg/
+
+** Permission is hereby granted, free of charge, to any person obtaining
+** a copy of this software and associated documentation files (the
+** "Software"), to deal in the Software without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Software, and to
+** permit persons to whom the Software is furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be
+** included in all copies or substantial portions of the Software.
+**
+** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**
+** [ MIT license: http://www.opensource.org/licenses/mit-license.php ]
+--]]
+local ffi = require "ffi"
+require "lpvm"
+
+local band, bor, bnot, rshift, lshift = bit.band, bit.bor, bit.bnot, bit.rshift, bit.lshift
+
+local TChar = 0
+local TSet = 1
+local TAny = 2 -- standard PEG elements
+local TTrue = 3
+local TFalse = 4
+local TRep = 5
+local TSeq = 6
+local TChoice = 7
+local TNot = 8
+local TAnd = 9
+local TCall = 10
+local TOpenCall = 11
+local TRule = 12 -- sib1 is rule's pattern, sib2 is 'next' rule
+local TGrammar = 13 -- sib1 is initial (and first) rule
+local TBehind = 14 -- match behind
+local TCapture = 15 -- regular capture
+local TRunTime = 16 -- run-time capture
+
+
+local IAny = 0 -- if no char, fail
+local IChar = 1 -- if char != val, fail
+local ISet = 2 -- if char not in val, fail
+local ITestAny = 3 -- in no char, jump to 'offset'
+local ITestChar = 4 -- if char != val, jump to 'offset'
+local ITestSet = 5 -- if char not in val, jump to 'offset'
+local ISpan = 6 -- read a span of chars in val
+local IBehind = 7 -- walk back 'val' characters (fail if not possible)
+local IRet = 8 -- return from a rule
+local IEnd = 9 -- end of pattern
+local IChoice = 10 -- stack a choice; next fail will jump to 'offset'
+local IJmp = 11 -- jump to 'offset'
+local ICall = 12 -- call rule at 'offset'
+local IOpenCall = 13 -- call rule number 'offset' (must be closed to a ICall)
+local ICommit = 14 -- pop choice and jump to 'offset'
+local IPartialCommit = 15 -- update top choice to current position and jump
+local IBackCommit = 16 -- "fails" but jump to its own 'offset'
+local IFailTwice = 17 -- pop one choice and then fail
+local IFail = 18 -- go back to saved state on choice and jump to saved offset
+local IGiveup = 19 -- internal use
+local IFullCapture = 20 -- complete capture of last 'off' chars
+local IOpenCapture = 21 -- start a capture
+local ICloseCapture = 22
+local ICloseRunTime = 23
+
+
+local Cclose = 0
+local Cposition = 1
+local Cconst = 2
+local Cbackref = 3
+local Carg = 4
+local Csimple = 5
+local Ctable = 6
+local Cfunction = 7
+local Cquery = 8
+local Cstring = 9
+local Cnum = 10
+local Csubst = 11
+local Cfold = 12
+local Cruntime = 13
+local Cgroup = 14
+
+
+local PEnullable = 0
+local PEnofail = 1
+local RuleLR = 0x10000
+local NOINST = -2
+
+
+local MAXBEHINDPREDICATE = 255
+local MAXRULES = 200
+local MAXOFF = 0xF
+
+-- number of siblings for each tree
+local numsiblings = {
+ 0, 0, 0, -- char, set, any
+ 0, 0, -- true, false
+ 1, -- rep
+ 2, 2, -- seq, choice
+ 1, 1, -- not, and
+ 0, 0, 2, 1, -- call, opencall, rule, grammar
+ 1, -- behind
+ 1, 1 -- capture, runtime capture
+}
+
+
+local patternelement = ffi.typeof('PATTERN_ELEMENT')
+local pattern = ffi.typeof('PATTERN')
+local settype = ffi.typeof('int32_t[8]')
+local fullset = settype(-1, -1, -1, -1, -1, -1, -1, -1)
+
+-- {======================================================
+-- Analysis and some optimizations
+-- =======================================================
+
+local codegen
+
+
+-- Check whether a charset is empty (IFail), singleton (IChar),
+-- full (IAny), or none of those (ISet).
+
+local function charsettype(cs)
+ local count = 0;
+ local candidate = -1; -- candidate position for a char
+ for i = 0, 8 - 1 do
+ local b = cs[i];
+ if b == 0 then
+ if count > 1 then
+ return ISet; -- else set is still empty
+ end
+ elseif b == -1 then
+ if count < (i * 32) then
+ return ISet;
+ else
+ count = count + 32; -- set is still full
+ end
+ -- byte has only one bit?
+ elseif band(b, (b - 1)) == 0 then
+ if count > 0 then
+ return ISet; -- set is neither full nor empty
+ -- set has only one char till now; track it
+ else
+ count = count + 1;
+ candidate = i;
+ end
+ else
+ return ISet; -- byte is neither empty, full, nor singleton
+ end
+ end
+ if count == 0 then
+ return IFail, 0 -- empty set
+ -- singleton; find character bit inside byte
+ elseif count == 1 then
+ local b = cs[candidate];
+ local c = candidate * 32;
+ for i = 1, 32 do
+ if b == 1 then
+ c = c + i - 1
+ break
+ end
+ b = rshift(b, 1)
+ end
+ return IChar, c
+ elseif count == 256 then
+ return IAny, 0 -- full set
+ else
+ assert(false) -- should have returned by now
+ end
+end
+
+
+-- A few basic operations on Charsets
+
+local function cs_complement(cs)
+ for i = 0, 8 - 1 do
+ cs[i] = bnot(cs[i])
+ end
+end
+
+
+local function cs_equal(cs1, cs2)
+ for i = 0, 8 - 1 do
+ if cs1[i] ~= cs2[i] then
+ return
+ end
+ end
+ return true
+end
+
+
+-- computes whether sets st1 and st2 are disjoint
+
+local function cs_disjoint(st1, st2)
+ for i = 0, 8 - 1 do
+ if band(st1[i], st2[i]) ~= 0 then
+ return
+ end
+ end
+ return true
+end
+
+
+-- Convert a 'char' pattern (TSet, TChar, TAny) to a charset
+
+local function tocharset(tree, index, valuetable)
+ local val = settype()
+ if tree.p[index].tag == TSet then
+ ffi.copy(val, valuetable[tree.p[index].val], ffi.sizeof(val))
+ return val
+ elseif tree.p[index].tag == TChar then
+ local b = tree.p[index].val
+ -- only one char
+ -- add that one
+ val[rshift(b, 5)] = lshift(1, band(b, 31))
+ return val
+ elseif tree.p[index].tag == TAny then
+ ffi.fill(val, ffi.sizeof(val), 0xff)
+ return val
+ end
+end
+
+
+-- checks whether a pattern has captures
+
+local function hascaptures(tree, index)
+ if tree.p[index].tag == TCapture or tree.p[index].tag == TRunTime then
+ return true
+ elseif tree.p[index].tag == TCall then
+ return hascaptures(tree, index + tree.p[index].ps)
+ else
+ local ns = numsiblings[tree.p[index].tag + 1]
+ if ns == 0 then
+ return
+ elseif ns == 1 then
+ return hascaptures(tree, index + 1)
+ elseif ns == 2 then
+ if hascaptures(tree, index + 1) then
+ return true
+ elseif tree.p[index].tag ~= TRule then
+ return hascaptures(tree, index + tree.p[index].ps)
+ end
+ else
+ assert(false)
+ end
+ end
+end
+
+
+-- Checks how a pattern behaves regarding the empty string,
+-- in one of two different ways:
+-- A pattern is *nullable* if it can match without consuming any character;
+-- A pattern is *nofail* if it never fails for any string
+-- (including the empty string).
+-- The difference is only for predicates; for patterns without
+-- predicates, the two properties are equivalent.
+-- (With predicates, &'a' is nullable but not nofail. Of course,
+-- nofail => nullable.)
+-- These functions are all convervative in the following way:
+-- p is nullable => nullable(p)
+-- nofail(p) => p cannot fail
+-- (The function assumes that TOpenCall and TRunTime are not nullable:
+-- TOpenCall must be checked again when the grammar is fixed;
+-- TRunTime is an arbitrary choice.)
+
+local function checkaux(tree, pred, index, lrcall)
+ lrcall = lrcall or {}
+ local tag = tree.p[index].tag
+ if tag == TChar or tag == TSet or tag == TAny or
+ tag == TFalse or tag == TOpenCall then
+ return -- not nullable
+ elseif tag == TRep or tag == TTrue then
+ return true -- no fail
+ elseif tag == TNot or tag == TBehind then
+ -- can match empty, but may fail
+ if pred == PEnofail then
+ return
+ else
+ return true -- PEnullable
+ end
+ elseif tag == TAnd then
+ -- can match empty; fail iff body does
+ if pred == PEnullable then
+ return true
+ else
+ return checkaux(tree, pred, index + 1, lrcall)
+ end
+ -- can fail; match empty iff body does
+ elseif tag == TRunTime then
+ if pred == PEnofail then
+ return
+ else
+ return checkaux(tree, pred, index + 1, lrcall)
+ end
+ elseif tag == TSeq then
+ if not checkaux(tree, pred, index + 1, lrcall) then
+ return
+ else
+ return checkaux(tree, pred, index + tree.p[index].ps, lrcall)
+ end
+ elseif tag == TChoice then
+ if checkaux(tree, pred, index + tree.p[index].ps, lrcall) then
+ return true
+ else
+ return checkaux(tree, pred, index + 1, lrcall)
+ end
+ elseif tag == TCapture or tag == TGrammar or tag == TRule then
+ return checkaux(tree, pred, index + 1, lrcall)
+ elseif tag == TCall then
+ --left recursive rule
+ if bit.band(tree.p[index].cap, 0xffff) ~= 0 then
+ local lr = index + tree.p[index].ps
+ if lrcall[lr] then
+ return
+ end
+ lrcall[lr] = true
+ end
+ return checkaux(tree, pred, index + tree.p[index].ps, lrcall)
+ else
+ assert(false)
+ end
+end
+
+
+-- number of characters to match a pattern (or -1 if variable)
+-- ('count' avoids infinite loops for grammars)
+
+local function fixedlenx(tree, count, len, index)
+ local tag = tree.p[index].tag
+ if tag == TChar or tag == TSet or tag == TAny then
+ return len + 1;
+ elseif tag == TFalse or tag == TTrue or tag == TNot or tag == TAnd or tag == TBehind then
+ return len;
+ elseif tag == TRep or tag == TRunTime or tag == TOpenCall then
+ return -1;
+ elseif tag == TCapture or tag == TRule or tag == TGrammar then
+ return fixedlenx(tree, count, len, index + 1)
+ elseif tag == TCall then
+ if count >= MAXRULES then
+ return -1; -- may be a loop
+ else
+ return fixedlenx(tree, count + 1, len, index + tree.p[index].ps)
+ end
+ elseif tag == TSeq then
+ len = fixedlenx(tree, count, len, index + 1)
+ if (len < 0) then
+ return -1;
+ else
+ return fixedlenx(tree, count, len, index + tree.p[index].ps)
+ end
+ elseif tag == TChoice then
+ local n1 = fixedlenx(tree, count, len, index + 1)
+ if n1 < 0 then return -1 end
+ local n2 = fixedlenx(tree, count, len, index + tree.p[index].ps)
+ if n1 == n2 then
+ return n1
+ else
+ return -1
+ end
+ else
+ assert(false)
+ end
+end
+
+
+-- Computes the 'first set' of a pattern.
+-- The result is a conservative aproximation:
+-- match p ax -> x' for some x ==> a in first(p).
+-- match p '' -> '' ==> returns 1.
+-- The set 'follow' is the first set of what follows the
+-- pattern (full set if nothing follows it)
+
+local function getfirst(tree, follow, index, valuetable, lrcall)
+ lrcall = lrcall or {}
+ local tag = tree.p[index].tag
+ if tag == TChar or tag == TSet or tag == TAny then
+ local firstset = tocharset(tree, index, valuetable)
+ return 0, firstset
+ elseif tag == TTrue then
+ local firstset = settype()
+ ffi.copy(firstset, follow, ffi.sizeof(firstset))
+ return 1, firstset
+ elseif tag == TFalse then
+ local firstset = settype()
+ return 0, firstset
+ elseif tag == TChoice then
+ local e1, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
+ local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall)
+ for i = 0, 8 - 1 do
+ firstset[i] = bor(firstset[i], csaux[i])
+ end
+ return bor(e1, e2), firstset
+ elseif tag == TSeq then
+ if not checkaux(tree, PEnullable, index + 1) then
+ return getfirst(tree, fullset, index + 1, valuetable, lrcall)
+ -- FIRST(p1 p2, fl) = FIRST(p1, FIRST(p2, fl))
+ else
+ local e2, csaux = getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall)
+ local e1, firstset = getfirst(tree, csaux, index + 1, valuetable, lrcall)
+ if e1 == 0 then -- 'e1' ensures that first can be used
+ return 0, firstset
+ -- one of the children has a matchtime?
+ elseif band(bor(e1, e2), 2) == 2 then
+ return 2, firstset -- pattern has a matchtime capture
+ else
+ return e2, firstset -- else depends on 'e2'
+ end
+ end
+ elseif tag == TRep then
+ local _, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
+ for i = 0, 8 - 1 do
+ firstset[i] = bor(firstset[i], follow[i])
+ end
+ return 1, firstset -- accept the empty string
+ elseif tag == TCapture or tag == TGrammar or tag == TRule then
+ return getfirst(tree, follow, index + 1, valuetable, lrcall)
+ -- function invalidates any follow info.
+ elseif tag == TRunTime then
+ local e, firstset = getfirst(tree, fullset, index + 1, valuetable, lrcall)
+ if e ~= 0 then
+ return 2, firstset -- function is not "protected"?
+ else
+ return 0, firstset -- pattern inside capture ensures first can be used
+ end
+ elseif tag == TCall then
+ -- left recursive rule
+ if bit.band(tree.p[index].cap, 0xffff) ~= 0 then
+ local lr = index + tree.p[index].ps
+ if lrcall[lr] then
+ return 0, settype()
+ else
+ lrcall[lr] = true
+ end
+ end
+ return getfirst(tree, follow, index + tree.p[index].ps, valuetable, lrcall)
+ elseif tag == TAnd then
+ local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
+ for i = 0, 8 - 1 do
+ firstset[i] = band(firstset[i], follow[i])
+ end
+ return e, firstset
+ elseif tag == TNot then
+ local firstset = tocharset(tree, index + 1, valuetable)
+ if firstset then
+ cs_complement(firstset)
+ return 1, firstset
+ end
+ local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
+ ffi.copy(firstset, follow, ffi.sizeof(firstset))
+ return bor(e, 1), firstset -- always can accept the empty string
+ -- instruction gives no new information
+ elseif tag == TBehind then
+ -- call 'getfirst' to check for math-time captures
+ local e, firstset = getfirst(tree, follow, index + 1, valuetable, lrcall)
+ ffi.copy(firstset, follow, ffi.sizeof(firstset))
+ return bor(e, 1), firstset -- always can accept the empty string
+ else
+ assert(false)
+ end
+end
+
+
+-- If it returns true, then pattern can fail only depending on the next
+-- character of the subject
+
+local function headfail(tree, index, lrcall)
+ lrcall = lrcall or {}
+ local tag = tree.p[index].tag
+ if tag == TChar or tag == TSet or tag == TAny or tag == TFalse then
+ return true
+ elseif tag == TTrue or tag == TRep or tag == TRunTime or tag == TNot or tag == TBehind then
+ return
+ elseif tag == TCapture or tag == TGrammar or tag == TRule or tag == TAnd then
+ return headfail(tree, index + 1, lrcall)
+ elseif tag == TCall then
+ -- left recursive rule
+ if bit.band(tree.p[index].cap, 0xffff) ~= 0 then
+ local lr = index + tree.p[index].ps
+ if lrcall[lr] then
+ return true
+ else
+ lrcall[lr] = true
+ end
+ end
+ return headfail(tree, index + tree.p[index].ps, lrcall)
+ elseif tag == TSeq then
+ if not checkaux(tree, PEnofail, index + tree.p[index].ps) then
+ return
+ else
+ return headfail(tree, index + 1, lrcall)
+ end
+ elseif tag == TChoice then
+ if not headfail(tree, index + 1, lrcall) then
+ return
+ else
+ return headfail(tree, index + tree.p[index].ps, lrcall)
+ end
+ else
+ assert(false)
+ end
+end
+
+
+-- Check whether the code generation for the given tree can benefit
+-- from a follow set (to avoid computing the follow set when it is
+-- not needed)
+
+local function needfollow(tree, index)
+ local tag = tree.p[index].tag
+ if tag == TChar or tag == TSet or tag == TAny or tag == TFalse or tag == TTrue or tag == TAnd or tag == TNot or
+ tag == TRunTime or tag == TGrammar or tag == TCall or tag == TBehind then
+ return
+ elseif tag == TChoice or tag == TRep then
+ return true
+ elseif tag == TCapture then
+ return needfollow(tree, index + 1)
+ elseif tag == TSeq then
+ return needfollow(tree, index + tree.p[index].ps)
+ else
+ assert(false)
+ end
+end
+
+-- ======================================================
+
+
+-- {======================================================
+-- Code generation
+-- =======================================================
+
+
+-- code generation is recursive; 'opt' indicates that the code is
+-- being generated under a 'IChoice' operator jumping to its end.
+-- 'tt' points to a previous test protecting this code. 'fl' is
+-- the follow set of the pattern.
+
+
+local function addinstruction(code, op, val)
+ local size = code.size
+ if size >= code.allocsize then
+ code:doublesize()
+ end
+ code.p[size].code = op
+ code.p[size].val = val
+ code.size = size + 1
+ return size
+end
+
+
+local function setoffset(code, instruction, offset)
+ code.p[instruction].offset = offset;
+end
+
+
+-- Add a capture instruction:
+-- 'op' is the capture instruction; 'cap' the capture kind;
+-- 'key' the key into ktable; 'aux' is optional offset
+
+local function addinstcap(code, op, cap, key, aux)
+ local i = addinstruction(code, op, bor(cap, lshift(aux, 4)))
+ setoffset(code, i, key)
+ return i
+end
+
+
+local function jumptothere(code, instruction, target)
+ if instruction >= 0 then
+ setoffset(code, instruction, target - instruction)
+ end
+end
+
+
+local function jumptohere(code, instruction)
+ jumptothere(code, instruction, code.size)
+end
+
+
+-- Code an IChar instruction, or IAny if there is an equivalent
+-- test dominating it
+
+local function codechar(code, c, tt)
+ assert(tt ~= -1)
+ if tt >= 0 and code.p[tt].code == ITestChar and
+ code.p[tt].val == c then
+ addinstruction(code, IAny, 0)
+ else
+ addinstruction(code, IChar, c)
+ end
+end
+
+
+-- Code an ISet instruction
+
+local function coderealcharset(code, cs, valuetable)
+ local ind = #valuetable + 1
+ valuetable[ind] = cs
+ return addinstruction(code, ISet, ind)
+end
+
+
+-- code a char set, optimizing unit sets for IChar, "complete"
+-- sets for IAny, and empty sets for IFail; also use an IAny
+-- when instruction is dominated by an equivalent test.
+
+local function codecharset(code, cs, tt, valuetable)
+ local op, c = charsettype(cs)
+ if op == IChar then
+ codechar(code, c, tt)
+ elseif op == ISet then
+ assert(tt ~= -1)
+ if tt >= 0 and code.p[tt].code == ITestSet and
+ cs_equal(cs, valuetable[code.p[tt].val]) then
+ addinstruction(code, IAny, 0)
+ else
+ coderealcharset(code, cs, valuetable)
+ end
+ else
+ addinstruction(code, op, c)
+ end
+end
+
+
+-- code a test set, optimizing unit sets for ITestChar, "complete"
+-- sets for ITestAny, and empty sets for IJmp (always fails).
+-- 'e' is true iff test should accept the empty string. (Test
+-- instructions in the current VM never accept the empty string.)
+
+local function codetestset(code, cs, e, valuetable)
+ if e ~= 0 then
+ return NOINST -- no test
+ else
+ local pos = code.size
+ codecharset(code, cs, NOINST, valuetable)
+ local inst = code.p[pos]
+ local code = inst.code
+ if code == IFail then
+ inst.code = IJmp -- always jump
+ elseif code == IAny then
+ inst.code = ITestAny
+ elseif code == IChar then
+ inst.code = ITestChar
+ elseif code == ISet then
+ inst.code = ITestSet
+ else
+ assert(false)
+ end
+ return pos
+ end
+end
+
+
+-- Find the final destination of a sequence of jumps
+
+local function finaltarget(code, i)
+ while code.p[i].code == IJmp do
+ i = i + code.p[i].offset
+ end
+ return i
+end
+
+
+-- final label (after traversing any jumps)
+
+local function finallabel(code, i)
+ return finaltarget(code, i + code.p[i].offset)
+end
+
+-- <behind(p)> == behind n; <p> (where n = fixedlen(p))
+
+local function codebehind(code, tree, index, valuetable)
+ if tree.p[index].val > 0 then
+ addinstruction(code, IBehind, tree.p[index].val)
+ end
+ codegen(code, tree, fullset, false, NOINST, index + 1, valuetable) -- NOINST
+end
+
+
+-- Choice; optimizations:
+-- - when p1 is headfail
+-- - when first(p1) and first(p2) are disjoint; than
+-- a character not in first(p1) cannot go to p1, and a character
+-- in first(p1) cannot go to p2 (at it is not in first(p2)).
+-- (The optimization is not valid if p1 accepts the empty string,
+-- as then there is no character at all...)
+-- - when p2 is empty and opt is true; a IPartialCommit can resuse
+-- the Choice already active in the stack.
+
+local function codechoice(code, tree, fl, opt, p1, p2, valuetable)
+ local emptyp2 = tree.p[p2].tag == TTrue
+ local e1, st1 = getfirst(tree, fullset, p1, valuetable)
+ local _, st2 = getfirst(tree, fl, p2, valuetable)
+ if headfail(tree, p1) or (e1 == 0 and cs_disjoint(st1, st2)) then
+ -- <p1 / p2> == test (fail(p1)) -> L1 ; p1 ; jmp L2; L1: p2; L2:
+ local test = codetestset(code, st1, 0, valuetable)
+ local jmp = NOINST;
+ codegen(code, tree, fl, false, test, p1, valuetable)
+ if not emptyp2 then
+ jmp = addinstruction(code, IJmp, 0)
+ end
+ jumptohere(code, test)
+ codegen(code, tree, fl, opt, NOINST, p2, valuetable)
+ jumptohere(code, jmp)
+ elseif opt and emptyp2 then
+ -- p1? == IPartialCommit; p1
+ jumptohere(code, addinstruction(code, IPartialCommit, 0))
+ codegen(code, tree, fullset, true, NOINST, p1, valuetable)
+ else
+ -- <p1 / p2> ==
+ -- test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2:
+ local test = codetestset(code, st1, e1, valuetable)
+ local pchoice = addinstruction(code, IChoice, 0)
+ codegen(code, tree, fullset, emptyp2, test, p1, valuetable)
+ local pcommit = addinstruction(code, ICommit, 0)
+ jumptohere(code, pchoice)
+ jumptohere(code, test)
+ codegen(code, tree, fl, opt, NOINST, p2, valuetable)
+ jumptohere(code, pcommit)
+ end
+end
+
+
+-- And predicate
+-- optimization: fixedlen(p) = n ==> <&p> == <p>; behind n
+-- (valid only when 'p' has no captures)
+
+local function codeand(code, tree, tt, index, valuetable)
+ local n = fixedlenx(tree, 0, 0, index)
+ if n >= 0 and n <= MAXBEHINDPREDICATE and not hascaptures(tree, index) then
+ codegen(code, tree, fullset, false, tt, index, valuetable)
+ if n > 0 then
+ addinstruction(code, IBehind, n)
+ end
+ else
+ -- default: Choice L1; p1; BackCommit L2; L1: Fail; L2:
+ local pchoice = addinstruction(code, IChoice, 0)
+ codegen(code, tree, fullset, false, tt, index, valuetable)
+ local pcommit = addinstruction(code, IBackCommit, 0)
+ jumptohere(code, pchoice)
+ addinstruction(code, IFail, 0)
+ jumptohere(code, pcommit)
+ end
+end
+
+
+-- Captures: if pattern has fixed (and not too big) length, use
+-- a single IFullCapture instruction after the match; otherwise,
+-- enclose the pattern with OpenCapture - CloseCapture.
+
+local function codecapture(code, tree, fl, tt, index, valuetable)
+ local len = fixedlenx(tree, 0, 0, index + 1)
+ if len >= 0 and len <= MAXOFF and not hascaptures(tree, index + 1) then
+ codegen(code, tree, fl, false, tt, index + 1, valuetable)
+ addinstcap(code, IFullCapture, tree.p[index].cap, tree.p[index].val, len)
+ else
+ addinstcap(code, IOpenCapture, tree.p[index].cap, tree.p[index].val, 0)
+ codegen(code, tree, fl, false, tt, index + 1, valuetable)
+ addinstcap(code, ICloseCapture, Cclose, 0, 0)
+ end
+end
+
+
+local function coderuntime(code, tree, tt, index, valuetable)
+ addinstcap(code, IOpenCapture, Cgroup, tree.p[index].val, 0)
+ codegen(code, tree, fullset, false, tt, index + 1, valuetable)
+ addinstcap(code, ICloseRunTime, Cclose, 0, 0)
+end
+
+
+-- Repetion; optimizations:
+-- When pattern is a charset, can use special instruction ISpan.
+-- When pattern is head fail, or if it starts with characters that
+-- are disjoint from what follows the repetions, a simple test
+-- is enough (a fail inside the repetition would backtrack to fail
+-- again in the following pattern, so there is no need for a choice).
+-- When 'opt' is true, the repetion can reuse the Choice already
+-- active in the stack.
+
+local function coderep(code, tree, opt, fl, index, valuetable)
+ local st = tocharset(tree, index, valuetable)
+ if st then
+ local op = coderealcharset(code, st, valuetable)
+ code.p[op].code = ISpan;
+ else
+ local e1, st = getfirst(tree, fullset, index, valuetable)
+ if headfail(tree, index) or (e1 == 0 and cs_disjoint(st, fl)) then
+ -- L1: test (fail(p1)) -> L2; <p>; jmp L1; L2:
+ local test = codetestset(code, st, 0, valuetable)
+ codegen(code, tree, fullset, false, test, index, valuetable)
+ local jmp = addinstruction(code, IJmp, 0)
+ jumptohere(code, test)
+ jumptothere(code, jmp, test)
+ else
+ -- test(fail(p1)) -> L2; choice L2; L1: <p>; partialcommit L1; L2:
+ -- or (if 'opt'): partialcommit L1; L1: <p>; partialcommit L1;
+ local test = codetestset(code, st, e1, valuetable)
+ local pchoice = NOINST;
+ if opt then
+ jumptohere(code, addinstruction(code, IPartialCommit, 0))
+ else
+ pchoice = addinstruction(code, IChoice, 0)
+ end
+ local l2 = code.size
+ codegen(code, tree, fullset, false, NOINST, index, valuetable)
+ local commit = addinstruction(code, IPartialCommit, 0)
+ jumptothere(code, commit, l2)
+ jumptohere(code, pchoice)
+ jumptohere(code, test)
+ end
+ end
+end
+
+
+-- Not predicate; optimizations:
+-- In any case, if first test fails, 'not' succeeds, so it can jump to
+-- the end. If pattern is headfail, that is all (it cannot fail
+-- in other parts); this case includes 'not' of simple sets. Otherwise,
+-- use the default code (a choice plus a failtwice).
+
+local function codenot(code, tree, index, valuetable)
+ local e, st = getfirst(tree, fullset, index, valuetable)
+ local test = codetestset(code, st, e, valuetable)
+ -- test (fail(p1)) -> L1; fail; L1:
+ if headfail(tree, index) then
+ addinstruction(code, IFail, 0)
+ else
+ -- test(fail(p))-> L1; choice L1; <p>; failtwice; L1:
+ local pchoice = addinstruction(code, IChoice, 0)
+ codegen(code, tree, fullset, false, NOINST, index, valuetable)
+ addinstruction(code, IFailTwice, 0)
+ jumptohere(code, pchoice)
+ end
+ jumptohere(code, test)
+end
+
+
+-- change open calls to calls, using list 'positions' to find
+-- correct offsets; also optimize tail calls
+
+local function correctcalls(code, positions, from, to)
+ for i = from, to - 1 do
+ if code.p[i].code == IOpenCall then
+ local n = code.p[i].offset; -- rule number
+ local rule = positions[n]; -- rule position
+ assert(rule == from or code.p[rule - 1].code == IRet)
+ -- call; ret ?
+ if bit.band(code.p[i].val, 0xffff) == 0 and code.p[finaltarget(code, i + 1)].code == IRet then
+ code.p[i].code = IJmp; -- tail call
+ else
+ code.p[i].code = ICall;
+ end
+ jumptothere(code, i, rule) -- call jumps to respective rule
+ end
+ end
+end
+
+
+-- Code for a grammar:
+-- call L1; jmp L2; L1: rule 1; ret; rule 2; ret; ...; L2:
+
+local function codegrammar(code, tree, index, valuetable)
+ local positions = {}
+ local rulenumber = 1;
+ -- tree.p[rule].tag
+ local rule = index + 1
+ assert(tree.p[rule].tag == TRule)
+ local LR = 0
+ if band(RuleLR, tree.p[rule].cap) ~= 0 then LR = 1 end
+ local firstcall = addinstruction(code, ICall, LR) -- call initial rule
+ code.p[firstcall].aux = tree.p[rule].val
+ local jumptoend = addinstruction(code, IJmp, 0) -- jump to the end
+ jumptohere(code, firstcall) -- here starts the initial rule
+ while tree.p[rule].tag == TRule do
+ positions[rulenumber] = code.size -- save rule position
+ rulenumber = rulenumber + 1
+ codegen(code, tree, fullset, false, NOINST, rule + 1, valuetable) -- code rule
+ addinstruction(code, IRet, 0)
+ rule = rule + tree.p[rule].ps
+ end
+ assert(tree.p[rule].tag == TTrue)
+ jumptohere(code, jumptoend)
+ correctcalls(code, positions, firstcall + 2, code.size)
+end
+
+
+local function codecall(code, tree, index, val)
+ local c = addinstruction(code, IOpenCall, tree.p[index].cap) -- to be corrected later
+ code.p[c].aux = val
+ assert(tree.p[index + tree.p[index].ps].tag == TRule)
+ setoffset(code, c, band(tree.p[index + tree.p[index].ps].cap, 0x7fff)) -- offset = rule number
+end
+
+
+local function codeseq(code, tree, fl, opt, tt, p1, p2, valuetable)
+ if needfollow(tree, p1) then
+ local _, fll = getfirst(tree, fl, p2, valuetable) -- p1 follow is p2 first
+ codegen(code, tree, fll, false, tt, p1, valuetable)
+ else
+ -- use 'fullset' as follow
+ codegen(code, tree, fullset, false, tt, p1, valuetable)
+ end
+ -- can p1 consume anything?
+ if (fixedlenx(tree, 0, 0, p1) ~= 0) then
+ tt = NOINST; -- invalidate test
+ end
+ return codegen(code, tree, fl, opt, tt, p2, valuetable)
+end
+
+
+-- Main code-generation function: dispatch to auxiliar functions
+-- according to kind of tree
+
+-- code generation is recursive; 'opt' indicates that the code is being
+-- generated as the last thing inside an optional pattern (so, if that
+-- code is optional too, it can reuse the 'IChoice' already in place for
+-- the outer pattern). 'tt' points to a previous test protecting this
+-- code (or NOINST). 'fl' is the follow set of the pattern.
+
+function codegen(code, tree, fl, opt, tt, index, valuetable)
+ local tag = tree.p[index].tag
+ if tag == TChar then
+ return codechar(code, tree.p[index].val, tt)
+ elseif tag == TAny then
+ return addinstruction(code, IAny, 0)
+ elseif tag == TSet then
+ return codecharset(code, valuetable[tree.p[index].val], tt, valuetable)
+ elseif tag == TTrue then
+ elseif tag == TFalse then
+ return addinstruction(code, IFail, 0)
+ elseif tag == TSeq then
+ return codeseq(code, tree, fl, opt, tt, index + 1, index + tree.p[index].ps, valuetable)
+ elseif tag == TChoice then
+ return codechoice(code, tree, fl, opt, index + 1, index + tree.p[index].ps, valuetable)
+ elseif tag == TRep then
+ return coderep(code, tree, opt, fl, index + 1, valuetable)
+ elseif tag == TBehind then
+ return codebehind(code, tree, index, valuetable)
+ elseif tag == TNot then
+ return codenot(code, tree, index + 1, valuetable)
+ elseif tag == TAnd then
+ return codeand(code, tree, tt, index + 1, valuetable)
+ elseif tag == TCapture then
+ return codecapture(code, tree, fl, tt, index, valuetable)
+ elseif tag == TRunTime then
+ return coderuntime(code, tree, tt, index, valuetable)
+ elseif tag == TGrammar then
+ return codegrammar(code, tree, index, valuetable)
+ elseif tag == TCall then
+ return codecall(code, tree, index, tree.p[index].val)
+ else
+ assert(false)
+ end
+end
+
+
+-- Optimize jumps and other jump-like instructions.
+-- * Update labels of instructions with labels to their final
+-- destinations (e.g., choice L1; ... L1: jmp L2: becomes
+-- choice L2)
+-- * Jumps to other instructions that do jumps become those
+-- instructions (e.g., jump to return becomes a return; jump
+-- to commit becomes a commit)
+
+local function peephole(code)
+ local i = 0
+ while i < code.size do
+ local tag = code.p[i].code
+ if tag == IChoice or tag == ICall or tag == ICommit or tag == IPartialCommit or
+ tag == IBackCommit or tag == ITestChar or tag == ITestSet or tag == ITestAny then
+ -- instructions with labels
+ jumptothere(code, i, finallabel(code, i)) -- optimize label
+
+ elseif tag == IJmp then
+ local ft = finaltarget(code, i)
+ local tag = code.p[ft].code -- jumping to what?
+ -- instructions with unconditional implicit jumps
+ if tag == IRet or tag == IFail or tag == IFailTwice or tag == IEnd then
+ ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction
+ elseif tag == ICommit or tag == IPartialCommit or tag == IBackCommit then
+ -- inst. with unconditional explicit jumps
+ local fft = finallabel(code, ft)
+ ffi.copy(code.p + i, code.p + ft, ffi.sizeof(patternelement)) -- jump becomes that instruction...
+ jumptothere(code, i, fft) -- but must correct its offset
+ i = i - 1 -- reoptimize its label
+ else
+ jumptothere(code, i, ft) -- optimize label
+ end
+ end
+ i = i + 1
+ end
+end
+
+
+-- Compile a pattern
+
+local function compile(tree, index, valuetable)
+ local code = pattern()
+ codegen(code, tree, fullset, false, NOINST, index, valuetable)
+ addinstruction(code, IEnd, 0)
+ peephole(code)
+ ffi.C.free(tree.code)
+ tree.code = code
+end
+
+local function pat_new(ct, size)
+ size = size or 0
+ local allocsize = size
+ if allocsize < 10 then
+ allocsize = 10
+ end
+ local pat = ffi.cast('PATTERN*', ffi.C.malloc(ffi.sizeof(pattern)))
+ assert(pat ~= nil)
+ pat.allocsize = allocsize
+ pat.size = size
+ pat.p = ffi.C.malloc(ffi.sizeof(patternelement) * allocsize)
+ assert(pat.p ~= nil)
+ ffi.fill(pat.p, ffi.sizeof(patternelement) * allocsize)
+ return pat
+end
+
+local function doublesize(ct)
+ ct.p = ffi.C.realloc(ct.p, ffi.sizeof(patternelement) * ct.allocsize * 2)
+ assert(ct.p ~= nil)
+ ffi.fill(ct.p + ct.allocsize, ffi.sizeof(patternelement) * ct.allocsize)
+ ct.allocsize = ct.allocsize * 2
+end
+
+local pattreg = {
+ doublesize = doublesize,
+}
+
+local metareg = {
+ ["__new"] = pat_new,
+ ["__index"] = pattreg
+}
+
+ffi.metatype(pattern, metareg)
+
+return {
+ checkaux = checkaux,
+ tocharset = tocharset,
+ fixedlenx = fixedlenx,
+ hascaptures = hascaptures,
+ compile = compile,
+}