(import-macros {: map} :lib.macro) (local number (require :lib.number)) (local {: must} (require :lib.utils)) (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) (require :lpeglj))) ;; "not" is taken >:( (fn pnot [p] (- (peg.P 1) (peg.P p))) (fn till [p] (^ (pnot p) 1)) (fn maybe [p] (^ (peg.P p) 0)) (fn anywhere [p] (peg.P [(+ p (* 1 (peg.V 1)))])) (local pegs {}) (tset pegs :number (^ (peg.R "09") 1)) (tset pegs :letter (+ (peg.R "az") (peg.R "AZ") "а" "б" "в" "г" "д" "е" "ё" "ж" "з" "и" "й" "к" "л" "м" "н" "о" "п" "р" "с" "т" "у" "ф" "х" "ц" "ч" "ъ" "ы" "ь" "э" "ю" "я" "А" "Б" "В" "Г" "Д" "Е" "Ё" "Ж" "З" "И" "Й" "К" "Л" "М" "Н" "О" "П" "Р" "С" "Т" "У" "Ф" "Х" "Ц" "Ч" "Ъ" "Ы" "Ь" "Э" "Ю" "Я")) (tset pegs :lat-letters (^ (+ (peg.R "az") (peg.R "AZ")) 1)) (tset pegs :space (peg.S "\n\t ")) (tset pegs :spaces (^ (peg.S "\n\t ") 1)) (tset pegs :tag-name (+ pegs.lat-letters pegs.number)) (tset pegs :attr (peg.Ct (* (peg.Cg (^ (+ pegs.lat-letters "-") 1) :name) (maybe (* "=\"" (peg.Cg (till "\"") :value) "\""))))) (tset pegs :self-closing-tag (* "<" (peg.Cg (+ (peg.P "area") "base" "br" "col" "embed" "hr" "img" "input" "link" "meta" "param" "source" "track" "wbr") ;; should be case insensitive :tag) (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs) (maybe "/") ">")) (tset pegs :opening-tag (* "<" (peg.Cg pegs.tag-name :tag) (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs) ">")) (tset pegs :closing-tag (* "")) (tset pegs :doctype (* "")) ;; should be case insensitive (tset pegs :tag (peg.P [(peg.Ct (+ pegs.self-closing-tag (* pegs.opening-tag (peg.Cg (peg.Ct (^ (+ (+ pegs.space (peg.V 1)) (peg.Cg (till pegs.closing-tag))) 0)) :nodes) pegs.closing-tag)))])) (tset pegs :html (* pegs.doctype (peg.Ct (^ (+ pegs.space (peg.Cg pegs.tag)) 0)))) (fn tag [tag attrs contents] (local tag (peg.P tag)) (local attrs-count (accumulate [sum 0 _ _ (pairs attrs)] (+ 1 sum))) (local attr-peg (fn [name value] (* (^ (peg.P name) 1) (if (~= value "") (+ ;; attributes may be wrapped in both " and ' (* "=\"" ;; wildcard for any value (if (= value "*") (till "\"") (peg.P value)) "\"") (* "='" ;; wildcard for any value (if (= value "*") (till "'") (peg.P value)) "'")) (maybe (.. "=\" name \"")))))) (local attrs-peg (accumulate [sum pegs.spaces _ rule (pairs (icollect [k v (pairs attrs)] (attr-peg k v)))] (+ rule sum))) (if contents (peg.P (* (^ pegs.space 0) ;; opening tag (* "<" tag (^ pegs.space 0) (^ attrs-peg (- (* attrs-count 2) 1)) (^ pegs.space 0) ">") ;; tag contents (^ pegs.space 0) (if (= contents "*") (till (* "")) contents) (^ pegs.space 0) ;; closing tag (* ""))) (peg.P (* (^ pegs.space 0) ;; opening tag (* "<" tag (^ pegs.space 0) (^ attrs-peg (- (* attrs-count 2) 1)) (^ pegs.space 0) (maybe "/") ">"))))) (fn match-many [html tag] (: (peg.Ct (^ (peg.Ct tag) 1)) :match html)) (fn guess-tags [text] (local text (if text (must (luna.utf8.lower text)) "")) (if (: (anywhere (peg.P "зеленый чай")) :match text) ["Зеленый чай"] (: (anywhere (peg.P "улун")) :match text) ["Улун"] (: (anywhere (peg.P "белый чай")) :match text) ["Белый чай"] (: (anywhere (peg.P "желтый чай")) :match text) ["Желтый чай"] (: (anywhere (peg.P "красный чай")) :match text) ["Красный чай"] (: (anywhere (peg.P "хэй ча")) :match text) ["Хэй ча"] (: (anywhere (peg.P "шу пуэр")) :match text) ["Шу пуэр"] (: (anywhere (+ (peg.P "шен пуэр") "шэн пуэр")) :match text) ["Шен пуэр"] (: (anywhere (+ (peg.P "матча") "маття")) :match text) ["Матча"] (: (anywhere (peg.P "габа")) :match text) ["Габа"] [])) (fn guess-year [text] (number.string->number (: (anywhere (* (peg.C (^ (peg.R "09") 4)) (maybe " ") (- (+ (peg.P "г") "год") (peg.P "гр")))) :match text))) (fn guess-weight [text extra-metrics] (if text (let [peg (peg.Ct (anywhere (* (+ (* (peg.C pegs.number) (+ (peg.P "x") "X" "х" "Х") (peg.C pegs.number)) (peg.C pegs.number)) (maybe (+ (peg.P " ") " ")) (* (peg.C (if extra-metrics (+ (peg.P "гр") "кг" (table.unpack extra-metrics)) (+ (peg.P "гр") "кг"))) (+ (pnot pegs.letter) -1)))))] (let [result (peg:match text)] (if result (match result [multiplier weight metric] (* (tonumber multiplier) (tonumber weight) (if (= metric "кг") 1000 1)) [weight metric] (* (tonumber weight) (if (= metric "кг") 1000 1))) nil))) nil)) (fn test-guess-weight [] (assert (= nil (guess-weight "за 1")) "1") (assert (= 120 (guess-weight "6x20гр")) "2") (assert (= 140 (guess-weight "Знакомство с китайским чаем, 7х20 гр.")) "3") (assert (= 357 (guess-weight "за 1 шт 357 гр")) "4") (assert (= 1 (guess-weight "1гр")) "5") (assert (= 150 (guess-weight "150 гр")) "6") (assert (= 1000 (guess-weight "1кг")) "7") (assert (= 150 (guess-weight "150 г" ["г"])) "8")) (test-guess-weight) (fn guess-volume [text] (if text (let [peg (peg.Ct (anywhere (* (peg.C pegs.number) (maybe (+ (peg.P " ") " ")) (* (peg.C (+ (peg.P "мл") "л")) (+ (pnot pegs.letter) -1)))))] (let [result (peg:match text)] (if result (let [[number metric] result] (* number (if (= metric "л") 1000 1))) nil))) nil)) (fn test-guess-volume [] (assert (= nil (guess-volume "Сервиз Хуа Хэ Няо"))) (assert (= 255 (guess-volume "Бутылка для чая «Походная» 255 мл"))) (assert (= 255 (guess-volume "Бутылка для чая «Походная» 255 мл")))) (test-guess-volume) {: match-many : tag : anywhere : till : maybe : pegs :not pnot : guess-tags : guess-year : guess-weight : guess-volume}