From 20e9d84fe9481bd0b8db839127a60bf29ec88014 Mon Sep 17 00:00:00 2001 From: unwox Date: Mon, 3 Feb 2025 12:33:16 +0600 Subject: improve volume/weight guessing --- parser/parser.fnl | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'parser/parser.fnl') diff --git a/parser/parser.fnl b/parser/parser.fnl index b2ff20c..7e9469e 100644 --- a/parser/parser.fnl +++ b/parser/parser.fnl @@ -23,12 +23,20 @@ (local pegs {}) (tset pegs :number (^ (peg.R "09") 1)) -(tset pegs :letters (^ (+ (peg.R "az") (peg.R "AZ")) 1)) +(tset pegs :letter (+ (peg.R "az") + (peg.R "AZ") + "а" "б" "в" "г" "д" "е" "ё" "ж" "з" "и" "й" "к" "л" "м" + "н" "о" "п" "р" "с" "т" "у" "ф" "х" "ц" "ч" "ъ" "ы" "ь" + "э" "ю" "я" + "А" "Б" "В" "Г" "Д" "Е" "Ё" "Ж" "З" "И" "Й" "К" "Л" "М" + "Н" "О" "П" "Р" "С" "Т" "У" "Ф" "Х" "Ц" "Ч" "Ъ" "Ы" "Ь" + "Э" "Ю" "Я")) +(tset pegs :lat-letters (^ (+ (peg.R "az") (peg.R "AZ")) 1)) (tset pegs :space (peg.S "\n\t ")) (tset pegs :spaces (^ (peg.S "\n\t ") 1)) -(tset pegs :tag-name (+ pegs.letters pegs.number)) +(tset pegs :tag-name (+ pegs.lat-letters pegs.number)) (tset pegs :attr - (peg.Ct (* (peg.Cg (^ (+ pegs.letters "-") 1) :name) + (peg.Ct (* (peg.Cg (^ (+ pegs.lat-letters "-") 1) :name) (maybe (* "=\"" (peg.Cg (till "\"") :value) "\""))))) (tset pegs :self-closing-tag (* "<" @@ -143,12 +151,11 @@ (anywhere (* (peg.C pegs.number) (maybe " ") - (+ (* (peg.C (+ (peg.P "гр") "кг" - (if extra-metrics - (table.unpack extra-metrics) - ""))) - (maybe ".") - (+ (peg.P " ") "\t" "." "\n" "<" -1))))))] + (* (peg.C (+ (peg.P "гр") "кг" + (if extra-metrics + (table.unpack extra-metrics) + ""))) + (+ (pnot pegs.letter) -1)))))] (let [result (peg:match text)] (if result (let [[number metric] result] @@ -156,15 +163,20 @@ nil))) nil)) +(fn test-guess-weight [] + (assert (= 1 (guess-weight "1гр"))) + (assert (= 150 (guess-weight "150 гр"))) + (assert (= 1000 (guess-weight "1кг"))) + (assert (= 150 (guess-weight "150 г" ["г"])))) + (fn guess-volume [text] (if text (let [peg (peg.Ct (anywhere (* (peg.C pegs.number) (maybe " ") - (+ (* (peg.C (+ (peg.P "мл") "л")) - (maybe ".") - (+ (peg.P " ") "\t" "." "\n" "<" -1))))))] + (* (peg.C (+ (peg.P "мл") "л")) + (+ (pnot pegs.letter) -1)))))] (let [result (peg:match text)] (if result (let [[number metric] result] -- cgit v1.2.3