From 0413dc0cc0b75bb016dce603d5402bc470440142 Mon Sep 17 00:00:00 2001 From: unwox Date: Mon, 23 Dec 2024 22:25:53 +0600 Subject: improve artoftea parsing --- parser/artoftea.fnl | 7 ++++--- parser/parser.fnl | 11 +++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'parser') diff --git a/parser/artoftea.fnl b/parser/artoftea.fnl index 773f038..5c7efff 100644 --- a/parser/artoftea.fnl +++ b/parser/artoftea.fnl @@ -26,8 +26,7 @@ (* (parser.anywhere (parser.tag :option {:value "*" :selected "selected"} - (* (peg.Cg parser.pegs.number :weight) (+ (peg.P "г") " гр") - parser.pegs.spaces))) + (peg.Cg (parser.till "") :weight))) (parser.anywhere (parser.tag :p {:class "price"} (parser.tag :span {:id "*"} @@ -41,8 +40,10 @@ (fn normalize [product] (local year (parser.guess-year product.title)) - (local weight (number.string->number product.weight)) + (local weight (or (parser.guess-weight product.weight ["г"]) + (parser.guess-weight product.title))) (local price (number.string->number product.price)) + {:site "artoftea" :title product.title :url product.url diff --git a/parser/parser.fnl b/parser/parser.fnl index 78df882..e97351f 100644 --- a/parser/parser.fnl +++ b/parser/parser.fnl @@ -137,14 +137,17 @@ (- (+ (peg.P "г") "год") (peg.P "гр")))) :match text))) -(fn guess-weight [text] +(fn guess-weight [text extra-metrics] (if text (let [peg (peg.Ct (anywhere (* (peg.C pegs.number) (maybe " ") - (+ (* (peg.C (+ (peg.P "гр") "кг")) - (+ (peg.P " ") "." "\n" -1))))))] + (+ (* (peg.C (+ (peg.P "гр") "кг" + (if extra-metrics + (table.unpack extra-metrics) + ""))) + (+ (peg.P " ") "\t" "." "\n" -1))))))] (let [result (peg:match text)] (if result (let [[number metric] result] @@ -159,7 +162,7 @@ (* (peg.C pegs.number) (maybe " ") (+ (* (peg.C (+ (peg.P "мл") "л")) - (+ (peg.P " ") "." "\n" -1))))))] + (+ (peg.P " ") "\t" "." "\n" -1))))))] (let [result (peg:match text)] (if result (let [[number metric] result] -- cgit v1.2.3