From 4e4880fd6631f46138fabee5e20ddc22c84ac6bd Mon Sep 17 00:00:00 2001 From: unwox Date: Wed, 29 Jan 2025 22:00:44 +0600 Subject: add suhexuan parser --- bin/fetch.fnl | 4 ++-- bin/serve.fnl | 4 ++-- parser/parser.fnl | 6 +++-- parser/suhexuan.fnl | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++ static/suhexuan.png | Bin 0 -> 5478 bytes 5 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 parser/suhexuan.fnl create mode 100644 static/suhexuan.png diff --git a/bin/fetch.fnl b/bin/fetch.fnl index 26566f7..213475e 100644 --- a/bin/fetch.fnl +++ b/bin/fetch.fnl @@ -9,6 +9,7 @@ (local gorkovchay (require :parser.gorkovchay)) (local moychay (require :parser.moychay)) (local ozchai (require :parser.ozchai)) +(local suhexuan (require :parser.suhexuan)) (local ipuer (require :parser.ipuer)) (local artoftea (require :parser.artoftea)) (local clubcha (require :parser.clubcha)) @@ -163,8 +164,7 @@ ;; replace with with-tx (local tx (must (luna.db.begin db))) (must (luna.db.exec-tx tx "DELETE FROM product_tags;" [])) -(each [_ parser (pairs [ozchai clubcha ipuer artoftea chaekshop moychay - gorkovchay])] +(each [_ parser (pairs [suhexuan])] (store-products tx (parser.products))) (cache.clear-tx tx "page:") (populate-search-table tx) diff --git a/bin/serve.fnl b/bin/serve.fnl index b1ec024..7385b53 100644 --- a/bin/serve.fnl +++ b/bin/serve.fnl @@ -491,7 +491,7 @@ [:option {:value val :selected (if (= form.site val) "selected" nil)} (. (require (.. "parser." val)) :title)]) - [:ozchai :clubcha :ipuer :artoftea :chaekshop :moychay + [:ozchai :suhexuan :clubcha :ipuer :artoftea :chaekshop :moychay :gorkovchay]))]] [:div {} [:select {:name "sort"} @@ -658,8 +658,8 @@ (must (luna.router.route "GET /" root-handler)) (must (luna.router.route "GET /robots.txt" robots-handler)) -(must (luna.router.route "GET /track" track-handler)) (must (luna.router.static "GET /static/" "static/")) +(must (luna.router.route "GET /track" track-handler)) (when luna.debug (must (luna.on-eval (fn [code] (fennel.eval code {:env _G}))))) diff --git a/parser/parser.fnl b/parser/parser.fnl index e97351f..b2ff20c 100644 --- a/parser/parser.fnl +++ b/parser/parser.fnl @@ -147,7 +147,8 @@ (if extra-metrics (table.unpack extra-metrics) ""))) - (+ (peg.P " ") "\t" "." "\n" -1))))))] + (maybe ".") + (+ (peg.P " ") "\t" "." "\n" "<" -1))))))] (let [result (peg:match text)] (if result (let [[number metric] result] @@ -162,7 +163,8 @@ (* (peg.C pegs.number) (maybe " ") (+ (* (peg.C (+ (peg.P "мл") "л")) - (+ (peg.P " ") "\t" "." "\n" -1))))))] + (maybe ".") + (+ (peg.P " ") "\t" "." "\n" "<" -1))))))] (let [result (peg:match text)] (if result (let [[number metric] result] diff --git a/parser/suhexuan.fnl b/parser/suhexuan.fnl new file mode 100644 index 0000000..8592294 --- /dev/null +++ b/parser/suhexuan.fnl @@ -0,0 +1,62 @@ +(import-macros {: map} :lib.macro) + +(local parser (require :parser.parser)) +(local array (require :lib.array)) +(local http (require :lib.http)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) +(local json (require :vendor.json)) + +(fn format-url [path page] + (.. "https://store.tildaapi.com/api/getproductslist/" + "?storepartuid=" path + "&slice=" page + "&recid=644733406" + "&c=1738164538594" + "&getparts=true" + "&getoptions=true" + "&size=36")) + +(fn destruct-response [response] + {:items (. (json.decode response) :products)}) + +(fn normalize [product] + (local gallery (json.decode product.gallery)) + (local weight (parser.guess-weight product.title ["г"])) + (local price (number.string->number product.price)) + + {:site "suhexuan" + :url product.url + :title product.title + :description product.text + ;; FIXME: parse all editions into different products + :image (if (< 0 (# gallery)) + (. gallery 1 :img) + "") + :weight weight + :volume (or (parser.guess-volume product.text) + (parser.guess-volume product.title)) + :price price + :price-per (if (and price weight (< 0 weight)) + (/ (math.ceil (* (/ price weight) 10)) 10) + nil) + :characteristics product.characteristics}) + +(fn products [] + (fetcher.from-json + [{:path "896764703561" :tags ["Посуда"]} + {:path "343222834961" :tags ["Разное"]} + {:path "167733677091&filters[charact:6272182]=Зеленый" :tags ["Зеленый чай"]} + {:path "167733677091&filters[charact:6272182]=Белый" :tags ["Белый чай"]} + {:path "167733677091&filters[charact:6272182]=Желтый" :tags ["Желтый чай"]} + {:path "167733677091&filters[charact:6272182]=Красный" :tags ["Красный чай"]} + {:path "167733677091&filters[charact:6272182]=Черный" :tags ["Хэй ча"]} + {:path "167733677091&filters[charact:6272182]=Шу+пуэр" :tags ["Шу пуэр"]} + {:path "167733677091&filters[charact:6272182]=Шэн+пуэр" :tags ["Шэн пуэр"]} + {:path "167733677091&filters[charact:6272182]=С+ароматами" :tags []} + {:path "167733677091&filters[charact:6272182]=Улун" :tags ["Улун"]}] + format-url + destruct-response + normalize)) + +{:products products :title "СуХэСюань" :url "https://suhexuan.ru"} diff --git a/static/suhexuan.png b/static/suhexuan.png new file mode 100644 index 0000000..78b151c Binary files /dev/null and b/static/suhexuan.png differ -- cgit v1.2.3