From 778ab54f2e97455e07b2247ec9ea79e3a39500e7 Mon Sep 17 00:00:00 2001 From: unwox Date: Mon, 3 Feb 2025 15:43:58 +0600 Subject: add tea108 parser --- bin/fetch.fnl | 13 +++++----- bin/serve.fnl | 4 ++-- fetcher.fnl | 4 ++-- parser/artoftea.fnl | 3 ++- parser/tea108.fnl | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++ static/tea108.webp | Bin 0 -> 3762 bytes 6 files changed, 80 insertions(+), 11 deletions(-) create mode 100644 parser/tea108.fnl create mode 100644 static/tea108.webp diff --git a/bin/fetch.fnl b/bin/fetch.fnl index 2edcdac..3b525d2 100644 --- a/bin/fetch.fnl +++ b/bin/fetch.fnl @@ -6,15 +6,16 @@ (local array (require :lib.array)) (local cache (require :lib.cache)) +(local artoftea (require :parser.artoftea)) +(local chaekshop (require :parser.chaekshop)) +(local clubcha (require :parser.clubcha)) (local gorkovchay (require :parser.gorkovchay)) +(local ipuer (require :parser.ipuer)) +(local kolokolnikovchai (require :parser.kolokolnikovchai)) (local moychay (require :parser.moychay)) (local ozchai (require :parser.ozchai)) (local suhexuan (require :parser.suhexuan)) -(local ipuer (require :parser.ipuer)) -(local artoftea (require :parser.artoftea)) -(local clubcha (require :parser.clubcha)) -(local chaekshop (require :parser.chaekshop)) -(local kolokolnikovchai (require :parser.kolokolnikovchai)) +(local tea108 (require :parser.tea108)) (local {: must} (require :lib.utils)) (when _G.unpack @@ -164,7 +165,7 @@ (must (luna.db.commit tx))) (each [_ parser (pairs [gorkovchay moychay ozchai suhexuan ipuer artoftea - clubcha chaekshop kolokolnikovchai])] + clubcha chaekshop kolokolnikovchai tea108])] (local products (parser.products)) (when (< 0 (# products)) ;; replace with with-tx diff --git a/bin/serve.fnl b/bin/serve.fnl index 2e74501..0dbee92 100644 --- a/bin/serve.fnl +++ b/bin/serve.fnl @@ -491,8 +491,8 @@ [:option {:value val :selected (if (= form.site val) "selected" nil)} (. (require (.. "parser." val)) :title)]) - [:ozchai :suhexuan :clubcha :ipuer :artoftea :chaekshop :moychay - :kolokolnikovchai :gorkovchay]))]] + [:ozchai :suhexuan :kolokolnikovchai :tea108 :ipuer :clubcha + :artoftea :chaekshop :moychay :gorkovchay]))]] [:div {} [:select {:name "sort"} [:option {:value ""} "~ Порядок ~"] diff --git a/fetcher.fnl b/fetcher.fnl index 7b46266..5670af9 100644 --- a/fetcher.fnl +++ b/fetcher.fnl @@ -44,7 +44,7 @@ (do (os.execute "sleep 1") (gather (+ page 1) (array.concat knil items))))) - (= 404 status) + (and (<= 300 status) (< status 500)) knil (retry #(gather page knil) 3 1))) @@ -92,7 +92,7 @@ (do (os.execute "sleep 1") (gather (+ 1 page) (array.concat knil items))))) - (= status 404) + (and (<= 300 status) (< status 500)) knil (retry #(gather page knil) 3 1))) diff --git a/parser/artoftea.fnl b/parser/artoftea.fnl index 5c7efff..1af4969 100644 --- a/parser/artoftea.fnl +++ b/parser/artoftea.fnl @@ -7,7 +7,8 @@ (local fetcher (require :fetcher)) (fn format-url [path page] - (.. "https://artoftea.ru/" path "/?page=" page)) + (.. "https://artoftea.ru/" path + "/" (if (< 1 page) (.. "?page=" page) ""))) (local product-peg (* diff --git a/parser/tea108.fnl b/parser/tea108.fnl new file mode 100644 index 0000000..d575716 --- /dev/null +++ b/parser/tea108.fnl @@ -0,0 +1,67 @@ +(import-macros {: map} :lib.macro) + +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local number (require :lib.number)) +(local parser (require :parser.parser)) +(local fetcher (require :fetcher)) +(local utils (require :lib.utils)) + +(fn format-url [path page] + (.. "https://tea108.ru/shop/" path "/?page=" page)) + +(local product-peg + (* + ;; a delimiter to clearly separate products + (parser.anywhere "data-productid=") + ;; url + (parser.anywhere + (parser.tag :a {:href (peg.Cg (parser.till "\"") :url) + :class "product js--hover-preview"})) + ;; image + (parser.anywhere (* "data-src=\"" + (peg.Cg (parser.till "\"") :image) + "\"")) + ;; name + (parser.anywhere + (parser.tag :div {:class "product-name"} + (peg.Cg (parser.till "") :title))) + ;; price + (parser.anywhere + (parser.tag :span {:class (* "product-price-min" (parser.till "\""))} + (peg.Cg (parser.till "") :price))))) + +(fn normalize [product] + (local weight (parser.guess-weight product.title [(* "г" (parser.not "."))])) + (local price (number.string->number product.price)) + + {:site "tea108" + :url product.url + :title product.title + :description product.description + :image (.. "https:" product.image) + :price price + :weight weight + :volume (parser.guess-volume product.title) + :category product.category + :price-per (if (and price weight (< 0 weight)) + (/ (math.ceil (* (/ price weight) 10)) 10) + nil)}) + +(fn products [] + (fetcher.from-html + [{:path "wu-yi-yan-cha" :tags ["Улун" "Фуцзянь"]} + {:path "feng-huang-dancong" :tags ["Улун" "Дань Цун"]} + {:path "taiwan-oolong" :tags ["Улун" "Тайвань"]} + {:path "hong-cha" :tags ["Красный чай"]} + {:path "bai-cha" :tags ["Белый чай"]} + {:path "sheng" :tags ["Шен пуэр"]} + {:path "shu-puer" :tags ["Шу пуэр"]} + {:path "tea-ware" :tags ["Посуда"]}] + format-url + product-peg + normalize)) + +{:products products :title "Tea108" :url "https://tea108.ru"} diff --git a/static/tea108.webp b/static/tea108.webp new file mode 100644 index 0000000..6f61913 Binary files /dev/null and b/static/tea108.webp differ -- cgit v1.2.3