summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2025-02-03 15:43:58 +0600
committerunwox <me@unwox.com>2025-02-03 15:43:58 +0600
commit778ab54f2e97455e07b2247ec9ea79e3a39500e7 (patch)
tree8ce6b98b0aaa28c9d61490177b18f6dbf832cd46
parenta44138f9110599d48a89cc307ebee1b9c9eae584 (diff)
add tea108 parser
-rw-r--r--bin/fetch.fnl13
-rw-r--r--bin/serve.fnl4
-rw-r--r--fetcher.fnl4
-rw-r--r--parser/artoftea.fnl3
-rw-r--r--parser/tea108.fnl67
-rw-r--r--static/tea108.webpbin0 -> 3762 bytes
6 files changed, 80 insertions, 11 deletions
diff --git a/bin/fetch.fnl b/bin/fetch.fnl
index 2edcdac..3b525d2 100644
--- a/bin/fetch.fnl
+++ b/bin/fetch.fnl
@@ -6,15 +6,16 @@
(local array (require :lib.array))
(local cache (require :lib.cache))
+(local artoftea (require :parser.artoftea))
+(local chaekshop (require :parser.chaekshop))
+(local clubcha (require :parser.clubcha))
(local gorkovchay (require :parser.gorkovchay))
+(local ipuer (require :parser.ipuer))
+(local kolokolnikovchai (require :parser.kolokolnikovchai))
(local moychay (require :parser.moychay))
(local ozchai (require :parser.ozchai))
(local suhexuan (require :parser.suhexuan))
-(local ipuer (require :parser.ipuer))
-(local artoftea (require :parser.artoftea))
-(local clubcha (require :parser.clubcha))
-(local chaekshop (require :parser.chaekshop))
-(local kolokolnikovchai (require :parser.kolokolnikovchai))
+(local tea108 (require :parser.tea108))
(local {: must} (require :lib.utils))
(when _G.unpack
@@ -164,7 +165,7 @@
(must (luna.db.commit tx)))
(each [_ parser (pairs [gorkovchay moychay ozchai suhexuan ipuer artoftea
- clubcha chaekshop kolokolnikovchai])]
+ clubcha chaekshop kolokolnikovchai tea108])]
(local products (parser.products))
(when (< 0 (# products))
;; replace with with-tx
diff --git a/bin/serve.fnl b/bin/serve.fnl
index 2e74501..0dbee92 100644
--- a/bin/serve.fnl
+++ b/bin/serve.fnl
@@ -491,8 +491,8 @@
[:option {:value val
:selected (if (= form.site val) "selected" nil)}
(. (require (.. "parser." val)) :title)])
- [:ozchai :suhexuan :clubcha :ipuer :artoftea :chaekshop :moychay
- :kolokolnikovchai :gorkovchay]))]]
+ [:ozchai :suhexuan :kolokolnikovchai :tea108 :ipuer :clubcha
+ :artoftea :chaekshop :moychay :gorkovchay]))]]
[:div {}
[:select {:name "sort"}
[:option {:value ""} "~ Порядок ~"]
diff --git a/fetcher.fnl b/fetcher.fnl
index 7b46266..5670af9 100644
--- a/fetcher.fnl
+++ b/fetcher.fnl
@@ -44,7 +44,7 @@
(do
(os.execute "sleep 1")
(gather (+ page 1) (array.concat knil items)))))
- (= 404 status)
+ (and (<= 300 status) (< status 500))
knil
(retry #(gather page knil) 3 1)))
@@ -92,7 +92,7 @@
(do
(os.execute "sleep 1")
(gather (+ 1 page) (array.concat knil items)))))
- (= status 404)
+ (and (<= 300 status) (< status 500))
knil
(retry #(gather page knil) 3 1)))
diff --git a/parser/artoftea.fnl b/parser/artoftea.fnl
index 5c7efff..1af4969 100644
--- a/parser/artoftea.fnl
+++ b/parser/artoftea.fnl
@@ -7,7 +7,8 @@
(local fetcher (require :fetcher))
(fn format-url [path page]
- (.. "https://artoftea.ru/" path "/?page=" page))
+ (.. "https://artoftea.ru/" path
+ "/" (if (< 1 page) (.. "?page=" page) "")))
(local product-peg
(*
diff --git a/parser/tea108.fnl b/parser/tea108.fnl
new file mode 100644
index 0000000..d575716
--- /dev/null
+++ b/parser/tea108.fnl
@@ -0,0 +1,67 @@
+(import-macros {: map} :lib.macro)
+
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :lpeglj)))
+(local number (require :lib.number))
+(local parser (require :parser.parser))
+(local fetcher (require :fetcher))
+(local utils (require :lib.utils))
+
+(fn format-url [path page]
+ (.. "https://tea108.ru/shop/" path "/?page=" page))
+
+(local product-peg
+ (*
+ ;; a delimiter to clearly separate products
+ (parser.anywhere "data-productid=")
+ ;; url
+ (parser.anywhere
+ (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)
+ :class "product js--hover-preview"}))
+ ;; image
+ (parser.anywhere (* "data-src=\""
+ (peg.Cg (parser.till "\"") :image)
+ "\""))
+ ;; name
+ (parser.anywhere
+ (parser.tag :div {:class "product-name"}
+ (peg.Cg (parser.till "</div>") :title)))
+ ;; price
+ (parser.anywhere
+ (parser.tag :span {:class (* "product-price-min" (parser.till "\""))}
+ (peg.Cg (parser.till "</span>") :price)))))
+
+(fn normalize [product]
+ (local weight (parser.guess-weight product.title [(* "г" (parser.not "."))]))
+ (local price (number.string->number product.price))
+
+ {:site "tea108"
+ :url product.url
+ :title product.title
+ :description product.description
+ :image (.. "https:" product.image)
+ :price price
+ :weight weight
+ :volume (parser.guess-volume product.title)
+ :category product.category
+ :price-per (if (and price weight (< 0 weight))
+ (/ (math.ceil (* (/ price weight) 10)) 10)
+ nil)})
+
+(fn products []
+ (fetcher.from-html
+ [{:path "wu-yi-yan-cha" :tags ["Улун" "Фуцзянь"]}
+ {:path "feng-huang-dancong" :tags ["Улун" "Дань Цун"]}
+ {:path "taiwan-oolong" :tags ["Улун" "Тайвань"]}
+ {:path "hong-cha" :tags ["Красный чай"]}
+ {:path "bai-cha" :tags ["Белый чай"]}
+ {:path "sheng" :tags ["Шен пуэр"]}
+ {:path "shu-puer" :tags ["Шу пуэр"]}
+ {:path "tea-ware" :tags ["Посуда"]}]
+ format-url
+ product-peg
+ normalize))
+
+{:products products :title "Tea108" :url "https://tea108.ru"}
diff --git a/static/tea108.webp b/static/tea108.webp
new file mode 100644
index 0000000..6f61913
--- /dev/null
+++ b/static/tea108.webp
Binary files differ