summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2024-10-07 20:19:12 +0600
committerunwox <me@unwox.com>2024-10-08 14:06:42 +0600
commit47af967213c48565adadc68d43fc572159c31ed9 (patch)
tree4b123a8557754fda7b5e3532513acccff56b26b2
parent0f4599c4df2291fe982a36d5df07983c860b89f9 (diff)
add clubcha parser
-rw-r--r--bin/fetch.fnl2
-rw-r--r--bin/serve.fnl14
-rw-r--r--parser/clubcha.fnl123
-rw-r--r--static/clubcha.pngbin0 -> 13158 bytes
4 files changed, 136 insertions, 3 deletions
diff --git a/bin/fetch.fnl b/bin/fetch.fnl
index 5df2849..c51d0f9 100644
--- a/bin/fetch.fnl
+++ b/bin/fetch.fnl
@@ -6,6 +6,7 @@
(local ozchai (require :parser.ozchai))
(local ipuer (require :parser.ipuer))
(local artoftea (require :parser.artoftea))
+(local clubcha (require :parser.clubcha))
(local db (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL"))
(luna.db.exec db "
@@ -71,6 +72,7 @@
FROM products;" [])
(luna.db.commit tx))
+(store-products (clubcha.products))
(store-products (artoftea.products))
(store-products (ipuer.products))
(store-products (ozchai.products))
diff --git a/bin/serve.fnl b/bin/serve.fnl
index 67c3d7d..f98a5ff 100644
--- a/bin/serve.fnl
+++ b/bin/serve.fnl
@@ -37,17 +37,25 @@
(fn site-name-template [name]
(if
(= name "ipuer")
- [:a {:class "site-icon" :href "https://ipuer.ru"}
+ [:a {:class "site-icon" :href "https://ipuer.ru"
+ :alt "Логотип Институт чай пуэр"}
[:img {:src "/static/ipuer.jpg"}]
"Институт чая пуэр"]
(= name "artoftea")
- [:a {:class "site-icon" :href "https://artoftea.ru"}
+ [:a {:class "site-icon" :href "https://artoftea.ru"
+ :alt "Логотип Art of tea"}
[:img {:src "/static/artoftea.png"}]
"Art of tea"]
(= name "ozchai")
- [:a {:class "site-icon" :href "https://ozchai.ru"}
+ [:a {:class "site-icon" :href "https://ozchai.ru"
+ :alt "Логотип #OZCHAI"}
[:img {:src "/static/ozchai.ico"}]
"Чайная #OZCHAI"]
+ (= name "clubcha")
+ [:a {:class "site-icon" :href "https://clubcha.ru"
+ :alt "Логотип Железный Феникс"}
+ [:img {:src "/static/clubcha.png"}]
+ "Железный Феникс"]
""))
(fn item-template [product]
diff --git a/parser/clubcha.fnl b/parser/clubcha.fnl
new file mode 100644
index 0000000..c935236
--- /dev/null
+++ b/parser/clubcha.fnl
@@ -0,0 +1,123 @@
+(import-macros {: map} :lib.macro)
+
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :lpeglj)))
+(local parser (require :parser.parser))
+(local number (require :lib.number))
+(local fetcher (require :fetcher))
+
+(fn format-url [path page]
+ (.. "https://clubcha.ru/catalog/" path "/?page=" page))
+
+(local product-peg
+ (*
+ (parser.anywhere
+ (parser.tag
+ :a
+ {:href (peg.Cg (parser.till "\"") :url)
+ :class "*"
+ :data-product (peg.Cg parser.pegs.number :id)}
+ (*
+ (parser.anywhere
+ (parser.tag
+ :img {:src (peg.Cg (parser.till "\"") :image)
+ :alt "*"}))
+ (parser.anywhere
+ (parser.tag
+ :span {:class "item__name"}
+ (peg.Cg (parser.till "</span>") :title))))))
+ (parser.anywhere
+ (parser.tag :span {:class "js-prod-price"}
+ (peg.Cg (^ (+ parser.pegs.number parser.pegs.spaces) 1) :price)))
+ (parser.maybe
+ (parser.anywhere
+ (parser.tag
+ :option
+ {:data-price "*"
+ :data-stock "*"
+ :data-units "*"
+ :data-oldprice "*"
+ :value "*"}
+ (peg.Cg (parser.till "</option>") :weight))))
+ (parser.anywhere
+ (parser.tag
+ :button
+ {:type "submit"
+ :value "Купить"
+ :data-added-text "В корзине"
+ :class "*"}
+ (parser.anywhere "Купить")))))
+
+(fn normalize [product]
+ (local weight (parser.guess-weight product.weight))
+ (local price (number.string->number product.price))
+ {:site "clubcha"
+ :id product.id
+ :title product.title
+ :url (.. "https://clubcha.ru/" product.url)
+ :description ""
+ :image product.image
+ :year 0
+ :price price
+ :weight weight
+ :price-per (if (and price weight (< 0 weight))
+ (/ (math.ceil (* (/ price weight) 10)) 10)
+ nil)})
+
+(fn products []
+ (fetcher.from-html
+ [{:path "shu-puer" :category "Шу пуэр"}
+ {:path "shen-puer" :category "Шен пуэр"}
+ {:path "bai-hao-puer"}
+ {:path "hei-cha" :category "Хэй ча"}
+ {:path "yuzhnofutszyanskij-ulun" :category "Улун"}
+ {:path "guandunskij-ulun" :category "Улун"}
+ {:path "severofujianskiy-ulun" :category "Улун"}
+ {:path "taiwanskiy-ulun" :category "Улун"}
+ {:path "gaba-ulun-gaba-alishan-i-dr" :category "Улун"}
+ {:path "zelenyi-chay" :category "Зеленый чай"}
+ {:path "belyi-chay" :category "Белый чай"}
+ {:path "zheltyi-chay" :category "Желтый чай"}
+ {:path "kitajskij-krasnyj-chaj" :category "Красный чай"}
+ {:path "tajvanskij-krasnyj-chaj" :category "Красный чай"}
+ {:path "Nechaynyye_chai_i_tsvetochnyye_dobavki"}
+ {:path "upakovka-dlya-puera"}
+ {:path "upakovka-dlya-posudy"}
+ {:path "iz-lichnoj-kollektsii"}
+ ;; FIXME: expand this category to subcategories because the main one
+ ;; does not contain all the products
+ {:path "gaivan" :category "Посуда"}
+ {:path "chashka" :category "Посуда"}
+ {:path "chashka" :category "Посуда"}
+ {:path "chaynyi-nabor" :category "Посуда"}
+ {:path "chaynik" :category "Посуда"}
+ {:path "glina-farfor" :category "Посуда"}
+ {:path "chaban-chajnaya-doska" :category "Посуда"}
+ {:path "chajnyj-podnos" :category "Посуда"}
+ {:path "chajnyj-prud" :category "Посуда"}
+ {:path "professionalnye" :category "Посуда"}
+ {:path "glinyanye" :category "Посуда"}
+ {:path "farforovye" :category "Посуда"}
+ {:path "zhestyanye-i-kartonnye" :category "Посуда"}
+ {:path "puernitsy" :category "Посуда"}
+ {:path "glina" :category "Посуда"}
+ {:path "farfor" :category "Посуда"}
+ {:path "steklo" :category "Посуда"}
+ {:path "kruzhka" :category "Посуда"}
+ {:path "Chaynyye_instrumenty" :category "Посуда"}
+ {:path "chahe" :category "Посуда"}
+ {:path "sito" :category "Посуда"}
+ {:path "Chaynyy_suvenir" :category "Посуда"}
+ {:path "Prochaya_utvar" :category "Посуда"}
+ {:path "farfor-eilong" :category "Посуда"}
+ {:path "butylka-dlya-zavarivaniya" :category "Посуда"}
+ {:path "Predmety_interyera_i_fenshuy"}
+ {:path "fo-chzhu-chetki" :category "Четки"}
+ {:path "nefrit-1" :category "Нефрит"}]
+ format-url
+ product-peg
+ normalize))
+
+{: products}
diff --git a/static/clubcha.png b/static/clubcha.png
new file mode 100644
index 0000000..30e35ea
--- /dev/null
+++ b/static/clubcha.png
Binary files differ