summaryrefslogtreecommitdiff
path: root/parser/chaibez.fnl
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2025-05-25 22:38:09 +0600
committerunwox <me@unwox.com>2025-05-25 22:38:09 +0600
commitca04058da0cbc4835e02287b080feb176d5babf1 (patch)
tree0719bdfef0b719890c67d12e2287904691f2c3df /parser/chaibez.fnl
parent3a1b9d39ff109d9b7c92c0283e262e625a00a39a (diff)
add chaibez parser
Diffstat (limited to 'parser/chaibez.fnl')
-rw-r--r--parser/chaibez.fnl114
1 files changed, 114 insertions, 0 deletions
diff --git a/parser/chaibez.fnl b/parser/chaibez.fnl
new file mode 100644
index 0000000..d42a4ae
--- /dev/null
+++ b/parser/chaibez.fnl
@@ -0,0 +1,114 @@
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :lpeglj)))
+(local parser (require :parser.parser))
+(local number (require :lib.number))
+(local fetcher (require :fetcher))
+
+(fn format-url [path page]
+ (.. "https://chaibez.ru/" path "/"
+ (if (< 1 page) (.. "?page=" page) "")))
+
+(local product-peg
+ (*
+ (parser.anywhere
+ (parser.tag :div {:class "image"}
+ (parser.tag :a {:href (peg.Cg (parser.till "\"") :url) :class "*"}
+ (*
+ (parser.anywhere (* "src=\"" (peg.Cg (parser.till "\"") :image) "\""))
+ (parser.till "</a>")))))
+ (parser.anywhere
+ (*
+ "<div class=\"caption\">"
+ parser.pegs.spaces
+ (parser.tag :h4 {}
+ (*
+ (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)}
+ (peg.Cg (parser.till "</a>") :title))
+ (parser.till "</h4>")))))
+ ;; same engine as artoftea
+ (parser.anywhere
+ (+
+ (*
+ (+
+ ;; without discount
+ (parser.tag :option {:data-price-prefix (peg.Cg (parser.till "\"") :prefix)
+ :data-price (peg.Cg (parser.till "\"") :amount)
+ :value "*" :selected "selected"}
+ (peg.Cg (parser.till "</option>") :weight))
+ ;; with discount
+ (parser.tag :option {:data-price-prefix (peg.Cg (parser.till "\"") :prefix)
+ :data-price "*"
+ :data-special (peg.Cg (parser.till "\"") :amount)
+ :value "*" :selected "selected"}
+ (peg.Cg (parser.till "</option>") :weight)))
+ (parser.anywhere
+ (parser.tag :span {:data-price (peg.Cg (parser.till "\"") :price)
+ :class "*"}
+ (parser.till "</span>"))))
+ ;; without options
+ (parser.tag :span {:data-price (peg.Cg (parser.till "\"") :price)
+ :class "*"}
+ (parser.till "</span>"))))
+ (parser.anywhere (parser.tag :span {:class "*"} "В корзину"))))
+
+(fn normalize [product]
+ (local year (parser.guess-year product.title))
+ (local weight (or (parser.guess-weight product.weight ["г"])
+ (parser.guess-weight product.title)))
+ (local amount (if product.amount
+ (number.string->number product.amount)
+ 1))
+ (local price (number.string->number product.price))
+ (local final-price (if (= product.prefix "*")
+ (* price amount)
+ (= product.prefix "=")
+ amount
+ price))
+
+ {:site "chaibez"
+ :title product.title
+ :url product.url
+ :description product.description
+ :image product.image
+ :year year
+ :price final-price
+ :weight weight
+ :volume (parser.guess-volume product.title)
+ :price-per (if (and final-price weight (< 0 weight))
+ (/ (math.ceil (* (/ final-price weight) 10)) 10)
+ nil)})
+
+ (fn products []
+ (fetcher.from-html
+ [{:path "red_tea" :tags ["Красный чай"]}
+ {:path "green_tea" :tags ["Зеленый чай"]}
+ {:path "puer/shu_puer" :tags ["Шу пуэр"]}
+ {:path "puer/rassypnoj-puer" :tags ["Шу пуэр"]}
+ {:path "puer/shen_puer" :tags ["Шен пуэр"]}
+ {:path "ulun/svetliy-ulun" :tags ["Улун"]}
+ {:path "ulun/temniy-ulun" :tags ["Улун" "Уишань"]}
+ {:path "ulun/ulun-guandun" :tags ["Улун" "Гуандун"]}
+ {:path "ulun/gaba" :tags ["Улун" "Габа"]}
+ {:path "ulun/taivan-ulun" :tags ["Улун" "Тайвань"]}
+ {:path "white_tea" :tags ["Белый чай"]}
+ {:path "yellow-tea" :tags ["Желтый чай"]}
+ {:path "xej-cha-chernyj-chaj" :tags ["Хэй ча"]}
+ {:path "posuda/gajvani" :tags ["Посуда" "Гайвань"]}
+ {:path "posuda/kolba-station" :tags ["Посуда" "Стекло"]}
+ {:path "posuda/chahai" :tags ["Посуда" "Чахай"]}
+ {:path "posuda/isinskie-chajniki" :tags ["Посуда" "Чайник" "Исин"]}
+ {:path "posuda/pials" :tags ["Посуда" "Пиала"]}
+ {:path "posuda/chajniki-s-knopkoi" :tags ["Посуда" "Типот"]}
+ {:path "aksessuary/chabani-chajnye-dosk" :tags ["Чабань"]}
+ {:path "aksessuary/figurki" :tags ["Фигурка"]}
+ {:path "aksessuary/instrumenty-kistochki-shhipcy-shilo" :tags ["Инструмент"]}
+ {:path "aksessuary/chaxe-sitechki" :tags ["Сито"]}
+ {:path "aksessuary/cases" :tags ["Чехол"]}
+ {:path "trav_tea" :tags ["Травяной чай"]}]
+ format-url
+ product-peg
+ normalize))
+
+{:products products :title "Чай без церемоний" :url "https://chaibez.ru"}