diff options
| author | unwox <me@unwox.com> | 2025-05-25 22:38:09 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2025-05-25 22:38:09 +0600 |
| commit | ca04058da0cbc4835e02287b080feb176d5babf1 (patch) | |
| tree | 0719bdfef0b719890c67d12e2287904691f2c3df /parser/chaibez.fnl | |
| parent | 3a1b9d39ff109d9b7c92c0283e262e625a00a39a (diff) | |
add chaibez parser
Diffstat (limited to 'parser/chaibez.fnl')
| -rw-r--r-- | parser/chaibez.fnl | 114 |
1 files changed, 114 insertions, 0 deletions
diff --git a/parser/chaibez.fnl b/parser/chaibez.fnl new file mode 100644 index 0000000..d42a4ae --- /dev/null +++ b/parser/chaibez.fnl @@ -0,0 +1,114 @@ +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local parser (require :parser.parser)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) + +(fn format-url [path page] + (.. "https://chaibez.ru/" path "/" + (if (< 1 page) (.. "?page=" page) ""))) + +(local product-peg + (* + (parser.anywhere + (parser.tag :div {:class "image"} + (parser.tag :a {:href (peg.Cg (parser.till "\"") :url) :class "*"} + (* + (parser.anywhere (* "src=\"" (peg.Cg (parser.till "\"") :image) "\"")) + (parser.till "</a>"))))) + (parser.anywhere + (* + "<div class=\"caption\">" + parser.pegs.spaces + (parser.tag :h4 {} + (* + (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)} + (peg.Cg (parser.till "</a>") :title)) + (parser.till "</h4>"))))) + ;; same engine as artoftea + (parser.anywhere + (+ + (* + (+ + ;; without discount + (parser.tag :option {:data-price-prefix (peg.Cg (parser.till "\"") :prefix) + :data-price (peg.Cg (parser.till "\"") :amount) + :value "*" :selected "selected"} + (peg.Cg (parser.till "</option>") :weight)) + ;; with discount + (parser.tag :option {:data-price-prefix (peg.Cg (parser.till "\"") :prefix) + :data-price "*" + :data-special (peg.Cg (parser.till "\"") :amount) + :value "*" :selected "selected"} + (peg.Cg (parser.till "</option>") :weight))) + (parser.anywhere + (parser.tag :span {:data-price (peg.Cg (parser.till "\"") :price) + :class "*"} + (parser.till "</span>")))) + ;; without options + (parser.tag :span {:data-price (peg.Cg (parser.till "\"") :price) + :class "*"} + (parser.till "</span>")))) + (parser.anywhere (parser.tag :span {:class "*"} "В корзину")))) + +(fn normalize [product] + (local year (parser.guess-year product.title)) + (local weight (or (parser.guess-weight product.weight ["г"]) + (parser.guess-weight product.title))) + (local amount (if product.amount + (number.string->number product.amount) + 1)) + (local price (number.string->number product.price)) + (local final-price (if (= product.prefix "*") + (* price amount) + (= product.prefix "=") + amount + price)) + + {:site "chaibez" + :title product.title + :url product.url + :description product.description + :image product.image + :year year + :price final-price + :weight weight + :volume (parser.guess-volume product.title) + :price-per (if (and final-price weight (< 0 weight)) + (/ (math.ceil (* (/ final-price weight) 10)) 10) + nil)}) + + (fn products [] + (fetcher.from-html + [{:path "red_tea" :tags ["Красный чай"]} + {:path "green_tea" :tags ["Зеленый чай"]} + {:path "puer/shu_puer" :tags ["Шу пуэр"]} + {:path "puer/rassypnoj-puer" :tags ["Шу пуэр"]} + {:path "puer/shen_puer" :tags ["Шен пуэр"]} + {:path "ulun/svetliy-ulun" :tags ["Улун"]} + {:path "ulun/temniy-ulun" :tags ["Улун" "Уишань"]} + {:path "ulun/ulun-guandun" :tags ["Улун" "Гуандун"]} + {:path "ulun/gaba" :tags ["Улун" "Габа"]} + {:path "ulun/taivan-ulun" :tags ["Улун" "Тайвань"]} + {:path "white_tea" :tags ["Белый чай"]} + {:path "yellow-tea" :tags ["Желтый чай"]} + {:path "xej-cha-chernyj-chaj" :tags ["Хэй ча"]} + {:path "posuda/gajvani" :tags ["Посуда" "Гайвань"]} + {:path "posuda/kolba-station" :tags ["Посуда" "Стекло"]} + {:path "posuda/chahai" :tags ["Посуда" "Чахай"]} + {:path "posuda/isinskie-chajniki" :tags ["Посуда" "Чайник" "Исин"]} + {:path "posuda/pials" :tags ["Посуда" "Пиала"]} + {:path "posuda/chajniki-s-knopkoi" :tags ["Посуда" "Типот"]} + {:path "aksessuary/chabani-chajnye-dosk" :tags ["Чабань"]} + {:path "aksessuary/figurki" :tags ["Фигурка"]} + {:path "aksessuary/instrumenty-kistochki-shhipcy-shilo" :tags ["Инструмент"]} + {:path "aksessuary/chaxe-sitechki" :tags ["Сито"]} + {:path "aksessuary/cases" :tags ["Чехол"]} + {:path "trav_tea" :tags ["Травяной чай"]}] + format-url + product-peg + normalize)) + +{:products products :title "Чай без церемоний" :url "https://chaibez.ru"} |
