diff options
| author | unwox <me@unwox.com> | 2024-12-12 13:45:02 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2024-12-16 22:58:34 +0600 |
| commit | d3a445ad7270e788812dd16e19a2820fc70a91b6 (patch) | |
| tree | c63eccfcf6341f47860d3e74c6a6a83bfa0f73d2 /parser/gorkovchay.fnl | |
| parent | b018944e6066e766560282c0bcd83df05899a4bc (diff) | |
implement gorkovchay.ru parser
Diffstat (limited to 'parser/gorkovchay.fnl')
| -rw-r--r-- | parser/gorkovchay.fnl | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/parser/gorkovchay.fnl b/parser/gorkovchay.fnl new file mode 100644 index 0000000..d094b85 --- /dev/null +++ b/parser/gorkovchay.fnl @@ -0,0 +1,83 @@ +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local parser (require :parser.parser)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) +; (local utils (require :lib.utils)) + +(fn format-url [path page] + (.. "https://gorkovchay.ru/" path "/page-" page)) + +(local product-peg + (* + (parser.anywhere (parser.tag :div {:class "ut2-gl__item "})) ;; brittle + ;; find a category ID for distinguishing teas from teaware later + (parser.anywhere (* "category_id=" (peg.Cg parser.pegs.number :category-id))) + (parser.anywhere (parser.tag :div {:class "ut2-gl__image"})) + (parser.anywhere (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)})) + (+ + (* (parser.anywhere (* " src=\"" (peg.Cg (parser.till "\"") :image) "\"")) + (parser.anywhere " lazy_load_disabled ")) + (* (parser.anywhere (* " data-src=\"" (peg.Cg (parser.till "\"") :image) "\"")))) + (parser.anywhere + (parser.tag + :a {:href "*" :class "product-title" + :title (peg.Cg (parser.till "\"") :title)})) + (parser.anywhere + (+ + (* (parser.tag :span {:class (* "ty-qty-in-stock" (parser.till "\"")) + :id "*"}) + (peg.Cg (peg.Cc false) :archived)) + (* + (parser.tag :span {:class (* "ty-qty-out-of-stock" (parser.till "\"")) + :id "*"}) + (peg.Cg (peg.Cc true) :archived)))) + (parser.anywhere + (parser.tag :span {:id "*" :class "ty-price-num"} + (* + (peg.Cg (^ (+ parser.pegs.number " ") 1) :price) + (parser.tag :sup {} (peg.Cg parser.pegs.number :price-fraction))) + (parser.till "</span>"))) + (parser.anywhere + (parser.tag :div {:class "product-description"} + (peg.Cg (parser.till "</div>") :description))))) + +(fn normalize [product] + (local weight + (if (or (= product.category-id "269") ;; teaware + (= product.category "298")) ;; incense + 0 1)) + (local price (number.string->number + (.. product.price "." product.price-fraction))) + {:site "gorkovchay" + :title product.title + :url product.url + :description product.description + :image product.image + :price price + :archived product.archived + :weight weight + :price-per price}) + +(fn products [] + (fetcher.from-html + [{:path "krasnyy-chay" :tags ["Красный чай"]} + {:path "belyy-chay" :tags ["Белый чай"]} + {:path "chernyy-chay" :tags ["Хэй ча"]} + {:path "zheltyy-chay" :tags ["Желтый чай"]} + {:path "zelenyy-chay" :tags ["Зеленый чай"]} + {:path "gaba-chay" :tags ["Габа"]} + {:path "puer/shen-puer" :tags ["Шен пуэр"]} + {:path "puer/shu-puer" :tags ["Шу пуэр"]} + {:path "ulun/temnyy-ulun" :tags ["Улун"]} + {:path "ulun/svetlyy-ulun" :tags ["Улун"]} + {:path "ulun/fhdc" :tags ["Улун" "Гуандун"]} + {:path "posuda" :tags ["Посуда"]} + {:path "blagovoniya" :tags ["Благовония"]}] + format-url + product-peg + normalize)) + +{:products products :title "Горьков чай" :url "https://gorkovchay.ru"} |
