diff options
| author | unwox <me@unwox.com> | 2024-12-12 13:45:02 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2024-12-16 22:58:34 +0600 |
| commit | d3a445ad7270e788812dd16e19a2820fc70a91b6 (patch) | |
| tree | c63eccfcf6341f47860d3e74c6a6a83bfa0f73d2 | |
| parent | b018944e6066e766560282c0bcd83df05899a4bc (diff) | |
implement gorkovchay.ru parser
| -rw-r--r-- | bin/fetch.fnl | 4 | ||||
| -rw-r--r-- | bin/serve.fnl | 2 | ||||
| -rw-r--r-- | parser/gorkovchay.fnl | 83 | ||||
| -rw-r--r-- | static/gorkovchay.png | bin | 0 -> 13636 bytes |
4 files changed, 87 insertions, 2 deletions
diff --git a/bin/fetch.fnl b/bin/fetch.fnl index 006cbea..0551d76 100644 --- a/bin/fetch.fnl +++ b/bin/fetch.fnl @@ -6,6 +6,7 @@ (local array (require :lib.array)) (local cache (require :lib.cache)) +(local gorkovchay (require :parser.gorkovchay)) (local moychay (require :parser.moychay)) (local ozchai (require :parser.ozchai)) (local ipuer (require :parser.ipuer)) @@ -151,7 +152,8 @@ ;; replace with with-tx (local tx (must (luna.db.begin db))) (must (luna.db.exec-tx tx "DELETE FROM product_tags;" [])) -(each [_ parser (pairs [chaekshop clubcha artoftea ipuer ozchai moychay])] +(each [_ parser (pairs [ozchai gorkovchay chaekshop clubcha artoftea ipuer + moychay])] (store-products tx (parser.products))) (cache.clear-tx tx "page:") (must (luna.db.commit tx)) diff --git a/bin/serve.fnl b/bin/serve.fnl index ea75edd..68dea35 100644 --- a/bin/serve.fnl +++ b/bin/serve.fnl @@ -36,7 +36,7 @@ (tset package :loaded module old)))))) (local db - (must (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL&mode=ro"))) + (must (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL"))) (local query-synonyms { "шэн" "шен" diff --git a/parser/gorkovchay.fnl b/parser/gorkovchay.fnl new file mode 100644 index 0000000..d094b85 --- /dev/null +++ b/parser/gorkovchay.fnl @@ -0,0 +1,83 @@ +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local parser (require :parser.parser)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) +; (local utils (require :lib.utils)) + +(fn format-url [path page] + (.. "https://gorkovchay.ru/" path "/page-" page)) + +(local product-peg + (* + (parser.anywhere (parser.tag :div {:class "ut2-gl__item "})) ;; brittle + ;; find a category ID for distinguishing teas from teaware later + (parser.anywhere (* "category_id=" (peg.Cg parser.pegs.number :category-id))) + (parser.anywhere (parser.tag :div {:class "ut2-gl__image"})) + (parser.anywhere (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)})) + (+ + (* (parser.anywhere (* " src=\"" (peg.Cg (parser.till "\"") :image) "\"")) + (parser.anywhere " lazy_load_disabled ")) + (* (parser.anywhere (* " data-src=\"" (peg.Cg (parser.till "\"") :image) "\"")))) + (parser.anywhere + (parser.tag + :a {:href "*" :class "product-title" + :title (peg.Cg (parser.till "\"") :title)})) + (parser.anywhere + (+ + (* (parser.tag :span {:class (* "ty-qty-in-stock" (parser.till "\"")) + :id "*"}) + (peg.Cg (peg.Cc false) :archived)) + (* + (parser.tag :span {:class (* "ty-qty-out-of-stock" (parser.till "\"")) + :id "*"}) + (peg.Cg (peg.Cc true) :archived)))) + (parser.anywhere + (parser.tag :span {:id "*" :class "ty-price-num"} + (* + (peg.Cg (^ (+ parser.pegs.number " ") 1) :price) + (parser.tag :sup {} (peg.Cg parser.pegs.number :price-fraction))) + (parser.till "</span>"))) + (parser.anywhere + (parser.tag :div {:class "product-description"} + (peg.Cg (parser.till "</div>") :description))))) + +(fn normalize [product] + (local weight + (if (or (= product.category-id "269") ;; teaware + (= product.category "298")) ;; incense + 0 1)) + (local price (number.string->number + (.. product.price "." product.price-fraction))) + {:site "gorkovchay" + :title product.title + :url product.url + :description product.description + :image product.image + :price price + :archived product.archived + :weight weight + :price-per price}) + +(fn products [] + (fetcher.from-html + [{:path "krasnyy-chay" :tags ["Красный чай"]} + {:path "belyy-chay" :tags ["Белый чай"]} + {:path "chernyy-chay" :tags ["Хэй ча"]} + {:path "zheltyy-chay" :tags ["Желтый чай"]} + {:path "zelenyy-chay" :tags ["Зеленый чай"]} + {:path "gaba-chay" :tags ["Габа"]} + {:path "puer/shen-puer" :tags ["Шен пуэр"]} + {:path "puer/shu-puer" :tags ["Шу пуэр"]} + {:path "ulun/temnyy-ulun" :tags ["Улун"]} + {:path "ulun/svetlyy-ulun" :tags ["Улун"]} + {:path "ulun/fhdc" :tags ["Улун" "Гуандун"]} + {:path "posuda" :tags ["Посуда"]} + {:path "blagovoniya" :tags ["Благовония"]}] + format-url + product-peg + normalize)) + +{:products products :title "Горьков чай" :url "https://gorkovchay.ru"} diff --git a/static/gorkovchay.png b/static/gorkovchay.png Binary files differnew file mode 100644 index 0000000..f57f327 --- /dev/null +++ b/static/gorkovchay.png |
