summaryrefslogtreecommitdiff
path: root/parser/gorkovchay.fnl
diff options
context:
space:
mode:
Diffstat (limited to 'parser/gorkovchay.fnl')
-rw-r--r--parser/gorkovchay.fnl83
1 files changed, 83 insertions, 0 deletions
diff --git a/parser/gorkovchay.fnl b/parser/gorkovchay.fnl
new file mode 100644
index 0000000..d094b85
--- /dev/null
+++ b/parser/gorkovchay.fnl
@@ -0,0 +1,83 @@
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :lpeglj)))
+(local parser (require :parser.parser))
+(local number (require :lib.number))
+(local fetcher (require :fetcher))
+; (local utils (require :lib.utils))
+
+(fn format-url [path page]
+ (.. "https://gorkovchay.ru/" path "/page-" page))
+
+(local product-peg
+ (*
+ (parser.anywhere (parser.tag :div {:class "ut2-gl__item "})) ;; brittle
+ ;; find a category ID for distinguishing teas from teaware later
+ (parser.anywhere (* "category_id=" (peg.Cg parser.pegs.number :category-id)))
+ (parser.anywhere (parser.tag :div {:class "ut2-gl__image"}))
+ (parser.anywhere (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)}))
+ (+
+ (* (parser.anywhere (* " src=\"" (peg.Cg (parser.till "\"") :image) "\""))
+ (parser.anywhere " lazy_load_disabled "))
+ (* (parser.anywhere (* " data-src=\"" (peg.Cg (parser.till "\"") :image) "\""))))
+ (parser.anywhere
+ (parser.tag
+ :a {:href "*" :class "product-title"
+ :title (peg.Cg (parser.till "\"") :title)}))
+ (parser.anywhere
+ (+
+ (* (parser.tag :span {:class (* "ty-qty-in-stock" (parser.till "\""))
+ :id "*"})
+ (peg.Cg (peg.Cc false) :archived))
+ (*
+ (parser.tag :span {:class (* "ty-qty-out-of-stock" (parser.till "\""))
+ :id "*"})
+ (peg.Cg (peg.Cc true) :archived))))
+ (parser.anywhere
+ (parser.tag :span {:id "*" :class "ty-price-num"}
+ (*
+ (peg.Cg (^ (+ parser.pegs.number " ") 1) :price)
+ (parser.tag :sup {} (peg.Cg parser.pegs.number :price-fraction)))
+ (parser.till "</span>")))
+ (parser.anywhere
+ (parser.tag :div {:class "product-description"}
+ (peg.Cg (parser.till "</div>") :description)))))
+
+(fn normalize [product]
+ (local weight
+ (if (or (= product.category-id "269") ;; teaware
+ (= product.category "298")) ;; incense
+ 0 1))
+ (local price (number.string->number
+ (.. product.price "." product.price-fraction)))
+ {:site "gorkovchay"
+ :title product.title
+ :url product.url
+ :description product.description
+ :image product.image
+ :price price
+ :archived product.archived
+ :weight weight
+ :price-per price})
+
+(fn products []
+ (fetcher.from-html
+ [{:path "krasnyy-chay" :tags ["Красный чай"]}
+ {:path "belyy-chay" :tags ["Белый чай"]}
+ {:path "chernyy-chay" :tags ["Хэй ча"]}
+ {:path "zheltyy-chay" :tags ["Желтый чай"]}
+ {:path "zelenyy-chay" :tags ["Зеленый чай"]}
+ {:path "gaba-chay" :tags ["Габа"]}
+ {:path "puer/shen-puer" :tags ["Шен пуэр"]}
+ {:path "puer/shu-puer" :tags ["Шу пуэр"]}
+ {:path "ulun/temnyy-ulun" :tags ["Улун"]}
+ {:path "ulun/svetlyy-ulun" :tags ["Улун"]}
+ {:path "ulun/fhdc" :tags ["Улун" "Гуандун"]}
+ {:path "posuda" :tags ["Посуда"]}
+ {:path "blagovoniya" :tags ["Благовония"]}]
+ format-url
+ product-peg
+ normalize))
+
+{:products products :title "Горьков чай" :url "https://gorkovchay.ru"}