summaryrefslogtreecommitdiff
path: root/parser/gorkovchay.fnl
blob: d094b8515cef0f75db74180e5aa244d260c45609 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
(local peg
 (if (pick-values 1 (pcall require :lpeg))
   (require :lpeg)
   (require :lpeglj)))
(local parser (require :parser.parser))
(local number (require :lib.number))
(local fetcher (require :fetcher))
; (local utils (require :lib.utils))

(fn format-url [path page]
  (.. "https://gorkovchay.ru/" path "/page-" page))

(local product-peg
  (*
   (parser.anywhere (parser.tag :div {:class "ut2-gl__item "})) ;; brittle
   ;; find a category ID for distinguishing teas from teaware later
   (parser.anywhere (* "category_id=" (peg.Cg parser.pegs.number :category-id)))
   (parser.anywhere (parser.tag :div {:class "ut2-gl__image"}))
   (parser.anywhere (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)}))
   (+
    (* (parser.anywhere (* " src=\"" (peg.Cg (parser.till "\"") :image) "\""))
       (parser.anywhere " lazy_load_disabled "))
    (* (parser.anywhere (* " data-src=\"" (peg.Cg (parser.till "\"") :image) "\""))))
   (parser.anywhere
    (parser.tag
     :a {:href "*" :class "product-title"
         :title (peg.Cg (parser.till "\"") :title)}))
   (parser.anywhere
    (+
     (* (parser.tag :span {:class (* "ty-qty-in-stock" (parser.till "\""))
                           :id "*"})
        (peg.Cg (peg.Cc false) :archived))
     (*
        (parser.tag :span {:class (* "ty-qty-out-of-stock" (parser.till "\""))
                           :id "*"})
        (peg.Cg (peg.Cc true) :archived))))
   (parser.anywhere
    (parser.tag :span {:id "*" :class "ty-price-num"}
      (*
       (peg.Cg (^ (+ parser.pegs.number " ") 1) :price)
        (parser.tag :sup {} (peg.Cg parser.pegs.number :price-fraction)))
      (parser.till "</span>")))
   (parser.anywhere
    (parser.tag :div {:class "product-description"}
     (peg.Cg (parser.till "</div>") :description)))))

(fn normalize [product]
  (local weight
    (if (or (= product.category-id "269") ;; teaware
            (= product.category "298")) ;; incense
      0 1))
  (local price (number.string->number
                 (.. product.price "." product.price-fraction)))
  {:site "gorkovchay"
   :title product.title
   :url product.url
   :description product.description
   :image product.image
   :price price
   :archived product.archived
   :weight weight
   :price-per price})

(fn products []
  (fetcher.from-html
    [{:path "krasnyy-chay" :tags ["Красный чай"]}
     {:path "belyy-chay" :tags ["Белый чай"]}
     {:path "chernyy-chay" :tags ["Хэй ча"]}
     {:path "zheltyy-chay" :tags ["Желтый чай"]}
     {:path "zelenyy-chay" :tags ["Зеленый чай"]}
     {:path "gaba-chay" :tags ["Габа"]}
     {:path "puer/shen-puer" :tags ["Шен пуэр"]}
     {:path "puer/shu-puer" :tags ["Шу пуэр"]}
     {:path "ulun/temnyy-ulun" :tags ["Улун"]}
     {:path "ulun/svetlyy-ulun" :tags ["Улун"]}
     {:path "ulun/fhdc" :tags ["Улун" "Гуандун"]}
     {:path "posuda" :tags ["Посуда"]}
     {:path "blagovoniya" :tags ["Благовония"]}]
    format-url
    product-peg
    normalize))

{:products products :title "Горьков чай" :url "https://gorkovchay.ru"}