blob: 890fffb85d48f1038307e1aad455d012b19d8d04 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
|
(local peg
(if (pick-values 1 (pcall require :lpeg))
(require :lpeg)
(require :lpeglj)))
(local parser (require :parser.parser))
(local number (require :lib.number))
(local fetcher (require :fetcher))
; (local utils (require :lib.utils))
(fn format-url [path page]
(.. "https://gorkovchay.ru/" path "/page-" page))
(local product-peg
(*
(parser.anywhere (parser.tag :div {:class "ut2-gl__item "})) ;; brittle
;; find a category ID for distinguishing teas from teaware later
(parser.anywhere (* "category_id=" (peg.Cg parser.pegs.number :category-id)))
(parser.anywhere (parser.tag :div {:class "ut2-gl__image"}))
(parser.anywhere (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)}))
(+
(* (parser.anywhere (* " src=\"" (peg.Cg (parser.till "\"") :image) "\""))
(parser.anywhere " lazy_load_disabled "))
(* (parser.anywhere (* " data-src=\"" (peg.Cg (parser.till "\"") :image) "\""))))
(parser.anywhere
(parser.tag
:a {:href "*" :class "product-title"
:title (peg.Cg (parser.till "\"") :title)}))
(parser.anywhere
(+
(* (parser.tag :span {:class (* "ty-qty-in-stock" (parser.till "\""))
:id "*"})
(peg.Cg (peg.Cc false) :archived))
(*
(parser.tag :span {:class (* "ty-qty-out-of-stock" (parser.till "\""))
:id "*"})
(peg.Cg (peg.Cc true) :archived))))
(parser.anywhere
(parser.tag :span {:id "*" :class "ty-price-num"}
(*
(peg.Cg (^ (+ parser.pegs.number " ") 1) :price)
(parser.tag :sup {} (peg.Cg parser.pegs.number :price-fraction)))
(parser.till "</span>")))
(parser.anywhere
(parser.tag :div {:class "product-description"}
(peg.Cg (parser.till "</div>") :description)))))
(fn normalize [product]
(local weight
(if (or (= product.category-id "269") ;; teaware
(= product.category "298")) ;; incense
0 1))
(local price (number.string->number
(.. product.price "." product.price-fraction)))
{:site "gorkovchay"
:title product.title
:url product.url
:description product.description
:image product.image
:price price
:archived product.archived
:weight weight
:volume (parser.guess-volume product.title)
:price-per price})
(fn products []
(fetcher.from-html
[{:path "krasnyy-chay" :tags ["Красный чай"]}
{:path "belyy-chay" :tags ["Белый чай"]}
{:path "chernyy-chay" :tags ["Хэй ча"]}
{:path "zheltyy-chay" :tags ["Желтый чай"]}
{:path "zelenyy-chay" :tags ["Зеленый чай"]}
{:path "gaba-chay" :tags ["Габа"]}
{:path "puer/shen-puer" :tags ["Шен пуэр"]}
{:path "puer/shu-puer" :tags ["Шу пуэр"]}
{:path "ulun/temnyy-ulun" :tags ["Улун"]}
{:path "ulun/svetlyy-ulun" :tags ["Улун"]}
{:path "ulun/fhdc" :tags ["Улун" "Гуандун"]}
{:path "posuda" :tags ["Посуда"]}
{:path "blagovoniya" :tags ["Благовония"]}]
format-url
product-peg
normalize))
{:products products :title "Горьков чай" :url "https://gorkovchay.ru"}
|