summaryrefslogtreecommitdiff
path: root/parser/ipuer.fnl
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2024-09-27 15:26:33 +0600
committerunwox <me@unwox.com>2024-09-27 15:44:16 +0600
commitdd449357f502dbe9ca4487d4b06a06ee4e597146 (patch)
tree9847488a6cc2c1aaf1fc80578e1a7a5d4af99ff5 /parser/ipuer.fnl
parent9b82db238f9e2e02a76f95c793f8d6ef2387ecfd (diff)
new structure
Diffstat (limited to 'parser/ipuer.fnl')
-rw-r--r--parser/ipuer.fnl70
1 files changed, 70 insertions, 0 deletions
diff --git a/parser/ipuer.fnl b/parser/ipuer.fnl
new file mode 100644
index 0000000..7fefd1b
--- /dev/null
+++ b/parser/ipuer.fnl
@@ -0,0 +1,70 @@
+(import-macros {: map} :lib.macro)
+
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :lpeglj)))
+(local number (require :lib.number))
+(local parser (require :parser.parser))
+(local fetcher (require :fetcher))
+
+(fn url-formatter [path page]
+ (.. "https://ipuer.ru/catalog/" path "/?p=" page))
+
+(local product-peg
+ (* ;; id
+ (parser.anywhere
+ (parser.tag :div
+ {:data-id (peg.Cg parser.pegs.number :id) :class "*"}))
+ ;; url and image
+ (parser.anywhere
+ (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)}
+ (parser.tag :img {:src (peg.Cg (parser.till "\"") :image) :alt "*"})))
+ ;; title
+ (parser.anywhere
+ (parser.tag :div {:class "card-product_title"}
+ (parser.tag :a {:href "*"}
+ (parser.tag :span {} (peg.Cg (parser.till "</span>") :title)))))
+ ;; price
+ (parser.anywhere
+ (parser.tag :span {:class "card-price"}
+ (* (peg.Cg
+ (* parser.pegs.number
+ (parser.maybe (* " " parser.pegs.number)))
+ :price)
+ " р.")))
+ (parser.anywhere
+ (+ (parser.tag :a {:data-url "*" :class "*" :data-add-text "*"} "В корзину")
+ (parser.tag :a {:data-url "*" :class "*"} "В корзину")))))
+
+(fn normalize [product]
+ (local weight (parser.guess-weight product.title))
+ (local price (number.string->number product.price))
+ {:site "ipuer"
+ :id product.id
+ :url (.. "https://ipuer.ru" product.url)
+ :title product.title
+ :description ""
+ ;; FIXME: parse all editions into different projects
+ :image (.. "https://ipuer.ru" product.image)
+ :year (parser.guess-year product.title)
+ :price price
+ :weight weight
+ :category product.category
+ :price-per (if (and price weight (< 0 weight))
+ (/ (math.ceil (* (/ price weight) 10)) 10)
+ nil)})
+
+(fn products []
+ (fetcher.from-html
+ [{:path "shen-puer" :category "Шен пуэр"}
+ {:path "shu-puer" :category "Шу пуэр"}
+ {:path "drugoy-chay"}
+ {:path "blagovoniya" :category "Благовония"}
+ {:path "posuda" :category "Посуда"}
+ {:path "282" :category "Посуда"}]
+ url-formatter
+ product-peg
+ normalize))
+
+{: products}