From 0ea9ddc67a4e771c57189b9bec721dd30df5f315 Mon Sep 17 00:00:00 2001 From: unwox Date: Fri, 27 Sep 2024 16:48:20 +0600 Subject: refactor json fetcher a bit --- fetcher.fnl | 8 +++---- parser/artoftea.fnl | 5 ++-- parser/ipuer.fnl | 4 ++-- parser/ozchai.fnl | 68 +++++++++++++++++++---------------------------------- 4 files changed, 33 insertions(+), 52 deletions(-) diff --git a/fetcher.fnl b/fetcher.fnl index 6d6d633..bf22abf 100644 --- a/fetcher.fnl +++ b/fetcher.fnl @@ -71,7 +71,7 @@ categories [])) -(fn walk-json-pages [url-formatter path] +(fn walk-json-pages [url-formatter path response-destructor] (fn gather [page knil] (local url (url-formatter path page)) (print (.. "requesting " url)) @@ -84,12 +84,12 @@ "")) (if (= status 200) - (let [items (json.decode content)] + (let [{: items} (response-destructor (json.decode content))] (if (or (= items nil) (= 0 (# items))) knil (do (os.execute "sleep 1") - (gather (+ page 1) (array.concat knil items))))) + (gather (+ 1 page) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) @@ -103,7 +103,7 @@ result (categorize-many (map #(normalizer $2) - (walk-json-pages url-formatter path)) + (walk-json-pages url-formatter path response-destructor)) category))) categories [])) diff --git a/parser/artoftea.fnl b/parser/artoftea.fnl index 1f03ed1..205d5d0 100644 --- a/parser/artoftea.fnl +++ b/parser/artoftea.fnl @@ -8,7 +8,7 @@ (local number (require :lib.number)) (local fetcher (require :fetcher)) -(fn url-formatter [path page] +(fn format-url [path page] (.. "https://artoftea.ru/" path "/?page=" page)) (local product-peg @@ -50,6 +50,7 @@ (local price (number.string->number product.price)) {:site "artoftea" :id product.id + :title product.title :url product.url :description product.description :image product.image @@ -64,7 +65,7 @@ (fetcher.from-html [{:path "redtea" :category "Красный чай"} {:path "greentea" :category "Зеленый чай"}] - url-formatter + format-url product-peg normalize)) diff --git a/parser/ipuer.fnl b/parser/ipuer.fnl index 7fefd1b..11e63c0 100644 --- a/parser/ipuer.fnl +++ b/parser/ipuer.fnl @@ -8,7 +8,7 @@ (local parser (require :parser.parser)) (local fetcher (require :fetcher)) -(fn url-formatter [path page] +(fn format-url [path page] (.. "https://ipuer.ru/catalog/" path "/?p=" page)) (local product-peg @@ -63,7 +63,7 @@ {:path "blagovoniya" :category "Благовония"} {:path "posuda" :category "Посуда"} {:path "282" :category "Посуда"}] - url-formatter + format-url product-peg normalize)) diff --git a/parser/ozchai.fnl b/parser/ozchai.fnl index 6bf6286..50bfd6a 100644 --- a/parser/ozchai.fnl +++ b/parser/ozchai.fnl @@ -1,52 +1,28 @@ (import-macros {: map} :lib.macro) -(local http (require :lib.http)) (local array (require :lib.array)) +(local http (require :lib.http)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) (local json (require :vendor.json)) -(local %all-products-partuid 176163172341) - -(fn string->number [str] - (if str - (tonumber (pick-values 1 (str:gsub "[^0-9.]" ""))) - nil)) - -(fn request [partuid slice] - (print (.. "https://store.tildaapi.com/api/getproductslist/" - "?storepartuid=" - partuid - "&recid=280779251&c=1723216515077" - "&getparts=true&getoptions=true&slice=%d&size=36")) - (let [(status headers body) - (luna.http.request - "GET" - (string.format - (.. "https://store.tildaapi.com/api/getproductslist/" - "?storepartuid=" - partuid - "&recid=280779251&c=1723216515077" - "&getparts=true&getoptions=true&slice=%d&size=36") - slice) - {:Content-Type "application/json" - :User-Agent (http.random-user-agent)} - "")] - (json.decode body))) +(fn format-url [path page] + (.. "https://store.tildaapi.com/api/getproductslist/" + "?storepartuid=" path + "&slice=" page + "&recid=280779251" + "&c=1723216515077" + "&getparts=true" + "&getoptions=true" + "&size=36")) -(fn walk-slices [partuid] - (fn gather [slice knil] - (let [{: nextslice : products} (request partuid slice) - res (array.concat knil products)] - (if (= 0 (# products)) - knil - (do - (os.execute "sleep 1") - (gather (+ slice 1) res))))) - (gather 1 [])) +(fn destruct-response [response] + {:items response.products}) -(fn normalize [_ product] +(fn normalize [product] (local gallery (json.decode product.gallery)) - (local weight (string->number (. (. product.editions 1) :Вес))) - (local price (string->number (. (. product.editions 1) :price))) + (local weight (number.string->number (. product.editions 1 :Вес))) + (local price (number.string->number (. product.editions 1 :price))) {:site "ozchai" :id product.url :url product.url @@ -54,8 +30,8 @@ :description product.descr ;; FIXME: parse all editions into different projects :image (if (< 0 (# gallery)) - (. (. gallery 1) :img) - "") + (. gallery 1 :img) + "") :weight weight :price price :price-per (if (and price weight (< 0 weight)) @@ -64,6 +40,10 @@ :characteristics product.characteristics}) (fn products [] - (map normalize (walk-slices %all-products-partuid))) + (fetcher.from-json + [{:path "176163172341"}] + format-url + destruct-response + normalize)) {: products} -- cgit v1.2.3