From dd449357f502dbe9ca4487d4b06a06ee4e597146 Mon Sep 17 00:00:00 2001 From: unwox Date: Fri, 27 Sep 2024 15:26:33 +0600 Subject: new structure --- fetcher.fnl | 68 +++++++++++++++++++++++++------------------------------------ 1 file changed, 28 insertions(+), 40 deletions(-) (limited to 'fetcher.fnl') diff --git a/fetcher.fnl b/fetcher.fnl index d31f858..6d6d633 100644 --- a/fetcher.fnl +++ b/fetcher.fnl @@ -3,11 +3,11 @@ (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) - (require :vendor.lpeglj))) + (require :lpeglj))) (local array (require :lib.array)) (local json (require :vendor.json)) -(local parser (require :parser)) -(local http (require :http)) +(local parser (require :parser.parser)) +(local http (require :lib.http)) (fn retry [what times sleep] (var result nil) @@ -37,48 +37,37 @@ (luna.http.request "GET" url {:User-Agent (http.random-user-agent)} "")) (if (= status 200) - (let [products (parser.match-many html item-peg)] - (if (or (= products nil) (= 0 (# products))) + (let [items (parser.match-many html item-peg)] + (if (or (= items nil) (= 0 (# items))) knil (do (os.execute "sleep 1") - (gather (+ page 1) (array.concat knil products))))) + (gather (+ page 1) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) -(fn guess-category [title] - (if (: (parser.anywhere (+ (peg.P "зеленый") "Зеленый")) :match title) - "Зеленый чай" - (: (parser.anywhere (+ (peg.P "Улун") "улун")) :match title) - "Улун" - (: (parser.anywhere (+ (peg.P "Белый") "белый")) :match title) - "Белый чай" - (: (parser.anywhere (+ (peg.P "Желтый") "желтый")) :match title) - "Желтый чай" - (: (parser.anywhere (+ (peg.P "Красный") "красный")) :match title) - "Красный чай" - "Неизвестная категория")) - (fn categorize-many [items category] (map (fn [_ item] - (tset item :category - (if category category (guess-category item.title))) - item) + (tset item :category + (if category + category + (parser.guess-category item.title))) + item) items)) -(fn from-html [url-formatter categories normalizer item-peg] +(fn from-html [categories url-formatter item-peg normalizer] (reduce (fn [_ {: category : path} result] (array.concat result (categorize-many - (map #(normalizer $2) - (walk-html-pages url-formatter path item-peg)) - category))) + (map #(normalizer $2) + (walk-html-pages url-formatter path item-peg)) + category))) categories [])) @@ -88,35 +77,34 @@ (print (.. "requesting " url)) (local (status _ content) (luna.http.request - "GET" - url + "GET" url {:User-Agent (http.random-user-agent) :Content-Type "application/json" :Accept "application/json"} "")) (if (= status 200) - (let [products (json.decode content)] - (if (or (= products nil) (= 0 (# products))) - knil - (do - (os.execute "sleep 1") - (gather (+ page 1) (array.concat knil products))))) + (let [items (json.decode content)] + (if (or (= items nil) (= 0 (# items))) + knil + (do + (os.execute "sleep 1") + (gather (+ page 1) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) -(fn from-json [url-formatter categories normalizer] +(fn from-json [categories url-formatter response-destructor normalizer] (reduce (fn [_ {: category : path} result] (array.concat - result - (categorize-many - (map #(normalizer $2) - (walk-json-pages url-formatter path)) - category))) + result + (categorize-many + (map #(normalizer $2) + (walk-json-pages url-formatter path)) + category))) categories [])) -- cgit v1.2.3