diff options
Diffstat (limited to 'fetcher.fnl')
| -rw-r--r-- | fetcher.fnl | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/fetcher.fnl b/fetcher.fnl new file mode 100644 index 0000000..d31f858 --- /dev/null +++ b/fetcher.fnl @@ -0,0 +1,124 @@ +(import-macros {: reduce : map} :lib.macro) + +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :vendor.lpeglj))) +(local array (require :lib.array)) +(local json (require :vendor.json)) +(local parser (require :parser)) +(local http (require :http)) + +(fn retry [what times sleep] + (var result nil) + (var stop? false) + (var err nil) + + (for [i 1 times &until stop?] + (local (ok? value) (pcall what)) + (if ok? + (do + (set result value) + (set stop? true)) + (do + (set err value) + (os.execute (.. "sleep " sleep))))) + + (when (not stop?) + (error (.. "failed after " times " retries:\n" err))) + + result) + +(fn walk-html-pages [url-formatter path item-peg] + (fn gather [page knil] + (local url (url-formatter path page)) + (print (.. "requesting " url)) + (local (status _ html) + (luna.http.request "GET" url {:User-Agent (http.random-user-agent)} "")) + + (if (= status 200) + (let [products (parser.match-many html item-peg)] + (if (or (= products nil) (= 0 (# products))) + knil + (do + (os.execute "sleep 1") + (gather (+ page 1) (array.concat knil products))))) + (= status 404) + knil + (retry #(gather page knil) 3 1))) + + (gather 1 [])) + +(fn guess-category [title] + (if (: (parser.anywhere (+ (peg.P "зеленый") "Зеленый")) :match title) + "Зеленый чай" + (: (parser.anywhere (+ (peg.P "Улун") "улун")) :match title) + "Улун" + (: (parser.anywhere (+ (peg.P "Белый") "белый")) :match title) + "Белый чай" + (: (parser.anywhere (+ (peg.P "Желтый") "желтый")) :match title) + "Желтый чай" + (: (parser.anywhere (+ (peg.P "Красный") "красный")) :match title) + "Красный чай" + "Неизвестная категория")) + +(fn categorize-many [items category] + (map + (fn [_ item] + (tset item :category + (if category category (guess-category item.title))) + item) + items)) + +(fn from-html [url-formatter categories normalizer item-peg] + (reduce + (fn [_ {: category : path} result] + (array.concat + result + (categorize-many + (map #(normalizer $2) + (walk-html-pages url-formatter path item-peg)) + category))) + categories + [])) + +(fn walk-json-pages [url-formatter path] + (fn gather [page knil] + (local url (url-formatter path page)) + (print (.. "requesting " url)) + (local (status _ content) + (luna.http.request + "GET" + url + {:User-Agent (http.random-user-agent) + :Content-Type "application/json" + :Accept "application/json"} + "")) + + (if (= status 200) + (let [products (json.decode content)] + (if (or (= products nil) (= 0 (# products))) + knil + (do + (os.execute "sleep 1") + (gather (+ page 1) (array.concat knil products))))) + (= status 404) + knil + (retry #(gather page knil) 3 1))) + + (gather 1 [])) + +(fn from-json [url-formatter categories normalizer] + (reduce + (fn [_ {: category : path} result] + (array.concat + result + (categorize-many + (map #(normalizer $2) + (walk-json-pages url-formatter path)) + category))) + categories + [])) + +{: from-html + : from-json} |
