(import-macros {: reduce : map} :lib.macro) (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) (require :lpeglj))) (local array (require :lib.array)) (local parser (require :parser.parser)) (local http (require :lib.http)) (local {: must} (require :lib.utils)) (fn retry [what times sleep] (var result nil) (var stop? false) (var err nil) (for [i 1 times &until stop?] (local (ok? value) (pcall what)) (if ok? (do (set result value) (set stop? true)) (do (set err value) (os.execute (.. "sleep " sleep))))) (when (not stop?) (error (.. "failed after " times " retries:\n" err))) result) (fn walk-html-pages [url-formatter path item-peg] (fn gather [page knil] (local url (url-formatter path page)) (print (.. "requesting " url)) (local {: status : body} (must (luna.http.request "GET" url {:User-Agent (http.random-user-agent)} ""))) (if (= 200 status) (let [items (parser.match-many body item-peg)] (if (or (= items nil) (= 0 (# items))) knil (do (os.execute "sleep 1") (gather (+ page 1) (array.concat knil items))))) (= 404 status) knil (retry #(gather page knil) 3 1))) (gather 1 [])) (fn categorize-many [items tags] (map (fn [_ item] (tset item :tags (-> (or tags []) (array.concat (parser.guess-tags item.title)) (array.unique))) item) items)) (fn from-html [categories url-formatter item-peg normalizer] (reduce (fn [_ {: tags : path} result] (array.concat result (categorize-many (map #(normalizer $2) (walk-html-pages url-formatter path item-peg)) tags))) categories [])) (fn walk-json-pages [url-formatter path response-destructor] (fn gather [page knil] (local url (url-formatter path page)) (print (.. "requesting " url)) (local {: status : body} (must (luna.http.request "GET" url {:User-Agent (http.random-user-agent) :Content-Type "application/json" :Accept "application/json"} ""))) (if (= status 200) (let [{: items} (response-destructor body)] (if (or (= items nil) (= 0 (# items))) knil (do (os.execute "sleep 1") (gather (+ 1 page) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) (fn from-json [categories url-formatter response-destructor normalizer] (reduce (fn [_ {: tags : path} result] (array.concat result (categorize-many (map #(normalizer $2) (walk-json-pages url-formatter path response-destructor)) tags))) categories [])) {: from-html : from-json}