(import-macros {: reduce : map} :lib.macro) (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) (require :vendor.lpeglj))) (local array (require :lib.array)) (local json (require :vendor.json)) (local parser (require :parser)) (local http (require :http)) (fn retry [what times sleep] (var result nil) (var stop? false) (var err nil) (for [i 1 times &until stop?] (local (ok? value) (pcall what)) (if ok? (do (set result value) (set stop? true)) (do (set err value) (os.execute (.. "sleep " sleep))))) (when (not stop?) (error (.. "failed after " times " retries:\n" err))) result) (fn walk-html-pages [url-formatter path item-peg] (fn gather [page knil] (local url (url-formatter path page)) (print (.. "requesting " url)) (local (status _ html) (luna.http.request "GET" url {:User-Agent (http.random-user-agent)} "")) (if (= status 200) (let [products (parser.match-many html item-peg)] (if (or (= products nil) (= 0 (# products))) knil (do (os.execute "sleep 1") (gather (+ page 1) (array.concat knil products))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) (fn guess-category [title] (if (: (parser.anywhere (+ (peg.P "зеленый") "Зеленый")) :match title) "Зеленый чай" (: (parser.anywhere (+ (peg.P "Улун") "улун")) :match title) "Улун" (: (parser.anywhere (+ (peg.P "Белый") "белый")) :match title) "Белый чай" (: (parser.anywhere (+ (peg.P "Желтый") "желтый")) :match title) "Желтый чай" (: (parser.anywhere (+ (peg.P "Красный") "красный")) :match title) "Красный чай" "Неизвестная категория")) (fn categorize-many [items category] (map (fn [_ item] (tset item :category (if category category (guess-category item.title))) item) items)) (fn from-html [url-formatter categories normalizer item-peg] (reduce (fn [_ {: category : path} result] (array.concat result (categorize-many (map #(normalizer $2) (walk-html-pages url-formatter path item-peg)) category))) categories [])) (fn walk-json-pages [url-formatter path] (fn gather [page knil] (local url (url-formatter path page)) (print (.. "requesting " url)) (local (status _ content) (luna.http.request "GET" url {:User-Agent (http.random-user-agent) :Content-Type "application/json" :Accept "application/json"} "")) (if (= status 200) (let [products (json.decode content)] (if (or (= products nil) (= 0 (# products))) knil (do (os.execute "sleep 1") (gather (+ page 1) (array.concat knil products))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) (fn from-json [url-formatter categories normalizer] (reduce (fn [_ {: category : path} result] (array.concat result (categorize-many (map #(normalizer $2) (walk-json-pages url-formatter path)) category))) categories [])) {: from-html : from-json}