summaryrefslogtreecommitdiff
path: root/fetcher.fnl
diff options
context:
space:
mode:
Diffstat (limited to 'fetcher.fnl')
-rw-r--r--fetcher.fnl124
1 files changed, 124 insertions, 0 deletions
diff --git a/fetcher.fnl b/fetcher.fnl
new file mode 100644
index 0000000..d31f858
--- /dev/null
+++ b/fetcher.fnl
@@ -0,0 +1,124 @@
+(import-macros {: reduce : map} :lib.macro)
+
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :vendor.lpeglj)))
+(local array (require :lib.array))
+(local json (require :vendor.json))
+(local parser (require :parser))
+(local http (require :http))
+
+(fn retry [what times sleep]
+ (var result nil)
+ (var stop? false)
+ (var err nil)
+
+ (for [i 1 times &until stop?]
+ (local (ok? value) (pcall what))
+ (if ok?
+ (do
+ (set result value)
+ (set stop? true))
+ (do
+ (set err value)
+ (os.execute (.. "sleep " sleep)))))
+
+ (when (not stop?)
+ (error (.. "failed after " times " retries:\n" err)))
+
+ result)
+
+(fn walk-html-pages [url-formatter path item-peg]
+ (fn gather [page knil]
+ (local url (url-formatter path page))
+ (print (.. "requesting " url))
+ (local (status _ html)
+ (luna.http.request "GET" url {:User-Agent (http.random-user-agent)} ""))
+
+ (if (= status 200)
+ (let [products (parser.match-many html item-peg)]
+ (if (or (= products nil) (= 0 (# products)))
+ knil
+ (do
+ (os.execute "sleep 1")
+ (gather (+ page 1) (array.concat knil products)))))
+ (= status 404)
+ knil
+ (retry #(gather page knil) 3 1)))
+
+ (gather 1 []))
+
+(fn guess-category [title]
+ (if (: (parser.anywhere (+ (peg.P "зеленый") "Зеленый")) :match title)
+ "Зеленый чай"
+ (: (parser.anywhere (+ (peg.P "Улун") "улун")) :match title)
+ "Улун"
+ (: (parser.anywhere (+ (peg.P "Белый") "белый")) :match title)
+ "Белый чай"
+ (: (parser.anywhere (+ (peg.P "Желтый") "желтый")) :match title)
+ "Желтый чай"
+ (: (parser.anywhere (+ (peg.P "Красный") "красный")) :match title)
+ "Красный чай"
+ "Неизвестная категория"))
+
+(fn categorize-many [items category]
+ (map
+ (fn [_ item]
+ (tset item :category
+ (if category category (guess-category item.title)))
+ item)
+ items))
+
+(fn from-html [url-formatter categories normalizer item-peg]
+ (reduce
+ (fn [_ {: category : path} result]
+ (array.concat
+ result
+ (categorize-many
+ (map #(normalizer $2)
+ (walk-html-pages url-formatter path item-peg))
+ category)))
+ categories
+ []))
+
+(fn walk-json-pages [url-formatter path]
+ (fn gather [page knil]
+ (local url (url-formatter path page))
+ (print (.. "requesting " url))
+ (local (status _ content)
+ (luna.http.request
+ "GET"
+ url
+ {:User-Agent (http.random-user-agent)
+ :Content-Type "application/json"
+ :Accept "application/json"}
+ ""))
+
+ (if (= status 200)
+ (let [products (json.decode content)]
+ (if (or (= products nil) (= 0 (# products)))
+ knil
+ (do
+ (os.execute "sleep 1")
+ (gather (+ page 1) (array.concat knil products)))))
+ (= status 404)
+ knil
+ (retry #(gather page knil) 3 1)))
+
+ (gather 1 []))
+
+(fn from-json [url-formatter categories normalizer]
+ (reduce
+ (fn [_ {: category : path} result]
+ (array.concat
+ result
+ (categorize-many
+ (map #(normalizer $2)
+ (walk-json-pages url-formatter path))
+ category)))
+ categories
+ []))
+
+{: from-html
+ : from-json}