From dd449357f502dbe9ca4487d4b06a06ee4e597146 Mon Sep 17 00:00:00 2001 From: unwox Date: Fri, 27 Sep 2024 15:26:33 +0600 Subject: new structure --- .gitignore | 1 + bin/fetch.fnl | 76 +++++++++++++++++ bin/serve.fnl | 166 +++++++++++++++++++++++++++++++++++++ fetcher.fnl | 68 +++++++-------- http.fnl | 11 --- lib/http.fnl | 11 +++ main.fnl | 234 ---------------------------------------------------- main.lua | 3 +- parser.fnl | 108 ------------------------ parser/artoftea.fnl | 71 ++++++++++++++++ parser/ipuer.fnl | 70 ++++++++++++++++ parser/ozchai.fnl | 69 ++++++++++++++++ parser/parser.fnl | 143 ++++++++++++++++++++++++++++++++ run.sh | 36 +++++++- runjit.sh | 2 - site/artoftea.fnl | 78 ------------------ site/ipuer.fnl | 81 ------------------ site/ozchai.fnl | 70 ---------------- var/.gitkeep | 0 19 files changed, 671 insertions(+), 627 deletions(-) create mode 100644 .gitignore create mode 100644 bin/fetch.fnl create mode 100644 bin/serve.fnl delete mode 100644 http.fnl create mode 100644 lib/http.fnl delete mode 100644 main.fnl delete mode 100644 parser.fnl create mode 100644 parser/artoftea.fnl create mode 100644 parser/ipuer.fnl create mode 100644 parser/ozchai.fnl create mode 100644 parser/parser.fnl delete mode 100755 runjit.sh delete mode 100644 site/artoftea.fnl delete mode 100644 site/ipuer.fnl delete mode 100644 site/ozchai.fnl create mode 100644 var/.gitkeep diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..33a56a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +var/db.sqlite* diff --git a/bin/fetch.fnl b/bin/fetch.fnl new file mode 100644 index 0000000..fe1a1a5 --- /dev/null +++ b/bin/fetch.fnl @@ -0,0 +1,76 @@ +(import-macros {: map : reduce} :lib.macro) + +(tset package :path (.. package.path ";./vendor/lpeglj/?.lua")) + +(local array (require :lib.array)) +(local ozchai (require :parser.ozchai)) +(local ipuer (require :parser.ipuer)) +(local artoftea (require :parser.artoftea)) + +(local db (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL")) +(luna.db.exec db " + PRAGMA foreign_keys=ON; + PRAGMA journal_mode=WAL; + PRAGMA synchronous=NORMAL; + + CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5(name, fid, `table`); + + CREATE TABLE IF NOT EXISTS products ( + id TEXT NOT NULL PRIMARY KEY, + site TEXT NOT NULL, + category TEXT NOT NULL, + title TEXT NOT NULL, + description TEXT NOT NULL, + year INT NOT NULL, + image TEXT NOT NULL, + url TEXT NOT NULL, + price REAL NOT NULL, + weight REAL NOT NULL, + price_per REAL NOT NULL, + misc TEXT NOT NULL, + creation_time DATETIME NOT NULL + );" []) + +(fn now [] + (os.date "%Y-%m-%d %H:%M:%S")) + +(fn store-products [products] + (local sql + (.. "INSERT OR REPLACE INTO products VALUES " + (table.concat + (map (fn [_ _] + "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + products) + ","))) + (local vars + (reduce + (fn [_ product rest] + (array.concat rest + [product.id + product.site + product.category + product.title + (or product.description "") + (or product.year 0) + (or product.image "") + (or product.url "") + (or product.price 0) + (or product.weight 0) + (or product.price-per 0) + (or product.misc "") + (now)])) + products [])) + (luna.db.exec db sql vars)) + +(fn populate-search-table [] + (local tx (luna.db.begin db)) + (luna.db.exec-tx tx "DELETE FROM search" []) + (luna.db.exec-tx tx "INSERT INTO search + SELECT title, id, 'products' + FROM products;" []) + (luna.db.commit tx)) + +(store-products (artoftea.products)) +(store-products (ipuer.products)) +(store-products (ozchai.products)) +(populate-search-table) diff --git a/bin/serve.fnl b/bin/serve.fnl new file mode 100644 index 0000000..f4ef3c7 --- /dev/null +++ b/bin/serve.fnl @@ -0,0 +1,166 @@ +(import-macros {: map : reduce} :lib.macro) + +(tset package :path (.. package.path ";./vendor/lpeglj/?.lua")) + +(local io (require :io)) +(local math (require :math)) +(local fennel (require :vendor.fennel)) +(local html (require :vendor.html)) +(local json (require :vendor.json)) +(local array (require :lib.array)) +(local str (require :lib.string)) + +(local ozchai (require :parser.ozchai)) +(local ipuer (require :parser.ipuer)) +(local artoftea (require :parser.artoftea)) + +(when _G.unpack + (tset table :unpack _G.unpack)) + +(local db (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL")) + +(local query-synonyms { + "шэн" "шен" + "шен" "шэн" + "доска" "чабань" + "чабань" "доска"}) + +(fn unescape [s] + (assert (= (type s) :string)) + (pick-values 1 + (-> s + (string.gsub "<" "<") + (string.gsub ">" ">") + (string.gsub """ "\"") + (string.gsub "&" "&")))) + +(fn site-name-template [name] + (if + (= name "ipuer.ru") + [:a {:class "site-icon" :href "https://ipuer.ru"} + [:img {:src "/static/ipuer.jpg"}] + "Институт чая пуэр"] + "")) + +(fn item-template [product] + [:div {:class "tile"} + [:a {:href product.url :style "display: block;"} + [:img {:src product.image} ""]] + (site-name-template product.site) + [:a {:href product.url :style "text-decoration: none;"} + [:NO-ESCAPE (.. "

" (unescape product.title) "

")]] + [:div {:class "price"} + (if product.price (.. product.price "₽") "") + (if product.quantity (.. " за " product.quantity "г") "") + (if (and product.price-per + (< 0 product.price-per)) + [:NO-ESCAPE (.. " (" product.price-per "₽ за 1г)")] + "")] + [:small {} (or product.description "")]]) + +(fn paginator-template [query page limit total] + (local last-page (math.ceil (/ total limit))) + + (if (< limit total) + [:div {:class "paginator"} + [:div {:class "paginator-numbers"} + (if (< 1 page) + [:a {:href (.. "?page=" (- page 1) "&query=" query)} "<"] + "") + (faccumulate [res [:span {}] i 1 last-page] + (do + (table.insert + res [:a {:href (.. "?page=" i "&query=" query) + :class (if (= page i) "paginator-active" "")} + (tostring i)]) + res)) + (if (< page last-page) + [:a {:href (.. "?page=" (+ page 1) "&query=" query)} ">"] + "")] + [:div {} "Всего результатов: " [:strong {} (string.format "%d" total)]]] + "")) + +(fn base-template [query sort page total ...] + (local paginator (paginator-template query page 32 total)) + + [:html {:lang "en"} + [:head {} + [:meta {:charset "UTF-8"}] + [:link {:rel :stylesheet :href "static/style.css"}] + [:title {} "A new cool web server for lua"]] + [:body {} + [:div {:class "container"} + [:div {:class "content"} + [:aside {:class "aside"} + [:div {:class "aside-content"} + [:a {:href "/" :style "display: block;"} + [:img {:class "logo" :src "static/logo.svg" :alt "Логотип meicha.ru"}]] + [:form {:class "form"} + [:input {:type :search :name :query :value query + :autofocus true :placeholder "enter search query"}] + [:button {:type :submit} "Искать"]] + paginator]] + [:section {} + [:div {:class "list"} ...] + [:footer {} paginator]]]]]]) + +(fn query-products [page query sorters] + (local query + (table.concat + (map (fn [_ q] + (if (. query-synonyms q) + (.. "(" q "* OR " (. query-synonyms q) "*)") + (.. q "*"))) + (str.split query)) + " ")) + (local total + (luna.db.query + db + "SELECT count(*) + FROM search + WHERE search.`table` = 'products' + AND search.name MATCH ?" + [query])) + + {:results + (luna.db.query* + db + "SELECT products.id, + highlight(search, 0, '', '') AS \"title\", + products.site, + products.description, + products.image, + products.url, + products.price, + products.weight, + products.price_per AS \"price-per\", + products.year + FROM search + INNER JOIN products ON search.fid = products.id + WHERE search.`table` = 'products' + AND search.name MATCH ? + ORDER BY rank + LIMIT 32 OFFSET ?" + [query (* (- page 1) 32)]) + :total (if (< 0 (# total)) + (. total 1 1) + 0)}) + +(fn root-handler [{: path : query}] + (if (= path "/") + (let [headers {:content-type "text/html"} + page (or (tonumber query.page) 1) + search (or query.query "") + sort "ASC" + {: results : total} (query-products page search sort)] + (values + 200 headers + (html.render + (base-template + search sort page total + (table.unpack (map #(item-template $2) results))) + true))) + (values 404 {} "not found"))) + +(luna.router.route "GET /" root-handler) +(luna.router.static "GET /static/" "static/") diff --git a/fetcher.fnl b/fetcher.fnl index d31f858..6d6d633 100644 --- a/fetcher.fnl +++ b/fetcher.fnl @@ -3,11 +3,11 @@ (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) - (require :vendor.lpeglj))) + (require :lpeglj))) (local array (require :lib.array)) (local json (require :vendor.json)) -(local parser (require :parser)) -(local http (require :http)) +(local parser (require :parser.parser)) +(local http (require :lib.http)) (fn retry [what times sleep] (var result nil) @@ -37,48 +37,37 @@ (luna.http.request "GET" url {:User-Agent (http.random-user-agent)} "")) (if (= status 200) - (let [products (parser.match-many html item-peg)] - (if (or (= products nil) (= 0 (# products))) + (let [items (parser.match-many html item-peg)] + (if (or (= items nil) (= 0 (# items))) knil (do (os.execute "sleep 1") - (gather (+ page 1) (array.concat knil products))))) + (gather (+ page 1) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) -(fn guess-category [title] - (if (: (parser.anywhere (+ (peg.P "зеленый") "Зеленый")) :match title) - "Зеленый чай" - (: (parser.anywhere (+ (peg.P "Улун") "улун")) :match title) - "Улун" - (: (parser.anywhere (+ (peg.P "Белый") "белый")) :match title) - "Белый чай" - (: (parser.anywhere (+ (peg.P "Желтый") "желтый")) :match title) - "Желтый чай" - (: (parser.anywhere (+ (peg.P "Красный") "красный")) :match title) - "Красный чай" - "Неизвестная категория")) - (fn categorize-many [items category] (map (fn [_ item] - (tset item :category - (if category category (guess-category item.title))) - item) + (tset item :category + (if category + category + (parser.guess-category item.title))) + item) items)) -(fn from-html [url-formatter categories normalizer item-peg] +(fn from-html [categories url-formatter item-peg normalizer] (reduce (fn [_ {: category : path} result] (array.concat result (categorize-many - (map #(normalizer $2) - (walk-html-pages url-formatter path item-peg)) - category))) + (map #(normalizer $2) + (walk-html-pages url-formatter path item-peg)) + category))) categories [])) @@ -88,35 +77,34 @@ (print (.. "requesting " url)) (local (status _ content) (luna.http.request - "GET" - url + "GET" url {:User-Agent (http.random-user-agent) :Content-Type "application/json" :Accept "application/json"} "")) (if (= status 200) - (let [products (json.decode content)] - (if (or (= products nil) (= 0 (# products))) - knil - (do - (os.execute "sleep 1") - (gather (+ page 1) (array.concat knil products))))) + (let [items (json.decode content)] + (if (or (= items nil) (= 0 (# items))) + knil + (do + (os.execute "sleep 1") + (gather (+ page 1) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) -(fn from-json [url-formatter categories normalizer] +(fn from-json [categories url-formatter response-destructor normalizer] (reduce (fn [_ {: category : path} result] (array.concat - result - (categorize-many - (map #(normalizer $2) - (walk-json-pages url-formatter path)) - category))) + result + (categorize-many + (map #(normalizer $2) + (walk-json-pages url-formatter path)) + category))) categories [])) diff --git a/http.fnl b/http.fnl deleted file mode 100644 index fb208fc..0000000 --- a/http.fnl +++ /dev/null @@ -1,11 +0,0 @@ -(fn random-user-agent [] - (let [agents ["Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.1" - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.3" - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.3" - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/25.0 Chrome/121.0.0.0 Safari/537.3" - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.3" - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.3"] - idx (math.random 1 (# agents))] - (. agents idx))) - -{: random-user-agent} diff --git a/lib/http.fnl b/lib/http.fnl new file mode 100644 index 0000000..fb208fc --- /dev/null +++ b/lib/http.fnl @@ -0,0 +1,11 @@ +(fn random-user-agent [] + (let [agents ["Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.1" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.3" + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.3" + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/25.0 Chrome/121.0.0.0 Safari/537.3" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.3" + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.3"] + idx (math.random 1 (# agents))] + (. agents idx))) + +{: random-user-agent} diff --git a/main.fnl b/main.fnl deleted file mode 100644 index 9282517..0000000 --- a/main.fnl +++ /dev/null @@ -1,234 +0,0 @@ -(import-macros {: map : reduce} :lib.macro) - -(tset package :path (.. package.path ";./lib/lpeglj/?.lua")) - -(local io (require :io)) -(local math (require :math)) -(local fennel (require :vendor.fennel)) -(local html (require :vendor.html)) -(local json (require :vendor.json)) -(local array (require :lib.array)) -(local str (require :lib.string)) - -(local ozchai (require :site.ozchai)) -(local ipuer (require :site.ipuer)) -(local artoftea (require :site.artoftea)) - -(print (fennel.view (ipuer.products))) -(os.exit 1) - -(when _G.unpack - (tset table :unpack _G.unpack)) - -(local query-synonyms { - "шэн" "шен" - "шен" "шэн" - "доска" "чабань" - "чабань" "доска" -}) - -(local db (luna.db.open "file:db.sqlite?_journal=WAL&_sync=NORMAL")) -(luna.db.exec db " - PRAGMA foreign_keys=ON; - PRAGMA journal_mode=WAL; - PRAGMA synchronous=NORMAL; - - CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5(name, fid, `table`); - - CREATE TABLE IF NOT EXISTS products ( - id TEXT NOT NULL PRIMARY KEY, - site TEXT NOT NULL, - category TEXT NOT NULL, - title TEXT NOT NULL, - description TEXT NOT NULL, - year INT NOT NULL, - image TEXT NOT NULL, - url TEXT NOT NULL, - price REAL NOT NULL, - weight REAL NOT NULL, - price_per REAL NOT NULL, - misc TEXT NOT NULL, - creation_time DATETIME NOT NULL - );" []) - -(fn now [] - (os.date "%Y-%m-%d %H:%M:%S")) - -(fn unescape [s] - (assert (= (type s) :string)) - (pick-values 1 - (-> s - (string.gsub "<" "<") - (string.gsub ">" ">") - (string.gsub """ "\"") - (string.gsub "&" "&")))) - -(fn site-name-template [name] - (if - (= name "ipuer.ru") - [:a {:class "site-icon" :href "https://ipuer.ru"} - [:img {:src "/static/ipuer.jpg"}] - "Институт чая пуэр"] - "")) - -(fn item-template [product] - [:div {:class "tile"} - [:a {:href product.url :style "display: block;"} - [:img {:src product.image} ""]] - (site-name-template product.site) - [:a {:href product.url :style "text-decoration: none;"} - [:NO-ESCAPE (.. "

" (unescape product.title) "

")]] - [:div {:class "price"} - (if product.price (.. product.price "₽") "") - (if product.quantity (.. " за " product.quantity "г") "") - (if (and product.price-per - (< 0 product.price-per)) - [:NO-ESCAPE (.. " (" product.price-per "₽ за 1г)")] - "")] - [:small {} (or product.description "")]]) - -(fn paginator-template [query page limit total] - (local last-page (math.ceil (/ total limit))) - - (if (< limit total) - [:div {:class "paginator"} - [:div {:class "paginator-numbers"} - (if (< 1 page) - [:a {:href (.. "?page=" (- page 1) "&query=" query)} "<"] - "") - (faccumulate [res [:span {}] i 1 last-page] - (do - (table.insert - res [:a {:href (.. "?page=" i "&query=" query) - :class (if (= page i) "paginator-active" "")} - (tostring i)]) - res)) - (if (< page last-page) - [:a {:href (.. "?page=" (+ page 1) "&query=" query)} ">"] - "")] - [:div {} "Всего результатов: " [:strong {} (string.format "%d" total)]]] - "")) - -(fn base-template [query sort page total ...] - (local paginator (paginator-template query page 32 total)) - - [:html {:lang "en"} - [:head {} - [:meta {:charset "UTF-8"}] - [:link {:rel :stylesheet :href "static/style.css"}] - [:title {} "A new cool web server for lua"]] - [:body {} - [:div {:class "container"} - [:div {:class "content"} - [:aside {:class "aside"} - [:div {:class "aside-content"} - [:a {:href "/" :style "display: block;"} - [:img {:class "logo" :src "static/logo.svg" :alt "Логотип meicha.ru"}]] - [:form {:class "form"} - [:input {:type :search :name :query :value query - :autofocus true :placeholder "enter search query"}] - [:button {:type :submit} "Искать"]] - paginator]] - [:section {} - [:div {:class "list"} ...] - [:footer {} paginator]]]]]]) - -(fn query-products [page query sorters] - (local query - (table.concat - (map (fn [_ q] - (if (. query-synonyms q) - (.. "(" q "* OR " (. query-synonyms q) "*)") - (.. q "*"))) - (str.split query)) - " ")) - (local total - (luna.db.query - db - "SELECT count(*) - FROM search - WHERE search.`table` = 'products' - AND search.name MATCH ?" - [query])) - - {:results - (luna.db.query* - db - "SELECT products.id, - highlight(search, 0, '', '') AS \"title\", - products.site, - products.description, - products.image, - products.url, - products.price, - products.weight, - products.price_per AS \"price-per\", - products.year - FROM search - INNER JOIN products ON search.fid = products.id - WHERE search.`table` = 'products' - AND search.name MATCH ? - ORDER BY rank - LIMIT 32 OFFSET ?" - [query (* (- page 1) 32)]) - :total (if (< 0 (# total)) - (. total 1 1) - 0)}) - -(fn store-products [products] - (local sql - (.. "INSERT OR REPLACE INTO products VALUES " - (table.concat - (map (fn [_ _] - "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") - products) - ","))) - (local vars - (reduce - (fn [_ product rest] - (array.concat rest - [product.id - product.site - product.category - product.title - (or product.description "") - (or product.year 0) - (or product.image "") - (or product.url "") - (or product.price 0) - (or product.weight 0) - (or product.price-per 0) - (or product.misc "") - (now)])) - products [])) - (luna.db.exec db sql vars)) - -(fn populate-search-table [] - (local tx (luna.db.begin db)) - (luna.db.exec-tx tx "DELETE FROM search" []) - (luna.db.exec-tx tx "INSERT INTO search - SELECT title, id, 'products' FROM products;" []) - (luna.db.commit tx)) - -; (store-products (ipuer.products)) -; (store-products (ozchai.products)) -; (populate-search-table) - -(fn root-handler [{: path : query}] - (if (= path "/") - (let [headers {:content-type "text/html"} - page (or (tonumber query.page) 1) - search (or query.query "") - sort "ASC" - {: results : total} (query-products page search sort)] - (values - 200 headers - (html.render - (base-template - search sort page total - (table.unpack (map #(item-template $2) results))) - true))) - (values 404 {} "not found"))) - -(luna.router.route "GET /" root-handler) -(luna.router.static "GET /static/" "static/") diff --git a/main.lua b/main.lua index 5d9abad..6084c9a 100644 --- a/main.lua +++ b/main.lua @@ -1 +1,2 @@ -return require("vendor.fennel").install().dofile("main.fnl") +assert(arg[1], "lua: file name must be specified") +return require("vendor.fennel").install().dofile(arg[1]) diff --git a/parser.fnl b/parser.fnl deleted file mode 100644 index 314476c..0000000 --- a/parser.fnl +++ /dev/null @@ -1,108 +0,0 @@ -(import-macros {: map} :lib.macro) -(local peg - (if (pick-values 1 (pcall require :lpeg)) - (require :lpeg) - (require :vendor.lpeglj))) - -(fn pnot [p] - (- (peg.P 1) (peg.P p))) - -(fn till [p] - (^ (pnot p) 1)) - -(fn maybe [p] - (^ (peg.P p) 0)) - -(fn anywhere [p] - (peg.P [(+ p (* 1 (peg.V 1)))])) - -(local pegs {}) -(tset pegs :number (^ (peg.R "09") 1)) -(tset pegs :letters (^ (+ (peg.R "az") (peg.R "AZ")) 1)) -(tset pegs :space (peg.S "\n\t ")) -(tset pegs :spaces (^ (peg.S "\n\t ") 1)) -(tset pegs :tag-name (+ pegs.letters pegs.number)) -(tset pegs :attr - (peg.Ct (* (peg.Cg (^ (+ pegs.letters "-") 1) :name) - (maybe (* "=\"" (peg.Cg (till "\"") :value) "\""))))) -(tset pegs :self-closing-tag - (* "<" - (peg.Cg - (+ (peg.P "area") "base" "br" "col" "embed" "hr" - "img" "input" "link" "meta" "param" "source" - "track" "wbr") ;; should be case insensitive - :tag) - (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs) - (maybe "/") ">")) -(tset pegs :opening-tag - (* "<" (peg.Cg pegs.tag-name :tag) - (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs) - ">")) -(tset pegs :closing-tag (* "")) -(tset pegs :doctype - (* "")) ;; should be case insensitive -(tset pegs :tag - (peg.P [(peg.Ct (+ pegs.self-closing-tag - (* pegs.opening-tag - (peg.Cg - (peg.Ct - (^ (+ (+ pegs.space (peg.V 1)) - (peg.Cg (till pegs.closing-tag))) - 0)) - :nodes) - pegs.closing-tag)))])) -(tset pegs :html - (* pegs.doctype (peg.Ct (^ (+ pegs.space (peg.Cg pegs.tag)) 0)))) - -(fn tag [tag attrs contents] - (local tag (peg.P tag)) - (local attrs-count (accumulate [sum 0 _ _ (pairs attrs)] (+ 1 sum))) - (local attr-peg - (fn [name value] (* (^ (peg.P name) 1) - (if (~= value "") - (* "=\"" - ;; wildcard for any value - (if (= value "*") - (till "\"") - (peg.P value)) - "\"") - (maybe (.. "=\" name \"")))))) - (local attrs-peg - (accumulate [sum pegs.spaces - _ rule - (pairs (icollect [k v (pairs attrs)] - (attr-peg k v)))] - (+ rule sum))) - (if contents - (peg.P (* - (^ pegs.space 0) - ;; opening tag - (* "<" tag (^ pegs.space 0) - (^ attrs-peg (- (* attrs-count 2) 1)) - (^ pegs.space 0) ">") - ;; tag contents - (^ pegs.space 0) - (if (= contents "*") - (till (* "")) - contents) - (^ pegs.space 0) - ;; closing tag - (* ""))) - (peg.P (* - (^ pegs.space 0) - ;; opening tag - (* "<" tag (^ pegs.space 0) - (^ attrs-peg (- (* attrs-count 2) 1)) - (^ pegs.space 0) (maybe "/") ">"))))) - -(fn match-many [html tag] - (: (peg.Ct (^ (peg.Ct tag) 1)) - :match html)) - -{: match-many - : tag - : anywhere - : till - : maybe - : pegs - :not pnot} diff --git a/parser/artoftea.fnl b/parser/artoftea.fnl new file mode 100644 index 0000000..1f03ed1 --- /dev/null +++ b/parser/artoftea.fnl @@ -0,0 +1,71 @@ +(import-macros {: map} :lib.macro) + +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local parser (require :parser.parser)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) + +(fn url-formatter [path page] + (.. "https://artoftea.ru/" path "/?page=" page)) + +(local product-peg + (* ;; id + (parser.anywhere + (parser.tag :div {:class "front-image"} + (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)} + (parser.tag :img {:src (peg.Cg (parser.till "\"") :image) + :title "*" :class "*" :alt "*"})))) + (parser.anywhere + (parser.tag :div {:class "name"} + (parser.tag :a {:href "*"} (peg.Cg (parser.till "") :title)))) + (parser.anywhere + (parser.tag :p {:class "description"} + (peg.Cg (parser.till "

") :description))) + (+ + (* + (parser.anywhere + (parser.tag :option {:value "*" :selected "selected"} + (* (peg.Cg parser.pegs.number :weight) " гр" parser.pegs.spaces))) + (parser.anywhere + (parser.tag :p {:class "price"} + (parser.tag :span {:id "*"} + (peg.Cg (parser.till "") :price))))) + (parser.anywhere + (parser.tag :p {:class "price"} + (parser.tag :span {:id "*"} + (peg.Cg (parser.till "") :price))))) + (parser.anywhere + (parser.tag :input {:type "hidden" + :name "product_id" + :value (peg.Cg parser.pegs.number :id)})) + (parser.anywhere + (parser.tag :button {:type "*" :onclick "*" :class "*"} "Купить")))) + +(fn normalize [product] + (local year (parser.guess-year product.title)) + (local weight (number.string->number product.weight)) + (local price (number.string->number product.price)) + {:site "artoftea" + :id product.id + :url product.url + :description product.description + :image product.image + :year year + :price price + :weight weight + :price-per (if (and price weight (< 0 weight)) + (/ (math.ceil (* (/ price weight) 10)) 10) + nil)}) + +(fn products [] + (fetcher.from-html + [{:path "redtea" :category "Красный чай"} + {:path "greentea" :category "Зеленый чай"}] + url-formatter + product-peg + normalize)) + +{: products} diff --git a/parser/ipuer.fnl b/parser/ipuer.fnl new file mode 100644 index 0000000..7fefd1b --- /dev/null +++ b/parser/ipuer.fnl @@ -0,0 +1,70 @@ +(import-macros {: map} :lib.macro) + +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local number (require :lib.number)) +(local parser (require :parser.parser)) +(local fetcher (require :fetcher)) + +(fn url-formatter [path page] + (.. "https://ipuer.ru/catalog/" path "/?p=" page)) + +(local product-peg + (* ;; id + (parser.anywhere + (parser.tag :div + {:data-id (peg.Cg parser.pegs.number :id) :class "*"})) + ;; url and image + (parser.anywhere + (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)} + (parser.tag :img {:src (peg.Cg (parser.till "\"") :image) :alt "*"}))) + ;; title + (parser.anywhere + (parser.tag :div {:class "card-product_title"} + (parser.tag :a {:href "*"} + (parser.tag :span {} (peg.Cg (parser.till "") :title))))) + ;; price + (parser.anywhere + (parser.tag :span {:class "card-price"} + (* (peg.Cg + (* parser.pegs.number + (parser.maybe (* " " parser.pegs.number))) + :price) + " р."))) + (parser.anywhere + (+ (parser.tag :a {:data-url "*" :class "*" :data-add-text "*"} "В корзину") + (parser.tag :a {:data-url "*" :class "*"} "В корзину"))))) + +(fn normalize [product] + (local weight (parser.guess-weight product.title)) + (local price (number.string->number product.price)) + {:site "ipuer" + :id product.id + :url (.. "https://ipuer.ru" product.url) + :title product.title + :description "" + ;; FIXME: parse all editions into different projects + :image (.. "https://ipuer.ru" product.image) + :year (parser.guess-year product.title) + :price price + :weight weight + :category product.category + :price-per (if (and price weight (< 0 weight)) + (/ (math.ceil (* (/ price weight) 10)) 10) + nil)}) + +(fn products [] + (fetcher.from-html + [{:path "shen-puer" :category "Шен пуэр"} + {:path "shu-puer" :category "Шу пуэр"} + {:path "drugoy-chay"} + {:path "blagovoniya" :category "Благовония"} + {:path "posuda" :category "Посуда"} + {:path "282" :category "Посуда"}] + url-formatter + product-peg + normalize)) + +{: products} diff --git a/parser/ozchai.fnl b/parser/ozchai.fnl new file mode 100644 index 0000000..6bf6286 --- /dev/null +++ b/parser/ozchai.fnl @@ -0,0 +1,69 @@ +(import-macros {: map} :lib.macro) + +(local http (require :lib.http)) +(local array (require :lib.array)) +(local json (require :vendor.json)) + +(local %all-products-partuid 176163172341) + +(fn string->number [str] + (if str + (tonumber (pick-values 1 (str:gsub "[^0-9.]" ""))) + nil)) + +(fn request [partuid slice] + (print (.. "https://store.tildaapi.com/api/getproductslist/" + "?storepartuid=" + partuid + "&recid=280779251&c=1723216515077" + "&getparts=true&getoptions=true&slice=%d&size=36")) + (let [(status headers body) + (luna.http.request + "GET" + (string.format + (.. "https://store.tildaapi.com/api/getproductslist/" + "?storepartuid=" + partuid + "&recid=280779251&c=1723216515077" + "&getparts=true&getoptions=true&slice=%d&size=36") + slice) + {:Content-Type "application/json" + :User-Agent (http.random-user-agent)} + "")] + (json.decode body))) + +(fn walk-slices [partuid] + (fn gather [slice knil] + (let [{: nextslice : products} (request partuid slice) + res (array.concat knil products)] + (if (= 0 (# products)) + knil + (do + (os.execute "sleep 1") + (gather (+ slice 1) res))))) + (gather 1 [])) + +(fn normalize [_ product] + (local gallery (json.decode product.gallery)) + (local weight (string->number (. (. product.editions 1) :Вес))) + (local price (string->number (. (. product.editions 1) :price))) + {:site "ozchai" + :id product.url + :url product.url + :title product.title + :description product.descr + ;; FIXME: parse all editions into different projects + :image (if (< 0 (# gallery)) + (. (. gallery 1) :img) + "") + :weight weight + :price price + :price-per (if (and price weight (< 0 weight)) + (/ (math.ceil (* (/ price weight) 10)) 10) + nil) + :characteristics product.characteristics}) + +(fn products [] + (map normalize (walk-slices %all-products-partuid))) + +{: products} diff --git a/parser/parser.fnl b/parser/parser.fnl new file mode 100644 index 0000000..b52f881 --- /dev/null +++ b/parser/parser.fnl @@ -0,0 +1,143 @@ +(import-macros {: map} :lib.macro) + +(local number (require :lib.number)) + +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) + +;; "not" is taken >:( +(fn pnot [p] + (- (peg.P 1) (peg.P p))) + +(fn till [p] + (^ (pnot p) 1)) + +(fn maybe [p] + (^ (peg.P p) 0)) + +(fn anywhere [p] + (peg.P [(+ p (* 1 (peg.V 1)))])) + +(local pegs {}) +(tset pegs :number (^ (peg.R "09") 1)) +(tset pegs :letters (^ (+ (peg.R "az") (peg.R "AZ")) 1)) +(tset pegs :space (peg.S "\n\t ")) +(tset pegs :spaces (^ (peg.S "\n\t ") 1)) +(tset pegs :tag-name (+ pegs.letters pegs.number)) +(tset pegs :attr + (peg.Ct (* (peg.Cg (^ (+ pegs.letters "-") 1) :name) + (maybe (* "=\"" (peg.Cg (till "\"") :value) "\""))))) +(tset pegs :self-closing-tag + (* "<" + (peg.Cg + (+ (peg.P "area") "base" "br" "col" "embed" "hr" + "img" "input" "link" "meta" "param" "source" + "track" "wbr") ;; should be case insensitive + :tag) + (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs) + (maybe "/") ">")) +(tset pegs :opening-tag + (* "<" (peg.Cg pegs.tag-name :tag) + (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs) + ">")) +(tset pegs :closing-tag (* "")) +(tset pegs :doctype + (* "")) ;; should be case insensitive +(tset pegs :tag + (peg.P [(peg.Ct (+ pegs.self-closing-tag + (* pegs.opening-tag + (peg.Cg + (peg.Ct + (^ (+ (+ pegs.space (peg.V 1)) + (peg.Cg (till pegs.closing-tag))) + 0)) + :nodes) + pegs.closing-tag)))])) +(tset pegs :html + (* pegs.doctype (peg.Ct (^ (+ pegs.space (peg.Cg pegs.tag)) 0)))) + +(fn tag [tag attrs contents] + (local tag (peg.P tag)) + (local attrs-count (accumulate [sum 0 _ _ (pairs attrs)] (+ 1 sum))) + (local attr-peg + (fn [name value] (* + (^ (peg.P name) 1) + (if (~= value "") + (* "=\"" + ;; wildcard for any value + (if (= value "*") + (till "\"") + (peg.P value)) + "\"") + (maybe (.. "=\" name \"")))))) + (local attrs-peg + (accumulate [sum pegs.spaces + _ rule + (pairs (icollect [k v (pairs attrs)] + (attr-peg k v)))] + (+ rule sum))) + (if contents + (peg.P (* + (^ pegs.space 0) + ;; opening tag + (* "<" tag (^ pegs.space 0) + (^ attrs-peg (- (* attrs-count 2) 1)) + (^ pegs.space 0) ">") + ;; tag contents + (^ pegs.space 0) + (if (= contents "*") + (till (* "")) + contents) + (^ pegs.space 0) + ;; closing tag + (* ""))) + (peg.P (* + (^ pegs.space 0) + ;; opening tag + (* "<" tag (^ pegs.space 0) + (^ attrs-peg (- (* attrs-count 2) 1)) + (^ pegs.space 0) (maybe "/") ">"))))) + +(fn match-many [html tag] + (: (peg.Ct (^ (peg.Ct tag) 1)) + :match html)) + +(fn guess-category [title] + (if (: (anywhere (+ (peg.P "зеленый") "Зеленый")) :match title) + "Зеленый чай" + (: (anywhere (+ (peg.P "Улун") "улун")) :match title) + "Улун" + (: (anywhere (+ (peg.P "Белый") "белый")) :match title) + "Белый чай" + (: (anywhere (+ (peg.P "Желтый") "желтый")) :match title) + "Желтый чай" + (: (anywhere (+ (peg.P "Красный") "красный")) :match title) + "Красный чай" + "Неизвестная категория")) + +(fn guess-year [title] + (number.string->number + (: (anywhere + (* (peg.C (^ (peg.R "09") 4)) + (maybe " ") + (- "г" (peg.P "гр")))) + :match title))) + +(fn guess-weight [title] + (number.string->number + (: (anywhere + (* (peg.C pegs.number) (maybe " ") "гр")) + :match title))) + +{: match-many + : tag + : anywhere + : till + : maybe + : pegs + :not pnot + : guess-category + : guess-year + : guess-weight} diff --git a/run.sh b/run.sh index f850f8d..89af2b3 100755 --- a/run.sh +++ b/run.sh @@ -1,3 +1,35 @@ #!/bin/sh -LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \ - go run -tags fts5,puc ../. -n 1 main.lua +set -e + +usage () { + echo "Usage: + serve [--jit] Serve the site pages + fetch [--jit] Populate the database with records" +} + +serve () { + variant="$1" + if [ "$variant" = "--jit" ]; then + echo "running jit" + go run -tags fts5,jit ../. -n 1 main.lua bin/serve.fnl + else + echo "running puc" + LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \ + go run -tags fts5,puc ../. -n 1 main.lua bin/serve.fnl + fi +} + +fetch () { + if [ "$variant" = "--jit" ]; then + echo "running jit" + go run -tags fts5,jit ../. -n 1 main.lua bin/fetch.fnl + else + LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \ + go run -tags fts5,puc ../. -n 1 main.lua bin/fetch.fnl + fi +} + +cmd="$1" +[ -z "$cmd" ] || [ "$cmd" = "-h" ] || [ "$cmd" = "--help" ] && usage && exit 1 +shift +"$cmd" "$@" || usage diff --git a/runjit.sh b/runjit.sh deleted file mode 100755 index dda6233..0000000 --- a/runjit.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -go run -tags fts5,jit ../. -n 1 main.lua diff --git a/site/artoftea.fnl b/site/artoftea.fnl deleted file mode 100644 index 382b0ef..0000000 --- a/site/artoftea.fnl +++ /dev/null @@ -1,78 +0,0 @@ -(import-macros {: map} :lib.macro) - -(local peg - (if (pick-values 1 (pcall require :lpeg)) - (require :lpeg) - (require :vendor.lpeglj))) -(local parser (require :parser)) -(local number (require :lib.number)) -(local fetcher (require :fetcher)) - -(fn url-formatter [path page] - (.. "https://artoftea.ru/" path "/?page=" page)) - -(local product-peg - (* ;; id - (parser.anywhere - (parser.tag :div {:class "front-image"} - (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)} - (parser.tag :img {:src (peg.Cg (parser.till "\"") :image) - :title "*" :class "*" :alt "*"})))) - (parser.anywhere - (parser.tag :div {:class "name"} - (parser.tag :a {:href "*"} (peg.Cg (parser.till "") :title)))) - (parser.anywhere - (parser.tag :p {:class "description"} - (peg.Cg (parser.till "

") :description))) - (+ - (* - (parser.anywhere - (parser.tag :option {:value "*" :selected "selected"} - (* (peg.Cg parser.pegs.number :weight) " гр" parser.pegs.spaces))) - (parser.anywhere - (parser.tag :p {:class "price"} - (parser.tag :span {:id "*"} - (peg.Cg (parser.till "") :price))))) - (parser.anywhere - (parser.tag :p {:class "price"} - (parser.tag :span {:id "*"} - (peg.Cg (parser.till "") :price))))) - (parser.anywhere - (parser.tag :input {:type "hidden" - :name "product_id" - :value (peg.Cg parser.pegs.number :id)})) - (parser.anywhere - (parser.tag :button {:type "*" :onclick "*" :class "*"} "Купить")))) - -(fn normalize [product] - (local year - (number.string->number - (: (parser.anywhere - (* (peg.C (^ (peg.R "09") 4)) - (parser.maybe " ") - (- "г" (peg.P "гр")))) - :match product.title))) - (local weight (number.string->number product.weight)) - (local price (number.string->number product.price)) - - {:site "artoftea" - :id product.id - :url product.url - :description product.description - :image product.image - :year year - :price price - :weight weight - :price-per (if (and price weight (< 0 weight)) - (/ (math.ceil (* (/ price weight) 10)) 10) - nil)}) - -(fn products [] - (fetcher.from-html - url-formatter - [{:path "redtea" :category "Красный чай"} - {:path "greentea" :category "Зеленый чай"}] - normalize - product-peg)) - -{: products} diff --git a/site/ipuer.fnl b/site/ipuer.fnl deleted file mode 100644 index f878912..0000000 --- a/site/ipuer.fnl +++ /dev/null @@ -1,81 +0,0 @@ -(import-macros {: map} :lib.macro) - -(local peg - (if (pick-values 1 (pcall require :lpeg)) - (require :lpeg) - (require :vendor.lpeglj))) -(local number (require :lib.number)) -(local parser (require :parser)) -(local fetcher (require :fetcher)) - -(fn url-formatter [path page] - (.. "https://ipuer.ru/catalog/" path "/?p=" page)) - -(local product-peg - (* ;; id - (parser.anywhere - (parser.tag :div - {:data-id (peg.Cg parser.pegs.number :id) :class "*"})) - ;; url and image - (parser.anywhere - (parser.tag :a {:href (peg.Cg (parser.till "\"") :url)} - (parser.tag :img {:src (peg.Cg (parser.till "\"") :image) :alt "*"}))) - ;; title - (parser.anywhere - (parser.tag :div {:class "card-product_title"} - (parser.tag :a {:href "*"} - (parser.tag :span {} (peg.Cg (parser.till "") :title))))) - ;; price - (parser.anywhere - (parser.tag :span {:class "card-price"} - (* (peg.Cg - (* parser.pegs.number - (parser.maybe (* " " parser.pegs.number))) - :price) - " р."))) - (parser.anywhere - (+ (parser.tag :a {:data-url "*" :class "*" :data-add-text "*"} "В корзину") - (parser.tag :a {:data-url "*" :class "*"} "В корзину"))))) - -(fn normalize [product] - (local year - (number.string->number - (: (parser.anywhere - (* (peg.C (^ (peg.R "09") 4)) - (parser.maybe " ") - (- "г" (peg.P "гр")))) - :match product.title))) - (local weight - (number.string->number - (: (parser.anywhere (* (peg.C parser.pegs.number) (parser.maybe " ") "гр")) - :match product.title))) - (local price (number.string->number product.price)) - - {:site "ipuer" - :id product.id - :url (.. "https://ipuer.ru" product.url) - :title product.title - :description "" - ;; FIXME: parse all editions into different projects - :image (.. "https://ipuer.ru" product.image) - :year year - :price price - :weight weight - :category product.category - :price-per (if (and price weight (< 0 weight)) - (/ (math.ceil (* (/ price weight) 10)) 10) - nil)}) - -(fn products [] - (fetcher.from-html - url-formatter - [{:path "shen-puer" :category "Шен пуэр"} - {:path "shu-puer" :category "Шу пуэр"} - {:path "drugoy-chay"} - {:path "blagovoniya" :category "Благовония"} - {:path "posuda" :category "Посуда"} - {:path "282" :category "Посуда"}] - normalize - product-peg)) - -{: products} diff --git a/site/ozchai.fnl b/site/ozchai.fnl deleted file mode 100644 index 90c4edc..0000000 --- a/site/ozchai.fnl +++ /dev/null @@ -1,70 +0,0 @@ -(import-macros {: map} :lib.macro) - -(local http (require :http)) -(local array (require :lib.array)) -(local json (require :vendor.json)) - -(local %all-products-partuid 176163172341) - -(fn string->number [str] - (if str - (tonumber (pick-values 1 (str:gsub "[^0-9.]" ""))) - nil)) - -(fn request [partuid slice] - (print (.. "https://store.tildaapi.com/api/getproductslist/" - "?storepartuid=" - partuid - "&recid=280779251&c=1723216515077" - "&getparts=true&getoptions=true&slice=%d&size=36")) - (let [(status headers body) - (luna.http.request - "GET" - (string.format - (.. "https://store.tildaapi.com/api/getproductslist/" - "?storepartuid=" - partuid - "&recid=280779251&c=1723216515077" - "&getparts=true&getoptions=true&slice=%d&size=36") - slice) - {:Content-Type "application/json" - :User-Agent (http.random-user-agent)} - "")] - (json.decode body))) - -(fn walk-slices [partuid] - (fn gather [slice knil] - (let [{: nextslice : products} (request partuid slice) - res (array.concat knil products)] - (if (= 0 (# products)) - knil - (do - (os.execute "sleep 1") - (gather (+ slice 1) res))))) - (gather 1 [])) - -(fn normalize [_ product] - (local gallery (json.decode product.gallery)) - (local weight (string->number (. (. product.editions 1) :Вес))) - (local price (string->number (. (. product.editions 1) :price))) - - {:site "ozchai" - :id product.url - :url product.url - :title product.title - :description product.descr - ;; FIXME: parse all editions into different projects - :image (if (< 0 (# gallery)) - (. (. gallery 1) :img) - "") - :weight weight - :price price - :price-per (if (and price weight (< 0 weight)) - (/ (math.ceil (* (/ price weight) 10)) 10) - nil) - :characteristics product.characteristics}) - -(fn products [] - (map normalize (walk-slices %all-products-partuid))) - -{: products} diff --git a/var/.gitkeep b/var/.gitkeep new file mode 100644 index 0000000..e69de29 -- cgit v1.2.3