diff options
| author | unwox <me@unwox.com> | 2024-09-27 15:26:33 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2024-09-27 15:44:16 +0600 |
| commit | dd449357f502dbe9ca4487d4b06a06ee4e597146 (patch) | |
| tree | 9847488a6cc2c1aaf1fc80578e1a7a5d4af99ff5 | |
| parent | 9b82db238f9e2e02a76f95c793f8d6ef2387ecfd (diff) | |
new structure
| -rw-r--r-- | .gitignore | 1 | ||||
| -rw-r--r-- | bin/fetch.fnl | 76 | ||||
| -rw-r--r-- | bin/serve.fnl (renamed from main.fnl) | 84 | ||||
| -rw-r--r-- | fetcher.fnl | 68 | ||||
| -rw-r--r-- | lib/http.fnl (renamed from http.fnl) | 0 | ||||
| -rw-r--r-- | main.lua | 3 | ||||
| -rw-r--r-- | parser/artoftea.fnl (renamed from site/artoftea.fnl) | 19 | ||||
| -rw-r--r-- | parser/ipuer.fnl (renamed from site/ipuer.fnl) | 25 | ||||
| -rw-r--r-- | parser/ozchai.fnl (renamed from site/ozchai.fnl) | 3 | ||||
| -rw-r--r-- | parser/parser.fnl (renamed from parser.fnl) | 71 | ||||
| -rwxr-xr-x | run.sh | 36 | ||||
| -rwxr-xr-x | runjit.sh | 2 | ||||
| -rw-r--r-- | var/.gitkeep | 0 |
13 files changed, 216 insertions, 172 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..33a56a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +var/db.sqlite* diff --git a/bin/fetch.fnl b/bin/fetch.fnl new file mode 100644 index 0000000..fe1a1a5 --- /dev/null +++ b/bin/fetch.fnl @@ -0,0 +1,76 @@ +(import-macros {: map : reduce} :lib.macro) + +(tset package :path (.. package.path ";./vendor/lpeglj/?.lua")) + +(local array (require :lib.array)) +(local ozchai (require :parser.ozchai)) +(local ipuer (require :parser.ipuer)) +(local artoftea (require :parser.artoftea)) + +(local db (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL")) +(luna.db.exec db " + PRAGMA foreign_keys=ON; + PRAGMA journal_mode=WAL; + PRAGMA synchronous=NORMAL; + + CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5(name, fid, `table`); + + CREATE TABLE IF NOT EXISTS products ( + id TEXT NOT NULL PRIMARY KEY, + site TEXT NOT NULL, + category TEXT NOT NULL, + title TEXT NOT NULL, + description TEXT NOT NULL, + year INT NOT NULL, + image TEXT NOT NULL, + url TEXT NOT NULL, + price REAL NOT NULL, + weight REAL NOT NULL, + price_per REAL NOT NULL, + misc TEXT NOT NULL, + creation_time DATETIME NOT NULL + );" []) + +(fn now [] + (os.date "%Y-%m-%d %H:%M:%S")) + +(fn store-products [products] + (local sql + (.. "INSERT OR REPLACE INTO products VALUES " + (table.concat + (map (fn [_ _] + "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") + products) + ","))) + (local vars + (reduce + (fn [_ product rest] + (array.concat rest + [product.id + product.site + product.category + product.title + (or product.description "") + (or product.year 0) + (or product.image "") + (or product.url "") + (or product.price 0) + (or product.weight 0) + (or product.price-per 0) + (or product.misc "") + (now)])) + products [])) + (luna.db.exec db sql vars)) + +(fn populate-search-table [] + (local tx (luna.db.begin db)) + (luna.db.exec-tx tx "DELETE FROM search" []) + (luna.db.exec-tx tx "INSERT INTO search + SELECT title, id, 'products' + FROM products;" []) + (luna.db.commit tx)) + +(store-products (artoftea.products)) +(store-products (ipuer.products)) +(store-products (ozchai.products)) +(populate-search-table) @@ -1,6 +1,6 @@ (import-macros {: map : reduce} :lib.macro) -(tset package :path (.. package.path ";./lib/lpeglj/?.lua")) +(tset package :path (.. package.path ";./vendor/lpeglj/?.lua")) (local io (require :io)) (local math (require :math)) @@ -10,49 +10,20 @@ (local array (require :lib.array)) (local str (require :lib.string)) -(local ozchai (require :site.ozchai)) -(local ipuer (require :site.ipuer)) -(local artoftea (require :site.artoftea)) - -(print (fennel.view (ipuer.products))) -(os.exit 1) +(local ozchai (require :parser.ozchai)) +(local ipuer (require :parser.ipuer)) +(local artoftea (require :parser.artoftea)) (when _G.unpack - (tset table :unpack _G.unpack)) + (tset table :unpack _G.unpack)) + +(local db (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL")) (local query-synonyms { "шэн" "шен" "шен" "шэн" "доска" "чабань" - "чабань" "доска" -}) - -(local db (luna.db.open "file:db.sqlite?_journal=WAL&_sync=NORMAL")) -(luna.db.exec db " - PRAGMA foreign_keys=ON; - PRAGMA journal_mode=WAL; - PRAGMA synchronous=NORMAL; - - CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5(name, fid, `table`); - - CREATE TABLE IF NOT EXISTS products ( - id TEXT NOT NULL PRIMARY KEY, - site TEXT NOT NULL, - category TEXT NOT NULL, - title TEXT NOT NULL, - description TEXT NOT NULL, - year INT NOT NULL, - image TEXT NOT NULL, - url TEXT NOT NULL, - price REAL NOT NULL, - weight REAL NOT NULL, - price_per REAL NOT NULL, - misc TEXT NOT NULL, - creation_time DATETIME NOT NULL - );" []) - -(fn now [] - (os.date "%Y-%m-%d %H:%M:%S")) + "чабань" "доска"}) (fn unescape [s] (assert (= (type s) :string)) @@ -175,45 +146,6 @@ (. total 1 1) 0)}) -(fn store-products [products] - (local sql - (.. "INSERT OR REPLACE INTO products VALUES " - (table.concat - (map (fn [_ _] - "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") - products) - ","))) - (local vars - (reduce - (fn [_ product rest] - (array.concat rest - [product.id - product.site - product.category - product.title - (or product.description "") - (or product.year 0) - (or product.image "") - (or product.url "") - (or product.price 0) - (or product.weight 0) - (or product.price-per 0) - (or product.misc "") - (now)])) - products [])) - (luna.db.exec db sql vars)) - -(fn populate-search-table [] - (local tx (luna.db.begin db)) - (luna.db.exec-tx tx "DELETE FROM search" []) - (luna.db.exec-tx tx "INSERT INTO search - SELECT title, id, 'products' FROM products;" []) - (luna.db.commit tx)) - -; (store-products (ipuer.products)) -; (store-products (ozchai.products)) -; (populate-search-table) - (fn root-handler [{: path : query}] (if (= path "/") (let [headers {:content-type "text/html"} diff --git a/fetcher.fnl b/fetcher.fnl index d31f858..6d6d633 100644 --- a/fetcher.fnl +++ b/fetcher.fnl @@ -3,11 +3,11 @@ (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) - (require :vendor.lpeglj))) + (require :lpeglj))) (local array (require :lib.array)) (local json (require :vendor.json)) -(local parser (require :parser)) -(local http (require :http)) +(local parser (require :parser.parser)) +(local http (require :lib.http)) (fn retry [what times sleep] (var result nil) @@ -37,48 +37,37 @@ (luna.http.request "GET" url {:User-Agent (http.random-user-agent)} "")) (if (= status 200) - (let [products (parser.match-many html item-peg)] - (if (or (= products nil) (= 0 (# products))) + (let [items (parser.match-many html item-peg)] + (if (or (= items nil) (= 0 (# items))) knil (do (os.execute "sleep 1") - (gather (+ page 1) (array.concat knil products))))) + (gather (+ page 1) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) -(fn guess-category [title] - (if (: (parser.anywhere (+ (peg.P "зеленый") "Зеленый")) :match title) - "Зеленый чай" - (: (parser.anywhere (+ (peg.P "Улун") "улун")) :match title) - "Улун" - (: (parser.anywhere (+ (peg.P "Белый") "белый")) :match title) - "Белый чай" - (: (parser.anywhere (+ (peg.P "Желтый") "желтый")) :match title) - "Желтый чай" - (: (parser.anywhere (+ (peg.P "Красный") "красный")) :match title) - "Красный чай" - "Неизвестная категория")) - (fn categorize-many [items category] (map (fn [_ item] - (tset item :category - (if category category (guess-category item.title))) - item) + (tset item :category + (if category + category + (parser.guess-category item.title))) + item) items)) -(fn from-html [url-formatter categories normalizer item-peg] +(fn from-html [categories url-formatter item-peg normalizer] (reduce (fn [_ {: category : path} result] (array.concat result (categorize-many - (map #(normalizer $2) - (walk-html-pages url-formatter path item-peg)) - category))) + (map #(normalizer $2) + (walk-html-pages url-formatter path item-peg)) + category))) categories [])) @@ -88,35 +77,34 @@ (print (.. "requesting " url)) (local (status _ content) (luna.http.request - "GET" - url + "GET" url {:User-Agent (http.random-user-agent) :Content-Type "application/json" :Accept "application/json"} "")) (if (= status 200) - (let [products (json.decode content)] - (if (or (= products nil) (= 0 (# products))) - knil - (do - (os.execute "sleep 1") - (gather (+ page 1) (array.concat knil products))))) + (let [items (json.decode content)] + (if (or (= items nil) (= 0 (# items))) + knil + (do + (os.execute "sleep 1") + (gather (+ page 1) (array.concat knil items))))) (= status 404) knil (retry #(gather page knil) 3 1))) (gather 1 [])) -(fn from-json [url-formatter categories normalizer] +(fn from-json [categories url-formatter response-destructor normalizer] (reduce (fn [_ {: category : path} result] (array.concat - result - (categorize-many - (map #(normalizer $2) - (walk-json-pages url-formatter path)) - category))) + result + (categorize-many + (map #(normalizer $2) + (walk-json-pages url-formatter path)) + category))) categories [])) @@ -1 +1,2 @@ -return require("vendor.fennel").install().dofile("main.fnl") +assert(arg[1], "lua: file name must be specified") +return require("vendor.fennel").install().dofile(arg[1]) diff --git a/site/artoftea.fnl b/parser/artoftea.fnl index 382b0ef..1f03ed1 100644 --- a/site/artoftea.fnl +++ b/parser/artoftea.fnl @@ -3,8 +3,8 @@ (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) - (require :vendor.lpeglj))) -(local parser (require :parser)) + (require :lpeglj))) +(local parser (require :parser.parser)) (local number (require :lib.number)) (local fetcher (require :fetcher)) @@ -45,16 +45,9 @@ (parser.tag :button {:type "*" :onclick "*" :class "*"} "Купить")))) (fn normalize [product] - (local year - (number.string->number - (: (parser.anywhere - (* (peg.C (^ (peg.R "09") 4)) - (parser.maybe " ") - (- "г" (peg.P "гр")))) - :match product.title))) + (local year (parser.guess-year product.title)) (local weight (number.string->number product.weight)) (local price (number.string->number product.price)) - {:site "artoftea" :id product.id :url product.url @@ -69,10 +62,10 @@ (fn products [] (fetcher.from-html - url-formatter [{:path "redtea" :category "Красный чай"} {:path "greentea" :category "Зеленый чай"}] - normalize - product-peg)) + url-formatter + product-peg + normalize)) {: products} diff --git a/site/ipuer.fnl b/parser/ipuer.fnl index f878912..7fefd1b 100644 --- a/site/ipuer.fnl +++ b/parser/ipuer.fnl @@ -3,9 +3,9 @@ (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) - (require :vendor.lpeglj))) + (require :lpeglj))) (local number (require :lib.number)) -(local parser (require :parser)) +(local parser (require :parser.parser)) (local fetcher (require :fetcher)) (fn url-formatter [path page] @@ -38,19 +38,8 @@ (parser.tag :a {:data-url "*" :class "*"} "В корзину"))))) (fn normalize [product] - (local year - (number.string->number - (: (parser.anywhere - (* (peg.C (^ (peg.R "09") 4)) - (parser.maybe " ") - (- "г" (peg.P "гр")))) - :match product.title))) - (local weight - (number.string->number - (: (parser.anywhere (* (peg.C parser.pegs.number) (parser.maybe " ") "гр")) - :match product.title))) + (local weight (parser.guess-weight product.title)) (local price (number.string->number product.price)) - {:site "ipuer" :id product.id :url (.. "https://ipuer.ru" product.url) @@ -58,7 +47,7 @@ :description "" ;; FIXME: parse all editions into different projects :image (.. "https://ipuer.ru" product.image) - :year year + :year (parser.guess-year product.title) :price price :weight weight :category product.category @@ -68,14 +57,14 @@ (fn products [] (fetcher.from-html - url-formatter [{:path "shen-puer" :category "Шен пуэр"} {:path "shu-puer" :category "Шу пуэр"} {:path "drugoy-chay"} {:path "blagovoniya" :category "Благовония"} {:path "posuda" :category "Посуда"} {:path "282" :category "Посуда"}] - normalize - product-peg)) + url-formatter + product-peg + normalize)) {: products} diff --git a/site/ozchai.fnl b/parser/ozchai.fnl index 90c4edc..6bf6286 100644 --- a/site/ozchai.fnl +++ b/parser/ozchai.fnl @@ -1,6 +1,6 @@ (import-macros {: map} :lib.macro) -(local http (require :http)) +(local http (require :lib.http)) (local array (require :lib.array)) (local json (require :vendor.json)) @@ -47,7 +47,6 @@ (local gallery (json.decode product.gallery)) (local weight (string->number (. (. product.editions 1) :Вес))) (local price (string->number (. (. product.editions 1) :price))) - {:site "ozchai" :id product.url :url product.url diff --git a/parser.fnl b/parser/parser.fnl index 314476c..b52f881 100644 --- a/parser.fnl +++ b/parser/parser.fnl @@ -1,9 +1,13 @@ (import-macros {: map} :lib.macro) + +(local number (require :lib.number)) + (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) - (require :vendor.lpeglj))) + (require :lpeglj))) +;; "not" is taken >:( (fn pnot [p] (- (peg.P 1) (peg.P p))) @@ -58,15 +62,16 @@ (local tag (peg.P tag)) (local attrs-count (accumulate [sum 0 _ _ (pairs attrs)] (+ 1 sum))) (local attr-peg - (fn [name value] (* (^ (peg.P name) 1) - (if (~= value "") - (* "=\"" - ;; wildcard for any value - (if (= value "*") - (till "\"") - (peg.P value)) - "\"") - (maybe (.. "=\" name \"")))))) + (fn [name value] (* + (^ (peg.P name) 1) + (if (~= value "") + (* "=\"" + ;; wildcard for any value + (if (= value "*") + (till "\"") + (peg.P value)) + "\"") + (maybe (.. "=\" name \"")))))) (local attrs-peg (accumulate [sum pegs.spaces _ rule @@ -78,8 +83,8 @@ (^ pegs.space 0) ;; opening tag (* "<" tag (^ pegs.space 0) - (^ attrs-peg (- (* attrs-count 2) 1)) - (^ pegs.space 0) ">") + (^ attrs-peg (- (* attrs-count 2) 1)) + (^ pegs.space 0) ">") ;; tag contents (^ pegs.space 0) (if (= contents "*") @@ -89,20 +94,50 @@ ;; closing tag (* "</" tag ">"))) (peg.P (* - (^ pegs.space 0) - ;; opening tag - (* "<" tag (^ pegs.space 0) - (^ attrs-peg (- (* attrs-count 2) 1)) - (^ pegs.space 0) (maybe "/") ">"))))) + (^ pegs.space 0) + ;; opening tag + (* "<" tag (^ pegs.space 0) + (^ attrs-peg (- (* attrs-count 2) 1)) + (^ pegs.space 0) (maybe "/") ">"))))) (fn match-many [html tag] (: (peg.Ct (^ (peg.Ct tag) 1)) :match html)) +(fn guess-category [title] + (if (: (anywhere (+ (peg.P "зеленый") "Зеленый")) :match title) + "Зеленый чай" + (: (anywhere (+ (peg.P "Улун") "улун")) :match title) + "Улун" + (: (anywhere (+ (peg.P "Белый") "белый")) :match title) + "Белый чай" + (: (anywhere (+ (peg.P "Желтый") "желтый")) :match title) + "Желтый чай" + (: (anywhere (+ (peg.P "Красный") "красный")) :match title) + "Красный чай" + "Неизвестная категория")) + +(fn guess-year [title] + (number.string->number + (: (anywhere + (* (peg.C (^ (peg.R "09") 4)) + (maybe " ") + (- "г" (peg.P "гр")))) + :match title))) + +(fn guess-weight [title] + (number.string->number + (: (anywhere + (* (peg.C pegs.number) (maybe " ") "гр")) + :match title))) + {: match-many : tag : anywhere : till : maybe : pegs - :not pnot} + :not pnot + : guess-category + : guess-year + : guess-weight} @@ -1,3 +1,35 @@ #!/bin/sh -LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \ - go run -tags fts5,puc ../. -n 1 main.lua +set -e + +usage () { + echo "Usage: + serve [--jit] Serve the site pages + fetch [--jit] Populate the database with records" +} + +serve () { + variant="$1" + if [ "$variant" = "--jit" ]; then + echo "running jit" + go run -tags fts5,jit ../. -n 1 main.lua bin/serve.fnl + else + echo "running puc" + LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \ + go run -tags fts5,puc ../. -n 1 main.lua bin/serve.fnl + fi +} + +fetch () { + if [ "$variant" = "--jit" ]; then + echo "running jit" + go run -tags fts5,jit ../. -n 1 main.lua bin/fetch.fnl + else + LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \ + go run -tags fts5,puc ../. -n 1 main.lua bin/fetch.fnl + fi +} + +cmd="$1" +[ -z "$cmd" ] || [ "$cmd" = "-h" ] || [ "$cmd" = "--help" ] && usage && exit 1 +shift +"$cmd" "$@" || usage diff --git a/runjit.sh b/runjit.sh deleted file mode 100755 index dda6233..0000000 --- a/runjit.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -go run -tags fts5,jit ../. -n 1 main.lua diff --git a/var/.gitkeep b/var/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/var/.gitkeep |
