summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2024-09-27 15:26:33 +0600
committerunwox <me@unwox.com>2024-09-27 15:44:16 +0600
commitdd449357f502dbe9ca4487d4b06a06ee4e597146 (patch)
tree9847488a6cc2c1aaf1fc80578e1a7a5d4af99ff5
parent9b82db238f9e2e02a76f95c793f8d6ef2387ecfd (diff)
new structure
-rw-r--r--.gitignore1
-rw-r--r--bin/fetch.fnl76
-rw-r--r--bin/serve.fnl (renamed from main.fnl)84
-rw-r--r--fetcher.fnl68
-rw-r--r--lib/http.fnl (renamed from http.fnl)0
-rw-r--r--main.lua3
-rw-r--r--parser/artoftea.fnl (renamed from site/artoftea.fnl)19
-rw-r--r--parser/ipuer.fnl (renamed from site/ipuer.fnl)25
-rw-r--r--parser/ozchai.fnl (renamed from site/ozchai.fnl)3
-rw-r--r--parser/parser.fnl (renamed from parser.fnl)71
-rwxr-xr-xrun.sh36
-rwxr-xr-xrunjit.sh2
-rw-r--r--var/.gitkeep0
13 files changed, 216 insertions, 172 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..33a56a3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+var/db.sqlite*
diff --git a/bin/fetch.fnl b/bin/fetch.fnl
new file mode 100644
index 0000000..fe1a1a5
--- /dev/null
+++ b/bin/fetch.fnl
@@ -0,0 +1,76 @@
+(import-macros {: map : reduce} :lib.macro)
+
+(tset package :path (.. package.path ";./vendor/lpeglj/?.lua"))
+
+(local array (require :lib.array))
+(local ozchai (require :parser.ozchai))
+(local ipuer (require :parser.ipuer))
+(local artoftea (require :parser.artoftea))
+
+(local db (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL"))
+(luna.db.exec db "
+ PRAGMA foreign_keys=ON;
+ PRAGMA journal_mode=WAL;
+ PRAGMA synchronous=NORMAL;
+
+ CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5(name, fid, `table`);
+
+ CREATE TABLE IF NOT EXISTS products (
+ id TEXT NOT NULL PRIMARY KEY,
+ site TEXT NOT NULL,
+ category TEXT NOT NULL,
+ title TEXT NOT NULL,
+ description TEXT NOT NULL,
+ year INT NOT NULL,
+ image TEXT NOT NULL,
+ url TEXT NOT NULL,
+ price REAL NOT NULL,
+ weight REAL NOT NULL,
+ price_per REAL NOT NULL,
+ misc TEXT NOT NULL,
+ creation_time DATETIME NOT NULL
+ );" [])
+
+(fn now []
+ (os.date "%Y-%m-%d %H:%M:%S"))
+
+(fn store-products [products]
+ (local sql
+ (.. "INSERT OR REPLACE INTO products VALUES "
+ (table.concat
+ (map (fn [_ _]
+ "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+ products)
+ ",")))
+ (local vars
+ (reduce
+ (fn [_ product rest]
+ (array.concat rest
+ [product.id
+ product.site
+ product.category
+ product.title
+ (or product.description "")
+ (or product.year 0)
+ (or product.image "")
+ (or product.url "")
+ (or product.price 0)
+ (or product.weight 0)
+ (or product.price-per 0)
+ (or product.misc "")
+ (now)]))
+ products []))
+ (luna.db.exec db sql vars))
+
+(fn populate-search-table []
+ (local tx (luna.db.begin db))
+ (luna.db.exec-tx tx "DELETE FROM search" [])
+ (luna.db.exec-tx tx "INSERT INTO search
+ SELECT title, id, 'products'
+ FROM products;" [])
+ (luna.db.commit tx))
+
+(store-products (artoftea.products))
+(store-products (ipuer.products))
+(store-products (ozchai.products))
+(populate-search-table)
diff --git a/main.fnl b/bin/serve.fnl
index 9282517..f4ef3c7 100644
--- a/main.fnl
+++ b/bin/serve.fnl
@@ -1,6 +1,6 @@
(import-macros {: map : reduce} :lib.macro)
-(tset package :path (.. package.path ";./lib/lpeglj/?.lua"))
+(tset package :path (.. package.path ";./vendor/lpeglj/?.lua"))
(local io (require :io))
(local math (require :math))
@@ -10,49 +10,20 @@
(local array (require :lib.array))
(local str (require :lib.string))
-(local ozchai (require :site.ozchai))
-(local ipuer (require :site.ipuer))
-(local artoftea (require :site.artoftea))
-
-(print (fennel.view (ipuer.products)))
-(os.exit 1)
+(local ozchai (require :parser.ozchai))
+(local ipuer (require :parser.ipuer))
+(local artoftea (require :parser.artoftea))
(when _G.unpack
- (tset table :unpack _G.unpack))
+ (tset table :unpack _G.unpack))
+
+(local db (luna.db.open "file:var/db.sqlite?_journal=WAL&_sync=NORMAL"))
(local query-synonyms {
"шэн" "шен"
"шен" "шэн"
"доска" "чабань"
- "чабань" "доска"
-})
-
-(local db (luna.db.open "file:db.sqlite?_journal=WAL&_sync=NORMAL"))
-(luna.db.exec db "
- PRAGMA foreign_keys=ON;
- PRAGMA journal_mode=WAL;
- PRAGMA synchronous=NORMAL;
-
- CREATE VIRTUAL TABLE IF NOT EXISTS search USING fts5(name, fid, `table`);
-
- CREATE TABLE IF NOT EXISTS products (
- id TEXT NOT NULL PRIMARY KEY,
- site TEXT NOT NULL,
- category TEXT NOT NULL,
- title TEXT NOT NULL,
- description TEXT NOT NULL,
- year INT NOT NULL,
- image TEXT NOT NULL,
- url TEXT NOT NULL,
- price REAL NOT NULL,
- weight REAL NOT NULL,
- price_per REAL NOT NULL,
- misc TEXT NOT NULL,
- creation_time DATETIME NOT NULL
- );" [])
-
-(fn now []
- (os.date "%Y-%m-%d %H:%M:%S"))
+ "чабань" "доска"})
(fn unescape [s]
(assert (= (type s) :string))
@@ -175,45 +146,6 @@
(. total 1 1)
0)})
-(fn store-products [products]
- (local sql
- (.. "INSERT OR REPLACE INTO products VALUES "
- (table.concat
- (map (fn [_ _]
- "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
- products)
- ",")))
- (local vars
- (reduce
- (fn [_ product rest]
- (array.concat rest
- [product.id
- product.site
- product.category
- product.title
- (or product.description "")
- (or product.year 0)
- (or product.image "")
- (or product.url "")
- (or product.price 0)
- (or product.weight 0)
- (or product.price-per 0)
- (or product.misc "")
- (now)]))
- products []))
- (luna.db.exec db sql vars))
-
-(fn populate-search-table []
- (local tx (luna.db.begin db))
- (luna.db.exec-tx tx "DELETE FROM search" [])
- (luna.db.exec-tx tx "INSERT INTO search
- SELECT title, id, 'products' FROM products;" [])
- (luna.db.commit tx))
-
-; (store-products (ipuer.products))
-; (store-products (ozchai.products))
-; (populate-search-table)
-
(fn root-handler [{: path : query}]
(if (= path "/")
(let [headers {:content-type "text/html"}
diff --git a/fetcher.fnl b/fetcher.fnl
index d31f858..6d6d633 100644
--- a/fetcher.fnl
+++ b/fetcher.fnl
@@ -3,11 +3,11 @@
(local peg
(if (pick-values 1 (pcall require :lpeg))
(require :lpeg)
- (require :vendor.lpeglj)))
+ (require :lpeglj)))
(local array (require :lib.array))
(local json (require :vendor.json))
-(local parser (require :parser))
-(local http (require :http))
+(local parser (require :parser.parser))
+(local http (require :lib.http))
(fn retry [what times sleep]
(var result nil)
@@ -37,48 +37,37 @@
(luna.http.request "GET" url {:User-Agent (http.random-user-agent)} ""))
(if (= status 200)
- (let [products (parser.match-many html item-peg)]
- (if (or (= products nil) (= 0 (# products)))
+ (let [items (parser.match-many html item-peg)]
+ (if (or (= items nil) (= 0 (# items)))
knil
(do
(os.execute "sleep 1")
- (gather (+ page 1) (array.concat knil products)))))
+ (gather (+ page 1) (array.concat knil items)))))
(= status 404)
knil
(retry #(gather page knil) 3 1)))
(gather 1 []))
-(fn guess-category [title]
- (if (: (parser.anywhere (+ (peg.P "зеленый") "Зеленый")) :match title)
- "Зеленый чай"
- (: (parser.anywhere (+ (peg.P "Улун") "улун")) :match title)
- "Улун"
- (: (parser.anywhere (+ (peg.P "Белый") "белый")) :match title)
- "Белый чай"
- (: (parser.anywhere (+ (peg.P "Желтый") "желтый")) :match title)
- "Желтый чай"
- (: (parser.anywhere (+ (peg.P "Красный") "красный")) :match title)
- "Красный чай"
- "Неизвестная категория"))
-
(fn categorize-many [items category]
(map
(fn [_ item]
- (tset item :category
- (if category category (guess-category item.title)))
- item)
+ (tset item :category
+ (if category
+ category
+ (parser.guess-category item.title)))
+ item)
items))
-(fn from-html [url-formatter categories normalizer item-peg]
+(fn from-html [categories url-formatter item-peg normalizer]
(reduce
(fn [_ {: category : path} result]
(array.concat
result
(categorize-many
- (map #(normalizer $2)
- (walk-html-pages url-formatter path item-peg))
- category)))
+ (map #(normalizer $2)
+ (walk-html-pages url-formatter path item-peg))
+ category)))
categories
[]))
@@ -88,35 +77,34 @@
(print (.. "requesting " url))
(local (status _ content)
(luna.http.request
- "GET"
- url
+ "GET" url
{:User-Agent (http.random-user-agent)
:Content-Type "application/json"
:Accept "application/json"}
""))
(if (= status 200)
- (let [products (json.decode content)]
- (if (or (= products nil) (= 0 (# products)))
- knil
- (do
- (os.execute "sleep 1")
- (gather (+ page 1) (array.concat knil products)))))
+ (let [items (json.decode content)]
+ (if (or (= items nil) (= 0 (# items)))
+ knil
+ (do
+ (os.execute "sleep 1")
+ (gather (+ page 1) (array.concat knil items)))))
(= status 404)
knil
(retry #(gather page knil) 3 1)))
(gather 1 []))
-(fn from-json [url-formatter categories normalizer]
+(fn from-json [categories url-formatter response-destructor normalizer]
(reduce
(fn [_ {: category : path} result]
(array.concat
- result
- (categorize-many
- (map #(normalizer $2)
- (walk-json-pages url-formatter path))
- category)))
+ result
+ (categorize-many
+ (map #(normalizer $2)
+ (walk-json-pages url-formatter path))
+ category)))
categories
[]))
diff --git a/http.fnl b/lib/http.fnl
index fb208fc..fb208fc 100644
--- a/http.fnl
+++ b/lib/http.fnl
diff --git a/main.lua b/main.lua
index 5d9abad..6084c9a 100644
--- a/main.lua
+++ b/main.lua
@@ -1 +1,2 @@
-return require("vendor.fennel").install().dofile("main.fnl")
+assert(arg[1], "lua: file name must be specified")
+return require("vendor.fennel").install().dofile(arg[1])
diff --git a/site/artoftea.fnl b/parser/artoftea.fnl
index 382b0ef..1f03ed1 100644
--- a/site/artoftea.fnl
+++ b/parser/artoftea.fnl
@@ -3,8 +3,8 @@
(local peg
(if (pick-values 1 (pcall require :lpeg))
(require :lpeg)
- (require :vendor.lpeglj)))
-(local parser (require :parser))
+ (require :lpeglj)))
+(local parser (require :parser.parser))
(local number (require :lib.number))
(local fetcher (require :fetcher))
@@ -45,16 +45,9 @@
(parser.tag :button {:type "*" :onclick "*" :class "*"} "Купить"))))
(fn normalize [product]
- (local year
- (number.string->number
- (: (parser.anywhere
- (* (peg.C (^ (peg.R "09") 4))
- (parser.maybe " ")
- (- "г" (peg.P "гр"))))
- :match product.title)))
+ (local year (parser.guess-year product.title))
(local weight (number.string->number product.weight))
(local price (number.string->number product.price))
-
{:site "artoftea"
:id product.id
:url product.url
@@ -69,10 +62,10 @@
(fn products []
(fetcher.from-html
- url-formatter
[{:path "redtea" :category "Красный чай"}
{:path "greentea" :category "Зеленый чай"}]
- normalize
- product-peg))
+ url-formatter
+ product-peg
+ normalize))
{: products}
diff --git a/site/ipuer.fnl b/parser/ipuer.fnl
index f878912..7fefd1b 100644
--- a/site/ipuer.fnl
+++ b/parser/ipuer.fnl
@@ -3,9 +3,9 @@
(local peg
(if (pick-values 1 (pcall require :lpeg))
(require :lpeg)
- (require :vendor.lpeglj)))
+ (require :lpeglj)))
(local number (require :lib.number))
-(local parser (require :parser))
+(local parser (require :parser.parser))
(local fetcher (require :fetcher))
(fn url-formatter [path page]
@@ -38,19 +38,8 @@
(parser.tag :a {:data-url "*" :class "*"} "В корзину")))))
(fn normalize [product]
- (local year
- (number.string->number
- (: (parser.anywhere
- (* (peg.C (^ (peg.R "09") 4))
- (parser.maybe " ")
- (- "г" (peg.P "гр"))))
- :match product.title)))
- (local weight
- (number.string->number
- (: (parser.anywhere (* (peg.C parser.pegs.number) (parser.maybe " ") "гр"))
- :match product.title)))
+ (local weight (parser.guess-weight product.title))
(local price (number.string->number product.price))
-
{:site "ipuer"
:id product.id
:url (.. "https://ipuer.ru" product.url)
@@ -58,7 +47,7 @@
:description ""
;; FIXME: parse all editions into different projects
:image (.. "https://ipuer.ru" product.image)
- :year year
+ :year (parser.guess-year product.title)
:price price
:weight weight
:category product.category
@@ -68,14 +57,14 @@
(fn products []
(fetcher.from-html
- url-formatter
[{:path "shen-puer" :category "Шен пуэр"}
{:path "shu-puer" :category "Шу пуэр"}
{:path "drugoy-chay"}
{:path "blagovoniya" :category "Благовония"}
{:path "posuda" :category "Посуда"}
{:path "282" :category "Посуда"}]
- normalize
- product-peg))
+ url-formatter
+ product-peg
+ normalize))
{: products}
diff --git a/site/ozchai.fnl b/parser/ozchai.fnl
index 90c4edc..6bf6286 100644
--- a/site/ozchai.fnl
+++ b/parser/ozchai.fnl
@@ -1,6 +1,6 @@
(import-macros {: map} :lib.macro)
-(local http (require :http))
+(local http (require :lib.http))
(local array (require :lib.array))
(local json (require :vendor.json))
@@ -47,7 +47,6 @@
(local gallery (json.decode product.gallery))
(local weight (string->number (. (. product.editions 1) :Вес)))
(local price (string->number (. (. product.editions 1) :price)))
-
{:site "ozchai"
:id product.url
:url product.url
diff --git a/parser.fnl b/parser/parser.fnl
index 314476c..b52f881 100644
--- a/parser.fnl
+++ b/parser/parser.fnl
@@ -1,9 +1,13 @@
(import-macros {: map} :lib.macro)
+
+(local number (require :lib.number))
+
(local peg
(if (pick-values 1 (pcall require :lpeg))
(require :lpeg)
- (require :vendor.lpeglj)))
+ (require :lpeglj)))
+;; "not" is taken >:(
(fn pnot [p]
(- (peg.P 1) (peg.P p)))
@@ -58,15 +62,16 @@
(local tag (peg.P tag))
(local attrs-count (accumulate [sum 0 _ _ (pairs attrs)] (+ 1 sum)))
(local attr-peg
- (fn [name value] (* (^ (peg.P name) 1)
- (if (~= value "")
- (* "=\""
- ;; wildcard for any value
- (if (= value "*")
- (till "\"")
- (peg.P value))
- "\"")
- (maybe (.. "=\" name \""))))))
+ (fn [name value] (*
+ (^ (peg.P name) 1)
+ (if (~= value "")
+ (* "=\""
+ ;; wildcard for any value
+ (if (= value "*")
+ (till "\"")
+ (peg.P value))
+ "\"")
+ (maybe (.. "=\" name \""))))))
(local attrs-peg
(accumulate [sum pegs.spaces
_ rule
@@ -78,8 +83,8 @@
(^ pegs.space 0)
;; opening tag
(* "<" tag (^ pegs.space 0)
- (^ attrs-peg (- (* attrs-count 2) 1))
- (^ pegs.space 0) ">")
+ (^ attrs-peg (- (* attrs-count 2) 1))
+ (^ pegs.space 0) ">")
;; tag contents
(^ pegs.space 0)
(if (= contents "*")
@@ -89,20 +94,50 @@
;; closing tag
(* "</" tag ">")))
(peg.P (*
- (^ pegs.space 0)
- ;; opening tag
- (* "<" tag (^ pegs.space 0)
- (^ attrs-peg (- (* attrs-count 2) 1))
- (^ pegs.space 0) (maybe "/") ">")))))
+ (^ pegs.space 0)
+ ;; opening tag
+ (* "<" tag (^ pegs.space 0)
+ (^ attrs-peg (- (* attrs-count 2) 1))
+ (^ pegs.space 0) (maybe "/") ">")))))
(fn match-many [html tag]
(: (peg.Ct (^ (peg.Ct tag) 1))
:match html))
+(fn guess-category [title]
+ (if (: (anywhere (+ (peg.P "зеленый") "Зеленый")) :match title)
+ "Зеленый чай"
+ (: (anywhere (+ (peg.P "Улун") "улун")) :match title)
+ "Улун"
+ (: (anywhere (+ (peg.P "Белый") "белый")) :match title)
+ "Белый чай"
+ (: (anywhere (+ (peg.P "Желтый") "желтый")) :match title)
+ "Желтый чай"
+ (: (anywhere (+ (peg.P "Красный") "красный")) :match title)
+ "Красный чай"
+ "Неизвестная категория"))
+
+(fn guess-year [title]
+ (number.string->number
+ (: (anywhere
+ (* (peg.C (^ (peg.R "09") 4))
+ (maybe " ")
+ (- "г" (peg.P "гр"))))
+ :match title)))
+
+(fn guess-weight [title]
+ (number.string->number
+ (: (anywhere
+ (* (peg.C pegs.number) (maybe " ") "гр"))
+ :match title)))
+
{: match-many
: tag
: anywhere
: till
: maybe
: pegs
- :not pnot}
+ :not pnot
+ : guess-category
+ : guess-year
+ : guess-weight}
diff --git a/run.sh b/run.sh
index f850f8d..89af2b3 100755
--- a/run.sh
+++ b/run.sh
@@ -1,3 +1,35 @@
#!/bin/sh
-LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \
- go run -tags fts5,puc ../. -n 1 main.lua
+set -e
+
+usage () {
+ echo "Usage:
+ serve [--jit] Serve the site pages
+ fetch [--jit] Populate the database with records"
+}
+
+serve () {
+ variant="$1"
+ if [ "$variant" = "--jit" ]; then
+ echo "running jit"
+ go run -tags fts5,jit ../. -n 1 main.lua bin/serve.fnl
+ else
+ echo "running puc"
+ LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \
+ go run -tags fts5,puc ../. -n 1 main.lua bin/serve.fnl
+ fi
+}
+
+fetch () {
+ if [ "$variant" = "--jit" ]; then
+ echo "running jit"
+ go run -tags fts5,jit ../. -n 1 main.lua bin/fetch.fnl
+ else
+ LUA_CPATH="/usr/local/lib/lua/5.4/?.so;/usr/local/lib/lua/5.4/loadall.so;./?.so;$(guix build lua-lpeg)/lib/lua/5.3/?.so" \
+ go run -tags fts5,puc ../. -n 1 main.lua bin/fetch.fnl
+ fi
+}
+
+cmd="$1"
+[ -z "$cmd" ] || [ "$cmd" = "-h" ] || [ "$cmd" = "--help" ] && usage && exit 1
+shift
+"$cmd" "$@" || usage
diff --git a/runjit.sh b/runjit.sh
deleted file mode 100755
index dda6233..0000000
--- a/runjit.sh
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-go run -tags fts5,jit ../. -n 1 main.lua
diff --git a/var/.gitkeep b/var/.gitkeep
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/var/.gitkeep