summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2025-02-17 19:57:58 +0600
committerunwox <me@unwox.com>2025-02-17 19:57:58 +0600
commitfd807bf1952073aff866bd0961ad6929e07da80d (patch)
tree43c653276b18040da2faee088e7734a345f39df6
parent35541ad02fddd7cb1d8840f47b2a203e125b0acf (diff)
add daochai parser
-rw-r--r--bin/fetch.fnl8
-rw-r--r--bin/serve.fnl3
-rw-r--r--parser/daochai.fnl187
-rw-r--r--parser/parser.fnl20
-rw-r--r--static/daochai.webpbin0 -> 2344 bytes
5 files changed, 208 insertions, 10 deletions
diff --git a/bin/fetch.fnl b/bin/fetch.fnl
index 3b525d2..b632249 100644
--- a/bin/fetch.fnl
+++ b/bin/fetch.fnl
@@ -6,9 +6,12 @@
(local array (require :lib.array))
(local cache (require :lib.cache))
+(local {: must} (require :lib.utils))
+
(local artoftea (require :parser.artoftea))
(local chaekshop (require :parser.chaekshop))
(local clubcha (require :parser.clubcha))
+(local daochai (require :parser.daochai))
(local gorkovchay (require :parser.gorkovchay))
(local ipuer (require :parser.ipuer))
(local kolokolnikovchai (require :parser.kolokolnikovchai))
@@ -16,7 +19,6 @@
(local ozchai (require :parser.ozchai))
(local suhexuan (require :parser.suhexuan))
(local tea108 (require :parser.tea108))
-(local {: must} (require :lib.utils))
(when _G.unpack
(tset table :unpack _G.unpack))
@@ -164,8 +166,8 @@
FROM products;" []))
(must (luna.db.commit tx)))
-(each [_ parser (pairs [gorkovchay moychay ozchai suhexuan ipuer artoftea
- clubcha chaekshop kolokolnikovchai tea108])]
+ (each [_ parser (pairs [daochai gorkovchay moychay ozchai suhexuan ipuer
+ artoftea clubcha chaekshop kolokolnikovchai tea108])]
(local products (parser.products))
(when (< 0 (# products))
;; replace with with-tx
diff --git a/bin/serve.fnl b/bin/serve.fnl
index c442801..55f1ff0 100644
--- a/bin/serve.fnl
+++ b/bin/serve.fnl
@@ -492,7 +492,8 @@
:selected (if (= form.site val) "selected" nil)}
(. (require (.. "parser." val)) :title)])
[:ozchai :suhexuan :kolokolnikovchai :tea108 :ipuer :clubcha
- :artoftea :chaekshop :moychay :gorkovchay]))]]
+ :daochai :ozchai :chaekshop :artoftea :moychay
+ :gorkovchay]))]]
[:div {}
[:select {:name "sort"}
[:option {:value ""} "~ Порядок ~"]
diff --git a/parser/daochai.fnl b/parser/daochai.fnl
new file mode 100644
index 0000000..50ec508
--- /dev/null
+++ b/parser/daochai.fnl
@@ -0,0 +1,187 @@
+(import-macros {: reduce} :lib.macro)
+
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :lpeglj)))
+(local parser (require :parser.parser))
+(local number (require :lib.number))
+(local fetcher (require :fetcher))
+
+(fn html-cyrillic->utf [str]
+ (local replacement-map
+ {"&#1040;" "А"
+ "&#1041;" "Б"
+ "&#1042;" "В"
+ "&#1043;" "Г"
+ "&#1044;" "Д"
+ "&#1045;" "Е"
+ "&#1046;" "Ж"
+ "&#1047;" "З"
+ "&#1048;" "И"
+ "&#1049;" "Й"
+ "&#1050;" "К"
+ "&#1051;" "Л"
+ "&#1052;" "М"
+ "&#1053;" "Н"
+ "&#1054;" "О"
+ "&#1055;" "П"
+ "&#1056;" "Р"
+ "&#1057;" "С"
+ "&#1058;" "Т"
+ "&#1059;" "У"
+ "&#1060;" "Ф"
+ "&#1061;" "Х"
+ "&#1062;" "Ц"
+ "&#1063;" "Ч"
+ "&#1064;" "Ш"
+ "&#1065;" "Щ"
+ "&#1066;" "Ъ"
+ "&#1067;" "Ы"
+ "&#1068;" "Ь"
+ "&#1069;" "Э"
+ "&#1070;" "Ю"
+ "&#1071;" "Я"
+ "&#1072;" "а"
+ "&#1073;" "б"
+ "&#1074;" "в"
+ "&#1075;" "г"
+ "&#1076;" "д"
+ "&#1077;" "е"
+ "&#1078;" "ж"
+ "&#1079;" "з"
+ "&#1080;" "и"
+ "&#1081;" "й"
+ "&#1082;" "к"
+ "&#1083;" "л"
+ "&#1084;" "м"
+ "&#1085;" "н"
+ "&#1086;" "о"
+ "&#1087;" "п"
+ "&#1088;" "р"
+ "&#1089;" "с"
+ "&#1090;" "т"
+ "&#1091;" "у"
+ "&#1092;" "ф"
+ "&#1093;" "х"
+ "&#1094;" "ц"
+ "&#1095;" "ч"
+ "&#1096;" "ш"
+ "&#1097;" "щ"
+ "&#1098;" "ъ"
+ "&#1099;" "ы"
+ "&#1100;" "ь"
+ "&#1101;" "э"
+ "&#1102;" "ю"
+ "&#1103;" "я"})
+
+ (var result str)
+ (each [code letter (pairs replacement-map)]
+ (set result (: result :gsub code letter)))
+ result)
+
+(fn format-url [path page]
+ (.. "https://daochai.ru/" path
+ "/" (if (< 1 page) (.. "page-" page "/") "")))
+
+(local product-peg
+ (*
+ (parser.anywhere
+ (+
+ ;; eager and lazy loaded versions of img
+ (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE
+ :src (peg.Cg (parser.till "\"") :image)
+ :id "*"
+ :title "*"
+ :alt "*"
+ :srcset "*"
+ :width "*"
+ :height "*"})
+ (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE
+ :src "*"
+ :data-src (peg.Cg (parser.till "\"") :image)
+ :id "*"
+ :title "*"
+ :alt "*"
+ :data-srcset "*"
+ :width "*"
+ :height "*"})))
+ (parser.anywhere
+ (parser.tag :a {:class "product-title"
+ :href (peg.Cg (parser.till "\"") :url)
+ :title "*"}
+ (peg.Cg (parser.till "</a>") :title)))
+ (+
+ (*
+ (parser.anywhere
+ (parser.tag :span {:class "ty-price-num" :id "*"}
+ (peg.Cg (parser.till "</span>") :price)))
+ (parser.anywhere
+ ;; "за" and "гр" words are html-encoded for some reason
+ (* "&#1079;&#1072; " (peg.Cg parser.pegs.number :weight) " &#1075;&#1088;.")))
+ (parser.anywhere
+ (parser.tag :span {:class "ty-price-num" :id "*"}
+ (peg.Cg (parser.till "</span>") :price))))
+ (parser.anywhere
+ ;; "Купить"
+ (parser.tag :span {} "&#1050;&#1091;&#1087;&#1080;&#1090;&#1100;"))))
+
+(fn normalize [product]
+ (local title (html-cyrillic->utf product.title))
+ (local year (parser.guess-year title))
+ (local weight (number.string->number product.weight))
+ (local price (number.string->number product.price))
+
+ {:site "daochai"
+ :title title
+ :url product.url
+ :description nil
+ :image product.image
+ :year year
+ :price price
+ :weight weight
+ :volume (parser.guess-volume title)
+ :price-per (if (and price weight (< 0 weight))
+ (/ (math.ceil (* (/ price weight) 10)) 10)
+ nil)})
+
+(fn products []
+ (fetcher.from-html
+ [{:path "vid-chaya/pu-erh/shu-puer" :tags ["Шу пуэр"]}
+ {:path "vid-chaya/pu-erh/shen" :tags ["Шен пуэр"]}
+ {:path "vid-chaya/ulun" :tags ["Улун"]}
+ {:path "vid-chaya/ulun/fudzjanskie-uluny" :tags ["Улун" "Фудзянь"]}
+ {:path "vid-chaya/ulun/guandunskie-uluny" :tags ["Улун" "Гуандун"]}
+ {:path "vid-chaya/ulun/uishanskie-uluny" :tags ["Улун" "Уишань"]}
+ {:path "vid-chaya/ulun/taiwan-ulun" :tags ["Улун" "Тайвань"]}
+ {:path "vid-chaya/ulun/yunnanskiy-uluny" :tags ["Улун" "Юннань"]}
+ {:path "vid-chaya/krasnyj-chaj" :tags ["Красный чай"]}
+ {:path "vid-chaya/zeljonyj-chaj" :tags ["Зеленый чай"]}
+ {:path "vid-chaya/white" :tags ["Белый чай"]}
+ {:path "vid-chaya/zheltyy-chay" :tags ["Желтый чай"]}
+ {:path "vid-chaya/heicha" :tags ["Хэй ча"]}
+ {:path "posuda/jianshuizitao" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/nisintao" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/chahu-taozi" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/chayniki-iz-chaochzhou" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/jingdezhen" :tags ["Посуда"]}
+ {:path "posuda/chahai" :tags ["Посуда"]}
+ {:path "posuda/gajvan" :tags ["Посуда"]}
+ {:path "posuda/chahaj" :tags ["Посуда"]}
+ {:path "posuda/chaban" :tags ["Посуда"]}
+ {:path "posuda/chajnye-prudy" :tags ["Посуда"]}
+ {:path "posuda/sito" :tags ["Посуда"]}
+ {:path "posuda/posuda-chajnoj-ceremonii" :tags ["Посуда"]}
+ {:path "posuda/termosy" :tags ["Посуда"]}
+ {:path "posuda/alternativa" :tags ["Посуда"]}
+ {:path "tea-accessorize" :tags ["Посуда"]}
+ {:path "chay-i-chan/aroma" :tags ["Благовония"]}
+ {:path "chay-i-chan/kurilnicy-i-podstavki-pod-blagovoniya" :tags ["Благовония"]}
+ {:path "chay-i-chan/chetki" :tags ["Четки"]}
+ {:path "chay-i-chan/dekorirovanie-prostranstva" :tags ["Декор"]}
+ {:path "chay-i-chan/figurki-iz-dereva" :tags ["Фигурки"]}]
+ format-url
+ product-peg
+ normalize))
+
+{:products products :title "DaoChai" :url "https://daochai.ru"}
diff --git a/parser/parser.fnl b/parser/parser.fnl
index 7e9469e..3a4d563 100644
--- a/parser/parser.fnl
+++ b/parser/parser.fnl
@@ -74,12 +74,20 @@
(fn [name value] (*
(^ (peg.P name) 1)
(if (~= value "")
- (* "=\""
- ;; wildcard for any value
- (if (= value "*")
- (till "\"")
- (peg.P value))
- "\"")
+ (+
+ ;; attributes may be wrapped in both " and '
+ (* "=\""
+ ;; wildcard for any value
+ (if (= value "*")
+ (till "\"")
+ (peg.P value))
+ "\"")
+ (* "='"
+ ;; wildcard for any value
+ (if (= value "*")
+ (till "'")
+ (peg.P value))
+ "'"))
(maybe (.. "=\" name \""))))))
(local attrs-peg
(accumulate [sum pegs.spaces
diff --git a/static/daochai.webp b/static/daochai.webp
new file mode 100644
index 0000000..d77e881
--- /dev/null
+++ b/static/daochai.webp
Binary files differ