diff options
| -rw-r--r-- | bin/fetch.fnl | 8 | ||||
| -rw-r--r-- | bin/serve.fnl | 3 | ||||
| -rw-r--r-- | parser/daochai.fnl | 187 | ||||
| -rw-r--r-- | parser/parser.fnl | 20 | ||||
| -rw-r--r-- | static/daochai.webp | bin | 0 -> 2344 bytes |
5 files changed, 208 insertions, 10 deletions
diff --git a/bin/fetch.fnl b/bin/fetch.fnl index 3b525d2..b632249 100644 --- a/bin/fetch.fnl +++ b/bin/fetch.fnl @@ -6,9 +6,12 @@ (local array (require :lib.array)) (local cache (require :lib.cache)) +(local {: must} (require :lib.utils)) + (local artoftea (require :parser.artoftea)) (local chaekshop (require :parser.chaekshop)) (local clubcha (require :parser.clubcha)) +(local daochai (require :parser.daochai)) (local gorkovchay (require :parser.gorkovchay)) (local ipuer (require :parser.ipuer)) (local kolokolnikovchai (require :parser.kolokolnikovchai)) @@ -16,7 +19,6 @@ (local ozchai (require :parser.ozchai)) (local suhexuan (require :parser.suhexuan)) (local tea108 (require :parser.tea108)) -(local {: must} (require :lib.utils)) (when _G.unpack (tset table :unpack _G.unpack)) @@ -164,8 +166,8 @@ FROM products;" [])) (must (luna.db.commit tx))) -(each [_ parser (pairs [gorkovchay moychay ozchai suhexuan ipuer artoftea - clubcha chaekshop kolokolnikovchai tea108])] + (each [_ parser (pairs [daochai gorkovchay moychay ozchai suhexuan ipuer + artoftea clubcha chaekshop kolokolnikovchai tea108])] (local products (parser.products)) (when (< 0 (# products)) ;; replace with with-tx diff --git a/bin/serve.fnl b/bin/serve.fnl index c442801..55f1ff0 100644 --- a/bin/serve.fnl +++ b/bin/serve.fnl @@ -492,7 +492,8 @@ :selected (if (= form.site val) "selected" nil)} (. (require (.. "parser." val)) :title)]) [:ozchai :suhexuan :kolokolnikovchai :tea108 :ipuer :clubcha - :artoftea :chaekshop :moychay :gorkovchay]))]] + :daochai :ozchai :chaekshop :artoftea :moychay + :gorkovchay]))]] [:div {} [:select {:name "sort"} [:option {:value ""} "~ Порядок ~"] diff --git a/parser/daochai.fnl b/parser/daochai.fnl new file mode 100644 index 0000000..50ec508 --- /dev/null +++ b/parser/daochai.fnl @@ -0,0 +1,187 @@ +(import-macros {: reduce} :lib.macro) + +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local parser (require :parser.parser)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) + +(fn html-cyrillic->utf [str] + (local replacement-map + {"А" "А" + "Б" "Б" + "В" "В" + "Г" "Г" + "Д" "Д" + "Е" "Е" + "Ж" "Ж" + "З" "З" + "И" "И" + "Й" "Й" + "К" "К" + "Л" "Л" + "М" "М" + "Н" "Н" + "О" "О" + "П" "П" + "Р" "Р" + "С" "С" + "Т" "Т" + "У" "У" + "Ф" "Ф" + "Х" "Х" + "Ц" "Ц" + "Ч" "Ч" + "Ш" "Ш" + "Щ" "Щ" + "Ъ" "Ъ" + "Ы" "Ы" + "Ь" "Ь" + "Э" "Э" + "Ю" "Ю" + "Я" "Я" + "а" "а" + "б" "б" + "в" "в" + "г" "г" + "д" "д" + "е" "е" + "ж" "ж" + "з" "з" + "и" "и" + "й" "й" + "к" "к" + "л" "л" + "м" "м" + "н" "н" + "о" "о" + "п" "п" + "р" "р" + "с" "с" + "т" "т" + "у" "у" + "ф" "ф" + "х" "х" + "ц" "ц" + "ч" "ч" + "ш" "ш" + "щ" "щ" + "ъ" "ъ" + "ы" "ы" + "ь" "ь" + "э" "э" + "ю" "ю" + "я" "я"}) + + (var result str) + (each [code letter (pairs replacement-map)] + (set result (: result :gsub code letter))) + result) + +(fn format-url [path page] + (.. "https://daochai.ru/" path + "/" (if (< 1 page) (.. "page-" page "/") ""))) + +(local product-peg + (* + (parser.anywhere + (+ + ;; eager and lazy loaded versions of img + (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE + :src (peg.Cg (parser.till "\"") :image) + :id "*" + :title "*" + :alt "*" + :srcset "*" + :width "*" + :height "*"}) + (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE + :src "*" + :data-src (peg.Cg (parser.till "\"") :image) + :id "*" + :title "*" + :alt "*" + :data-srcset "*" + :width "*" + :height "*"}))) + (parser.anywhere + (parser.tag :a {:class "product-title" + :href (peg.Cg (parser.till "\"") :url) + :title "*"} + (peg.Cg (parser.till "</a>") :title))) + (+ + (* + (parser.anywhere + (parser.tag :span {:class "ty-price-num" :id "*"} + (peg.Cg (parser.till "</span>") :price))) + (parser.anywhere + ;; "за" and "гр" words are html-encoded for some reason + (* "за " (peg.Cg parser.pegs.number :weight) " гр."))) + (parser.anywhere + (parser.tag :span {:class "ty-price-num" :id "*"} + (peg.Cg (parser.till "</span>") :price)))) + (parser.anywhere + ;; "Купить" + (parser.tag :span {} "Купить")))) + +(fn normalize [product] + (local title (html-cyrillic->utf product.title)) + (local year (parser.guess-year title)) + (local weight (number.string->number product.weight)) + (local price (number.string->number product.price)) + + {:site "daochai" + :title title + :url product.url + :description nil + :image product.image + :year year + :price price + :weight weight + :volume (parser.guess-volume title) + :price-per (if (and price weight (< 0 weight)) + (/ (math.ceil (* (/ price weight) 10)) 10) + nil)}) + +(fn products [] + (fetcher.from-html + [{:path "vid-chaya/pu-erh/shu-puer" :tags ["Шу пуэр"]} + {:path "vid-chaya/pu-erh/shen" :tags ["Шен пуэр"]} + {:path "vid-chaya/ulun" :tags ["Улун"]} + {:path "vid-chaya/ulun/fudzjanskie-uluny" :tags ["Улун" "Фудзянь"]} + {:path "vid-chaya/ulun/guandunskie-uluny" :tags ["Улун" "Гуандун"]} + {:path "vid-chaya/ulun/uishanskie-uluny" :tags ["Улун" "Уишань"]} + {:path "vid-chaya/ulun/taiwan-ulun" :tags ["Улун" "Тайвань"]} + {:path "vid-chaya/ulun/yunnanskiy-uluny" :tags ["Улун" "Юннань"]} + {:path "vid-chaya/krasnyj-chaj" :tags ["Красный чай"]} + {:path "vid-chaya/zeljonyj-chaj" :tags ["Зеленый чай"]} + {:path "vid-chaya/white" :tags ["Белый чай"]} + {:path "vid-chaya/zheltyy-chay" :tags ["Желтый чай"]} + {:path "vid-chaya/heicha" :tags ["Хэй ча"]} + {:path "posuda/jianshuizitao" :tags ["Посуда" "Чайник"]} + {:path "posuda/nisintao" :tags ["Посуда" "Чайник"]} + {:path "posuda/chahu-taozi" :tags ["Посуда" "Чайник"]} + {:path "posuda/chayniki-iz-chaochzhou" :tags ["Посуда" "Чайник"]} + {:path "posuda/jingdezhen" :tags ["Посуда"]} + {:path "posuda/chahai" :tags ["Посуда"]} + {:path "posuda/gajvan" :tags ["Посуда"]} + {:path "posuda/chahaj" :tags ["Посуда"]} + {:path "posuda/chaban" :tags ["Посуда"]} + {:path "posuda/chajnye-prudy" :tags ["Посуда"]} + {:path "posuda/sito" :tags ["Посуда"]} + {:path "posuda/posuda-chajnoj-ceremonii" :tags ["Посуда"]} + {:path "posuda/termosy" :tags ["Посуда"]} + {:path "posuda/alternativa" :tags ["Посуда"]} + {:path "tea-accessorize" :tags ["Посуда"]} + {:path "chay-i-chan/aroma" :tags ["Благовония"]} + {:path "chay-i-chan/kurilnicy-i-podstavki-pod-blagovoniya" :tags ["Благовония"]} + {:path "chay-i-chan/chetki" :tags ["Четки"]} + {:path "chay-i-chan/dekorirovanie-prostranstva" :tags ["Декор"]} + {:path "chay-i-chan/figurki-iz-dereva" :tags ["Фигурки"]}] + format-url + product-peg + normalize)) + +{:products products :title "DaoChai" :url "https://daochai.ru"} diff --git a/parser/parser.fnl b/parser/parser.fnl index 7e9469e..3a4d563 100644 --- a/parser/parser.fnl +++ b/parser/parser.fnl @@ -74,12 +74,20 @@ (fn [name value] (* (^ (peg.P name) 1) (if (~= value "") - (* "=\"" - ;; wildcard for any value - (if (= value "*") - (till "\"") - (peg.P value)) - "\"") + (+ + ;; attributes may be wrapped in both " and ' + (* "=\"" + ;; wildcard for any value + (if (= value "*") + (till "\"") + (peg.P value)) + "\"") + (* "='" + ;; wildcard for any value + (if (= value "*") + (till "'") + (peg.P value)) + "'")) (maybe (.. "=\" name \"")))))) (local attrs-peg (accumulate [sum pegs.spaces diff --git a/static/daochai.webp b/static/daochai.webp Binary files differnew file mode 100644 index 0000000..d77e881 --- /dev/null +++ b/static/daochai.webp |
