diff options
| author | unwox <me@unwox.com> | 2025-02-17 19:57:58 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2025-02-17 19:57:58 +0600 |
| commit | fd807bf1952073aff866bd0961ad6929e07da80d (patch) | |
| tree | 43c653276b18040da2faee088e7734a345f39df6 /parser/daochai.fnl | |
| parent | 35541ad02fddd7cb1d8840f47b2a203e125b0acf (diff) | |
add daochai parser
Diffstat (limited to 'parser/daochai.fnl')
| -rw-r--r-- | parser/daochai.fnl | 187 |
1 files changed, 187 insertions, 0 deletions
diff --git a/parser/daochai.fnl b/parser/daochai.fnl new file mode 100644 index 0000000..50ec508 --- /dev/null +++ b/parser/daochai.fnl @@ -0,0 +1,187 @@ +(import-macros {: reduce} :lib.macro) + +(local peg + (if (pick-values 1 (pcall require :lpeg)) + (require :lpeg) + (require :lpeglj))) +(local parser (require :parser.parser)) +(local number (require :lib.number)) +(local fetcher (require :fetcher)) + +(fn html-cyrillic->utf [str] + (local replacement-map + {"А" "А" + "Б" "Б" + "В" "В" + "Г" "Г" + "Д" "Д" + "Е" "Е" + "Ж" "Ж" + "З" "З" + "И" "И" + "Й" "Й" + "К" "К" + "Л" "Л" + "М" "М" + "Н" "Н" + "О" "О" + "П" "П" + "Р" "Р" + "С" "С" + "Т" "Т" + "У" "У" + "Ф" "Ф" + "Х" "Х" + "Ц" "Ц" + "Ч" "Ч" + "Ш" "Ш" + "Щ" "Щ" + "Ъ" "Ъ" + "Ы" "Ы" + "Ь" "Ь" + "Э" "Э" + "Ю" "Ю" + "Я" "Я" + "а" "а" + "б" "б" + "в" "в" + "г" "г" + "д" "д" + "е" "е" + "ж" "ж" + "з" "з" + "и" "и" + "й" "й" + "к" "к" + "л" "л" + "м" "м" + "н" "н" + "о" "о" + "п" "п" + "р" "р" + "с" "с" + "т" "т" + "у" "у" + "ф" "ф" + "х" "х" + "ц" "ц" + "ч" "ч" + "ш" "ш" + "щ" "щ" + "ъ" "ъ" + "ы" "ы" + "ь" "ь" + "э" "э" + "ю" "ю" + "я" "я"}) + + (var result str) + (each [code letter (pairs replacement-map)] + (set result (: result :gsub code letter))) + result) + +(fn format-url [path page] + (.. "https://daochai.ru/" path + "/" (if (< 1 page) (.. "page-" page "/") ""))) + +(local product-peg + (* + (parser.anywhere + (+ + ;; eager and lazy loaded versions of img + (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE + :src (peg.Cg (parser.till "\"") :image) + :id "*" + :title "*" + :alt "*" + :srcset "*" + :width "*" + :height "*"}) + (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE + :src "*" + :data-src (peg.Cg (parser.till "\"") :image) + :id "*" + :title "*" + :alt "*" + :data-srcset "*" + :width "*" + :height "*"}))) + (parser.anywhere + (parser.tag :a {:class "product-title" + :href (peg.Cg (parser.till "\"") :url) + :title "*"} + (peg.Cg (parser.till "</a>") :title))) + (+ + (* + (parser.anywhere + (parser.tag :span {:class "ty-price-num" :id "*"} + (peg.Cg (parser.till "</span>") :price))) + (parser.anywhere + ;; "за" and "гр" words are html-encoded for some reason + (* "за " (peg.Cg parser.pegs.number :weight) " гр."))) + (parser.anywhere + (parser.tag :span {:class "ty-price-num" :id "*"} + (peg.Cg (parser.till "</span>") :price)))) + (parser.anywhere + ;; "Купить" + (parser.tag :span {} "Купить")))) + +(fn normalize [product] + (local title (html-cyrillic->utf product.title)) + (local year (parser.guess-year title)) + (local weight (number.string->number product.weight)) + (local price (number.string->number product.price)) + + {:site "daochai" + :title title + :url product.url + :description nil + :image product.image + :year year + :price price + :weight weight + :volume (parser.guess-volume title) + :price-per (if (and price weight (< 0 weight)) + (/ (math.ceil (* (/ price weight) 10)) 10) + nil)}) + +(fn products [] + (fetcher.from-html + [{:path "vid-chaya/pu-erh/shu-puer" :tags ["Шу пуэр"]} + {:path "vid-chaya/pu-erh/shen" :tags ["Шен пуэр"]} + {:path "vid-chaya/ulun" :tags ["Улун"]} + {:path "vid-chaya/ulun/fudzjanskie-uluny" :tags ["Улун" "Фудзянь"]} + {:path "vid-chaya/ulun/guandunskie-uluny" :tags ["Улун" "Гуандун"]} + {:path "vid-chaya/ulun/uishanskie-uluny" :tags ["Улун" "Уишань"]} + {:path "vid-chaya/ulun/taiwan-ulun" :tags ["Улун" "Тайвань"]} + {:path "vid-chaya/ulun/yunnanskiy-uluny" :tags ["Улун" "Юннань"]} + {:path "vid-chaya/krasnyj-chaj" :tags ["Красный чай"]} + {:path "vid-chaya/zeljonyj-chaj" :tags ["Зеленый чай"]} + {:path "vid-chaya/white" :tags ["Белый чай"]} + {:path "vid-chaya/zheltyy-chay" :tags ["Желтый чай"]} + {:path "vid-chaya/heicha" :tags ["Хэй ча"]} + {:path "posuda/jianshuizitao" :tags ["Посуда" "Чайник"]} + {:path "posuda/nisintao" :tags ["Посуда" "Чайник"]} + {:path "posuda/chahu-taozi" :tags ["Посуда" "Чайник"]} + {:path "posuda/chayniki-iz-chaochzhou" :tags ["Посуда" "Чайник"]} + {:path "posuda/jingdezhen" :tags ["Посуда"]} + {:path "posuda/chahai" :tags ["Посуда"]} + {:path "posuda/gajvan" :tags ["Посуда"]} + {:path "posuda/chahaj" :tags ["Посуда"]} + {:path "posuda/chaban" :tags ["Посуда"]} + {:path "posuda/chajnye-prudy" :tags ["Посуда"]} + {:path "posuda/sito" :tags ["Посуда"]} + {:path "posuda/posuda-chajnoj-ceremonii" :tags ["Посуда"]} + {:path "posuda/termosy" :tags ["Посуда"]} + {:path "posuda/alternativa" :tags ["Посуда"]} + {:path "tea-accessorize" :tags ["Посуда"]} + {:path "chay-i-chan/aroma" :tags ["Благовония"]} + {:path "chay-i-chan/kurilnicy-i-podstavki-pod-blagovoniya" :tags ["Благовония"]} + {:path "chay-i-chan/chetki" :tags ["Четки"]} + {:path "chay-i-chan/dekorirovanie-prostranstva" :tags ["Декор"]} + {:path "chay-i-chan/figurki-iz-dereva" :tags ["Фигурки"]}] + format-url + product-peg + normalize)) + +{:products products :title "DaoChai" :url "https://daochai.ru"} |
