(import-macros {: reduce} :lib.macro) (local peg (if (pick-values 1 (pcall require :lpeg)) (require :lpeg) (require :lpeglj))) (local parser (require :parser.parser)) (local number (require :lib.number)) (local fetcher (require :fetcher)) (fn html-cyrillic->utf [str] (local replacement-map {"А" "А" "Б" "Б" "В" "В" "Г" "Г" "Д" "Д" "Е" "Е" "Ж" "Ж" "З" "З" "И" "И" "Й" "Й" "К" "К" "Л" "Л" "М" "М" "Н" "Н" "О" "О" "П" "П" "Р" "Р" "С" "С" "Т" "Т" "У" "У" "Ф" "Ф" "Х" "Х" "Ц" "Ц" "Ч" "Ч" "Ш" "Ш" "Щ" "Щ" "Ъ" "Ъ" "Ы" "Ы" "Ь" "Ь" "Э" "Э" "Ю" "Ю" "Я" "Я" "а" "а" "б" "б" "в" "в" "г" "г" "д" "д" "е" "е" "ж" "ж" "з" "з" "и" "и" "й" "й" "к" "к" "л" "л" "м" "м" "н" "н" "о" "о" "п" "п" "р" "р" "с" "с" "т" "т" "у" "у" "ф" "ф" "х" "х" "ц" "ц" "ч" "ч" "ш" "ш" "щ" "щ" "ъ" "ъ" "ы" "ы" "ь" "ь" "э" "э" "ю" "ю" "я" "я"}) (var result str) (each [code letter (pairs replacement-map)] (set result (: result :gsub code letter))) result) (fn format-url [path page] (.. "https://daochai.ru/" path "/" (if (< 1 page) (.. "page-" page "/") ""))) (local product-peg (* (parser.anywhere (+ ;; eager and lazy loaded versions of img (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE :src (peg.Cg (parser.till "\"") :image) :id "*" :title "*" :alt "*" :srcset "*" :width "*" :height "*"}) (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE :src "*" :data-src (peg.Cg (parser.till "\"") :image) :id "*" :title "*" :alt "*" :data-srcset "*" :width "*" :height "*"}))) (parser.anywhere (parser.tag :a {:class "product-title" :href (peg.Cg (parser.till "\"") :url) :title "*"} (peg.Cg (parser.till "") :title))) (+ (* (parser.anywhere (parser.tag :span {:class "ty-price-num" :id "*"} (peg.Cg (parser.till "") :price))) (parser.anywhere ;; "за" and "гр" words are html-encoded for some reason (* "за " (peg.Cg parser.pegs.number :weight) " гр."))) (parser.anywhere (parser.tag :span {:class "ty-price-num" :id "*"} (peg.Cg (parser.till "") :price)))) (parser.anywhere ;; "Купить" (parser.tag :span {} "Купить")))) (fn normalize [product] (local title (html-cyrillic->utf product.title)) (local year (parser.guess-year title)) (local weight (number.string->number product.weight)) (local price (number.string->number product.price)) {:site "daochai" :title title :url product.url :description nil :image product.image :year year :price price :weight weight :volume (parser.guess-volume title) :price-per (if (and price weight (< 0 weight)) (/ (math.ceil (* (/ price weight) 10)) 10) nil)}) (fn products [] (fetcher.from-html [{:path "vid-chaya/pu-erh/shu-puer" :tags ["Шу пуэр"]} {:path "vid-chaya/pu-erh/shen" :tags ["Шен пуэр"]} {:path "vid-chaya/ulun" :tags ["Улун"]} {:path "vid-chaya/ulun/fudzjanskie-uluny" :tags ["Улун" "Фудзянь"]} {:path "vid-chaya/ulun/guandunskie-uluny" :tags ["Улун" "Гуандун"]} {:path "vid-chaya/ulun/uishanskie-uluny" :tags ["Улун" "Уишань"]} {:path "vid-chaya/ulun/taiwan-ulun" :tags ["Улун" "Тайвань"]} {:path "vid-chaya/ulun/yunnanskiy-uluny" :tags ["Улун" "Юннань"]} {:path "vid-chaya/krasnyj-chaj" :tags ["Красный чай"]} {:path "vid-chaya/zeljonyj-chaj" :tags ["Зеленый чай"]} {:path "vid-chaya/white" :tags ["Белый чай"]} {:path "vid-chaya/zheltyy-chay" :tags ["Желтый чай"]} {:path "vid-chaya/heicha" :tags ["Хэй ча"]} {:path "posuda/jianshuizitao" :tags ["Посуда" "Чайник"]} {:path "posuda/nisintao" :tags ["Посуда" "Чайник"]} {:path "posuda/chahu-taozi" :tags ["Посуда" "Чайник"]} {:path "posuda/chayniki-iz-chaochzhou" :tags ["Посуда" "Чайник"]} {:path "posuda/jingdezhen" :tags ["Посуда"]} {:path "posuda/chahai" :tags ["Посуда"]} {:path "posuda/gajvan" :tags ["Посуда"]} {:path "posuda/chahaj" :tags ["Посуда"]} {:path "posuda/chaban" :tags ["Посуда"]} {:path "posuda/chajnye-prudy" :tags ["Посуда"]} {:path "posuda/sito" :tags ["Посуда"]} {:path "posuda/posuda-chajnoj-ceremonii" :tags ["Посуда"]} {:path "posuda/termosy" :tags ["Посуда"]} {:path "posuda/alternativa" :tags ["Посуда"]} {:path "tea-accessorize" :tags ["Посуда"]} {:path "chay-i-chan/aroma" :tags ["Благовония"]} {:path "chay-i-chan/kurilnicy-i-podstavki-pod-blagovoniya" :tags ["Благовония"]} {:path "chay-i-chan/chetki" :tags ["Четки"]} {:path "chay-i-chan/dekorirovanie-prostranstva" :tags ["Декор"]} {:path "chay-i-chan/figurki-iz-dereva" :tags ["Фигурки"]}] format-url product-peg normalize)) {:products products :title "DaoChai" :url "https://daochai.ru"}