summaryrefslogtreecommitdiff
path: root/parser/daochai.fnl
diff options
context:
space:
mode:
Diffstat (limited to 'parser/daochai.fnl')
-rw-r--r--parser/daochai.fnl187
1 files changed, 187 insertions, 0 deletions
diff --git a/parser/daochai.fnl b/parser/daochai.fnl
new file mode 100644
index 0000000..50ec508
--- /dev/null
+++ b/parser/daochai.fnl
@@ -0,0 +1,187 @@
+(import-macros {: reduce} :lib.macro)
+
+(local peg
+ (if (pick-values 1 (pcall require :lpeg))
+ (require :lpeg)
+ (require :lpeglj)))
+(local parser (require :parser.parser))
+(local number (require :lib.number))
+(local fetcher (require :fetcher))
+
+(fn html-cyrillic->utf [str]
+ (local replacement-map
+ {"А" "А"
+ "Б" "Б"
+ "В" "В"
+ "Г" "Г"
+ "Д" "Д"
+ "Е" "Е"
+ "Ж" "Ж"
+ "З" "З"
+ "И" "И"
+ "Й" "Й"
+ "К" "К"
+ "Л" "Л"
+ "М" "М"
+ "Н" "Н"
+ "О" "О"
+ "П" "П"
+ "Р" "Р"
+ "С" "С"
+ "Т" "Т"
+ "У" "У"
+ "Ф" "Ф"
+ "Х" "Х"
+ "Ц" "Ц"
+ "Ч" "Ч"
+ "Ш" "Ш"
+ "Щ" "Щ"
+ "Ъ" "Ъ"
+ "Ы" "Ы"
+ "Ь" "Ь"
+ "Э" "Э"
+ "Ю" "Ю"
+ "Я" "Я"
+ "а" "а"
+ "б" "б"
+ "в" "в"
+ "г" "г"
+ "д" "д"
+ "е" "е"
+ "ж" "ж"
+ "з" "з"
+ "и" "и"
+ "й" "й"
+ "к" "к"
+ "л" "л"
+ "м" "м"
+ "н" "н"
+ "о" "о"
+ "п" "п"
+ "р" "р"
+ "с" "с"
+ "т" "т"
+ "у" "у"
+ "ф" "ф"
+ "х" "х"
+ "ц" "ц"
+ "ч" "ч"
+ "ш" "ш"
+ "щ" "щ"
+ "ъ" "ъ"
+ "ы" "ы"
+ "ь" "ь"
+ "э" "э"
+ "ю" "ю"
+ "я" "я"})
+
+ (var result str)
+ (each [code letter (pairs replacement-map)]
+ (set result (: result :gsub code letter)))
+ result)
+
+(fn format-url [path page]
+ (.. "https://daochai.ru/" path
+ "/" (if (< 1 page) (.. "page-" page "/") "")))
+
+(local product-peg
+ (*
+ (parser.anywhere
+ (+
+ ;; eager and lazy loaded versions of img
+ (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE
+ :src (peg.Cg (parser.till "\"") :image)
+ :id "*"
+ :title "*"
+ :alt "*"
+ :srcset "*"
+ :width "*"
+ :height "*"})
+ (parser.tag :img {:class "ty-pict cm-image" ;; FRAGILE
+ :src "*"
+ :data-src (peg.Cg (parser.till "\"") :image)
+ :id "*"
+ :title "*"
+ :alt "*"
+ :data-srcset "*"
+ :width "*"
+ :height "*"})))
+ (parser.anywhere
+ (parser.tag :a {:class "product-title"
+ :href (peg.Cg (parser.till "\"") :url)
+ :title "*"}
+ (peg.Cg (parser.till "</a>") :title)))
+ (+
+ (*
+ (parser.anywhere
+ (parser.tag :span {:class "ty-price-num" :id "*"}
+ (peg.Cg (parser.till "</span>") :price)))
+ (parser.anywhere
+ ;; "за" and "гр" words are html-encoded for some reason
+ (* "&#1079;&#1072; " (peg.Cg parser.pegs.number :weight) " &#1075;&#1088;.")))
+ (parser.anywhere
+ (parser.tag :span {:class "ty-price-num" :id "*"}
+ (peg.Cg (parser.till "</span>") :price))))
+ (parser.anywhere
+ ;; "Купить"
+ (parser.tag :span {} "&#1050;&#1091;&#1087;&#1080;&#1090;&#1100;"))))
+
+(fn normalize [product]
+ (local title (html-cyrillic->utf product.title))
+ (local year (parser.guess-year title))
+ (local weight (number.string->number product.weight))
+ (local price (number.string->number product.price))
+
+ {:site "daochai"
+ :title title
+ :url product.url
+ :description nil
+ :image product.image
+ :year year
+ :price price
+ :weight weight
+ :volume (parser.guess-volume title)
+ :price-per (if (and price weight (< 0 weight))
+ (/ (math.ceil (* (/ price weight) 10)) 10)
+ nil)})
+
+(fn products []
+ (fetcher.from-html
+ [{:path "vid-chaya/pu-erh/shu-puer" :tags ["Шу пуэр"]}
+ {:path "vid-chaya/pu-erh/shen" :tags ["Шен пуэр"]}
+ {:path "vid-chaya/ulun" :tags ["Улун"]}
+ {:path "vid-chaya/ulun/fudzjanskie-uluny" :tags ["Улун" "Фудзянь"]}
+ {:path "vid-chaya/ulun/guandunskie-uluny" :tags ["Улун" "Гуандун"]}
+ {:path "vid-chaya/ulun/uishanskie-uluny" :tags ["Улун" "Уишань"]}
+ {:path "vid-chaya/ulun/taiwan-ulun" :tags ["Улун" "Тайвань"]}
+ {:path "vid-chaya/ulun/yunnanskiy-uluny" :tags ["Улун" "Юннань"]}
+ {:path "vid-chaya/krasnyj-chaj" :tags ["Красный чай"]}
+ {:path "vid-chaya/zeljonyj-chaj" :tags ["Зеленый чай"]}
+ {:path "vid-chaya/white" :tags ["Белый чай"]}
+ {:path "vid-chaya/zheltyy-chay" :tags ["Желтый чай"]}
+ {:path "vid-chaya/heicha" :tags ["Хэй ча"]}
+ {:path "posuda/jianshuizitao" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/nisintao" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/chahu-taozi" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/chayniki-iz-chaochzhou" :tags ["Посуда" "Чайник"]}
+ {:path "posuda/jingdezhen" :tags ["Посуда"]}
+ {:path "posuda/chahai" :tags ["Посуда"]}
+ {:path "posuda/gajvan" :tags ["Посуда"]}
+ {:path "posuda/chahaj" :tags ["Посуда"]}
+ {:path "posuda/chaban" :tags ["Посуда"]}
+ {:path "posuda/chajnye-prudy" :tags ["Посуда"]}
+ {:path "posuda/sito" :tags ["Посуда"]}
+ {:path "posuda/posuda-chajnoj-ceremonii" :tags ["Посуда"]}
+ {:path "posuda/termosy" :tags ["Посуда"]}
+ {:path "posuda/alternativa" :tags ["Посуда"]}
+ {:path "tea-accessorize" :tags ["Посуда"]}
+ {:path "chay-i-chan/aroma" :tags ["Благовония"]}
+ {:path "chay-i-chan/kurilnicy-i-podstavki-pod-blagovoniya" :tags ["Благовония"]}
+ {:path "chay-i-chan/chetki" :tags ["Четки"]}
+ {:path "chay-i-chan/dekorirovanie-prostranstva" :tags ["Декор"]}
+ {:path "chay-i-chan/figurki-iz-dereva" :tags ["Фигурки"]}]
+ format-url
+ product-peg
+ normalize))
+
+{:products products :title "DaoChai" :url "https://daochai.ru"}