diff options
| author | unwox <me@unwox.com> | 2024-10-10 11:51:21 +0600 |
|---|---|---|
| committer | unwox <me@unwox.com> | 2024-10-10 11:51:38 +0600 |
| commit | dfcdaab3a84389e610a57fa82ee0d3a216f4821d (patch) | |
| tree | e0318abc43baaf83987381312a02c97ffe8b6f9b /parser | |
| parent | ddc11ff1cc31f17ff46523a649917940357825b5 (diff) | |
allow to specify several tags for a product (instead of one category)
Diffstat (limited to 'parser')
| -rw-r--r-- | parser/artoftea.fnl | 22 | ||||
| -rw-r--r-- | parser/chaekshop.fnl | 52 | ||||
| -rw-r--r-- | parser/clubcha.fnl | 80 | ||||
| -rw-r--r-- | parser/ipuer.fnl | 10 | ||||
| -rw-r--r-- | parser/parser.fnl | 50 |
5 files changed, 109 insertions, 105 deletions
diff --git a/parser/artoftea.fnl b/parser/artoftea.fnl index 1b8b6e1..f053833 100644 --- a/parser/artoftea.fnl +++ b/parser/artoftea.fnl @@ -64,18 +64,18 @@ (fn products [] (fetcher.from-html - [{:path "redtea" :category "Красный чай"} - {:path "greentea" :category "Зеленый чай"} - {:path "puer/shu-puer" :category "Шу пуэр"} - {:path "puer/sheng-puer" :category "Шен пуэр"} + [{:path "redtea" :tags ["Красный чай"]}] + {:path "greentea" :tags ["Зеленый чай"]} + {:path "puer/shu-puer" :tags ["Шу пуэр"]} + {:path "puer/sheng-puer" :tags ["Шен пуэр"]} {:path "rassypnoi-puer"} - {:path "oolong" :category "Улун"} - {:path "whitetea" :category "Белый чай"} - {:path "yellowtea" :category "Желтый чай"} - {:path "xej-cha-chernyj-chaj" :category "Хэй ча"} - {:path "posuda" :category "Посуда"} - {:path "tipoty-lightking" :category "Посуда"} - {:path "accesories" :category "Аксессуары"} + {:path "oolong" :tags ["Улун"]} + {:path "whitetea" :tags ["Белый чай"]} + {:path "yellowtea" :tags ["Желтый чай"]} + {:path "xej-cha-chernyj-chaj" :tags ["Хэй ча"]} + {:path "posuda" :tags ["Посуда"]} + {:path "tipoty-lightking" :tags ["Посуда"]} + {:path "accesories" :tags ["Аксессуары"]} {:path "matcha-i-aksessuary"} {:path "upakovka"} {:path "eksklyuzivny-chay"} diff --git a/parser/chaekshop.fnl b/parser/chaekshop.fnl index b6e012a..13be8f1 100644 --- a/parser/chaekshop.fnl +++ b/parser/chaekshop.fnl @@ -52,35 +52,35 @@ (fn products [] (fetcher.from-html - [{:path "chay/belyy_chay" :category "Белый чай"} - {:path "chay/guandunskie_uluny" :category "Улун"} - {:path "chay/zhyeltyy_chay" :category "Желтый чай"} - {:path "chay/zelyenyy_chay" :category "Зеленый чай"} - {:path "chay/krasnyy_chay" :category "Красный чай"} - {:path "chay/severo_futszyanskie_uluny" :category "Улун"} - {:path "chay/tayvanskie_uluny" :category "Улун"} - {:path "chay/khey_cha" :category "Хэй ча"} - {:path "chay/shen_puer" :category "Шэн пуэр"} - {:path "chay/shu_puery" :category "Шу пуэр"} - {:path "chay/yuzhno_futszyanskie_uluny_" :category "Улун"} - {:path "chay/yunnanskie_uluny" :category "Улун"} - {:path "chay/gaba_chay" :category "Улун"} + [{:path "chay/belyy_chay" :tags ["Белый чай"]} + {:path "chay/guandunskie_uluny" :tags ["Улун"]} + {:path "chay/zhyeltyy_chay" :tags ["Желтый чай"]} + {:path "chay/zelyenyy_chay" :tags ["Зеленый чай"]} + {:path "chay/krasnyy_chay" :tags ["Красный чай"]} + {:path "chay/severo_futszyanskie_uluny" :tags ["Улун"]} + {:path "chay/tayvanskie_uluny" :tags ["Улун"]} + {:path "chay/khey_cha" :tags ["Хэй ча"]} + {:path "chay/shen_puer" :tags ["Шен пуэр"]} + {:path "chay/shu_puery" :tags ["Шу пуэр"]} + {:path "chay/yuzhno_futszyanskie_uluny_" :tags ["Улун"]} + {:path "chay/yunnanskie_uluny" :tags ["Улун"]} + {:path "chay/gaba_chay" :tags ["Улун"]} {:path "chay/na_kazhdyy_den"} {:path "chay/eksklyuziv"} - {:path "posuda_i_aksessuary/pialy" :category "Посуда"} - {:path "posuda_i_aksessuary/isinskaya_glina" :category "Посуда"} - {:path "posuda_i_aksessuary/keramika_iz_tszindechzhen" :category "Посуда"} - {:path "posuda_i_aksessuary/keramika_iz_tszyanshuy" :category "Посуда"} - {:path "posuda_i_aksessuary/kolby_termosy_nabory" :category "Посуда"} - {:path "posuda_i_aksessuary/chabani" :category "Посуда"} - {:path "posuda_i_aksessuary/chaynye_figurki" :category "Посуда"} - {:path "posuda_i_aksessuary/slivniki" :category "Посуда"} - {:path "posuda_i_aksessuary/gayvani" :category "Посуда"} - {:path "posuda_i_aksessuary/keramika_iz_tsinchzhou" :category "Посуда"} + {:path "posuda_i_aksessuary/pialy" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/isinskaya_glina" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/keramika_iz_tszindechzhen" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/keramika_iz_tszyanshuy" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/kolby_termosy_nabory" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/chabani" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/chaynye_figurki" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/slivniki" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/gayvani" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/keramika_iz_tsinchzhou" :tags ["Посуда"]} {:path "posuda_i_aksessuary/aksessuary"} - {:path "posuda_i_aksessuary/pialy_tszindechzhen" :category "Посуда"} - {:path "posuda_i_aksessuary/keramika_dekhua" :category "Посуда"} - {:path "posuda_i_aksessuary/chayniki_iz_farfora" :category "Посуда"}] + {:path "posuda_i_aksessuary/pialy_tszindechzhen" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/keramika_dekhua" :tags ["Посуда"]} + {:path "posuda_i_aksessuary/chayniki_iz_farfora" :tags ["Посуда"]}] format-url product-peg normalize)) diff --git a/parser/clubcha.fnl b/parser/clubcha.fnl index e260fd1..200be94 100644 --- a/parser/clubcha.fnl +++ b/parser/clubcha.fnl @@ -68,53 +68,53 @@ (fn products [] (fetcher.from-html - [{:path "shu-puer" :category "Шу пуэр"} - {:path "shen-puer" :category "Шен пуэр"} + [{:path "shu-puer" :tags ["Шу пуэр"]} + {:path "shen-puer" :tags ["Шен пуэр"]} {:path "bai-hao-puer"} - {:path "hei-cha" :category "Хэй ча"} - {:path "yuzhnofutszyanskij-ulun" :category "Улун"} - {:path "guandunskij-ulun" :category "Улун"} - {:path "severofujianskiy-ulun" :category "Улун"} - {:path "taiwanskiy-ulun" :category "Улун"} - {:path "gaba-ulun-gaba-alishan-i-dr" :category "Улун"} - {:path "zelenyi-chay" :category "Зеленый чай"} - {:path "belyi-chay" :category "Белый чай"} - {:path "zheltyi-chay" :category "Желтый чай"} - {:path "kitajskij-krasnyj-chaj" :category "Красный чай"} - {:path "tajvanskij-krasnyj-chaj" :category "Красный чай"} + {:path "hei-cha" :tags ["Хэй ча"]} + {:path "yuzhnofutszyanskij-ulun" :tags ["Улун"]} + {:path "guandunskij-ulun" :tags ["Улун"]} + {:path "severofujianskiy-ulun" :tags ["Улун"]} + {:path "taiwanskiy-ulun" :tags ["Улун"]} + {:path "gaba-ulun-gaba-alishan-i-dr" :tags ["Улун"]} + {:path "zelenyi-chay" :tags ["Зеленый чай"]} + {:path "belyi-chay" :tags ["Белый чай"]} + {:path "zheltyi-chay" :tags ["Желтый чай"]} + {:path "kitajskij-krasnyj-chaj" :tags ["Красный чай"]} + {:path "tajvanskij-krasnyj-chaj" :tags ["Красный чай"]} {:path "Nechaynyye_chai_i_tsvetochnyye_dobavki"} {:path "upakovka-dlya-puera"} {:path "upakovka-dlya-posudy"} {:path "iz-lichnoj-kollektsii"} - ;; FIXME: expand this category to subcategories because the main one + ;; FIXME: expand this tags [to subcategories because the main one] ;; does not contain all the products - {:path "gaivan" :category "Посуда"} - {:path "chashka" :category "Посуда"} - {:path "chaynyi-nabor" :category "Посуда"} - {:path "chaynik" :category "Посуда"} - {:path "glina-farfor" :category "Посуда"} - {:path "chaban-chajnaya-doska" :category "Посуда"} - {:path "chajnyj-podnos" :category "Посуда"} - {:path "chajnyj-prud" :category "Посуда"} - {:path "professionalnye" :category "Посуда"} - {:path "glinyanye" :category "Посуда"} - {:path "farforovye" :category "Посуда"} - {:path "zhestyanye-i-kartonnye" :category "Посуда"} - {:path "puernitsy" :category "Посуда"} - {:path "glina" :category "Посуда"} - {:path "farfor" :category "Посуда"} - {:path "steklo" :category "Посуда"} - {:path "kruzhka" :category "Посуда"} - {:path "Chaynyye_instrumenty" :category "Посуда"} - {:path "chahe" :category "Посуда"} - {:path "sito" :category "Посуда"} - {:path "Chaynyy_suvenir" :category "Посуда"} - {:path "Prochaya_utvar" :category "Посуда"} - {:path "farfor-eilong" :category "Посуда"} - {:path "butylka-dlya-zavarivaniya" :category "Посуда"} + {:path "gaivan" :tags ["Посуда"]} + {:path "chashka" :tags ["Посуда"]} + {:path "chaynyi-nabor" :tags ["Посуда"]} + {:path "chaynik" :tags ["Посуда"]} + {:path "glina-farfor" :tags ["Посуда"]} + {:path "chaban-chajnaya-doska" :tags ["Посуда"]} + {:path "chajnyj-podnos" :tags ["Посуда"]} + {:path "chajnyj-prud" :tags ["Посуда"]} + {:path "professionalnye" :tags ["Посуда"]} + {:path "glinyanye" :tags ["Посуда"]} + {:path "farforovye" :tags ["Посуда"]} + {:path "zhestyanye-i-kartonnye" :tags ["Посуда"]} + {:path "puernitsy" :tags ["Посуда"]} + {:path "glina" :tags ["Посуда"]} + {:path "farfor" :tags ["Посуда"]} + {:path "steklo" :tags ["Посуда"]} + {:path "kruzhka" :tags ["Посуда"]} + {:path "Chaynyye_instrumenty" :tags ["Посуда"]} + {:path "chahe" :tags ["Посуда"]} + {:path "sito" :tags ["Посуда"]} + {:path "Chaynyy_suvenir" :tags ["Посуда"]} + {:path "Prochaya_utvar" :tags ["Посуда"]} + {:path "farfor-eilong" :tags ["Посуда"]} + {:path "butylka-dlya-zavarivaniya" :tags ["Посуда"]} {:path "Predmety_interyera_i_fenshuy"} - {:path "fo-chzhu-chetki" :category "Четки"} - {:path "nefrit-1" :category "Нефрит"}] + {:path "fo-chzhu-chetki" :tags ["Четки"]} + {:path "nefrit-1" :tags ["Нефрит"]}] format-url product-peg normalize)) diff --git a/parser/ipuer.fnl b/parser/ipuer.fnl index 8b0f6be..b16ead5 100644 --- a/parser/ipuer.fnl +++ b/parser/ipuer.fnl @@ -59,12 +59,12 @@ (fn products [] (fetcher.from-html - [{:path "shen-puer" :category "Шен пуэр"} - {:path "shu-puer" :category "Шу пуэр"} + [{:path "shen-puer" :tags ["Шен пуэр"]} + {:path "shu-puer" :tags ["Шу пуэр"]} {:path "drugoy-chay"} - {:path "blagovoniya" :category "Благовония"} - {:path "posuda" :category "Посуда"} - {:path "282" :category "Посуда"}] + {:path "blagovoniya" :tags ["Благовония"]} + {:path "posuda" :tags ["Посуда"]} + {:path "282" :tags ["Посуда"]}] format-url product-peg normalize)) diff --git a/parser/parser.fnl b/parser/parser.fnl index d5baafa..ca74847 100644 --- a/parser/parser.fnl +++ b/parser/parser.fnl @@ -105,39 +105,43 @@ :match html)) ;; FIXME: make guessing case insensitive -(fn guess-category [title] - (local title (if title (luna.utf.lower title) "")) +(fn guess-tags [text] + (local text (if text (luna.utf.lower text) "")) - (if (: (anywhere (peg.P "зеленый")) :match title) - "Зеленый чай" - (: (anywhere (peg.P "улун")) :match title) - "Улун" - (: (anywhere (peg.P "белый")) :match title) - "Белый чай" - (: (anywhere (peg.P "желтый")) :match title) - "Желтый чай" - (: (anywhere (peg.P "красный")) :match title) - "Красный чай" - (: (anywhere (peg.P "хэй ча")) :match title) - "Хэй ча" - (: (anywhere (+ (peg.P "матча") "маття")) :match title) - "Матча" - "Неизвестная категория")) + (if (: (anywhere (peg.P "зеленый")) :match text) + ["Зеленый чай"] + (: (anywhere (peg.P "улун")) :match text) + ["Улун"] + (: (anywhere (peg.P "белый")) :match text) + ["Белый чай"] + (: (anywhere (peg.P "желтый")) :match text) + ["Желтый чай"] + (: (anywhere (peg.P "красный")) :match text) + ["Красный чай"] + (: (anywhere (peg.P "хэй ча")) :match text) + ["Хэй ча"] + (: (anywhere (peg.P "шу пуэр")) :match text) + ["Шу пуэр"] + (: (anywhere (+ (peg.P "шен пуэр") "шэн пуэр")) :match text) + ["Шен пуэр"] + (: (anywhere (+ (peg.P "матча") "маття")) :match text) + ["Матча"] + [])) -(fn guess-year [title] +(fn guess-year [text] (number.string->number (: (anywhere (* (peg.C (^ (peg.R "09") 4)) (maybe " ") (- (+ (peg.P "г") "год") (peg.P "гр")))) - :match title))) + :match text))) -(fn guess-weight [title] - (if title +(fn guess-weight [text] + (if text (number.string->number (: (anywhere (* (peg.C pegs.number) (maybe " ") "гр")) - :match title)) + :match text)) nil)) {: match-many @@ -147,6 +151,6 @@ : maybe : pegs :not pnot - : guess-category + : guess-tags : guess-year : guess-weight} |
