summaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorunwox <me@unwox.com>2024-10-10 11:51:21 +0600
committerunwox <me@unwox.com>2024-10-10 11:51:38 +0600
commitdfcdaab3a84389e610a57fa82ee0d3a216f4821d (patch)
treee0318abc43baaf83987381312a02c97ffe8b6f9b /parser
parentddc11ff1cc31f17ff46523a649917940357825b5 (diff)
allow to specify several tags for a product (instead of one category)
Diffstat (limited to 'parser')
-rw-r--r--parser/artoftea.fnl22
-rw-r--r--parser/chaekshop.fnl52
-rw-r--r--parser/clubcha.fnl80
-rw-r--r--parser/ipuer.fnl10
-rw-r--r--parser/parser.fnl50
5 files changed, 109 insertions, 105 deletions
diff --git a/parser/artoftea.fnl b/parser/artoftea.fnl
index 1b8b6e1..f053833 100644
--- a/parser/artoftea.fnl
+++ b/parser/artoftea.fnl
@@ -64,18 +64,18 @@
(fn products []
(fetcher.from-html
- [{:path "redtea" :category "Красный чай"}
- {:path "greentea" :category "Зеленый чай"}
- {:path "puer/shu-puer" :category "Шу пуэр"}
- {:path "puer/sheng-puer" :category "Шен пуэр"}
+ [{:path "redtea" :tags ["Красный чай"]}]
+ {:path "greentea" :tags ["Зеленый чай"]}
+ {:path "puer/shu-puer" :tags ["Шу пуэр"]}
+ {:path "puer/sheng-puer" :tags ["Шен пуэр"]}
{:path "rassypnoi-puer"}
- {:path "oolong" :category "Улун"}
- {:path "whitetea" :category "Белый чай"}
- {:path "yellowtea" :category "Желтый чай"}
- {:path "xej-cha-chernyj-chaj" :category "Хэй ча"}
- {:path "posuda" :category "Посуда"}
- {:path "tipoty-lightking" :category "Посуда"}
- {:path "accesories" :category "Аксессуары"}
+ {:path "oolong" :tags ["Улун"]}
+ {:path "whitetea" :tags ["Белый чай"]}
+ {:path "yellowtea" :tags ["Желтый чай"]}
+ {:path "xej-cha-chernyj-chaj" :tags ["Хэй ча"]}
+ {:path "posuda" :tags ["Посуда"]}
+ {:path "tipoty-lightking" :tags ["Посуда"]}
+ {:path "accesories" :tags ["Аксессуары"]}
{:path "matcha-i-aksessuary"}
{:path "upakovka"}
{:path "eksklyuzivny-chay"}
diff --git a/parser/chaekshop.fnl b/parser/chaekshop.fnl
index b6e012a..13be8f1 100644
--- a/parser/chaekshop.fnl
+++ b/parser/chaekshop.fnl
@@ -52,35 +52,35 @@
(fn products []
(fetcher.from-html
- [{:path "chay/belyy_chay" :category "Белый чай"}
- {:path "chay/guandunskie_uluny" :category "Улун"}
- {:path "chay/zhyeltyy_chay" :category "Желтый чай"}
- {:path "chay/zelyenyy_chay" :category "Зеленый чай"}
- {:path "chay/krasnyy_chay" :category "Красный чай"}
- {:path "chay/severo_futszyanskie_uluny" :category "Улун"}
- {:path "chay/tayvanskie_uluny" :category "Улун"}
- {:path "chay/khey_cha" :category "Хэй ча"}
- {:path "chay/shen_puer" :category "Шэн пуэр"}
- {:path "chay/shu_puery" :category "Шу пуэр"}
- {:path "chay/yuzhno_futszyanskie_uluny_" :category "Улун"}
- {:path "chay/yunnanskie_uluny" :category "Улун"}
- {:path "chay/gaba_chay" :category "Улун"}
+ [{:path "chay/belyy_chay" :tags ["Белый чай"]}
+ {:path "chay/guandunskie_uluny" :tags ["Улун"]}
+ {:path "chay/zhyeltyy_chay" :tags ["Желтый чай"]}
+ {:path "chay/zelyenyy_chay" :tags ["Зеленый чай"]}
+ {:path "chay/krasnyy_chay" :tags ["Красный чай"]}
+ {:path "chay/severo_futszyanskie_uluny" :tags ["Улун"]}
+ {:path "chay/tayvanskie_uluny" :tags ["Улун"]}
+ {:path "chay/khey_cha" :tags ["Хэй ча"]}
+ {:path "chay/shen_puer" :tags ["Шен пуэр"]}
+ {:path "chay/shu_puery" :tags ["Шу пуэр"]}
+ {:path "chay/yuzhno_futszyanskie_uluny_" :tags ["Улун"]}
+ {:path "chay/yunnanskie_uluny" :tags ["Улун"]}
+ {:path "chay/gaba_chay" :tags ["Улун"]}
{:path "chay/na_kazhdyy_den"}
{:path "chay/eksklyuziv"}
- {:path "posuda_i_aksessuary/pialy" :category "Посуда"}
- {:path "posuda_i_aksessuary/isinskaya_glina" :category "Посуда"}
- {:path "posuda_i_aksessuary/keramika_iz_tszindechzhen" :category "Посуда"}
- {:path "posuda_i_aksessuary/keramika_iz_tszyanshuy" :category "Посуда"}
- {:path "posuda_i_aksessuary/kolby_termosy_nabory" :category "Посуда"}
- {:path "posuda_i_aksessuary/chabani" :category "Посуда"}
- {:path "posuda_i_aksessuary/chaynye_figurki" :category "Посуда"}
- {:path "posuda_i_aksessuary/slivniki" :category "Посуда"}
- {:path "posuda_i_aksessuary/gayvani" :category "Посуда"}
- {:path "posuda_i_aksessuary/keramika_iz_tsinchzhou" :category "Посуда"}
+ {:path "posuda_i_aksessuary/pialy" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/isinskaya_glina" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/keramika_iz_tszindechzhen" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/keramika_iz_tszyanshuy" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/kolby_termosy_nabory" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/chabani" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/chaynye_figurki" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/slivniki" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/gayvani" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/keramika_iz_tsinchzhou" :tags ["Посуда"]}
{:path "posuda_i_aksessuary/aksessuary"}
- {:path "posuda_i_aksessuary/pialy_tszindechzhen" :category "Посуда"}
- {:path "posuda_i_aksessuary/keramika_dekhua" :category "Посуда"}
- {:path "posuda_i_aksessuary/chayniki_iz_farfora" :category "Посуда"}]
+ {:path "posuda_i_aksessuary/pialy_tszindechzhen" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/keramika_dekhua" :tags ["Посуда"]}
+ {:path "posuda_i_aksessuary/chayniki_iz_farfora" :tags ["Посуда"]}]
format-url
product-peg
normalize))
diff --git a/parser/clubcha.fnl b/parser/clubcha.fnl
index e260fd1..200be94 100644
--- a/parser/clubcha.fnl
+++ b/parser/clubcha.fnl
@@ -68,53 +68,53 @@
(fn products []
(fetcher.from-html
- [{:path "shu-puer" :category "Шу пуэр"}
- {:path "shen-puer" :category "Шен пуэр"}
+ [{:path "shu-puer" :tags ["Шу пуэр"]}
+ {:path "shen-puer" :tags ["Шен пуэр"]}
{:path "bai-hao-puer"}
- {:path "hei-cha" :category "Хэй ча"}
- {:path "yuzhnofutszyanskij-ulun" :category "Улун"}
- {:path "guandunskij-ulun" :category "Улун"}
- {:path "severofujianskiy-ulun" :category "Улун"}
- {:path "taiwanskiy-ulun" :category "Улун"}
- {:path "gaba-ulun-gaba-alishan-i-dr" :category "Улун"}
- {:path "zelenyi-chay" :category "Зеленый чай"}
- {:path "belyi-chay" :category "Белый чай"}
- {:path "zheltyi-chay" :category "Желтый чай"}
- {:path "kitajskij-krasnyj-chaj" :category "Красный чай"}
- {:path "tajvanskij-krasnyj-chaj" :category "Красный чай"}
+ {:path "hei-cha" :tags ["Хэй ча"]}
+ {:path "yuzhnofutszyanskij-ulun" :tags ["Улун"]}
+ {:path "guandunskij-ulun" :tags ["Улун"]}
+ {:path "severofujianskiy-ulun" :tags ["Улун"]}
+ {:path "taiwanskiy-ulun" :tags ["Улун"]}
+ {:path "gaba-ulun-gaba-alishan-i-dr" :tags ["Улун"]}
+ {:path "zelenyi-chay" :tags ["Зеленый чай"]}
+ {:path "belyi-chay" :tags ["Белый чай"]}
+ {:path "zheltyi-chay" :tags ["Желтый чай"]}
+ {:path "kitajskij-krasnyj-chaj" :tags ["Красный чай"]}
+ {:path "tajvanskij-krasnyj-chaj" :tags ["Красный чай"]}
{:path "Nechaynyye_chai_i_tsvetochnyye_dobavki"}
{:path "upakovka-dlya-puera"}
{:path "upakovka-dlya-posudy"}
{:path "iz-lichnoj-kollektsii"}
- ;; FIXME: expand this category to subcategories because the main one
+ ;; FIXME: expand this tags [to subcategories because the main one]
;; does not contain all the products
- {:path "gaivan" :category "Посуда"}
- {:path "chashka" :category "Посуда"}
- {:path "chaynyi-nabor" :category "Посуда"}
- {:path "chaynik" :category "Посуда"}
- {:path "glina-farfor" :category "Посуда"}
- {:path "chaban-chajnaya-doska" :category "Посуда"}
- {:path "chajnyj-podnos" :category "Посуда"}
- {:path "chajnyj-prud" :category "Посуда"}
- {:path "professionalnye" :category "Посуда"}
- {:path "glinyanye" :category "Посуда"}
- {:path "farforovye" :category "Посуда"}
- {:path "zhestyanye-i-kartonnye" :category "Посуда"}
- {:path "puernitsy" :category "Посуда"}
- {:path "glina" :category "Посуда"}
- {:path "farfor" :category "Посуда"}
- {:path "steklo" :category "Посуда"}
- {:path "kruzhka" :category "Посуда"}
- {:path "Chaynyye_instrumenty" :category "Посуда"}
- {:path "chahe" :category "Посуда"}
- {:path "sito" :category "Посуда"}
- {:path "Chaynyy_suvenir" :category "Посуда"}
- {:path "Prochaya_utvar" :category "Посуда"}
- {:path "farfor-eilong" :category "Посуда"}
- {:path "butylka-dlya-zavarivaniya" :category "Посуда"}
+ {:path "gaivan" :tags ["Посуда"]}
+ {:path "chashka" :tags ["Посуда"]}
+ {:path "chaynyi-nabor" :tags ["Посуда"]}
+ {:path "chaynik" :tags ["Посуда"]}
+ {:path "glina-farfor" :tags ["Посуда"]}
+ {:path "chaban-chajnaya-doska" :tags ["Посуда"]}
+ {:path "chajnyj-podnos" :tags ["Посуда"]}
+ {:path "chajnyj-prud" :tags ["Посуда"]}
+ {:path "professionalnye" :tags ["Посуда"]}
+ {:path "glinyanye" :tags ["Посуда"]}
+ {:path "farforovye" :tags ["Посуда"]}
+ {:path "zhestyanye-i-kartonnye" :tags ["Посуда"]}
+ {:path "puernitsy" :tags ["Посуда"]}
+ {:path "glina" :tags ["Посуда"]}
+ {:path "farfor" :tags ["Посуда"]}
+ {:path "steklo" :tags ["Посуда"]}
+ {:path "kruzhka" :tags ["Посуда"]}
+ {:path "Chaynyye_instrumenty" :tags ["Посуда"]}
+ {:path "chahe" :tags ["Посуда"]}
+ {:path "sito" :tags ["Посуда"]}
+ {:path "Chaynyy_suvenir" :tags ["Посуда"]}
+ {:path "Prochaya_utvar" :tags ["Посуда"]}
+ {:path "farfor-eilong" :tags ["Посуда"]}
+ {:path "butylka-dlya-zavarivaniya" :tags ["Посуда"]}
{:path "Predmety_interyera_i_fenshuy"}
- {:path "fo-chzhu-chetki" :category "Четки"}
- {:path "nefrit-1" :category "Нефрит"}]
+ {:path "fo-chzhu-chetki" :tags ["Четки"]}
+ {:path "nefrit-1" :tags ["Нефрит"]}]
format-url
product-peg
normalize))
diff --git a/parser/ipuer.fnl b/parser/ipuer.fnl
index 8b0f6be..b16ead5 100644
--- a/parser/ipuer.fnl
+++ b/parser/ipuer.fnl
@@ -59,12 +59,12 @@
(fn products []
(fetcher.from-html
- [{:path "shen-puer" :category "Шен пуэр"}
- {:path "shu-puer" :category "Шу пуэр"}
+ [{:path "shen-puer" :tags ["Шен пуэр"]}
+ {:path "shu-puer" :tags ["Шу пуэр"]}
{:path "drugoy-chay"}
- {:path "blagovoniya" :category "Благовония"}
- {:path "posuda" :category "Посуда"}
- {:path "282" :category "Посуда"}]
+ {:path "blagovoniya" :tags ["Благовония"]}
+ {:path "posuda" :tags ["Посуда"]}
+ {:path "282" :tags ["Посуда"]}]
format-url
product-peg
normalize))
diff --git a/parser/parser.fnl b/parser/parser.fnl
index d5baafa..ca74847 100644
--- a/parser/parser.fnl
+++ b/parser/parser.fnl
@@ -105,39 +105,43 @@
:match html))
;; FIXME: make guessing case insensitive
-(fn guess-category [title]
- (local title (if title (luna.utf.lower title) ""))
+(fn guess-tags [text]
+ (local text (if text (luna.utf.lower text) ""))
- (if (: (anywhere (peg.P "зеленый")) :match title)
- "Зеленый чай"
- (: (anywhere (peg.P "улун")) :match title)
- "Улун"
- (: (anywhere (peg.P "белый")) :match title)
- "Белый чай"
- (: (anywhere (peg.P "желтый")) :match title)
- "Желтый чай"
- (: (anywhere (peg.P "красный")) :match title)
- "Красный чай"
- (: (anywhere (peg.P "хэй ча")) :match title)
- "Хэй ча"
- (: (anywhere (+ (peg.P "матча") "маття")) :match title)
- "Матча"
- "Неизвестная категория"))
+ (if (: (anywhere (peg.P "зеленый")) :match text)
+ ["Зеленый чай"]
+ (: (anywhere (peg.P "улун")) :match text)
+ ["Улун"]
+ (: (anywhere (peg.P "белый")) :match text)
+ ["Белый чай"]
+ (: (anywhere (peg.P "желтый")) :match text)
+ ["Желтый чай"]
+ (: (anywhere (peg.P "красный")) :match text)
+ ["Красный чай"]
+ (: (anywhere (peg.P "хэй ча")) :match text)
+ ["Хэй ча"]
+ (: (anywhere (peg.P "шу пуэр")) :match text)
+ ["Шу пуэр"]
+ (: (anywhere (+ (peg.P "шен пуэр") "шэн пуэр")) :match text)
+ ["Шен пуэр"]
+ (: (anywhere (+ (peg.P "матча") "маття")) :match text)
+ ["Матча"]
+ []))
-(fn guess-year [title]
+(fn guess-year [text]
(number.string->number
(: (anywhere
(* (peg.C (^ (peg.R "09") 4))
(maybe " ")
(- (+ (peg.P "г") "год") (peg.P "гр"))))
- :match title)))
+ :match text)))
-(fn guess-weight [title]
- (if title
+(fn guess-weight [text]
+ (if text
(number.string->number
(: (anywhere
(* (peg.C pegs.number) (maybe " ") "гр"))
- :match title))
+ :match text))
nil))
{: match-many
@@ -147,6 +151,6 @@
: maybe
: pegs
:not pnot
- : guess-category
+ : guess-tags
: guess-year
: guess-weight}