summaryrefslogtreecommitdiff
path: root/parser.fnl
blob: 314476ce45877b5566082f3c840900399998e2aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
(import-macros {: map} :lib.macro)
(local peg
 (if (pick-values 1 (pcall require :lpeg))
   (require :lpeg)
   (require :vendor.lpeglj)))

(fn pnot [p]
  (- (peg.P 1) (peg.P p)))

(fn till [p]
  (^ (pnot p) 1))

(fn maybe [p]
  (^ (peg.P p) 0))

(fn anywhere [p]
  (peg.P [(+ p (* 1 (peg.V 1)))]))

(local pegs {})
(tset pegs :number (^ (peg.R "09") 1))
(tset pegs :letters (^ (+ (peg.R "az") (peg.R "AZ")) 1))
(tset pegs :space (peg.S "\n\t "))
(tset pegs :spaces (^ (peg.S "\n\t ") 1))
(tset pegs :tag-name (+ pegs.letters pegs.number))
(tset pegs :attr
      (peg.Ct (* (peg.Cg (^ (+ pegs.letters "-") 1) :name)
                 (maybe (* "=\"" (peg.Cg (till "\"") :value) "\"")))))
(tset pegs :self-closing-tag
      (* "<"
         (peg.Cg
           (+ (peg.P "area") "base" "br" "col" "embed" "hr"
              "img" "input" "link" "meta" "param" "source"
              "track" "wbr") ;; should be case insensitive
           :tag)
         (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs)
         (maybe "/") ">"))
(tset pegs :opening-tag
      (* "<" (peg.Cg pegs.tag-name :tag)
         (peg.Cg (peg.Ct (^ (+ pegs.space (peg.Cg pegs.attr)) 0)) :attrs)
         ">"))
(tset pegs :closing-tag (* "</" pegs.tag-name ">"))
(tset pegs :doctype
      (* "<!DOCTYPE HTML" (^ pegs.attr 0) ">")) ;; should be case insensitive
(tset pegs :tag
      (peg.P [(peg.Ct (+ pegs.self-closing-tag
                         (* pegs.opening-tag
                            (peg.Cg
                              (peg.Ct
                                (^ (+ (+ pegs.space (peg.V 1))
                                      (peg.Cg (till pegs.closing-tag)))
                                   0))
                              :nodes)
                            pegs.closing-tag)))]))
(tset pegs :html
      (* pegs.doctype (peg.Ct (^ (+ pegs.space (peg.Cg pegs.tag)) 0))))

(fn tag [tag attrs contents]
  (local tag (peg.P tag))
  (local attrs-count (accumulate [sum 0 _ _ (pairs attrs)] (+ 1 sum)))
  (local attr-peg
    (fn [name value] (* (^ (peg.P name) 1)
                        (if (~= value "")
                          (* "=\""
                             ;; wildcard for any value
                             (if (= value "*")
                                 (till "\"")
                                 (peg.P value))
                             "\"")
                          (maybe (.. "=\" name \""))))))
  (local attrs-peg
    (accumulate [sum pegs.spaces 
                 _ rule
                 (pairs (icollect [k v (pairs attrs)]
                          (attr-peg k v)))]
      (+ rule sum)))
  (if contents
    (peg.P (*
            (^ pegs.space 0)
            ;; opening tag
            (* "<" tag (^ pegs.space 0)
                (^ attrs-peg (- (* attrs-count 2) 1))
                (^ pegs.space 0) ">")
            ;; tag contents
            (^ pegs.space 0)
            (if (= contents "*")
              (till (* "</" tag ">"))
              contents)
            (^ pegs.space 0)
            ;; closing tag
            (* "</" tag ">")))
    (peg.P (*
             (^ pegs.space 0)
             ;; opening tag
             (* "<" tag (^ pegs.space 0)
                (^ attrs-peg (- (* attrs-count 2) 1))
                (^ pegs.space 0) (maybe "/") ">")))))

(fn match-many [html tag]
  (: (peg.Ct (^ (peg.Ct tag) 1))
     :match html))

{: match-many
 : tag
 : anywhere
 : till
 : maybe
 : pegs
 :not pnot}