Class: HTML5::Filters::OptionalTagFilter

Inherits:
Base
  • Object
show all
Defined in:
lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb

Instance Method Summary collapse

Instance Method Details

#eachObject



18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb', line 18

def each
  slider do |previous, token, nexttok|
    type = token[:type]
    if type == :StartTag
      yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
    elsif type == :EndTag
      yield token unless is_optional_end(token[:name], nexttok)
    else
      yield token
    end
  end
end

#is_optional_end(tagname, nexttok) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb', line 90

def is_optional_end(tagname, nexttok)
  type = nexttok ? nexttok[:type] : nil
  if %w[html head body].include?(tagname)
    # An html element's end tag may be omitted if the html element
    # is not immediately followed by a space character or a comment.
    return ![:Comment, :SpaceCharacters].include?(type)
  elsif %w[li optgroup option tr].include?(tagname)
    # A li element's end tag may be omitted if the li element is
    # immediately followed by another li element or if there is
    # no more content in the parent element.
    # An optgroup element's end tag may be omitted if the optgroup
    # element is immediately followed by another optgroup element,
    # or if there is no more content in the parent element.
    # An option element's end tag may be omitted if the option
    # element is immediately followed by another option element,
    # or if there is no more content in the parent element.
    # A tr element's end tag may be omitted if the tr element is
    # immediately followed by another tr element, or if there is
    # no more content in the parent element.
    if type == :StartTag
      return nexttok[:name] == tagname
    else
      return type == :EndTag || type == nil
    end
  elsif %w(dt dd).include?(tagname)
    # A dt element's end tag may be omitted if the dt element is
    # immediately followed by another dt element or a dd element.
    # A dd element's end tag may be omitted if the dd element is
    # immediately followed by another dd element or a dt element,
    # or if there is no more content in the parent element.
    if type == :StartTag
      return %w(dt dd).include?(nexttok[:name])
    elsif tagname == 'dd'
      return type == :EndTag || type == nil
    else
      return false
    end
  elsif tagname == 'p'
    # A p element's end tag may be omitted if the p element is
    # immediately followed by an address, blockquote, dl, fieldset,
    # form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
    # or ul  element, or if there is no more content in the parent
    # element.
    if type == :StartTag
      return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
                h6 hr menu ol p pre table ul).include?(nexttok[:name])
    else
      return type == :EndTag || type == nil
    end
  elsif tagname == 'colgroup'
    # A colgroup element's end tag may be omitted if the colgroup
    # element is not immediately followed by a space character or
    # a comment.
    if [:Comment, :SpaceCharacters].include?(type)
      return false
    elsif type == :StartTag
      # XXX: we also look for an immediately following colgroup
      # element. See is_optional_start.
      return nexttok[:name] != 'colgroup'
    else
      return true
    end
  elsif %w(thead tbody).include? tagname
    # A thead element's end tag may be omitted if the thead element
    # is immediately followed by a tbody or tfoot element.
    # A tbody element's end tag may be omitted if the tbody element
    # is immediately followed by a tbody or tfoot element, or if
    # there is no more content in the parent element.
    # A tfoot element's end tag may be omitted if the tfoot element
    # is immediately followed by a tbody element, or if there is no
    # more content in the parent element.
    # XXX: we never omit the end tag when the following element is
    # a tbody. See is_optional_start.
    if type == :StartTag
      return %w(tbody tfoot).include?(nexttok[:name])
    elsif tagname == 'tbody'
      return (type == :EndTag or type == nil)
    else
      return false
    end
  elsif tagname == 'tfoot'
    # A tfoot element's end tag may be omitted if the tfoot element
    # is immediately followed by a tbody element, or if there is no
    # more content in the parent element.
    # XXX: we never omit the end tag when the following element is
    # a tbody. See is_optional_start.
    if type == :StartTag
      return nexttok[:name] == 'tbody'
    else
      return type == :EndTag || type == nil
    end
  elsif %w(td th).include? tagname
    # A td element's end tag may be omitted if the td element is
    # immediately followed by a td or th element, or if there is
    # no more content in the parent element.
    # A th element's end tag may be omitted if the th element is
    # immediately followed by a td or th element, or if there is
    # no more content in the parent element.
    if type == :StartTag
      return %w(td th).include?(nexttok[:name])
    else
      return type == :EndTag || type == nil
    end
  end
  return false
end

#is_optional_start(tagname, previous, nexttok) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb', line 31

def is_optional_start(tagname, previous, nexttok)
  type = nexttok ? nexttok[:type] : nil
  if tagname == 'html'
    # An html element's start tag may be omitted if the first thing
    # inside the html element is not a space character or a comment.
    return ![:Comment, :SpaceCharacters].include?(type)
  elsif tagname == 'head'
    # A head element's start tag may be omitted if the first thing
    # inside the head element is an element.
    return type == :StartTag
  elsif tagname == 'body'
    # A body element's start tag may be omitted if the first thing
    # inside the body element is not a space character or a comment,
    # except if the first thing inside the body element is a script
    # or style element and the node immediately preceding the body
    # element is a head element whose end tag has been omitted.
    if [:Comment, :SpaceCharacters].include?(type)
      return false
    elsif type == :StartTag
      # XXX: we do not look at the preceding event, so we never omit
      # the body element's start tag if it's followed by a script or
      # a style element.
      return !%w[script style].include?(nexttok[:name])
    else
      return true
    end
  elsif tagname == 'colgroup'
    # A colgroup element's start tag may be omitted if the first thing
    # inside the colgroup element is a col element, and if the element
    # is not immediately preceeded by another colgroup element whose
    # end tag has been omitted.
    if type == :StartTag
      # XXX: we do not look at the preceding event, so instead we never
      # omit the colgroup element's end tag when it is immediately
      # followed by another colgroup element. See is_optional_end.
      return nexttok[:name] == "col"
    else
      return false
    end
  elsif tagname == 'tbody'
    # A tbody element's start tag may be omitted if the first thing
    # inside the tbody element is a tr element, and if the element is
    # not immediately preceeded by a tbody, thead, or tfoot element
    # whose end tag has been omitted.
    if type == :StartTag
      # omit the thead and tfoot elements' end tag when they are
      # immediately followed by a tbody element. See is_optional_end.
      if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
        return false
      end

      return nexttok[:name] == 'tr'
    else
      return false
    end
  end
  return false
end

#slider {|previous2, previous1, nil| ... } ⇒ Object

Yields:

  • (previous2, previous1, nil)


8
9
10
11
12
13
14
15
16
# File 'lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb', line 8

def slider
  previous1 = previous2 = nil
  __getobj__.each do |token|
    yield previous2, previous1, token if previous1 != nil
    previous2 = previous1
    previous1 = token
  end
  yield previous2, previous1, nil
end