Class: XMLScan::XMLParser

Inherits:
XMLScanner show all
Defined in:
lib/xmlscan/parser.rb

Direct Known Subclasses

XMLParserNS

Defined Under Namespace

Classes: AttributeChecker

Constant Summary

Constants included from XMLScan::XMLScanner::OptRegexp

XMLScan::XMLScanner::OptRegexp::RE_ENCODINGS, XMLScan::XMLScanner::OptRegexp::RE_ENCODING_OPTIONS, XMLScan::XMLScanner::OptRegexp::UTFSTR

Instance Attribute Summary

Attributes inherited from XMLScanner

#optkey

Instance Method Summary collapse

Methods inherited from XMLScanner

apply_option, apply_options, #initialize, #lineno, new, #opt_encoding, #parse_document, #path, provided_options, #source

Constructor Details

This class inherits a constructor from XMLScan::XMLScanner

Instance Method Details

#parseObject



36
37
38
39
40
41
# File 'lib/xmlscan/parser.rb', line 36

def parse(*)
  @elem = []
  @attr = AttributeChecker.new
  @standalone = nil
  super
end

#scan_content(s) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
# File 'lib/xmlscan/parser.rb', line 163

def scan_content(s)
  elem = @elem  # for speed
  src = @src  # for speed
  found_root_element = false

  begin

    # -- first start tag --
    elem.clear
    found_stag = false

    while s and not found_stag
      if (c = s[0]) == ?< then
        if (c = s[1]) == ?/ then
          # should be a parse error
          scan_etag s
        elsif c == ?! then
          if s[2] == ?- and s[3] == ?- then
            scan_comment s
          elsif /\A<!\[CDATA\[/n =~ s then
            parse_error "CDATA section is found outside of root element"
            scan_cdata $'
          else
            scan_bang_tag s
          end
        elsif c == ?? then
          scan_pi s
        else
          found_root_element = true
          found_stag = true
          scan_stag s
        end
      else
        parse_error "content of element is found outside of root element"
        scan_chardata s
      end
      s = src.get
    end

    if not found_root_element and not found_stag then
      parse_error "no root element was found"
    end

    # -- contents --
    while s and not elem.empty?
      if (c = s[0]) == ?< then
        if (c = s[1]) == ?/ then
          scan_etag s
        elsif c == ?! then
          if s[2] == ?- and s[3] == ?- then
            scan_comment s
          elsif /\A<!\[CDATA\[/n =~ s then
            scan_cdata $'
          else
            scan_bang_tag s
          end
        elsif c == ?? then
          scan_pi s
        else
          scan_stag s
        end
      else
        scan_chardata s
      end
      s = src.get
    end

    unless elem.empty? then
      while name = elem.pop
        parse_error "unclosed element `#{name}' meets EOF"
        @visitor.on_etag name
      end
    end

    # -- epilogue --
    finish = true

    while s
      if (c = s[0]) == ?< then
        if (c = s[1]) == ?/ then
          finish = false    # content out of root element
          break
        elsif c == ?! then
          if s[2] == ?- and s[3] == ?- then
            scan_comment s
          else
            finish = false  # content out of root element
            break
          end
        elsif c == ?? then
          scan_pi s
        else
          parse_error "another root element is found"  # stag
          finish = false
          break
        end
      else
        if s.strip.empty? then
          on_prolog_space s
        else
          finish = false    # content out of root element
          break
        end
      end
      s = src.get
    end

  end until finish

end