Class: HTMLTree::XMLParser

Inherits:
HTML::StackingParser show all
Defined in:
lib/web/htmltools/xmltree.rb

Overview

:nodoc: all

Constant Summary

Constants inherited from SGMLParser

SGMLParser::Attrfind, SGMLParser::Charref, SGMLParser::Commentclose, SGMLParser::Commentopen, SGMLParser::Endbracket, SGMLParser::Endtagopen, SGMLParser::Entitydefs, SGMLParser::Entityref, SGMLParser::Incomplete, SGMLParser::Interesting, SGMLParser::Special, SGMLParser::Starttagopen, SGMLParser::Tagfind

Instance Method Summary collapse

Methods inherited from HTML::StackingParser

#feed, #last_tag, #parent_tag, #parse_file_named, #stack, #strip_whitespace=

Methods inherited from SGMLParser

#close, #feed, #finish_endtag, #finish_starttag, #goahead, #handle_charref, #handle_data, #handle_endtag, #handle_entityref, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #setliteral, #setnomoretags, #unknown_charref, #unknown_endtag, #unknown_entityref, #unknown_starttag

Constructor Details

#initialize(verbose = false, strip_white = true) ⇒ XMLParser

verbose

if true, will warn to $stderr on unknown

tags/entities/characters, as well as missing end tags and extra end tags.

strip_white

if true, remove all non-essential whitespace. Note

that there are browser bugs that may cause this to change the appearance of HTML (even though it shouldn’t by the standard).



42
43
44
45
# File 'lib/web/htmltools/xmltree.rb', line 42

def initialize(verbose=false, strip_white=true)
  super
  reset
end

Instance Method Details

#documentObject

Return the document that was built. This will be an REXML::Document that represents the whole document. The <html> node is a child of this.



56
57
58
# File 'lib/web/htmltools/xmltree.rb', line 56

def document
  @rootNode
end

#htmlObject

Return the <html> node, if any.



70
71
72
# File 'lib/web/htmltools/xmltree.rb', line 70

def html
  @rootNode.root.elements['html']
end

#resetObject

Reset this parser so that it can parse a new document.



48
49
50
51
# File 'lib/web/htmltools/xmltree.rb', line 48

def reset
  super
  @rootNode = @currentNode = REXML::Document.new()
end

#rootObject

Return the root of the document, if any.



65
66
67
# File 'lib/web/htmltools/xmltree.rb', line 65

def root
  @rootNode.root()
end

#treeObject



60
61
62
# File 'lib/web/htmltools/xmltree.rb', line 60

def tree
  document()
end