Class: LibXML::XML::HTMLParser

Inherits:
Object
  • Object
show all
Defined in:
ext/libxml/ruby_xml_html_parser.c,
lib/libxml/html_parser.rb,
ext/libxml/ruby_xml_html_parser.c

Overview

The HTML parser implements an HTML 4.0 non-verifying parser with an API compatible with the XML::Parser. In contrast with the XML::Parser, it can parse “real world” HTML, even if it severely broken from a specification point of view.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#XML::HTMLParser.initializeObject

Initializes a new parser instance with no pre-determined source.



24
25
26
27
28
29
# File 'ext/libxml/ruby_xml_html_parser.c', line 24

static VALUE rxml_html_parser_initialize(VALUE self)
{
  VALUE input = rb_class_new_instance(0, NULL, cXMLInput);
  rb_iv_set(self, "@input", input);
  return self;
}

Instance Attribute Details

#inputObject (readonly)

Atributes

Class Method Details

.document(value) ⇒ Object



16
17
18
19
20
# File 'lib/libxml/html_parser.rb', line 16

def self.document(value)
  parser = self.new
  parser.input.document = value
  parser
end

.file(value) ⇒ Object



4
5
6
7
8
# File 'lib/libxml/html_parser.rb', line 4

def self.file(value)
  parser = self.new
  parser.input.file = value
  parser
end

.io(value) ⇒ Object



22
23
24
25
26
# File 'lib/libxml/html_parser.rb', line 22

def self.io(value)
  parser = self.new
  parser.input.io = value
  parser
end

.string(value) ⇒ Object



10
11
12
13
14
# File 'lib/libxml/html_parser.rb', line 10

def self.string(value)
  parser = self.new
  parser.input.string = value
  parser
end

Instance Method Details

#documentObject



44
45
46
# File 'lib/libxml/html_parser.rb', line 44

def document
  input.document
end

#document=(value) ⇒ Object



48
49
50
# File 'lib/libxml/html_parser.rb', line 48

def document=(value)
  input.document = value
end

#fileObject



28
29
30
# File 'lib/libxml/html_parser.rb', line 28

def file
  input.file
end

#file=(value) ⇒ Object



32
33
34
# File 'lib/libxml/html_parser.rb', line 32

def file=(value)
  input.file = value
end

#ioObject



52
53
54
# File 'lib/libxml/html_parser.rb', line 52

def io
  input.io
end

#io=(value) ⇒ Object



56
57
58
# File 'lib/libxml/html_parser.rb', line 56

def io=(value)
  input.io = value
end

#parseObject

Parse the input XML and create an XML::Document with it’s content. If an error occurs, XML::Parser::ParseError is thrown.



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'ext/libxml/ruby_xml_html_parser.c', line 81

static VALUE rxml_html_parser_parse(VALUE self)
{
  VALUE input = rb_ivar_get(self, INPUT_ATTR);
  htmlDocPtr xdoc;

  if (rb_ivar_get(input, FILE_ATTR) != Qnil)
    xdoc = rxml_html_parser_read_file(input);
  else if (rb_ivar_get(input, STRING_ATTR) != Qnil)
    xdoc = rxml_html_parser_read_string(input);
  else if (rb_ivar_get(input, IO_ATTR) != Qnil)
    xdoc = rxml_html_parser_read_io(input);
  else
    rb_raise(rb_eArgError, "You must specify a parser data source");

  if (!xdoc)
    rxml_raise(&xmlLastError);

  return rxml_document_wrap(xdoc);
}

#stringObject



36
37
38
# File 'lib/libxml/html_parser.rb', line 36

def string
  input.string
end

#string=(value) ⇒ Object



40
41
42
# File 'lib/libxml/html_parser.rb', line 40

def string=(value)
  input.string = value
end