Class: LibXML::XML::HTMLParser::Context

Inherits:
Parser::Context show all
Defined in:
ext/libxml/ruby_xml_html_parser_context.c,
ext/libxml/ruby_xml_html_parser_context.c

Overview

The XML::HTMLParser::Context class provides in-depth control over how a document is parsed.

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Parser::Context

#base_uri, #base_uri=, #data_directory, #depth, #disable_sax?, #docbook?, document, #encoding, #encoding=, #errno, #html?, #io_max_num_streams, #io_num_streams, #keep_blanks?, #name_depth, #name_depth_max, #name_node, #name_tab, #node, #node_depth, #node_depth_max, #num_chars, #options, #recovery=, #recovery?, #replace_entities=, #replace_entities?, #space_depth, #space_depth_max, #standalone?, #stats?, #subset_external?, #subset_external_system_id, #subset_external_uri, #subset_internal?, #subset_internal_name, #valid, #validate?, #version, #well_formed?

Class Method Details

.XML::HTMLParser::Context.file(file) ⇒ XML::HTMLParser::Context

Creates a new parser context based on the specified file or uri.

Parameters:

file - A filename or uri.


36
37
38
39
40
# File 'ext/libxml/ruby_xml_html_parser_context.c', line 36

static VALUE rxml_html_parser_context_file(VALUE klass, VALUE file)
{
  xmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(StringValuePtr(file), NULL);
  return rxml_html_parser_context_wrap(ctxt);
}

.XML::HTMLParser::Context.io(io) ⇒ XML::HTMLParser::Context

Creates a new parser context based on the specified io object.

Parameters:

io - A ruby IO object.


51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'ext/libxml/ruby_xml_html_parser_context.c', line 51

static VALUE rxml_html_parser_context_io(VALUE klass, VALUE io)
{
  htmlParserCtxtPtr ctxt;
  xmlParserInputBufferPtr input;
  xmlParserInputPtr stream;

  input = xmlParserInputBufferCreateIO((xmlInputReadCallback) rxml_read_callback, NULL,
                                     (void*)io, XML_CHAR_ENCODING_NONE);

  ctxt = htmlNewParserCtxt();
  if (!ctxt)
  {
    xmlFreeParserInputBuffer(input);
    rxml_raise(&xmlLastError);
  }

  stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);

  if (!stream)
  {
    xmlFreeParserInputBuffer(input);
    xmlFreeParserCtxt(ctxt);
    rxml_raise(&xmlLastError);
  }
  inputPush(ctxt, stream);

  return rxml_html_parser_context_wrap(ctxt);
}

.XML::HTMLParser::Context.string(string) ⇒ XML::HTMLParser::Context

Creates a new parser context based on the specified string.

Parameters:

string - A string that contains the data to parse.


89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'ext/libxml/ruby_xml_html_parser_context.c', line 89

static VALUE rxml_html_parser_context_string(VALUE klass, VALUE string)
{
  xmlParserCtxtPtr ctxt;
  Check_Type(string, T_STRING);

  if (RSTRING_LEN(string) == 0)
    rb_raise(rb_eArgError, "Must specify a string with one or more characters");

  ctxt = xmlCreateMemoryParserCtxt(StringValuePtr(string),
                                   RSTRING_LEN(string));
  if (!ctxt)
    rxml_raise(&xmlLastError);

  htmlDefaultSAXHandlerInit();
  if (ctxt->sax != NULL)
    memcpy(ctxt->sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
  
  return rxml_html_parser_context_wrap(ctxt);
}

Instance Method Details

#options=(XML) ⇒ Object #XML::Parser::Options::NOCDATAObject

Provides control over the execution of a parser. Valid values are the constants defined on XML::Parser::Options. Multiple options can be combined by using Bitwise OR (|).



118
119
120
121
122
123
124
125
126
127
128
# File 'ext/libxml/ruby_xml_html_parser_context.c', line 118

static VALUE rxml_html_parser_context_options_set(VALUE self, VALUE options)
{
  int result;
  xmlParserCtxtPtr ctxt;
  Check_Type(options, T_FIXNUM);

  Data_Get_Struct(self, xmlParserCtxt, ctxt);
  result = htmlCtxtUseOptions(ctxt, NUM2INT(options));

  return self;
}