Class: LibXML::XML::HTMLParser
- Inherits:
-
Object
- Object
- LibXML::XML::HTMLParser
- Defined in:
- ext/libxml/ruby_xml_html_parser.c
Class Method Summary collapse
-
.XML::HTMLParser.new ⇒ Object
Create a new parser instance with no pre-determined source.
-
.XML::HTMLParser.string ⇒ Object
Create a new parser instance that will parse the given string.
Instance Method Summary collapse
-
#context ⇒ Object
Obtain the XML::Parser::Context associated with this parser.
-
#parse ⇒ Object
Parse the input XML and create an XML::Document with it’s content.
-
#string ⇒ Object
Obtain the string this parser works with.
-
#string=(str) ⇒ Object
Set the string this parser works with.
Class Method Details
.XML::HTMLParser.new ⇒ Object
Create a new parser instance with no pre-determined source.
216 217 218 219 220 221 222 223 224 225 226 227 228 |
# File 'ext/libxml/ruby_xml_html_parser.c', line 216
VALUE
ruby_xml_html_parser_new(VALUE class) {
ruby_xml_html_parser *rxp;
rxp = ALLOC(ruby_xml_html_parser);
rxp->ctxt = Qnil;
rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
rxp->data = NULL;
rxp->parsed = 0;
return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
ruby_xml_html_parser_free, rxp));
}
|
.XML::HTMLParser.string ⇒ Object
Create a new parser instance that will parse the given string.
291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 |
# File 'ext/libxml/ruby_xml_html_parser.c', line 291
VALUE
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
VALUE obj;
ruby_xml_html_parser *rxp;
rx_string_data *data;
obj = ruby_xml_html_parser_new(class);
Data_Get_Struct(obj, ruby_xml_html_parser, rxp);
data = ALLOC(rx_string_data);
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
rxp->data = data;
ruby_xml_html_parser_str_set(obj, str);
return(obj);
}
|
Instance Method Details
#context ⇒ Object
Obtain the XML::Parser::Context associated with this parser.
359 360 361 362 363 364 365 366 367 368 |
# File 'ext/libxml/ruby_xml_html_parser.c', line 359
VALUE
ruby_xml_html_parser_context_get(VALUE self) {
ruby_xml_html_parser *rxp;
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
if (rxp->ctxt == Qnil)
return(Qnil);
else
return(rxp->ctxt);
}
|
#parse ⇒ Object
Parse the input XML and create an XML::Document with it’s content. If an error occurs, XML::Parser::ParseError is thrown.
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 |
# File 'ext/libxml/ruby_xml_html_parser.c', line 318
VALUE
ruby_xml_html_parser_parse(VALUE self) {
ruby_xml_html_parser *rxp;
ruby_xml_parser_context *rxpc;
htmlDocPtr xdp;
VALUE doc;
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
switch (rxp->data_type) {
case RUBY_LIBXML_SRC_TYPE_NULL:
return(Qnil);
case RUBY_LIBXML_SRC_TYPE_STRING:
//case RUBY_LIBXML_SRC_TYPE_FILE:
//case RUBY_LIBXML_SRC_TYPE_IO:
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
/* don't check return values here, the HTML parser returns errors
* but still allows the resulting tree to be used.
*/
htmlParseDocument(rxpc->ctxt);
xdp = rxpc->ctxt->myDoc;
rxp->parsed = 1;
doc = ruby_xml_document_wrap(xdp);
break;
default:
rb_fatal("Unknown data type, %d", rxp->data_type);
}
return(doc);
}
|
#string ⇒ Object
Obtain the string this parser works with.
377 378 379 380 381 382 383 384 385 386 387 388 |
# File 'ext/libxml/ruby_xml_html_parser.c', line 377
VALUE
ruby_xml_html_parser_str_get(VALUE self) {
ruby_xml_html_parser *rxp;
rx_string_data *data;
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
return(Qnil);
data = (rx_string_data *)rxp->data;
return(data->str);
}
|
#string=(str) ⇒ Object
Set the string this parser works with.
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 |
# File 'ext/libxml/ruby_xml_html_parser.c', line 397
VALUE
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
ruby_xml_html_parser *rxp;
ruby_xml_parser_context *rxpc;
rx_string_data *data;
Check_Type(str, T_STRING);
Data_Get_Struct(self, ruby_xml_html_parser, rxp);
if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
data = ALLOC(rx_string_data);
rxp->data = data;
} else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
return(Qnil);
}
rxp->ctxt = ruby_xml_parser_context_new();
data = (rx_string_data *)rxp->data;
data->str = str;
Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));
return(data->str);
}
|