Class: LibXML::XML::HTMLParser

Inherits:
Object
  • Object
show all
Defined in:
ext/libxml/ruby_xml_html_parser.c

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.XML::HTMLParser.newObject

Create a new parser instance with no pre-determined source.



216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'ext/libxml/ruby_xml_html_parser.c', line 216

VALUE
ruby_xml_html_parser_new(VALUE class) {
  ruby_xml_html_parser *rxp;

  rxp = ALLOC(ruby_xml_html_parser);
  rxp->ctxt = Qnil;
  rxp->data_type = RUBY_LIBXML_SRC_TYPE_NULL;
  rxp->data = NULL;
  rxp->parsed = 0;

  return(Data_Wrap_Struct(class, ruby_xml_html_parser_mark,
			  ruby_xml_html_parser_free, rxp));
}

.XML::HTMLParser.stringObject

Create a new parser instance that will parse the given string.



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
# File 'ext/libxml/ruby_xml_html_parser.c', line 291

VALUE
ruby_xml_html_parser_new_string(VALUE class, VALUE str) {
  VALUE obj;
  ruby_xml_html_parser *rxp;
  rx_string_data *data;

  obj = ruby_xml_html_parser_new(class);
  Data_Get_Struct(obj, ruby_xml_html_parser, rxp);

  data = ALLOC(rx_string_data);
  rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
  rxp->data = data;

  ruby_xml_html_parser_str_set(obj, str);

  return(obj);
}

Instance Method Details

#contextObject

Obtain the XML::Parser::Context associated with this parser.



359
360
361
362
363
364
365
366
367
368
# File 'ext/libxml/ruby_xml_html_parser.c', line 359

VALUE
ruby_xml_html_parser_context_get(VALUE self) {
  ruby_xml_html_parser *rxp;

  Data_Get_Struct(self, ruby_xml_html_parser, rxp);
  if (rxp->ctxt == Qnil)
    return(Qnil);
  else
    return(rxp->ctxt);
}

#parseObject

Parse the input XML and create an XML::Document with it’s content. If an error occurs, XML::Parser::ParseError is thrown.



318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# File 'ext/libxml/ruby_xml_html_parser.c', line 318

VALUE
ruby_xml_html_parser_parse(VALUE self) {
  ruby_xml_html_parser *rxp;
  ruby_xml_parser_context *rxpc;
  htmlDocPtr xdp;
  VALUE doc;

  Data_Get_Struct(self, ruby_xml_html_parser, rxp);

  switch (rxp->data_type) {
  case RUBY_LIBXML_SRC_TYPE_NULL:
    return(Qnil);
  case RUBY_LIBXML_SRC_TYPE_STRING:
  //case RUBY_LIBXML_SRC_TYPE_FILE:
  //case RUBY_LIBXML_SRC_TYPE_IO:
    Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
    
    /* don't check return values here, the HTML parser returns errors
     * but still allows the resulting tree to be used.
     */
    htmlParseDocument(rxpc->ctxt);
    xdp = rxpc->ctxt->myDoc;
    rxp->parsed = 1;

    doc = ruby_xml_document_wrap(xdp);
    break;
  default:
    rb_fatal("Unknown data type, %d", rxp->data_type);
  }

  return(doc);
}

#stringObject

Obtain the string this parser works with.



377
378
379
380
381
382
383
384
385
386
387
388
# File 'ext/libxml/ruby_xml_html_parser.c', line 377

VALUE
ruby_xml_html_parser_str_get(VALUE self) {
  ruby_xml_html_parser *rxp;
  rx_string_data *data;

  Data_Get_Struct(self, ruby_xml_html_parser, rxp);
  if (rxp->data == NULL || rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING)
    return(Qnil);

  data = (rx_string_data *)rxp->data;
  return(data->str);
}

#string=(str) ⇒ Object

Set the string this parser works with.



397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
# File 'ext/libxml/ruby_xml_html_parser.c', line 397

VALUE
ruby_xml_html_parser_str_set(VALUE self, VALUE str) {
  ruby_xml_html_parser *rxp;
  ruby_xml_parser_context *rxpc;
  rx_string_data *data;

  Check_Type(str, T_STRING);
  Data_Get_Struct(self, ruby_xml_html_parser, rxp);

  if (rxp->data_type == RUBY_LIBXML_SRC_TYPE_NULL) {
    rxp->data_type = RUBY_LIBXML_SRC_TYPE_STRING;
    data = ALLOC(rx_string_data);
    rxp->data = data;
  } else if (rxp->data_type != RUBY_LIBXML_SRC_TYPE_STRING) {
    return(Qnil);
  }

  rxp->ctxt = ruby_xml_parser_context_new();
  data = (rx_string_data *)rxp->data;
  data->str = str;

  Data_Get_Struct(rxp->ctxt, ruby_xml_parser_context, rxpc);
  rxpc->ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data->str), RSTRING_LEN(data->str));

  return(data->str);
}