Class: HtmlTokenizer::Parser

Inherits:
Object
  • Object
show all
Defined in:
ext/html_tokenizer_ext/parser.c

Instance Method Summary collapse

Constructor Details

#initializeObject



467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
# File 'ext/html_tokenizer_ext/parser.c', line 467

static VALUE parser_initialize_method(VALUE self)
{
  struct parser_t *parser = NULL;

  Parser_Get_Struct(self, parser);
  DBG_PRINT("parser=%p initialize", parser);

  memset(parser, 0, sizeof(struct parser_t));

  parser->context = PARSER_NONE;

  tokenizer_init(&parser->tk);
  parser->tk.callback_data = parser;
  parser->tk.f_callback = parser_tokenize_callback;

  parser->doc.length = 0;
  parser->doc.data = NULL;
  parser->doc.enc_index = 0;
  parser->doc.mb_length = 0;

  parser->doc.line_number = 1;
  parser->doc.column_number = 0;

  parser->errors_count = 0;
  parser->errors = NULL;

  return Qnil;
}

Instance Method Details

#append_placeholder(source) ⇒ Object



568
569
570
571
# File 'ext/html_tokenizer_ext/parser.c', line 568

static VALUE parser_append_placeholder_method(VALUE self, VALUE source)
{
  return parser_append_data(self, source, 1);
}

#attribute_nameObject



669
670
671
672
673
674
# File 'ext/html_tokenizer_ext/parser.c', line 669

static VALUE parser_attribute_name_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->attribute.name);
}

#attribute_quoted?Boolean

Returns:

  • (Boolean)


692
693
694
695
696
697
# File 'ext/html_tokenizer_ext/parser.c', line 692

static VALUE parser_attribute_is_quoted_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->attribute.is_quoted ? Qtrue : Qfalse;
}

#attribute_valueObject



676
677
678
679
680
681
# File 'ext/html_tokenizer_ext/parser.c', line 676

static VALUE parser_attribute_value_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->attribute.value);
}

#cdata_textObject



706
707
708
709
710
711
# File 'ext/html_tokenizer_ext/parser.c', line 706

static VALUE parser_cdata_text_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->cdata.text);
}

#closing_tag?Boolean

Returns:

  • (Boolean)


655
656
657
658
659
660
# File 'ext/html_tokenizer_ext/parser.c', line 655

static VALUE parser_closing_tag_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->tk.is_closing_tag ? Qtrue : Qfalse;
}

#column_numberObject



764
765
766
767
768
769
# File 'ext/html_tokenizer_ext/parser.c', line 764

static VALUE parser_column_number_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ULONG2NUM(parser->doc.column_number);
}

#comment_textObject



699
700
701
702
703
704
# File 'ext/html_tokenizer_ext/parser.c', line 699

static VALUE parser_comment_text_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->comment.text);
}

#contextObject



602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
# File 'ext/html_tokenizer_ext/parser.c', line 602

static VALUE parser_context_method(VALUE self)
{
  struct parser_t *parser = NULL;

  Parser_Get_Struct(self, parser);

  switch(parser->context) {
  case PARSER_NONE:
    return rawtext_context(parser) ? ID2SYM(rb_intern("rawtext")) : ID2SYM(rb_intern("none"));
  case PARSER_SOLIDUS_OR_TAG_NAME:
    return ID2SYM(rb_intern("solidus_or_tag_name"));
  case PARSER_TAG_NAME:
    return ID2SYM(rb_intern("tag_name"));
  case PARSER_TAG:
    return ID2SYM(rb_intern("tag"));
  case PARSER_ATTRIBUTE_NAME:
    return ID2SYM(rb_intern("attribute_name"));
  case PARSER_ATTRIBUTE_WHITESPACE_OR_EQUAL:
    return ID2SYM(rb_intern("after_attribute_name"));
  case PARSER_ATTRIBUTE_WHITESPACE_OR_VALUE:
    return ID2SYM(rb_intern("after_equal"));
  case PARSER_ATTRIBUTE_QUOTED_VALUE:
    return ID2SYM(rb_intern("quoted_value"));
  case PARSER_SPACE_AFTER_ATTRIBUTE:
    return ID2SYM(rb_intern("space_after_attribute"));
  case PARSER_ATTRIBUTE_UNQUOTED_VALUE:
    return ID2SYM(rb_intern("unquoted_value"));
  case PARSER_TAG_END:
    return ID2SYM(rb_intern("tag_end"));
  case PARSER_COMMENT:
    return ID2SYM(rb_intern("comment"));
  case PARSER_CDATA:
    return ID2SYM(rb_intern("cdata"));
  }

  return Qnil;
}

#documentObject



573
574
575
576
577
578
579
580
581
582
# File 'ext/html_tokenizer_ext/parser.c', line 573

static VALUE parser_document_method(VALUE self)
{
  struct parser_t *parser = NULL;
  rb_encoding *enc;
  Parser_Get_Struct(self, parser);
  if(!parser->doc.data)
    return Qnil;
  enc = rb_enc_from_index(parser->doc.enc_index);
  return rb_enc_str_new(parser->doc.data, parser->doc.length, enc);
}

#document_lengthObject



584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
# File 'ext/html_tokenizer_ext/parser.c', line 584

static VALUE parser_document_length_method(VALUE self)
{
  struct parser_t *parser = NULL;
  rb_encoding *enc;
  const char *buf;

  Parser_Get_Struct(self, parser);

  if(parser->doc.data == NULL) {
    return ULONG2NUM(0);
  }
  else {
    buf = parser->doc.data;
    enc = rb_enc_from_index(parser->doc.enc_index);
    return ULONG2NUM(rb_enc_strlen(buf, buf + parser->doc.length, enc));
  }
}

#errorsObject



740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
# File 'ext/html_tokenizer_ext/parser.c', line 740

static VALUE parser_errors_method(VALUE self)
{
  struct parser_t *parser = NULL;
  VALUE list;
  size_t i;
  Parser_Get_Struct(self, parser);

  list = rb_ary_new();
  for(i=0; i<parser->errors_count; i++) {
    if(parser->errors[i].message) {
      rb_ary_push(list, create_parser_error(&parser->errors[i]));
    }
  }

  return list;
}

#errors_countObject



720
721
722
723
724
725
# File 'ext/html_tokenizer_ext/parser.c', line 720

static VALUE parser_errors_count_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ULONG2NUM(parser->errors_count);
}

#line_numberObject



757
758
759
760
761
762
# File 'ext/html_tokenizer_ext/parser.c', line 757

static VALUE parser_line_number_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ULONG2NUM(parser->doc.line_number);
}

#parse(source) ⇒ Object



563
564
565
566
# File 'ext/html_tokenizer_ext/parser.c', line 563

static VALUE parser_parse_method(VALUE self, VALUE source)
{
  return parser_append_data(self, source, 0);
}

#quote_characterObject



683
684
685
686
687
688
689
690
# File 'ext/html_tokenizer_ext/parser.c', line 683

static VALUE parser_quote_character_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->attribute.is_quoted ?
    rb_str_new(&parser->tk.attribute_value_start, 1) :
    Qnil;
}

#rawtext_textObject



713
714
715
716
717
718
# File 'ext/html_tokenizer_ext/parser.c', line 713

static VALUE parser_rawtext_text_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->rawtext.text);
}

#self_closing_tag?Boolean

Returns:

  • (Boolean)


662
663
664
665
666
667
# File 'ext/html_tokenizer_ext/parser.c', line 662

static VALUE parser_self_closing_tag_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->tag.self_closing ? Qtrue : Qfalse;
}

#tag_nameObject



648
649
650
651
652
653
# File 'ext/html_tokenizer_ext/parser.c', line 648

static VALUE parser_tag_name_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->tag.name);
}