Class: HtmlTokenizer::Parser

Inherits:
Object
  • Object
show all
Defined in:
ext/better_html_ext/parser.c

Instance Method Summary collapse

Constructor Details

#initializeObject



468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
# File 'ext/better_html_ext/parser.c', line 468

static VALUE parser_initialize_method(VALUE self)
{
  struct parser_t *parser = NULL;

  Parser_Get_Struct(self, parser);
  DBG_PRINT("parser=%p initialize", parser);

  memset(parser, 0, sizeof(struct parser_t));

  parser->context = PARSER_NONE;

  tokenizer_init(&parser->tk);
  parser->tk.callback_data = parser;
  parser->tk.f_callback = parser_tokenize_callback;

  parser->doc.length = 0;
  parser->doc.data = NULL;
  parser->doc.enc_index = 0;
  parser->doc.mb_length = 0;

  parser->doc.line_number = 1;
  parser->doc.column_number = 0;

  parser->errors_count = 0;
  parser->errors = NULL;

  return Qnil;
}

Instance Method Details

#append_placeholder(source) ⇒ Object



566
567
568
569
# File 'ext/better_html_ext/parser.c', line 566

static VALUE parser_append_placeholder_method(VALUE self, VALUE source)
{
  return parser_append_data(self, source, 1);
}

#attribute_nameObject



667
668
669
670
671
672
# File 'ext/better_html_ext/parser.c', line 667

static VALUE parser_attribute_name_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->attribute.name);
}

#attribute_quoted?Boolean

Returns:

  • (Boolean)


690
691
692
693
694
695
# File 'ext/better_html_ext/parser.c', line 690

static VALUE parser_attribute_is_quoted_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->attribute.is_quoted ? Qtrue : Qfalse;
}

#attribute_valueObject



674
675
676
677
678
679
# File 'ext/better_html_ext/parser.c', line 674

static VALUE parser_attribute_value_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->attribute.value);
}

#cdata_textObject



704
705
706
707
708
709
# File 'ext/better_html_ext/parser.c', line 704

static VALUE parser_cdata_text_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->cdata.text);
}

#closing_tag?Boolean

Returns:

  • (Boolean)


653
654
655
656
657
658
# File 'ext/better_html_ext/parser.c', line 653

static VALUE parser_closing_tag_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->tk.is_closing_tag ? Qtrue : Qfalse;
}

#column_numberObject



762
763
764
765
766
767
# File 'ext/better_html_ext/parser.c', line 762

static VALUE parser_column_number_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ULONG2NUM(parser->doc.column_number);
}

#comment_textObject



697
698
699
700
701
702
# File 'ext/better_html_ext/parser.c', line 697

static VALUE parser_comment_text_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->comment.text);
}

#contextObject



600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
# File 'ext/better_html_ext/parser.c', line 600

static VALUE parser_context_method(VALUE self)
{
  struct parser_t *parser = NULL;

  Parser_Get_Struct(self, parser);

  switch(parser->context) {
  case PARSER_NONE:
    return rawtext_context(parser) ? ID2SYM(rb_intern("rawtext")) : ID2SYM(rb_intern("none"));
  case PARSER_SOLIDUS_OR_TAG_NAME:
    return ID2SYM(rb_intern("solidus_or_tag_name"));
  case PARSER_TAG_NAME:
    return ID2SYM(rb_intern("tag_name"));
  case PARSER_TAG:
    return ID2SYM(rb_intern("tag"));
  case PARSER_ATTRIBUTE_NAME:
    return ID2SYM(rb_intern("attribute_name"));
  case PARSER_ATTRIBUTE_WHITESPACE_OR_EQUAL:
    return ID2SYM(rb_intern("after_attribute_name"));
  case PARSER_ATTRIBUTE_WHITESPACE_OR_VALUE:
    return ID2SYM(rb_intern("after_equal"));
  case PARSER_ATTRIBUTE_QUOTED_VALUE:
    return ID2SYM(rb_intern("quoted_value"));
  case PARSER_SPACE_AFTER_ATTRIBUTE:
    return ID2SYM(rb_intern("space_after_attribute"));
  case PARSER_ATTRIBUTE_UNQUOTED_VALUE:
    return ID2SYM(rb_intern("unquoted_value"));
  case PARSER_TAG_END:
    return ID2SYM(rb_intern("tag_end"));
  case PARSER_COMMENT:
    return ID2SYM(rb_intern("comment"));
  case PARSER_CDATA:
    return ID2SYM(rb_intern("cdata"));
  }

  return Qnil;
}

#documentObject



571
572
573
574
575
576
577
578
579
580
# File 'ext/better_html_ext/parser.c', line 571

static VALUE parser_document_method(VALUE self)
{
  struct parser_t *parser = NULL;
  rb_encoding *enc;
  Parser_Get_Struct(self, parser);
  if(!parser->doc.data)
    return Qnil;
  enc = rb_enc_from_index(parser->doc.enc_index);
  return rb_enc_str_new(parser->doc.data, parser->doc.length, enc);
}

#document_lengthObject



582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
# File 'ext/better_html_ext/parser.c', line 582

static VALUE parser_document_length_method(VALUE self)
{
  struct parser_t *parser = NULL;
  rb_encoding *enc;
  const char *buf;

  Parser_Get_Struct(self, parser);

  if(parser->doc.data == NULL) {
    return ULONG2NUM(0);
  }
  else {
    buf = parser->doc.data;
    enc = rb_enc_from_index(parser->doc.enc_index);
    return ULONG2NUM(rb_enc_strlen(buf, buf + parser->doc.length, enc));
  }
}

#errorsObject



738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
# File 'ext/better_html_ext/parser.c', line 738

static VALUE parser_errors_method(VALUE self)
{
  struct parser_t *parser = NULL;
  VALUE list;
  size_t i;
  Parser_Get_Struct(self, parser);

  list = rb_ary_new();
  for(i=0; i<parser->errors_count; i++) {
    if(parser->errors[i].message) {
      rb_ary_push(list, create_parser_error(&parser->errors[i]));
    }
  }

  return list;
}

#errors_countObject



718
719
720
721
722
723
# File 'ext/better_html_ext/parser.c', line 718

static VALUE parser_errors_count_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return INT2NUM(parser->errors_count);
}

#line_numberObject



755
756
757
758
759
760
# File 'ext/better_html_ext/parser.c', line 755

static VALUE parser_line_number_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ULONG2NUM(parser->doc.line_number);
}

#parse(source) ⇒ Object



561
562
563
564
# File 'ext/better_html_ext/parser.c', line 561

static VALUE parser_parse_method(VALUE self, VALUE source)
{
  return parser_append_data(self, source, 0);
}

#quote_characterObject



681
682
683
684
685
686
687
688
# File 'ext/better_html_ext/parser.c', line 681

static VALUE parser_quote_character_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->attribute.is_quoted ?
    rb_str_new(&parser->tk.attribute_value_start, 1) :
    Qnil;
}

#rawtext_textObject



711
712
713
714
715
716
# File 'ext/better_html_ext/parser.c', line 711

static VALUE parser_rawtext_text_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->rawtext.text);
}

#self_closing_tag?Boolean

Returns:

  • (Boolean)


660
661
662
663
664
665
# File 'ext/better_html_ext/parser.c', line 660

static VALUE parser_self_closing_tag_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return parser->tag.self_closing ? Qtrue : Qfalse;
}

#tag_nameObject



646
647
648
649
650
651
# File 'ext/better_html_ext/parser.c', line 646

static VALUE parser_tag_name_method(VALUE self)
{
  struct parser_t *parser = NULL;
  Parser_Get_Struct(self, parser);
  return ref_to_str(parser, &parser->tag.name);
}