Class: HtmlTokenizer::Parser
- Inherits:
-
Object
- Object
- HtmlTokenizer::Parser
- Defined in:
- ext/html_tokenizer_ext/parser.c
Instance Method Summary collapse
- #append_placeholder(source) ⇒ Object
- #attribute_name ⇒ Object
- #attribute_quoted? ⇒ Boolean
- #attribute_value ⇒ Object
- #cdata_text ⇒ Object
- #closing_tag? ⇒ Boolean
- #column_number ⇒ Object
- #comment_text ⇒ Object
- #context ⇒ Object
- #document ⇒ Object
- #document_length ⇒ Object
- #errors ⇒ Object
- #errors_count ⇒ Object
- #initialize ⇒ Object constructor
- #line_number ⇒ Object
- #parse(source) ⇒ Object
- #quote_character ⇒ Object
- #rawtext_text ⇒ Object
- #self_closing_tag? ⇒ Boolean
- #tag_name ⇒ Object
Constructor Details
#initialize ⇒ Object
467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 |
# File 'ext/html_tokenizer_ext/parser.c', line 467
static VALUE parser_initialize_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
DBG_PRINT("parser=%p initialize", parser);
memset(parser, 0, sizeof(struct parser_t));
parser->context = PARSER_NONE;
tokenizer_init(&parser->tk);
parser->tk.callback_data = parser;
parser->tk.f_callback = parser_tokenize_callback;
parser->doc.length = 0;
parser->doc.data = NULL;
parser->doc.enc_index = 0;
parser->doc.mb_length = 0;
parser->doc.line_number = 1;
parser->doc.column_number = 0;
parser->errors_count = 0;
parser->errors = NULL;
return Qnil;
}
|
Instance Method Details
#append_placeholder(source) ⇒ Object
568 569 570 571 |
# File 'ext/html_tokenizer_ext/parser.c', line 568
static VALUE parser_append_placeholder_method(VALUE self, VALUE source)
{
return parser_append_data(self, source, 1);
}
|
#attribute_name ⇒ Object
669 670 671 672 673 674 |
# File 'ext/html_tokenizer_ext/parser.c', line 669
static VALUE parser_attribute_name_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->attribute.name);
}
|
#attribute_quoted? ⇒ Boolean
692 693 694 695 696 697 |
# File 'ext/html_tokenizer_ext/parser.c', line 692
static VALUE parser_attribute_is_quoted_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->attribute.is_quoted ? Qtrue : Qfalse;
}
|
#attribute_value ⇒ Object
676 677 678 679 680 681 |
# File 'ext/html_tokenizer_ext/parser.c', line 676
static VALUE parser_attribute_value_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->attribute.value);
}
|
#cdata_text ⇒ Object
706 707 708 709 710 711 |
# File 'ext/html_tokenizer_ext/parser.c', line 706
static VALUE parser_cdata_text_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->cdata.text);
}
|
#closing_tag? ⇒ Boolean
655 656 657 658 659 660 |
# File 'ext/html_tokenizer_ext/parser.c', line 655
static VALUE parser_closing_tag_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->tk.is_closing_tag ? Qtrue : Qfalse;
}
|
#column_number ⇒ Object
764 765 766 767 768 769 |
# File 'ext/html_tokenizer_ext/parser.c', line 764
static VALUE parser_column_number_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ULONG2NUM(parser->doc.column_number);
}
|
#comment_text ⇒ Object
699 700 701 702 703 704 |
# File 'ext/html_tokenizer_ext/parser.c', line 699
static VALUE parser_comment_text_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->comment.text);
}
|
#context ⇒ Object
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 |
# File 'ext/html_tokenizer_ext/parser.c', line 602
static VALUE parser_context_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
switch(parser->context) {
case PARSER_NONE:
return rawtext_context(parser) ? ID2SYM(rb_intern("rawtext")) : ID2SYM(rb_intern("none"));
case PARSER_SOLIDUS_OR_TAG_NAME:
return ID2SYM(rb_intern("solidus_or_tag_name"));
case PARSER_TAG_NAME:
return ID2SYM(rb_intern("tag_name"));
case PARSER_TAG:
return ID2SYM(rb_intern("tag"));
case PARSER_ATTRIBUTE_NAME:
return ID2SYM(rb_intern("attribute_name"));
case PARSER_ATTRIBUTE_WHITESPACE_OR_EQUAL:
return ID2SYM(rb_intern("after_attribute_name"));
case PARSER_ATTRIBUTE_WHITESPACE_OR_VALUE:
return ID2SYM(rb_intern("after_equal"));
case PARSER_ATTRIBUTE_QUOTED_VALUE:
return ID2SYM(rb_intern("quoted_value"));
case PARSER_SPACE_AFTER_ATTRIBUTE:
return ID2SYM(rb_intern("space_after_attribute"));
case PARSER_ATTRIBUTE_UNQUOTED_VALUE:
return ID2SYM(rb_intern("unquoted_value"));
case PARSER_TAG_END:
return ID2SYM(rb_intern("tag_end"));
case PARSER_COMMENT:
return ID2SYM(rb_intern("comment"));
case PARSER_CDATA:
return ID2SYM(rb_intern("cdata"));
}
return Qnil;
}
|
#document ⇒ Object
573 574 575 576 577 578 579 580 581 582 |
# File 'ext/html_tokenizer_ext/parser.c', line 573
static VALUE parser_document_method(VALUE self)
{
struct parser_t *parser = NULL;
rb_encoding *enc;
Parser_Get_Struct(self, parser);
if(!parser->doc.data)
return Qnil;
enc = rb_enc_from_index(parser->doc.enc_index);
return rb_enc_str_new(parser->doc.data, parser->doc.length, enc);
}
|
#document_length ⇒ Object
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 |
# File 'ext/html_tokenizer_ext/parser.c', line 584
static VALUE parser_document_length_method(VALUE self)
{
struct parser_t *parser = NULL;
rb_encoding *enc;
const char *buf;
Parser_Get_Struct(self, parser);
if(parser->doc.data == NULL) {
return ULONG2NUM(0);
}
else {
buf = parser->doc.data;
enc = rb_enc_from_index(parser->doc.enc_index);
return ULONG2NUM(rb_enc_strlen(buf, buf + parser->doc.length, enc));
}
}
|
#errors ⇒ Object
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 |
# File 'ext/html_tokenizer_ext/parser.c', line 740
static VALUE parser_errors_method(VALUE self)
{
struct parser_t *parser = NULL;
VALUE list;
size_t i;
Parser_Get_Struct(self, parser);
list = rb_ary_new();
for(i=0; i<parser->errors_count; i++) {
if(parser->errors[i].message) {
rb_ary_push(list, create_parser_error(&parser->errors[i]));
}
}
return list;
}
|
#errors_count ⇒ Object
720 721 722 723 724 725 |
# File 'ext/html_tokenizer_ext/parser.c', line 720
static VALUE parser_errors_count_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ULONG2NUM(parser->errors_count);
}
|
#line_number ⇒ Object
757 758 759 760 761 762 |
# File 'ext/html_tokenizer_ext/parser.c', line 757
static VALUE parser_line_number_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ULONG2NUM(parser->doc.line_number);
}
|
#parse(source) ⇒ Object
563 564 565 566 |
# File 'ext/html_tokenizer_ext/parser.c', line 563
static VALUE parser_parse_method(VALUE self, VALUE source)
{
return parser_append_data(self, source, 0);
}
|
#quote_character ⇒ Object
683 684 685 686 687 688 689 690 |
# File 'ext/html_tokenizer_ext/parser.c', line 683
static VALUE parser_quote_character_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->attribute.is_quoted ?
rb_str_new(&parser->tk.attribute_value_start, 1) :
Qnil;
}
|
#rawtext_text ⇒ Object
713 714 715 716 717 718 |
# File 'ext/html_tokenizer_ext/parser.c', line 713
static VALUE parser_rawtext_text_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->rawtext.text);
}
|
#self_closing_tag? ⇒ Boolean
662 663 664 665 666 667 |
# File 'ext/html_tokenizer_ext/parser.c', line 662
static VALUE parser_self_closing_tag_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return parser->tag.self_closing ? Qtrue : Qfalse;
}
|
#tag_name ⇒ Object
648 649 650 651 652 653 |
# File 'ext/html_tokenizer_ext/parser.c', line 648
static VALUE parser_tag_name_method(VALUE self)
{
struct parser_t *parser = NULL;
Parser_Get_Struct(self, parser);
return ref_to_str(parser, &parser->tag.name);
}
|