Class: HtmlTokenizer::Parser
- Inherits:
-
Object
- Object
- HtmlTokenizer::Parser
- Defined in:
- ext/html_tokenizer_ext/parser.c
Instance Method Summary collapse
- #append_placeholder(source) ⇒ Object
- #attribute_name ⇒ Object
- #attribute_quoted? ⇒ Boolean
- #attribute_value ⇒ Object
- #cdata_text ⇒ Object
- #closing_tag? ⇒ Boolean
- #column_number ⇒ Object
- #comment_text ⇒ Object
- #context ⇒ Object
- #document ⇒ Object
- #document_length ⇒ Object
- #errors(error_p) ⇒ Object
- #errors_count ⇒ Object
- #initialize ⇒ Object constructor
- #line_number ⇒ Object
- #parse(source) ⇒ Object
- #quote_character ⇒ Object
- #rawtext_text ⇒ Object
- #self_closing_tag? ⇒ Boolean
- #tag_name ⇒ Object
Constructor Details
#initialize ⇒ Object
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 |
# File 'ext/html_tokenizer_ext/parser.c', line 464 static VALUE parser_initialize_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); DBG_PRINT("parser=%p initialize", parser); memset(parser, 0, sizeof(struct parser_t)); parser->context = PARSER_NONE; tokenizer_init(&parser->tk); parser->tk.callback_data = parser; parser->tk.f_callback = parser_tokenize_callback; parser->doc.length = 0; parser->doc.data = NULL; parser->doc.enc_index = 0; parser->doc.mb_length = 0; parser->doc.line_number = 1; parser->doc.column_number = 0; parser->errors_count = 0; parser->errors = NULL; return Qnil; } |
Instance Method Details
#append_placeholder(source) ⇒ Object
562 563 564 565 |
# File 'ext/html_tokenizer_ext/parser.c', line 562 static VALUE parser_append_placeholder_method(VALUE self, VALUE source) { return parser_append_data(self, source, 1); } |
#attribute_name ⇒ Object
663 664 665 666 667 668 |
# File 'ext/html_tokenizer_ext/parser.c', line 663 static VALUE parser_attribute_name_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ref_to_str(parser, &parser->attribute.name); } |
#attribute_quoted? ⇒ Boolean
686 687 688 689 690 691 |
# File 'ext/html_tokenizer_ext/parser.c', line 686 static VALUE parser_attribute_is_quoted_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return parser->attribute.is_quoted ? Qtrue : Qfalse; } |
#attribute_value ⇒ Object
670 671 672 673 674 675 |
# File 'ext/html_tokenizer_ext/parser.c', line 670 static VALUE parser_attribute_value_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ref_to_str(parser, &parser->attribute.value); } |
#cdata_text ⇒ Object
700 701 702 703 704 705 |
# File 'ext/html_tokenizer_ext/parser.c', line 700 static VALUE parser_cdata_text_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ref_to_str(parser, &parser->cdata.text); } |
#closing_tag? ⇒ Boolean
649 650 651 652 653 654 |
# File 'ext/html_tokenizer_ext/parser.c', line 649 static VALUE parser_closing_tag_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return parser->tk.is_closing_tag ? Qtrue : Qfalse; } |
#column_number ⇒ Object
757 758 759 760 761 762 |
# File 'ext/html_tokenizer_ext/parser.c', line 757 static VALUE parser_column_number_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ULONG2NUM(parser->doc.column_number); } |
#comment_text ⇒ Object
693 694 695 696 697 698 |
# File 'ext/html_tokenizer_ext/parser.c', line 693 static VALUE parser_comment_text_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ref_to_str(parser, &parser->comment.text); } |
#context ⇒ Object
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 |
# File 'ext/html_tokenizer_ext/parser.c', line 596 static VALUE parser_context_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); switch(parser->context) { case PARSER_NONE: return rawtext_context(parser) ? ID2SYM(rb_intern("rawtext")) : ID2SYM(rb_intern("none")); case PARSER_SOLIDUS_OR_TAG_NAME: return ID2SYM(rb_intern("solidus_or_tag_name")); case PARSER_TAG_NAME: return ID2SYM(rb_intern("tag_name")); case PARSER_TAG: return ID2SYM(rb_intern("tag")); case PARSER_ATTRIBUTE_NAME: return ID2SYM(rb_intern("attribute_name")); case PARSER_ATTRIBUTE_WHITESPACE_OR_EQUAL: return ID2SYM(rb_intern("after_attribute_name")); case PARSER_ATTRIBUTE_WHITESPACE_OR_VALUE: return ID2SYM(rb_intern("after_equal")); case PARSER_ATTRIBUTE_QUOTED_VALUE: return ID2SYM(rb_intern("quoted_value")); case PARSER_SPACE_AFTER_ATTRIBUTE: return ID2SYM(rb_intern("space_after_attribute")); case PARSER_ATTRIBUTE_UNQUOTED_VALUE: return ID2SYM(rb_intern("unquoted_value")); case PARSER_TAG_END: return ID2SYM(rb_intern("tag_end")); case PARSER_COMMENT: return ID2SYM(rb_intern("comment")); case PARSER_CDATA: return ID2SYM(rb_intern("cdata")); } return Qnil; } |
#document ⇒ Object
567 568 569 570 571 572 573 574 575 576 |
# File 'ext/html_tokenizer_ext/parser.c', line 567 static VALUE parser_document_method(VALUE self) { struct parser_t *parser = NULL; rb_encoding *enc; Parser_Get_Struct(self, parser); if(!parser->doc.data) return Qnil; enc = rb_enc_from_index(parser->doc.enc_index); return rb_enc_str_new(parser->doc.data, parser->doc.length, enc); } |
#document_length ⇒ Object
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 |
# File 'ext/html_tokenizer_ext/parser.c', line 578 static VALUE parser_document_length_method(VALUE self) { struct parser_t *parser = NULL; rb_encoding *enc; const char *buf; Parser_Get_Struct(self, parser); if(parser->doc.data == NULL) { return ULONG2NUM(0); } else { buf = parser->doc.data; enc = rb_enc_from_index(parser->doc.enc_index); return ULONG2NUM(rb_enc_strlen(buf, buf + parser->doc.length, enc)); } } |
#errors(error_p) ⇒ Object
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 |
# File 'ext/html_tokenizer_ext/parser.c', line 733 static VALUE parser_errors_method(VALUE self, VALUE error_p) { struct parser_t *parser = NULL; VALUE list; size_t i; Parser_Get_Struct(self, parser); list = rb_ary_new(); for(i=0; i<parser->errors_count; i++) { if(parser->errors[i].) { rb_ary_push(list, create_parser_error(&parser->errors[i])); } } return list; } |
#errors_count ⇒ Object
714 715 716 717 718 719 |
# File 'ext/html_tokenizer_ext/parser.c', line 714 static VALUE parser_errors_count_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return INT2NUM(parser->errors_count); } |
#line_number ⇒ Object
750 751 752 753 754 755 |
# File 'ext/html_tokenizer_ext/parser.c', line 750 static VALUE parser_line_number_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ULONG2NUM(parser->doc.line_number); } |
#parse(source) ⇒ Object
557 558 559 560 |
# File 'ext/html_tokenizer_ext/parser.c', line 557 static VALUE parser_parse_method(VALUE self, VALUE source) { return parser_append_data(self, source, 0); } |
#quote_character ⇒ Object
677 678 679 680 681 682 683 684 |
# File 'ext/html_tokenizer_ext/parser.c', line 677 static VALUE parser_quote_character_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return parser->attribute.is_quoted ? rb_str_new(&parser->tk.attribute_value_start, 1) : Qnil; } |
#rawtext_text ⇒ Object
707 708 709 710 711 712 |
# File 'ext/html_tokenizer_ext/parser.c', line 707 static VALUE parser_rawtext_text_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ref_to_str(parser, &parser->rawtext.text); } |
#self_closing_tag? ⇒ Boolean
656 657 658 659 660 661 |
# File 'ext/html_tokenizer_ext/parser.c', line 656 static VALUE parser_self_closing_tag_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return parser->tag.self_closing ? Qtrue : Qfalse; } |
#tag_name ⇒ Object
642 643 644 645 646 647 |
# File 'ext/html_tokenizer_ext/parser.c', line 642 static VALUE parser_tag_name_method(VALUE self) { struct parser_t *parser = NULL; Parser_Get_Struct(self, parser); return ref_to_str(parser, &parser->tag.name); } |