Class: HexaPDF::Content::Tokenizer
- Defined in:
- lib/hexapdf/content/parser.rb
Overview
More efficient tokenizer for content streams. This tokenizer class works directly on a string and not on an IO.
Note: Indirect object references are not supported by this tokenizer!
See: PDF1.7 s7.2
Constant Summary
Constants inherited from Tokenizer
Tokenizer::DELIMITER, Tokenizer::NO_MORE_TOKENS, Tokenizer::TOKEN_ARRAY_END, Tokenizer::TOKEN_ARRAY_START, Tokenizer::TOKEN_DICT_END, Tokenizer::TOKEN_DICT_START, Tokenizer::WHITESPACE, Tokenizer::WHITESPACE_MULTI_RE, Tokenizer::WHITESPACE_OR_DELIMITER_RE
Instance Attribute Summary collapse
-
#string ⇒ Object
readonly
The string that is tokenized.
Attributes inherited from Tokenizer
Instance Method Summary collapse
-
#initialize(string) ⇒ Tokenizer
constructor
Creates a new tokenizer.
-
#next_token ⇒ Object
See: HexaPDF::Tokenizer#next_token.
-
#pos ⇒ Object
See: HexaPDF::Tokenizer#pos.
-
#pos=(pos) ⇒ Object
See: HexaPDF::Tokenizer#pos=.
-
#scan_until(re) ⇒ Object
See: HexaPDF::Tokenizer#scan_until.
Methods inherited from Tokenizer
#next_byte, #next_object, #next_xref_entry, #peek_token, #skip_whitespace
Constructor Details
#initialize(string) ⇒ Tokenizer
Creates a new tokenizer.
53 54 55 56 |
# File 'lib/hexapdf/content/parser.rb', line 53 def initialize(string) @ss = StringScanner.new(string) @string = string end |
Instance Attribute Details
#string ⇒ Object (readonly)
The string that is tokenized.
50 51 52 |
# File 'lib/hexapdf/content/parser.rb', line 50 def string @string end |
Instance Method Details
#next_token ⇒ Object
See: HexaPDF::Tokenizer#next_token
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/hexapdf/content/parser.rb', line 74 def next_token @ss.skip(WHITESPACE_MULTI_RE) byte = @string.getbyte(@ss.pos) || -1 if (48 <= byte && byte <= 57) || byte == 45 || byte == 43 || byte == 46 # 0..9 - + . parse_number elsif (65 <= byte && byte <= 90) || (96 <= byte && byte <= 121) parse_keyword elsif byte == 47 # / parse_name elsif byte == 40 # ( parse_literal_string elsif byte == 60 # < if @string.getbyte(@ss.pos + 1) != 60 parse_hex_string else @ss.pos += 2 TOKEN_DICT_START end elsif byte == 62 # > unless @string.getbyte(@ss.pos + 1) == 62 raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos) end @ss.pos += 2 TOKEN_DICT_END elsif byte == 91 # [ @ss.pos += 1 TOKEN_ARRAY_START elsif byte == 93 # ] @ss.pos += 1 TOKEN_ARRAY_END elsif byte == 123 || byte == 125 # { } Token.new(@ss.get_byte) elsif byte == 37 # % return NO_MORE_TOKENS unless @ss.skip_until(/(?=[\r\n])/) next_token elsif byte == -1 NO_MORE_TOKENS else parse_keyword end end |
#pos ⇒ Object
See: HexaPDF::Tokenizer#pos
59 60 61 |
# File 'lib/hexapdf/content/parser.rb', line 59 def pos @ss.pos end |
#pos=(pos) ⇒ Object
See: HexaPDF::Tokenizer#pos=
64 65 66 |
# File 'lib/hexapdf/content/parser.rb', line 64 def pos=(pos) @ss.pos = pos end |
#scan_until(re) ⇒ Object
See: HexaPDF::Tokenizer#scan_until
69 70 71 |
# File 'lib/hexapdf/content/parser.rb', line 69 def scan_until(re) @ss.scan_until(re) end |