Class: HexaPDF::Content::Tokenizer

Inherits:
Tokenizer
  • Object
show all
Defined in:
lib/hexapdf/content/parser.rb

Overview

More efficient tokenizer for content streams. This tokenizer class works directly on a string and not on an IO.

Note: Indirect object references are not supported by this tokenizer!

See: PDF1.7 s7.2

Constant Summary

Constants inherited from Tokenizer

Tokenizer::DELIMITER, Tokenizer::NO_MORE_TOKENS, Tokenizer::TOKEN_ARRAY_END, Tokenizer::TOKEN_ARRAY_START, Tokenizer::TOKEN_DICT_END, Tokenizer::TOKEN_DICT_START, Tokenizer::WHITESPACE, Tokenizer::WHITESPACE_MULTI_RE, Tokenizer::WHITESPACE_OR_DELIMITER_RE

Instance Attribute Summary

Attributes inherited from Tokenizer

#io

Instance Method Summary collapse

Methods inherited from Tokenizer

#next_byte, #next_object, #next_xref_entry, #peek_token, #skip_whitespace

Constructor Details

#initialize(string) ⇒ Tokenizer

Creates a new tokenizer.



49
50
51
# File 'lib/hexapdf/content/parser.rb', line 49

def initialize(string)
  @ss = StringScanner.new(string)
end

Instance Method Details

#next_tokenObject

See: HexaPDF::Tokenizer#next_token



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/hexapdf/content/parser.rb', line 69

def next_token
  @ss.skip(WHITESPACE_MULTI_RE)
  case (@ss.eos? ? -1 : @ss.string.getbyte(@ss.pos))
  when 43, 45, 46, 48..57 # + - . 0..9
    parse_number
  when 65..90, 96..121
    parse_keyword
  when 47 # /
    parse_name
  when 40 # (
    parse_literal_string
  when 60 # <
    if @ss.string.getbyte(@ss.pos + 1) != 60
      parse_hex_string
    else
      @ss.pos += 2
      TOKEN_DICT_START
    end
  when 62 # >
    unless @ss.string.getbyte(@ss.pos + 1) == 62
      raise HexaPDF::MalformedPDFError.new("Delimiter '>' found at invalid position", pos: pos)
    end
    @ss.pos += 2
    TOKEN_DICT_END
  when 91 # [
    @ss.pos += 1
    TOKEN_ARRAY_START
  when 93 # ]
    @ss.pos += 1
    TOKEN_ARRAY_END
  when 123, 125 # { }
    Token.new(@ss.get_byte)
  when 37 # %
    return NO_MORE_TOKENS unless @ss.skip_until(/(?=[\r\n])/)
    next_token
  when -1
    NO_MORE_TOKENS
  else
    parse_keyword
  end
end

#posObject

See: HexaPDF::Tokenizer#pos



54
55
56
# File 'lib/hexapdf/content/parser.rb', line 54

def pos
  @ss.pos
end

#pos=(pos) ⇒ Object

See: HexaPDF::Tokenizer#pos=



59
60
61
# File 'lib/hexapdf/content/parser.rb', line 59

def pos=(pos)
  @ss.pos = pos
end

#scan_until(re) ⇒ Object

See: HexaPDF::Tokenizer#scan_until



64
65
66
# File 'lib/hexapdf/content/parser.rb', line 64

def scan_until(re)
  @ss.scan_until(re)
end