Class: CSKit::Parsers::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/cskit/parsers/tokenizer.rb

Overview

base class for tokenizers

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(citation) ⇒ Tokenizer

Returns a new instance of Tokenizer.



9
10
11
# File 'lib/cskit/parsers/tokenizer.rb', line 9

def initialize(citation)
  @citation = citation
end

Instance Attribute Details

#citationObject (readonly)

Returns the value of attribute citation.



7
8
9
# File 'lib/cskit/parsers/tokenizer.rb', line 7

def citation
  @citation
end

Instance Method Details

#each_tokenObject



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/cskit/parsers/tokenizer.rb', line 13

def each_token
  return to_enum(__method__) unless block_given?

  text = citation.dup
  pos = 0

  until text.empty?
    patterns.each_pair do |token_type, pattern|
      if match = pattern.match(text)
        unless token_type == :space
          yield Token.new(token_type, match[0], pos)
        end

        text[0...match[0].size] = ''
        pos += match[0].size

        break
      end
    end
  end
end