Class: RMMSeg::Ferret::Tokenizer

Inherits:
Ferret::Analysis::TokenStream
  • Object
show all
Defined in:
lib/rmmseg/ferret.rb

Overview

The Tokenizer tokenize text with RMMSeg::Algorithm.

Instance Method Summary collapse

Constructor Details

#initialize(str) ⇒ Tokenizer

Create a new Tokenizer to tokenize text



35
36
37
# File 'lib/rmmseg/ferret.rb', line 35

def initialize(str)
  self.text = str
end

Instance Method Details

#nextObject

Get next token



40
41
42
43
44
45
46
47
48
49
50
# File 'lib/rmmseg/ferret.rb', line 40

def next
  tok = @algor.next_token
  if tok.nil?
    return nil
  else
    @token.text = tok.text
    @token.start = tok.start
    @token.end = tok.end
    return @token
  end
end

#textObject

Get the text being tokenized



53
54
55
# File 'lib/rmmseg/ferret.rb', line 53

def text
  @text
end

#text=(str) ⇒ Object

Set the text to be tokenized



58
59
60
61
62
# File 'lib/rmmseg/ferret.rb', line 58

def text=(str)
  @token = ::Ferret::Analysis::Token.new("", 0, 0)
  @text = str
  @algor = Algorithm.new(@text)
end