Class: RMMSeg::Ferret::Tokenizer

Inherits:
Ferret::Analysis::TokenStream
  • Object
show all
Defined in:
lib/rmmseg/ferret.rb

Overview

The Tokenizer tokenize text with RMMSeg::Algorithm.

Instance Method Summary collapse

Constructor Details

#initialize(str) ⇒ Tokenizer

Create a new Tokenizer to tokenize text



34
35
36
# File 'lib/rmmseg/ferret.rb', line 34

def initialize(str)
  self.text = str
end

Instance Method Details

#nextObject

Get next token



39
40
41
42
43
44
45
46
47
48
49
# File 'lib/rmmseg/ferret.rb', line 39

def next
  tok = @algor.next_token
  if tok.nil?
    return nil
  else
    @token.text = tok.text
    @token.start = tok.start
    @token.end = tok.end
    return @token
  end
end

#textObject

Get the text being tokenized



52
53
54
# File 'lib/rmmseg/ferret.rb', line 52

def text
  @text
end

#text=(str) ⇒ Object

Set the text to be tokenized



57
58
59
60
61
# File 'lib/rmmseg/ferret.rb', line 57

def text=(str)
  @token = ::Ferret::Analysis::Token.new("", 0, 0)
  @text = str
  @algor = Algorithm.new(@text)
end