Class: NlpToolz::Tokens

Inherits:
Object
  • Object
show all
Defined in:
lib/nlp_toolz/tokens.rb

Constant Summary collapse

FileInputStream =

load java classes

Rjb::import('java.io.FileInputStream')
TokenizerModel =
Rjb::import('opennlp.tools.tokenize.TokenizerModel')
TokenizerME =
Rjb::import('opennlp.tools.tokenize.TokenizerME')

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input, lang = nil) ⇒ Tokens

Returns a new instance of Tokens.



19
20
21
22
23
24
# File 'lib/nlp_toolz/tokens.rb', line 19

def initialize(input, lang = nil)
  @input = input
  @lang = lang || NlpToolz::Language.get_language(input)
  @model_name = "#{@lang}-token.bin"
  get_model
end

Instance Attribute Details

#inputObject

Returns the value of attribute input.



17
18
19
# File 'lib/nlp_toolz/tokens.rb', line 17

def input
  @input
end

#langObject

Returns the value of attribute lang.



17
18
19
# File 'lib/nlp_toolz/tokens.rb', line 17

def lang
  @lang
end

#modelObject

Returns the value of attribute model.



17
18
19
# File 'lib/nlp_toolz/tokens.rb', line 17

def model
  @model
end

#model_nameObject

Returns the value of attribute model_name.



17
18
19
# File 'lib/nlp_toolz/tokens.rb', line 17

def model_name
  @model_name
end

#tokensObject

Returns the value of attribute tokens.



17
18
19
# File 'lib/nlp_toolz/tokens.rb', line 17

def tokens
  @tokens
end

Instance Method Details

#has_model?Boolean

Returns:

  • (Boolean)


30
31
32
# File 'lib/nlp_toolz/tokens.rb', line 30

def has_model?
  @model
end

#tokenizeObject



26
27
28
# File 'lib/nlp_toolz/tokens.rb', line 26

def tokenize
  @tokens = @tokenizer.tokenize(@input)
end