Class: NlpToolz::Tokens
- Inherits:
-
Object
- Object
- NlpToolz::Tokens
- Defined in:
- lib/nlp_toolz/tokens.rb
Constant Summary collapse
- FileInputStream =
load java classes
Rjb::import('java.io.FileInputStream')
- TokenizerModel =
Rjb::import('opennlp.tools.tokenize.TokenizerModel')
- TokenizerME =
Rjb::import('opennlp.tools.tokenize.TokenizerME')
Instance Attribute Summary collapse
-
#input ⇒ Object
Returns the value of attribute input.
-
#lang ⇒ Object
Returns the value of attribute lang.
-
#model ⇒ Object
Returns the value of attribute model.
-
#model_name ⇒ Object
Returns the value of attribute model_name.
-
#tokens ⇒ Object
Returns the value of attribute tokens.
Instance Method Summary collapse
- #has_model? ⇒ Boolean
-
#initialize(input, lang = nil) ⇒ Tokens
constructor
A new instance of Tokens.
- #tokenize ⇒ Object
Constructor Details
#initialize(input, lang = nil) ⇒ Tokens
Returns a new instance of Tokens.
19 20 21 22 23 24 |
# File 'lib/nlp_toolz/tokens.rb', line 19 def initialize(input, lang = nil) @input = input @lang = lang || NlpToolz::Language.get_language(input) @model_name = "#{@lang}-token.bin" get_model end |
Instance Attribute Details
#input ⇒ Object
Returns the value of attribute input.
17 18 19 |
# File 'lib/nlp_toolz/tokens.rb', line 17 def input @input end |
#lang ⇒ Object
Returns the value of attribute lang.
17 18 19 |
# File 'lib/nlp_toolz/tokens.rb', line 17 def lang @lang end |
#model ⇒ Object
Returns the value of attribute model.
17 18 19 |
# File 'lib/nlp_toolz/tokens.rb', line 17 def model @model end |
#model_name ⇒ Object
Returns the value of attribute model_name.
17 18 19 |
# File 'lib/nlp_toolz/tokens.rb', line 17 def model_name @model_name end |
#tokens ⇒ Object
Returns the value of attribute tokens.
17 18 19 |
# File 'lib/nlp_toolz/tokens.rb', line 17 def tokens @tokens end |
Instance Method Details
#has_model? ⇒ Boolean
30 31 32 |
# File 'lib/nlp_toolz/tokens.rb', line 30 def has_model? @model end |
#tokenize ⇒ Object
26 27 28 |
# File 'lib/nlp_toolz/tokens.rb', line 26 def tokenize @tokens = @tokenizer.tokenize(@input) end |