Class: OpenNlp::Chunker

Inherits:
Tool
  • Object
show all
Defined in:
lib/open_nlp/chunker.rb

Instance Attribute Summary

Attributes inherited from Tool

#j_instance

Instance Method Summary collapse

Methods included from JavaClass

included

Constructor Details

#initialize(model, token_model, pos_model) ⇒ Chunker

Returns a new instance of Chunker.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/open_nlp/chunker.rb', line 5

def initialize(model, token_model, pos_model)
  super(model)

  unless token_model.is_a?(Model::Tokenizer)
    fail ArgumentError, 'token model must be an OpenNlp::Tokenizer::Model'
  end

  unless pos_model.is_a?(Model::POSTagger)
    fail ArgumentError, 'pos model must be an OpenNlp::POSTagger::Model'
  end

  @tokenizer = Tokenizer.new(token_model)
  @pos_tagger = POSTagger.new(pos_model)
end

Instance Method Details

#chunk(str) ⇒ Array

Chunks a string into part-of-sentence pieces

Parameters:

  • str (String)

    string to chunk

Returns:

  • (Array)

    array of chunks with part-of-sentence information



24
25
26
27
28
29
30
31
32
33
# File 'lib/open_nlp/chunker.rb', line 24

def chunk(str)
  fail ArgumentError, 'str must be a String' unless str.is_a?(String)

  tokens = tokenizer.tokenize(str)
  pos_tags = pos_tagger.tag(tokens).to_ary

  chunks = j_instance.chunk(tokens.to_java(:String), pos_tags.to_java(:String)).to_ary

  build_chunks(chunks, tokens, pos_tags)
end