Module: NlpToolz

Extended by:
Lang
Defined in:
lib/nlp_toolz/parser.rb,
lib/nlp_toolz.rb,
lib/nlp_toolz/tokens.rb,
lib/nlp_toolz/version.rb,
lib/nlp_toolz/pos_tags.rb,
lib/nlp_toolz/load_jars.rb,
lib/nlp_toolz/sentences.rb

Overview

ToDo 2012-10-24: add train capabilities

Defined Under Namespace

Classes: Parser, PosTags, Sentences, Tokens

Constant Summary collapse

VERSION =
"1.0.5"
MODELS =
File.join(File.dirname(__FILE__), '..', '..', "models")
JARS =
File.join(File.dirname(__FILE__), '..', '..', "jars")
CLASS_PATH =
[
  File.join(JARS, "jwnl-1.3.3.jar"),
  File.join(JARS, "opennlp-tools-1.5.3.jar"),
  File.join(JARS, "opennlp-maxent-3.0.3.jar")
].join(":")

Class Method Summary collapse

Methods included from Lang

alternative_langs, get_language

Methods included from UrlHandler

build_url, post_data

Class Method Details

.get_lang(input) ⇒ Object



32
33
34
# File 'lib/nlp_toolz.rb', line 32

def get_lang(input)
  NlpToolz.get_language(input)
end

.get_sentences(input, lang = nil) ⇒ Object



36
37
38
39
# File 'lib/nlp_toolz.rb', line 36

def get_sentences(input,lang = nil)
  text = NlpToolz::Sentences.new(input,lang)
  text.split_into_sentences if text.has_model?
end

.parse_sentence(input, lang = nil) ⇒ Object



69
70
71
72
73
74
# File 'lib/nlp_toolz.rb', line 69

def parse_sentence(input,lang = nil)
  text = NlpToolz::Parser.new(input,lang)
  text.parse_text
  
  text.parse_hash
end

.parse_text(input, lang = nil) ⇒ Object



76
77
78
79
80
81
82
83
# File 'lib/nlp_toolz.rb', line 76

def parse_text(input,lang = nil)
  parsed_text = []
  get_sentences(input,lang).each do |sentence|
    parsed_text << parse_sentence(sentence,lang)
  end
  
  parsed_text
end

.tag_sentence(input, lang = nil) ⇒ Object



55
56
57
58
# File 'lib/nlp_toolz.rb', line 55

def tag_sentence(input,lang = nil)
  sentence = NlpToolz::PosTags.new(input,lang)
  sentence.get_pos_tags if sentence.has_model?
end

.tag_text(input, lang = nil) ⇒ Object



60
61
62
63
64
65
66
67
# File 'lib/nlp_toolz.rb', line 60

def tag_text(input,lang = nil)
  tagged_text = []
  get_sentences(input,lang).each do |sentence|
    tagged_text << tag_sentence(sentence,lang)
  end
  
  tagged_text
end

.tokenize_sentence(input, lang = nil) ⇒ Object



41
42
43
44
# File 'lib/nlp_toolz.rb', line 41

def tokenize_sentence(input,lang = nil)
  sentence = NlpToolz::Tokens.new(input,lang)
  sentence.tokenize
end

.tokenize_text(input, lang = nil) ⇒ Object



46
47
48
49
50
51
52
53
# File 'lib/nlp_toolz.rb', line 46

def tokenize_text(input,lang = nil)
  tokenized_text = []
  get_sentences(input,lang).each do |sentence|
    tokenized_text << tokenize_sentence(sentence,lang)
  end
  
  tokenized_text
end