Module: Gemmy::Components::Nlp

Defined in:
lib/gemmy/patches_loaded/components/nlp.rb

Instance Method Summary collapse

Instance Method Details

#default_noun_proc_string(word) ⇒ Object



126
127
128
129
130
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 126

def default_noun_proc_string(word)
  <<-Ruby.strip_heredoc
    ->(vn_phrases){ "#{word}" }
  Ruby
end

#default_verb_proc_string(word) ⇒ Object



132
133
134
135
136
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 132

def default_verb_proc_string(word)
  <<-Ruby.strip_heredoc
    ->(*nouns){ "#{word} \#{nouns.join " "}" }
  Ruby
end

#engtagger_lookup(sentence) ⇒ Object

This uses EngTagger to analyze a sentence The results will not be ambiguous; in this method’s results, a given word with either be ‘verb’, ‘noun’, or ‘unknown’.



141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 141

def engtagger_lookup sentence
  nouns, verbs = tag_sentence(sentence)
  sentence.words.graph do |word|
    pos = case word
    when ->(w){ verbs.include? w }
      "verb"
    when ->(w){ nouns.include? w }
      "noun"
    else
      "unknown"
    end
    [word, [pos]]
  end
end

#finalize_engtagger_pos(pos) ⇒ Object



85
86
87
88
89
90
91
92
93
94
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 85

def finalize_engtagger_pos(pos)
  # If the WordPos definition isn't found, then there's no ambiguity
  if pos.include?("noun")
    "noun"
  elsif pos.include?("verb")
    "verb"
  else
    "unknown"
  end
end

#finalize_pos(word, pos) ⇒ Object

Compare WordPos and Engtagger results and save to proc if found only prioritize Engtagger if WordPos is missing



72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 72

def finalize_pos word, pos
  final_pos = word_pos_cache.get_or_set(word) do
    doublecheck = wordpos_lookup(word)
    if ['noun', 'verb'].none? &doublecheck.m(:include?)
      finalize_engtagger_pos(pos)
    else
      finalize_wordpos_pos(pos)
    end
  end
  save_proc(final_pos, word)
  { word: word, pos: [final_pos] }
end

#finalize_wordpos_pos(pos) ⇒ Object



96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 96

def finalize_wordpos_pos(pos)
  # WordPos returns ambiguous results.
  # Only unambiguous words are selected.
  # I.e. a noun|verb isn't saved.
  # It must be solely noun or verb.
  if pos.include?("noun") && !pos.include?("verb")
    "noun"
  elsif pos.include?("verb") && !pos.include?("noun")
    "verb"
  else
    "unknown"
  end
end

#log_sentence(sentence) ⇒ Object

Adds words in sentence to application database The part of speech is identified by the DB Name Each entry is a word => proc mapping.

Noun procs are passed all vn_phrases for the sentence (these are constructed by parse_sentence)

The Verb procs are passed the evaluated results of the Noun procs in its Verb-Noun phrase (as sequential arguments)

For example, if the phrase is “live well and flourish” Then (assuming the

Although EngTagger extracts POS for the words in a sentence, these classifications are context-dependent.

For this reason, words are also looked up using WordPos. Only umambiguous words are added to the grammar.



62
63
64
65
66
67
68
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 62

def log_sentence sentence
  sentence_cache.get_or_set(sentence) do
    engtagger_lookup(sentence).map do |word, pos|
      finalize_pos(word, pos)
    end
  end
end

#parse_sentence(sentence) ⇒ Object



6
7
8
9
10
11
12
13
14
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 6

def parse_sentence sentence
  setup_lexicons
  log_sentence sentence
  begin
    SentenceInterpreter.interpret sentence
  rescue NounBeforeVerbError
    []
  end
end

#save_noun_proc(word) ⇒ Object



118
119
120
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 118

def save_noun_proc word
  NounLexicon.set word.to_sym, default_noun_proc_string(word)
end

#save_proc(final_pos, word) ⇒ Object



110
111
112
113
114
115
116
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 110

def save_proc(final_pos, word)
  if final_pos.include?("noun")
    save_noun_proc(word)
  elsif final_pos.include?("verb")
    save_verb_proc word
  end
end

#save_verb_proc(word) ⇒ Object



122
123
124
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 122

def save_verb_proc word
  VerbLexicon.set word.to_sym, default_verb_proc_string(word)
end

#sentence_cacheObject

Engtagger evaluates POS in the context of a sentence So from that perspective, only entire sentences can be cached



158
159
160
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 158

def sentence_cache
  @sentence_cache ||= Gemmy::Components::Cache.new "sentence_pos"
end

#setup_lexiconsObject



16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 16

def setup_lexicons
  return if @lexicon_set_up
  Object.send :remove_const, "VerbLexicon"
  Object.send :remove_const, "NounLexicon"
  Object.send(:const_set,"VerbLexicon", Gemmy::Components::Cache.new(
    "verb_lexicon"
  ))
  Object.send(:const_set,"NounLexicon", Gemmy::Components::Cache.new(
    "noun_lexicon"
  ))
  @lexicon_set_up = true
end

#tag_sentence(sentence) ⇒ Object

Uses the Ruby EngTagger tool to find parts of speech of a sentence

Returns a hash with :verbs and :nouns keys (vals are arrays)



34
35
36
37
38
39
40
41
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 34

def tag_sentence sentence
  @tagger ||= EngTagger.new
  res = @tagger.add_tags(sentence).ergo do |tagged|
    nouns = @tagger.get_nouns(tagged)&.keys || []
    verbs = @tagger.get_verbs(tagged)&.keys || []
    [nouns, verbs]
  end
end

#word_pos_cacheObject

This cache reduces the call rate of the WordPos shell util by caching the POS for individual words.



164
165
166
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 164

def word_pos_cache
  @pos_cache ||= Gemmy::Components::Cache.new("word_pos")
end

#wordpos_lookup(word) ⇒ Object



168
169
170
171
172
173
174
175
176
177
# File 'lib/gemmy/patches_loaded/components/nlp.rb', line 168

def wordpos_lookup(word)
  default_result = ['unknown']
  result = []
  word = word.strip.gsub(/[^a-zA-z]/, '')
  return default_result if word.empty?
  pos_response = JSON.parse `coffee -e "#{Gemmy::Coffee}" pos #{word}`
  result << "verb" unless pos_response["verbs"].empty?
  result << "noun" unless pos_response["nouns"].empty?
  result.empty? ? default_result : result
end