Class: Raingrams::MultigramModel

Inherits:
Model show all
Defined in:
lib/raingrams/multigram_model.rb

Instance Attribute Summary collapse

Attributes inherited from Model

#convert_abbrev, #convert_acronyms, #frequency, #ignore_case, #ignore_phone_numbers, #ignore_punc, #ignore_references, #ignore_urls, #ngram_size, #probability

Instance Method Summary collapse

Methods inherited from Model

#each_ngram, #has_ngram?, #ngrams, #ngrams_ending_with, #ngrams_starting_with, #ngrams_with, #parse_sentence, #parse_text, #probabilities_for, #probability_of_gram, #probability_of_ngram, #probability_of_ngrams, #train_with_ngrams, #vocabulary, #within_vocabulary?

Constructor Details

#initialize(opts = {}, &block) ⇒ MultigramModel

Returns a new instance of MultigramModel.



12
13
14
15
16
# File 'lib/raingrams/multigram_model.rb', line 12

def initialize(opts={},&block)
  @prefix_frequency = Hash.new { |hash,key| 0 }

  super(opts) { |model| model.build(&block) }
end

Instance Attribute Details

#prefix_frequencyObject (readonly)

Frequencies of n-1 grams



10
11
12
# File 'lib/raingrams/multigram_model.rb', line 10

def prefix_frequency
  @prefix_frequency
end

Instance Method Details

#build(&block) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/raingrams/multigram_model.rb', line 67

def build(&block)
  clear_probabilities

  block.call(self) if block

  @frequency.each do |ngram,count|
    prefix = ngram.prefix

    unless @prefix_frequency[prefix]
      raise(PrefixFrequencyMissing,"the model is missing the frequency of the ngram prefix #{prefix}",caller)
    end

    @probability[ngram] = count.to_f / @prefix_frequency[prefix].to_f
  end

  return self
end

#clearObject



153
154
155
156
# File 'lib/raingrams/multigram_model.rb', line 153

def clear
  @prefix_frequency.clear
  return super
end

#common_fragment_probability(fragment) ⇒ Object



129
130
131
# File 'lib/raingrams/multigram_model.rb', line 129

def common_fragment_probability(fragment)
  probability_of_ngrams(common_ngrams_from_fragment(fragment))
end

#common_ngrams_from_fragment(fragment) ⇒ Object



42
43
44
# File 'lib/raingrams/multigram_model.rb', line 42

def common_ngrams_from_fragment(fragment)
  ngrams_from_fragment(words).select { |ngram| has_ngram?(ngram) }
end

#common_ngrams_from_sentence(sentence) ⇒ Object



46
47
48
# File 'lib/raingrams/multigram_model.rb', line 46

def common_ngrams_from_sentence(sentence)
  ngrams_from_sentence(sentence).select { |ngram| has_ngram?(ngram) }
end

#common_ngrams_from_text(text) ⇒ Object



50
51
52
# File 'lib/raingrams/multigram_model.rb', line 50

def common_ngrams_from_text(text)
  ngrams_from_text(text).select { |ngram| has_ngram?(ngram) }
end

#common_ngrams_from_words(words) ⇒ Object



38
39
40
# File 'lib/raingrams/multigram_model.rb', line 38

def common_ngrams_from_words(words)
  ngrams_from_words(words).select { |ngram| has_ngram?(ngram) }
end

#common_sentence_probability(sentence) ⇒ Object



133
134
135
# File 'lib/raingrams/multigram_model.rb', line 133

def common_sentence_probability(sentence)
  probability_of_ngrams(common_ngrams_from_sentence(sentence))
end

#common_text_probability(fragment) ⇒ Object



137
138
139
# File 'lib/raingrams/multigram_model.rb', line 137

def common_text_probability(fragment)
  probability_of_ngrams(common_ngrams_from_text(text))
end

#fragment_probability(fragment) ⇒ Object



117
118
119
# File 'lib/raingrams/multigram_model.rb', line 117

def fragment_probability(fragment)
  probability_of_ngrams(ngrams_from_fragment(fragment))
end

#grams_following(gram) ⇒ Object



111
112
113
114
115
# File 'lib/raingrams/multigram_model.rb', line 111

def grams_following(gram)
  ngrams_starting_with(gram).map do |ngram|
    ngram[1]
  end
end

#grams_preceeding(gram) ⇒ Object



105
106
107
108
109
# File 'lib/raingrams/multigram_model.rb', line 105

def grams_preceeding(gram)
  ngrams_ending_with(gram).map do |ngram|
    ngram[-2]
  end
end

#ngrams_following(gram) ⇒ Object



99
100
101
102
103
# File 'lib/raingrams/multigram_model.rb', line 99

def ngrams_following(gram)
  ngrams_starting_with(gram).map do |ngram|
    ngrams_prefixed_by(ngram.postfix)
  end
end

#ngrams_from_fragment(fragment) ⇒ Object



24
25
26
# File 'lib/raingrams/multigram_model.rb', line 24

def ngrams_from_fragment(fragment)
  ngrams_from_words(parse_sentence(fragment))
end

#ngrams_from_sentence(sentence) ⇒ Object



28
29
30
# File 'lib/raingrams/multigram_model.rb', line 28

def ngrams_from_sentence(sentence)
  ngrams_from_words(wrap_sentence(parse_sentence(sentence)))
end

#ngrams_from_text(text) ⇒ Object



32
33
34
35
36
# File 'lib/raingrams/multigram_model.rb', line 32

def ngrams_from_text(text)
  parse_text(text).inject([]) do |ngrams,sentence|
    ngrams + ngrams_from_sentence(sentence)
  end
end

#ngrams_from_words(words) ⇒ Object



18
19
20
21
22
# File 'lib/raingrams/multigram_model.rb', line 18

def ngrams_from_words(words)
  return (0...(words.length-@ngram_size+1)).map do |index|
    Ngram.new(words[index,@ngram_size])
  end
end

#ngrams_postfixed_by(postfix) ⇒ Object



89
90
91
# File 'lib/raingrams/multigram_model.rb', line 89

def ngrams_postfixed_by(postfix)
  ngrams_with { |ngram| ngram.prefixed_by?(postfix) }
end

#ngrams_preceeding(gram) ⇒ Object



93
94
95
96
97
# File 'lib/raingrams/multigram_model.rb', line 93

def ngrams_preceeding(gram)
  ngrams_ending_with(gram).map do |ngram|
    ngrams_postfixed_by(ngram.prefix)
  end
end

#ngrams_prefixed_by(prefix) ⇒ Object



85
86
87
# File 'lib/raingrams/multigram_model.rb', line 85

def ngrams_prefixed_by(prefix)
  ngrams_with { |ngram| ngram.prefixed_by?(prefix) }
end

#sentence_probability(sentence) ⇒ Object



121
122
123
# File 'lib/raingrams/multigram_model.rb', line 121

def sentence_probability(sentence)
  probability_of_ngrams(ngrams_from_sentence(sentence))
end

#similar_fragment_probability(other, fragment) ⇒ Object



141
142
143
# File 'lib/raingrams/multigram_model.rb', line 141

def similar_fragment_probability(other,fragment)
  common_fragment_probability(fragment) * other.common_fragment_probability(fragment)
end

#similar_sentence_probability(other, sentence) ⇒ Object



145
146
147
# File 'lib/raingrams/multigram_model.rb', line 145

def similar_sentence_probability(other,sentence)
  common_sentence_probability(sentence) * other.common_sentence_probability(sentence)
end

#similar_text_probability(other, text) ⇒ Object



149
150
151
# File 'lib/raingrams/multigram_model.rb', line 149

def similar_text_probability(other,text)
  common_text_probability(text) * other.common_text_probability(text)
end

#text_probability(text) ⇒ Object



125
126
127
# File 'lib/raingrams/multigram_model.rb', line 125

def text_probability(text)
  probability_of_ngrams(ngrams_from_text(text))
end

#train_with_ngram(ngram) ⇒ Object



54
55
56
57
# File 'lib/raingrams/multigram_model.rb', line 54

def train_with_ngram(ngram)
  @prefix_frequency[ngram.prefix] += 1
  return super(ngram)
end

#train_with_sentence(sentence) ⇒ Object



59
60
61
# File 'lib/raingrams/multigram_model.rb', line 59

def train_with_sentence(sentence)
  train_with_ngrams(ngrams_from_sentence(sentence))
end

#train_with_text(text) ⇒ Object



63
64
65
# File 'lib/raingrams/multigram_model.rb', line 63

def train_with_text(text)
  train_with_ngrams(ngrams_from_text(text))
end