Class: Raingrams::MultigramModel
- Inherits:
-
Model
show all
- Defined in:
- lib/raingrams/multigram_model.rb
Instance Attribute Summary collapse
Attributes inherited from Model
#convert_abbrev, #convert_acronyms, #frequency, #ignore_case, #ignore_phone_numbers, #ignore_punc, #ignore_references, #ignore_urls, #ngram_size, #probability
Instance Method Summary
collapse
Methods inherited from Model
#each_ngram, #has_ngram?, #ngrams, #ngrams_ending_with, #ngrams_starting_with, #ngrams_with, #parse_sentence, #parse_text, #probabilities_for, #probability_of_gram, #probability_of_ngram, #probability_of_ngrams, #train_with_ngrams, #vocabulary, #within_vocabulary?
Constructor Details
#initialize(opts = {}, &block) ⇒ MultigramModel
Returns a new instance of MultigramModel.
12
13
14
15
16
|
# File 'lib/raingrams/multigram_model.rb', line 12
def initialize(opts={},&block)
@prefix_frequency = Hash.new { |hash,key| 0 }
super(opts) { |model| model.build(&block) }
end
|
Instance Attribute Details
#prefix_frequency ⇒ Object
10
11
12
|
# File 'lib/raingrams/multigram_model.rb', line 10
def prefix_frequency
@prefix_frequency
end
|
Instance Method Details
#build(&block) ⇒ Object
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/raingrams/multigram_model.rb', line 67
def build(&block)
clear_probabilities
block.call(self) if block
@frequency.each do |ngram,count|
prefix = ngram.prefix
unless @prefix_frequency[prefix]
raise(PrefixFrequencyMissing,"the model is missing the frequency of the ngram prefix #{prefix}",caller)
end
@probability[ngram] = count.to_f / @prefix_frequency[prefix].to_f
end
return self
end
|
153
154
155
156
|
# File 'lib/raingrams/multigram_model.rb', line 153
def clear
@prefix_frequency.clear
return super
end
|
#common_fragment_probability(fragment) ⇒ Object
129
130
131
|
# File 'lib/raingrams/multigram_model.rb', line 129
def common_fragment_probability(fragment)
probability_of_ngrams(common_ngrams_from_fragment(fragment))
end
|
#common_ngrams_from_fragment(fragment) ⇒ Object
42
43
44
|
# File 'lib/raingrams/multigram_model.rb', line 42
def common_ngrams_from_fragment(fragment)
ngrams_from_fragment(words).select { |ngram| has_ngram?(ngram) }
end
|
#common_ngrams_from_sentence(sentence) ⇒ Object
46
47
48
|
# File 'lib/raingrams/multigram_model.rb', line 46
def common_ngrams_from_sentence(sentence)
ngrams_from_sentence(sentence).select { |ngram| has_ngram?(ngram) }
end
|
#common_ngrams_from_text(text) ⇒ Object
50
51
52
|
# File 'lib/raingrams/multigram_model.rb', line 50
def common_ngrams_from_text(text)
ngrams_from_text(text).select { |ngram| has_ngram?(ngram) }
end
|
#common_ngrams_from_words(words) ⇒ Object
38
39
40
|
# File 'lib/raingrams/multigram_model.rb', line 38
def common_ngrams_from_words(words)
ngrams_from_words(words).select { |ngram| has_ngram?(ngram) }
end
|
#common_sentence_probability(sentence) ⇒ Object
133
134
135
|
# File 'lib/raingrams/multigram_model.rb', line 133
def common_sentence_probability(sentence)
probability_of_ngrams(common_ngrams_from_sentence(sentence))
end
|
#common_text_probability(fragment) ⇒ Object
137
138
139
|
# File 'lib/raingrams/multigram_model.rb', line 137
def common_text_probability(fragment)
probability_of_ngrams(common_ngrams_from_text(text))
end
|
#fragment_probability(fragment) ⇒ Object
117
118
119
|
# File 'lib/raingrams/multigram_model.rb', line 117
def fragment_probability(fragment)
probability_of_ngrams(ngrams_from_fragment(fragment))
end
|
#grams_following(gram) ⇒ Object
111
112
113
114
115
|
# File 'lib/raingrams/multigram_model.rb', line 111
def grams_following(gram)
ngrams_starting_with(gram).map do |ngram|
ngram[1]
end
end
|
#grams_preceeding(gram) ⇒ Object
105
106
107
108
109
|
# File 'lib/raingrams/multigram_model.rb', line 105
def grams_preceeding(gram)
ngrams_ending_with(gram).map do |ngram|
ngram[-2]
end
end
|
#ngrams_following(gram) ⇒ Object
99
100
101
102
103
|
# File 'lib/raingrams/multigram_model.rb', line 99
def ngrams_following(gram)
ngrams_starting_with(gram).map do |ngram|
ngrams_prefixed_by(ngram.postfix)
end
end
|
#ngrams_from_fragment(fragment) ⇒ Object
24
25
26
|
# File 'lib/raingrams/multigram_model.rb', line 24
def ngrams_from_fragment(fragment)
ngrams_from_words(parse_sentence(fragment))
end
|
#ngrams_from_sentence(sentence) ⇒ Object
28
29
30
|
# File 'lib/raingrams/multigram_model.rb', line 28
def ngrams_from_sentence(sentence)
ngrams_from_words(wrap_sentence(parse_sentence(sentence)))
end
|
#ngrams_from_text(text) ⇒ Object
32
33
34
35
36
|
# File 'lib/raingrams/multigram_model.rb', line 32
def ngrams_from_text(text)
parse_text(text).inject([]) do |ngrams,sentence|
ngrams + ngrams_from_sentence(sentence)
end
end
|
#ngrams_from_words(words) ⇒ Object
18
19
20
21
22
|
# File 'lib/raingrams/multigram_model.rb', line 18
def ngrams_from_words(words)
return (0...(words.length-@ngram_size+1)).map do |index|
Ngram.new(words[index,@ngram_size])
end
end
|
#ngrams_postfixed_by(postfix) ⇒ Object
89
90
91
|
# File 'lib/raingrams/multigram_model.rb', line 89
def ngrams_postfixed_by(postfix)
ngrams_with { |ngram| ngram.prefixed_by?(postfix) }
end
|
#ngrams_preceeding(gram) ⇒ Object
93
94
95
96
97
|
# File 'lib/raingrams/multigram_model.rb', line 93
def ngrams_preceeding(gram)
ngrams_ending_with(gram).map do |ngram|
ngrams_postfixed_by(ngram.prefix)
end
end
|
#ngrams_prefixed_by(prefix) ⇒ Object
85
86
87
|
# File 'lib/raingrams/multigram_model.rb', line 85
def ngrams_prefixed_by(prefix)
ngrams_with { |ngram| ngram.prefixed_by?(prefix) }
end
|
#sentence_probability(sentence) ⇒ Object
121
122
123
|
# File 'lib/raingrams/multigram_model.rb', line 121
def sentence_probability(sentence)
probability_of_ngrams(ngrams_from_sentence(sentence))
end
|
#similar_fragment_probability(other, fragment) ⇒ Object
141
142
143
|
# File 'lib/raingrams/multigram_model.rb', line 141
def similar_fragment_probability(other,fragment)
common_fragment_probability(fragment) * other.common_fragment_probability(fragment)
end
|
#similar_sentence_probability(other, sentence) ⇒ Object
145
146
147
|
# File 'lib/raingrams/multigram_model.rb', line 145
def similar_sentence_probability(other,sentence)
common_sentence_probability(sentence) * other.common_sentence_probability(sentence)
end
|
#similar_text_probability(other, text) ⇒ Object
149
150
151
|
# File 'lib/raingrams/multigram_model.rb', line 149
def similar_text_probability(other,text)
common_text_probability(text) * other.common_text_probability(text)
end
|
#text_probability(text) ⇒ Object
125
126
127
|
# File 'lib/raingrams/multigram_model.rb', line 125
def text_probability(text)
probability_of_ngrams(ngrams_from_text(text))
end
|
#train_with_ngram(ngram) ⇒ Object
54
55
56
57
|
# File 'lib/raingrams/multigram_model.rb', line 54
def train_with_ngram(ngram)
@prefix_frequency[ngram.prefix] += 1
return super(ngram)
end
|
#train_with_sentence(sentence) ⇒ Object
59
60
61
|
# File 'lib/raingrams/multigram_model.rb', line 59
def train_with_sentence(sentence)
train_with_ngrams(ngrams_from_sentence(sentence))
end
|
#train_with_text(text) ⇒ Object
63
64
65
|
# File 'lib/raingrams/multigram_model.rb', line 63
def train_with_text(text)
train_with_ngrams(ngrams_from_text(text))
end
|