Class: FeldtRuby::NgramWordCounter
Constant Summary
Constants inherited
from WordCounter
WordCounter::StopWords
Instance Method Summary
collapse
Methods inherited from WordCounter
#count, #count_word, #invidual_words_in_string, #is_stop_word?, #merge!, #preprocess_word, #top_words, #words
Constructor Details
78
79
80
81
|
# File 'lib/feldtruby/word_counter.rb', line 78
def initialize(n = 2)
super()
@n = n
end
|
Instance Method Details
#all_ngrams(array) ⇒ Object
90
91
92
93
94
95
96
97
98
99
|
# File 'lib/feldtruby/word_counter.rb', line 90
def all_ngrams(array)
res = []
length = array.length
index = 0
while (length - index) >= @n
res << array[index, @n]
index += 1
end
res
end
|
#count_words(words) ⇒ Object
82
83
84
85
86
87
88
89
|
# File 'lib/feldtruby/word_counter.rb', line 82
def count_words(words)
words.split(/\.\s+(?=[A-Z]{1})/).each do |sentence|
ngrams = all_ngrams(invidual_words_in_string(sentence))
non_stop_ngrams = ngrams.select {|ngram| !ngram.any? {|ngw| is_stop_word?(ngw)}}
non_stop_ngrams.each {|ngram| count_word(ngram.join(' '))}
end
end
|