Module: DocumentCache
- Defined in:
- lib/document-cache.rb
Class Method Summary collapse
- .add(search) ⇒ Object
- .clean(sentence) ⇒ Object
- .documents ⇒ Object
- .extract_matching_words(search, sentence) ⇒ Object
- .find_examples_for(search, count = 1) ⇒ Object
- .find_matches_by_grepping(search, sentences) ⇒ Object
- .find_matches_by_stemming(search, sentences) ⇒ Object
- .find_matches_in(filenames, search, count) ⇒ Object
- .frequency_list ⇒ Object
- .stemmed_frequency_list ⇒ Object
Class Method Details
.add(search) ⇒ Object
13 14 15 16 |
# File 'lib/document-cache.rb', line 13 def self.add search filename = "#{CACHE_DIR}/#{UUID.new.generate}" File.open(filename,'w'){|f| f.write(search)} end |
.clean(sentence) ⇒ Object
62 63 64 |
# File 'lib/document-cache.rb', line 62 def self.clean(sentence) sentence.strip + "." end |
.documents ⇒ Object
54 55 56 |
# File 'lib/document-cache.rb', line 54 def self.documents Dir["#{CACHE_DIR}/*"] end |
.extract_matching_words(search, sentence) ⇒ Object
66 67 68 69 70 |
# File 'lib/document-cache.rb', line 66 def self.extract_matching_words search, sentence matches = find_matches_by_stemming(search, [sentence]) return matches.values.first if !matches.empty? return find_matches_by_grepping(search, [sentence]).values.first end |
.find_examples_for(search, count = 1) ⇒ Object
58 59 60 |
# File 'lib/document-cache.rb', line 58 def self.find_examples_for search, count=1 find_matches_in documents, search, count end |
.find_matches_by_grepping(search, sentences) ⇒ Object
28 29 30 31 32 33 |
# File 'lib/document-cache.rb', line 28 def self.find_matches_by_grepping search, sentences sentences.inject({}){|hash, s| hash[clean(s)] = [search] if s.include? search hash } end |
.find_matches_by_stemming(search, sentences) ⇒ Object
18 19 20 21 22 23 24 25 26 |
# File 'lib/document-cache.rb', line 18 def self.find_matches_by_stemming search, sentences token = VocabularyChest::stem(search) sentences.inject({}){|hash, s| words = s.split(" ") found = words.select{|w| VocabularyChest::stem(w) == token} hash[clean(s)] = found.map{|f| VocabularyChest::sanitize(f)} if !found.empty? hash } end |
.find_matches_in(filenames, search, count) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/document-cache.rb', line 35 def self.find_matches_in filenames, search, count matches = {} [:find_matches_by_stemming, :find_matches_by_grepping].each{|matcher| filenames.each {|filename| File.open(filename){|file| contents = file.read sentences = contents.split(/[\.?!\n]/) matches.merge!(self.send(matcher, search, sentences)) matches.shift until matches.size <= count if matches.size > count return matches if matches.size == count } } } matches end |
.frequency_list ⇒ Object
72 73 74 75 76 77 78 |
# File 'lib/document-cache.rb', line 72 def self.frequency_list text = "" documents.each{|f| text += File.open(f).read } counts = text.split(" ").inject(Hash.new(0)) {|h,w| h[w] += 1; h } counts.reject!{|word, count| count < 2} counts.sort_by {|k,v| v}.reverse end |
.stemmed_frequency_list ⇒ Object
80 81 82 83 84 85 86 87 |
# File 'lib/document-cache.rb', line 80 def self.stemmed_frequency_list text = "" documents.each{|f| text += File.open(f).read } stems = text.split(" ").map{|w| VocabularyChest::stem(w)} counts = stems.inject(Hash.new(0)) {|h,stem| h[stem] += 1; h } counts.reject!{|stem, count| count < 2} counts.sort_by {|k,v| v}.reverse end |