Class: Yanbi::DiadBag

Inherits:
WordBag show all
Defined in:
lib/wordbags/diadbag.rb

Direct Known Subclasses

StemmedDiadBag

Instance Attribute Summary

Attributes inherited from WordBag

#words

Instance Method Summary collapse

Methods inherited from WordBag

#add_file, #add_text, #between_counts, #empty?, #initialize, #intersection, #load, load, #remove, #save, #word_counts

Constructor Details

This class inherits a constructor from Yanbi::WordBag

Instance Method Details

#process(raw) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/wordbags/diadbag.rb', line 16

def process(raw)
  processed = raw.downcase
  processed.gsub!(/[^\w\s'\-]/, ' ')
  words = processed.split
  words = words.map {|x| x.split /-/}.flatten
  
  if block_given?
    words.map! {|x| yield x}
  end
  
  diads = []
  words.each_with_index {|w, i| diads << [w, words[i+1]]}
  diads.delete_at(-1)
  
  words = diads.map {|x| "#{x.first} #{x.last}"}
  update_counts(words)
  @words.concat(words)
end