Class: Word

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
app/models/word.rb

Constant Summary collapse

@@insig_words =
{}

Class Method Summary collapse

Class Method Details

.frequency(text, options = {}) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'app/models/word.rb', line 12

def Word.frequency(text, options = {})

  t = text.downcase.gsub(/[^a-z\']/, ' ')
 
  stem = options[:stem] || false
  exclude_insig_words = options[:exclude_insig_words] || true
  excludes = options[:excludes] 
  exclude_matches_stems = options[:exclude_matches_stems] || true

  words = {}
  t.split(' ').each do |w|
    next if exclude_insig_words && @@insig_words[w]
    next if excludes && excludes[w]
    ww = stem ? Lingua.stemmer(w) : w
    next if excludes && excludes[ww] && exclude_matches_stems
    next if exclude_insig_words && @@insig_words[ww] && exclude_matches_stems
    words[ww] ||= 0
    words[ww] += 1
  end

  if options[:de_stem]
    t.split(' ').each do |w|
      ww = Lingua.stemmer(w)
      words[w] = words[ww] if words[ww]
      words.delete(ww)
    end

  end
 return words 
end

.histogram(text, options) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'app/models/word.rb', line 43

def Word.histogram(text, options) 
  f = Word.frequency(text, options)
   
  threshold = options[:threshold]
  max = options[:max]
  r = []  
  f.sort {|a,b| a[1]<=>b[1]}.reverse.collect do |k,v| 
    break if max && r.size>=max
    if threshold==nil || v >= threshold
      r << [ k, v ]
    end
  end
  
  return r
end