Class: Ankusa::TextHash

Inherits:
Hash
  • Object
show all
Defined in:
lib/ankusa/hasher.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text = nil) ⇒ TextHash

Returns a new instance of TextHash.



9
10
11
12
13
# File 'lib/ankusa/hasher.rb', line 9

def initialize(text=nil)
  super 0
  @word_count = 0
  add_text(text) if not text.nil?
end

Instance Attribute Details

#word_countObject (readonly)

Returns the value of attribute word_count.



7
8
9
# File 'lib/ankusa/hasher.rb', line 7

def word_count
  @word_count
end

Class Method Details

.atomize(text) ⇒ Object



33
34
35
# File 'lib/ankusa/hasher.rb', line 33

def self.atomize(text)
  text.to_ascii.tr('-', ' ').gsub(/[^\w\s]/," ").split.map { |w| w.downcase }
end

.valid_word?(word) ⇒ Boolean

word should be only alphanum chars at this point

Returns:

  • (Boolean)


38
39
40
41
42
43
# File 'lib/ankusa/hasher.rb', line 38

def self.valid_word?(word)
  return false if Ankusa::STOPWORDS.include? word
  return false if word.length < 3
  return false if word.numeric?
  true
end

Instance Method Details

#add_text(text) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
# File 'lib/ankusa/hasher.rb', line 15

def add_text(text)
  if text.kind_of? Array
    text.each { |t| add_text t }
  else
    # replace dashes with spaces, then get rid of non-word/non-space characters, 
    # then split by space to get words
    words = TextHash.atomize text
    words.each { |word| add_word(word) if TextHash.valid_word?(word) }
  end
  self
end

#add_word(word) ⇒ Object



27
28
29
30
31
# File 'lib/ankusa/hasher.rb', line 27

def add_word(word)
  @word_count += 1
  key = word.stem.intern
  store key, fetch(key, 0)+1
end