Class: Ankusa::TextHash

Inherits:
Hash
  • Object
show all
Defined in:
lib/ankusa/hasher.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text = nil, stem = true) ⇒ TextHash

Returns a new instance of TextHash.



9
10
11
12
13
14
# File 'lib/ankusa/hasher.rb', line 9

def initialize(text=nil, stem=true)
  super 0
  @word_count = 0
  @stem = stem
  add_text(text) unless text.nil?
end

Instance Attribute Details

#word_countObject (readonly)

Returns the value of attribute word_count.



7
8
9
# File 'lib/ankusa/hasher.rb', line 7

def word_count
  @word_count
end

Class Method Details

.atomize(text) ⇒ Object



16
17
18
# File 'lib/ankusa/hasher.rb', line 16

def self.atomize(text)
  text.downcase.to_ascii.tr('-', ' ').gsub(/[^\w\s]/," ").split
end

.valid_word?(word) ⇒ Boolean

word should be only alphanum chars at this point

Returns:

  • (Boolean)


21
22
23
# File 'lib/ankusa/hasher.rb', line 21

def self.valid_word?(word)
  not (Ankusa::STOPWORDS.include?(word) || word.length < 3 || self.numeric_word?(word))
end

Instance Method Details

#add_text(text) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
# File 'lib/ankusa/hasher.rb', line 25

def add_text(text)
  if text.instance_of? Array
    text.each { |t| add_text t }
  else
    # replace dashes with spaces, then get rid of non-word/non-space characters,
    # then split by space to get words
    words = TextHash.atomize text
    words.each { |word| add_word(word) if TextHash.valid_word?(word) }
  end
  self
end