Class: Tagelizer

Inherits:
Object
  • Object
show all
Defined in:
lib/tagelizer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(locale = 'en') ⇒ Tagelizer

Returns a new instance of Tagelizer.



10
11
12
13
14
# File 'lib/tagelizer.rb', line 10

def initialize(locale = 'en')
  @dictionary= (dictionaries.include?(locale) ? locale : "en")
  @minwordsize = 2
  @options         = {'ignore-case' => true}
end

Instance Attribute Details

#dictionaryObject

Returns the value of attribute dictionary.



42
43
44
# File 'lib/tagelizer.rb', line 42

def dictionary
  @dictionary
end

#localeObject (readonly)

Returns the value of attribute locale.



9
10
11
# File 'lib/tagelizer.rb', line 9

def locale
  @locale
end

#optionsObject (readonly)

Returns the value of attribute options.



9
10
11
# File 'lib/tagelizer.rb', line 9

def options
  @options
end

Instance Method Details

#actual_optionsObject



59
60
61
62
63
64
# File 'lib/tagelizer.rb', line 59

def actual_options
  options.keys.inject({}) do |hash, key|
    hash[key] = options[key].to_s
    hash
  end
end

#build_spellerObject



24
25
26
27
28
29
30
31
32
# File 'lib/tagelizer.rb', line 24

def build_speller
  speller = Aspell.new(dictionary)
  speller.suggestion_mode = 'normal'
  actual_options.each do |key, value|
    speller.set_option key, value
  end

  speller
end

#build_stemmerObject



38
39
40
# File 'lib/tagelizer.rb', line 38

def build_stemmer
  Lingua::Stemmer.new(:language => dictionary)
end

#corrected_word(word) ⇒ Object



55
56
57
# File 'lib/tagelizer.rb', line 55

def corrected_word(word)
  speller.check(word) ? word : speller.suggest(word).first
end

#dictionariesObject



51
52
53
# File 'lib/tagelizer.rb', line 51

def dictionaries
  @dictionaries ||= Aspell.list_dicts.collect { |dict| dict.code }
end

#parse(text) ⇒ Object



16
17
18
# File 'lib/tagelizer.rb', line 16

def parse( text )
  remove_duplicates(text.split(" ").collect {|i| /(\w*)/.match(i)[1]}.select {|i| i.size > @minwordsize}.collect {|w| corrected_word(w)})
end

#remove_duplicates(list) ⇒ Object



66
67
68
69
70
71
72
73
# File 'lib/tagelizer.rb', line 66

def remove_duplicates list
  if list.empty?
    []
  else
    tmp = list.pop
    remove_duplicates(list.select { |word| stemmer.stem(word) != stemmer.stem(tmp) }) + [tmp]
  end
end

#spellerObject



20
21
22
# File 'lib/tagelizer.rb', line 20

def speller
  @speller ||= build_speller
end

#stemmerObject



34
35
36
# File 'lib/tagelizer.rb', line 34

def stemmer
  @stemmer ||= build_stemmer
end