Class: Tagelizer

Inherits:
Object
  • Object
show all
Defined in:
lib/tagelizer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(locale = 'en') ⇒ Tagelizer

Returns a new instance of Tagelizer.



8
9
10
11
12
# File 'lib/tagelizer.rb', line 8

def initialize(locale = 'en')
  @dictionary= (dictionaries.include?(locale) ? locale : "en")
  @minwordsize = 2
  @options         = {'ignore-case' => true}
end

Instance Attribute Details

#dictionaryObject

Returns the value of attribute dictionary.



40
41
42
# File 'lib/tagelizer.rb', line 40

def dictionary
  @dictionary
end

#localeObject (readonly)

Returns the value of attribute locale.



7
8
9
# File 'lib/tagelizer.rb', line 7

def locale
  @locale
end

#optionsObject (readonly)

Returns the value of attribute options.



7
8
9
# File 'lib/tagelizer.rb', line 7

def options
  @options
end

Instance Method Details

#actual_optionsObject



57
58
59
60
61
62
# File 'lib/tagelizer.rb', line 57

def actual_options
  options.keys.inject({}) do |hash, key|
    hash[key] = options[key].to_s
    hash
  end
end

#build_spellerObject



22
23
24
25
26
27
28
29
30
# File 'lib/tagelizer.rb', line 22

def build_speller
  speller = Aspell.new(dictionary)
  speller.suggestion_mode = 'normal'
  actual_options.each do |key, value|
    speller.set_option key, value
  end

  speller
end

#build_stemmerObject



36
37
38
# File 'lib/tagelizer.rb', line 36

def build_stemmer
  Lingua::Stemmer.new(:language => dictionary)
end

#corrected_word(word) ⇒ Object



53
54
55
# File 'lib/tagelizer.rb', line 53

def corrected_word(word)
  speller.check(word) ? word : speller.suggest(word).first
end

#dictionariesObject



49
50
51
# File 'lib/tagelizer.rb', line 49

def dictionaries
  @dictionaries ||= Aspell.list_dicts.collect { |dict| dict.code }
end

#parse(text) ⇒ Object



14
15
16
# File 'lib/tagelizer.rb', line 14

def parse( text )
  text.split(" ").collect {|i| /(\w*)/.match(i)[1]}.select {|i| i.size > @minwordsize}.collect {|w| stemmer.stem corrected_word(w)}
end

#spellerObject



18
19
20
# File 'lib/tagelizer.rb', line 18

def speller
  @speller ||= build_speller
end

#stemmerObject



32
33
34
# File 'lib/tagelizer.rb', line 32

def stemmer
  @stemmer ||= build_stemmer
end