Class: Lda::Vocabulary

Inherits:
Object
  • Object
show all
Defined in:
lib/lda-ruby/vocabulary.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(words = nil) ⇒ Vocabulary

Returns a new instance of Vocabulary.



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/lda-ruby/vocabulary.rb', line 5

def initialize(words = nil)
  @words = Hash.new do |hash, key|
    if hash.member?(:MAX_VALUE)
      hash[:MAX_VALUE] = hash[:MAX_VALUE] + 1
    else
      hash[:MAX_VALUE] = 1
    end
    hash[key] = hash[:MAX_VALUE]
  end

  words.each { |w| @words[w] } if words
  @indexes = Hash.new

  @words.each_pair do |w, i|
    @indexes[i] = w
  end
end

Instance Attribute Details

#indexesObject (readonly)

Returns the value of attribute indexes.



3
4
5
# File 'lib/lda-ruby/vocabulary.rb', line 3

def indexes
  @indexes
end

#wordsObject (readonly)

Returns the value of attribute words.



3
4
5
# File 'lib/lda-ruby/vocabulary.rb', line 3

def words
  @words
end

Instance Method Details

#check_word(word) ⇒ Object



23
24
25
26
27
# File 'lib/lda-ruby/vocabulary.rb', line 23

def check_word(word)
  w = @words[word.dup]
  @indexes[w] = word.dup
  w
end

#load_file(filename) ⇒ Object



29
30
31
32
# File 'lib/lda-ruby/vocabulary.rb', line 29

def load_file(filename)
  txt = File.open(filename, 'r') { |f| f.read }
  txt.split(/[\n\r]+/).each { |word| check_word(word) }
end

#load_yaml(filename) ⇒ Object



34
35
36
# File 'lib/lda-ruby/vocabulary.rb', line 34

def load_yaml(filename)
  YAML::load_file(filename).each { |word| check_word(word) }
end

#num_wordsObject



38
39
40
# File 'lib/lda-ruby/vocabulary.rb', line 38

def num_words
  ((@words.size > 0) ? @words.size - 1 : 0 )
end

#to_aObject



42
43
44
# File 'lib/lda-ruby/vocabulary.rb', line 42

def to_a
  @words.sort { |w1, w2| w1[1] <=> w2[1] }.map { |word, idx| word }.reject { |w| w == :MAX_VALUE }
end