Class: Lda::Corpus

Inherits:
Object
  • Object
show all
Defined in:
lib/lda-ruby/corpus/corpus.rb,
ext/lda-ruby/lda-inference.c

Direct Known Subclasses

DataCorpus, DirectoryCorpus, TextCorpus

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeCorpus

Returns a new instance of Corpus.



7
8
9
10
11
12
13
14
# File 'lib/lda-ruby/corpus/corpus.rb', line 7

def initialize
  @documents = Array.new
  @all_terms = Set.new
  @num_terms = @num_docs = 0
  @vocabulary = Vocabulary.new
  @stopwords = YAML.load_file(File.join(File.dirname(__FILE__), '..', 'config', 'stopwords.yml'))
  @stopwords.map! { |w| w.strip }
end

Instance Attribute Details

#documentsObject (readonly)

Returns the value of attribute documents.



5
6
7
# File 'lib/lda-ruby/corpus/corpus.rb', line 5

def documents
  @documents
end

#num_docsObject (readonly)

Returns the value of attribute num_docs.



5
6
7
# File 'lib/lda-ruby/corpus/corpus.rb', line 5

def num_docs
  @num_docs
end

#num_termsObject (readonly)

Returns the value of attribute num_terms.



5
6
7
# File 'lib/lda-ruby/corpus/corpus.rb', line 5

def num_terms
  @num_terms
end

#stopwordsObject (readonly)

Returns the value of attribute stopwords.



5
6
7
# File 'lib/lda-ruby/corpus/corpus.rb', line 5

def stopwords
  @stopwords
end

#vocabularyObject (readonly)

Returns the value of attribute vocabulary.



5
6
7
# File 'lib/lda-ruby/corpus/corpus.rb', line 5

def vocabulary
  @vocabulary
end

Instance Method Details

#add_document(doc) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/lda-ruby/corpus/corpus.rb', line 16

def add_document(doc)
  raise 'Parameter +doc+ must be of type Document' unless doc.kind_of?(Document)

  @documents << doc

  @all_terms += doc.words
  @num_docs += 1
  @num_terms = @all_terms.size

  update_vocabulary(doc)
  nil
end

#remove_word(word) ⇒ Object



29
30
31
# File 'lib/lda-ruby/corpus/corpus.rb', line 29

def remove_word(word)
	@vocabulary.words.delete word
end