Class: Lda::TextCorpus

Inherits:
Corpus
  • Object
show all
Defined in:
lib/lda-ruby/corpus/text_corpus.rb

Instance Attribute Summary collapse

Attributes inherited from Corpus

#documents, #num_docs, #num_terms, #stopwords, #vocabulary

Instance Method Summary collapse

Methods inherited from Corpus

#add_document, #remove_word

Constructor Details

#initialize(input_data) ⇒ TextCorpus

Loads text documents from a YAML file or an array of strings



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/lda-ruby/corpus/text_corpus.rb', line 6

def initialize(input_data)
  super()

  docs = if input_data.is_a?(String) && File.exists?(input_data)
    # yaml file containing an array of strings representing each document
    YAML.load_file(input_data)
  elsif input_data.is_a?(Array)
    # an array of strings representing each document
    input_data.dup
  elsif input_data.is_a?(String)
    # a single string representing one document
    [input_data]
  else
    raise "Unknown input type: please pass in a valid filename or an array of strings."
  end

  docs.each do |doc|
    add_document(TextDocument.new(self, doc))
  end
end

Instance Attribute Details

#filenameObject (readonly)

Returns the value of attribute filename.



3
4
5
# File 'lib/lda-ruby/corpus/text_corpus.rb', line 3

def filename
  @filename
end