Class: Tomoto::LDA

Inherits:
Object
  • Object
show all
Defined in:
lib/tomoto/lda.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.load(filename) ⇒ Object



11
12
13
14
15
# File 'lib/tomoto/lda.rb', line 11

def self.load(filename)
  model = new
  model._load(filename)
  model
end

.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil) ⇒ Object



3
4
5
6
7
8
9
# File 'lib/tomoto/lda.rb', line 3

def self.new(tw: :one, min_cf: 0, min_df: 0, rm_top: 0, k: 1, alpha: 0.1, eta: 0.01, seed: nil)
  model = _new(to_tw(tw), k, alpha, eta, seed || -1)
  model.instance_variable_set(:@min_cf, min_cf)
  model.instance_variable_set(:@min_df, min_df)
  model.instance_variable_set(:@rm_top, rm_top)
  init_params(model, binding)
end

Instance Method Details

#add_doc(doc) ⇒ Object



17
18
19
# File 'lib/tomoto/lda.rb', line 17

def add_doc(doc)
  _add_doc(prepare_doc(doc))
end

#count_by_topicsObject



31
32
33
34
# File 'lib/tomoto/lda.rb', line 31

def count_by_topics
  prepare
  _count_by_topics
end

#infer(doc, iter: 100, tolerance: -1,, workers: 0, parallel: :default, together: 0) ⇒ Object

TODO support multiple docs



26
27
28
29
# File 'lib/tomoto/lda.rb', line 26

def infer(doc, iter: 100, tolerance: -1, workers: 0, parallel: :default, together: 0)
  raise "cannot infer with untrained model" unless trained?
  _infer(doc, iter, tolerance, workers, to_ps(parallel), together)
end

#make_doc(doc) ⇒ Object



21
22
23
# File 'lib/tomoto/lda.rb', line 21

def make_doc(doc)
  _make_doc(tokenize_doc(doc))
end

#removed_top_wordsObject



36
37
38
39
# File 'lib/tomoto/lda.rb', line 36

def removed_top_words
  prepare
  _removed_top_words(@rm_top)
end

#save(filename, full: true) ⇒ Object



41
42
43
# File 'lib/tomoto/lda.rb', line 41

def save(filename, full: true)
  _save(filename, full)
end

#summary(initial_hp: true, params: true, topic_word_top_n: 5) ⇒ Object

returns string instead of printing



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/tomoto/lda.rb', line 46

def summary(initial_hp: true, params: true, topic_word_top_n: 5)
  summary = []

  summary << "<Basic Info>"
  basic_info(summary)
  summary << "|"

  summary << "<Training Info>"
  training_info(summary)
  summary << "|"

  if initial_hp
    summary << "<Initial Parameters>"
    initial_params_info(summary)
    summary << "|"
  end

  if params
    summary << "<Parameters>"
    params_info(summary)
    summary << "|"
  end

  if topic_word_top_n > 0
    summary << "<Topics>"
    topics_info(summary, topic_word_top_n: topic_word_top_n)
    summary << "|"
  end

  # skip ending |
  summary.pop

  summary.join("\n")
end

#topic_words(topic_id = nil, top_n: 10) ⇒ Object



81
82
83
84
85
86
87
# File 'lib/tomoto/lda.rb', line 81

def topic_words(topic_id = nil, top_n: 10)
  if topic_id
    _topic_words(topic_id, top_n)
  else
    k.times.map { |i| _topic_words(i, top_n) }
  end
end

#train(iterations = 10, workers: 0, parallel: :default) ⇒ Object

TODO raise error if iterations < 1



90
91
92
93
# File 'lib/tomoto/lda.rb', line 90

def train(iterations = 10, workers: 0, parallel: :default)
  prepare
  _train(iterations, workers, to_ps(parallel))
end

#twObject



95
96
97
# File 'lib/tomoto/lda.rb', line 95

def tw
  TERM_WEIGHT[_tw]
end