Class: Bayon::Documents

Inherits:
Object
  • Object
show all
Defined in:
lib/bayon.rb

Instance Method Summary collapse

Constructor Details

#initializeDocuments



5
6
7
8
9
# File 'lib/bayon.rb', line 5

def initialize
  @documents = []
  @cluster_size_limit = nil
  @eval_limit = nil
end

Instance Method Details

#add_document(label, features) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
# File 'lib/bayon.rb', line 27

def add_document(label, features)
  unless features.kind_of?(Hash)
    raise TypeError, "wrong argument type #{limit.class} (expected Hash)"
  end

  if (label_features = @documents.assoc(label))
    label_features[1] = features
  else
    @documents << [label, features]
  end
end

#cluster_size_limit=(limit) ⇒ Object



11
12
13
14
15
16
17
# File 'lib/bayon.rb', line 11

def cluster_size_limit=(limit)
  unless limit.kind_of?(Integer)
    raise TypeError, "wrong argument type #{limit.class} (expected Integer)"
  end

  @cluster_size_limit = limit
end

#do_clustering(method = Analyzer::REPEATED_BISECTION) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/bayon.rb', line 39

def do_clustering(method = Analyzer::REPEATED_BISECTION)
  analyzer = Analyzer.new
  analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit
  analyzer.set_eval_limit(@eval_limit) if @eval_limit

  feature_set = []

  @documents.each_with_index do |label_features, i|
    doc = Document.new(i)

    label_features[1].each do |feature, value|
      feature_set << feature unless feature_set.include?(feature)
      doc.add_feature(feature_set.index(feature), value)
    end

    analyzer.add_document(doc)
  end

  analyzer.do_clustering(method)

  result = []

  while (cluster = analyzer.get_next_result)
    result << cluster.map {|doc_id| @documents[doc_id][0] }
  end

  return result
end

#eval_limit=(limit) ⇒ Object



19
20
21
22
23
24
25
# File 'lib/bayon.rb', line 19

def eval_limit=(limit)
  unless limit.kind_of?(Numeric)
    raise TypeError, "wrong argument type #{limit.class} (expected Numeric)"
  end

  @eval_limit = limit
end