Class: Bayon::Documents

Inherits:
Object
  • Object
show all
Defined in:
lib/bayon.rb

Instance Method Summary collapse

Constructor Details

#initializeDocuments

Returns a new instance of Documents.



5
6
7
8
9
10
# File 'lib/bayon.rb', line 5

def initialize
  @documents = []
  @cluster_size_limit = nil
  @eval_limit = nil
  @output_similairty_point = nil
end

Instance Method Details

#add_document(label, features) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
# File 'lib/bayon.rb', line 36

def add_document(label, features)
  unless features.kind_of?(Hash)
    raise TypeError, "wrong argument type #{limit.class} (expected Hash)"
  end

  if (label_features = @documents.assoc(label))
    label_features[1] = features
  else
    @documents << [label, features]
  end
end

#cluster_size_limit=(limit) ⇒ Object



12
13
14
15
16
17
18
# File 'lib/bayon.rb', line 12

def cluster_size_limit=(limit)
  unless limit.kind_of?(Integer)
    raise TypeError, "wrong argument type #{limit.class} (expected Integer)"
  end

  @cluster_size_limit = limit
end

#do_clustering(method = Analyzer::REPEATED_BISECTION) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/bayon.rb', line 48

def do_clustering(method = Analyzer::REPEATED_BISECTION)
  analyzer = Analyzer.new
  analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit
  analyzer.set_eval_limit(@eval_limit) if @eval_limit
  analyzer.set_output_similairty_point(@output_similairty_point) if @output_similairty_point

  feature_set = []

  @documents.each_with_index do |label_features, i|
    doc = Document.new(i)

    label_features[1].each do |feature, value|
      feature_set << feature unless feature_set.include?(feature)
      doc.add_feature(feature_set.index(feature), value)
    end

    analyzer.add_document(doc)
  end

  analyzer.do_clustering(method)

  result = []

  while (cluster = analyzer.get_next_result)
    if @output_similairty_point
      result << cluster.map {|doc_id, point| [@documents[doc_id][0], point] }
    else
      result << cluster.map {|doc_id| @documents[doc_id][0] }
    end
  end

  return result
end

#eval_limit=(limit) ⇒ Object



20
21
22
23
24
25
26
# File 'lib/bayon.rb', line 20

def eval_limit=(limit)
  unless limit.kind_of?(Numeric)
    raise TypeError, "wrong argument type #{limit.class} (expected Numeric)"
  end

  @eval_limit = limit
end

#output_similairty_point=(output) ⇒ Object



28
29
30
31
32
33
34
# File 'lib/bayon.rb', line 28

def output_similairty_point=(output)
  unless output.instance_of?(TrueClass) or output.instance_of?(FalseClass)
    raise TypeError, "wrong argument type #{limit.class} (expected boolean value)"
  end

  @output_similairty_point = output
end