Class: Bayon::Documents
- Inherits:
-
Object
- Object
- Bayon::Documents
- Defined in:
- lib/bayon.rb
Instance Method Summary collapse
- #add_document(label, features) ⇒ Object
- #cluster_size_limit=(limit) ⇒ Object
- #do_clustering(method = Analyzer::REPEATED_BISECTION) ⇒ Object
- #eval_limit=(limit) ⇒ Object
-
#initialize ⇒ Documents
constructor
A new instance of Documents.
- #output_similairty_point=(output) ⇒ Object
Constructor Details
#initialize ⇒ Documents
Returns a new instance of Documents.
5 6 7 8 9 10 |
# File 'lib/bayon.rb', line 5 def initialize @documents = [] @cluster_size_limit = nil @eval_limit = nil @output_similairty_point = nil end |
Instance Method Details
#add_document(label, features) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/bayon.rb', line 36 def add_document(label, features) unless features.kind_of?(Hash) raise TypeError, "wrong argument type #{limit.class} (expected Hash)" end if (label_features = @documents.assoc(label)) label_features[1] = features else @documents << [label, features] end end |
#cluster_size_limit=(limit) ⇒ Object
12 13 14 15 16 17 18 |
# File 'lib/bayon.rb', line 12 def cluster_size_limit=(limit) unless limit.kind_of?(Integer) raise TypeError, "wrong argument type #{limit.class} (expected Integer)" end @cluster_size_limit = limit end |
#do_clustering(method = Analyzer::REPEATED_BISECTION) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/bayon.rb', line 48 def do_clustering(method = Analyzer::REPEATED_BISECTION) analyzer = Analyzer.new analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit analyzer.set_eval_limit(@eval_limit) if @eval_limit analyzer.set_output_similairty_point(@output_similairty_point) if @output_similairty_point feature_set = [] @documents.each_with_index do |label_features, i| doc = Document.new(i) label_features[1].each do |feature, value| feature_set << feature unless feature_set.include?(feature) doc.add_feature(feature_set.index(feature), value) end analyzer.add_document(doc) end analyzer.do_clustering(method) result = [] while (cluster = analyzer.get_next_result) if @output_similairty_point result << cluster.map {|doc_id, point| [@documents[doc_id][0], point] } else result << cluster.map {|doc_id| @documents[doc_id][0] } end end return result end |
#eval_limit=(limit) ⇒ Object
20 21 22 23 24 25 26 |
# File 'lib/bayon.rb', line 20 def eval_limit=(limit) unless limit.kind_of?(Numeric) raise TypeError, "wrong argument type #{limit.class} (expected Numeric)" end @eval_limit = limit end |
#output_similairty_point=(output) ⇒ Object
28 29 30 31 32 33 34 |
# File 'lib/bayon.rb', line 28 def output_similairty_point=(output) unless output.instance_of?(TrueClass) or output.instance_of?(FalseClass) raise TypeError, "wrong argument type #{limit.class} (expected boolean value)" end @output_similairty_point = output end |