Class: Bayon::Documents
- Inherits:
-
Object
- Object
- Bayon::Documents
- Defined in:
- lib/bayon.rb
Instance Method Summary collapse
- #[](label) ⇒ Object
- #add_document(label, features) ⇒ Object
- #clear ⇒ Object
- #cluster_size_limit=(limit) ⇒ Object
- #delete_document(label) ⇒ Object
- #do_clustering(method = Analyzer::REPEATED_BISECTION) ⇒ Object
- #eval_limit=(limit) ⇒ Object
-
#initialize ⇒ Documents
constructor
A new instance of Documents.
- #labels ⇒ Object
- #output_similairty_point=(output) ⇒ Object
Constructor Details
#initialize ⇒ Documents
Returns a new instance of Documents.
5 6 7 8 9 10 |
# File 'lib/bayon.rb', line 5 def initialize @documents = [] @cluster_size_limit = nil @eval_limit = nil @output_similairty_point = nil end |
Instance Method Details
#[](label) ⇒ Object
52 53 54 55 |
# File 'lib/bayon.rb', line 52 def [](label) label, features = @documents.assoc(label) return features end |
#add_document(label, features) ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/bayon.rb', line 36 def add_document(label, features) unless features.kind_of?(Hash) raise TypeError, "wrong argument type #{limit.class} (expected Hash)" end if (label_features = @documents.assoc(label)) label_features[1] = features else @documents << [label, features] end end |
#clear ⇒ Object
61 62 63 |
# File 'lib/bayon.rb', line 61 def clear @documents.clear end |
#cluster_size_limit=(limit) ⇒ Object
12 13 14 15 16 17 18 |
# File 'lib/bayon.rb', line 12 def cluster_size_limit=(limit) unless limit.kind_of?(Integer) raise TypeError, "wrong argument type #{limit.class} (expected Integer)" end @cluster_size_limit = limit end |
#delete_document(label) ⇒ Object
48 49 50 |
# File 'lib/bayon.rb', line 48 def delete_document(label) @documents.delete_if {|l, f| l == label } end |
#do_clustering(method = Analyzer::REPEATED_BISECTION) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/bayon.rb', line 65 def do_clustering(method = Analyzer::REPEATED_BISECTION) return [] if @documents.empty? analyzer = Analyzer.new analyzer.set_cluster_size_limit(@cluster_size_limit) if @cluster_size_limit analyzer.set_eval_limit(@eval_limit) if @eval_limit analyzer.set_output_similairty_point(@output_similairty_point) if @output_similairty_point feature_set = [] @documents.each_with_index do |label_features, i| doc = Document.new(i) label_features[1].each do |feature, value| feature_set << feature unless feature_set.include?(feature) doc.add_feature(feature_set.index(feature), value) end analyzer.add_document(doc) end analyzer.do_clustering(method) result = [] while (cluster = analyzer.get_next_result) if @output_similairty_point result << cluster.map {|doc_id, point| [@documents[doc_id][0], point] } else result << cluster.map {|doc_id| @documents[doc_id][0] } end end return result end |
#eval_limit=(limit) ⇒ Object
20 21 22 23 24 25 26 |
# File 'lib/bayon.rb', line 20 def eval_limit=(limit) unless limit.kind_of?(Numeric) raise TypeError, "wrong argument type #{limit.class} (expected Numeric)" end @eval_limit = limit end |
#labels ⇒ Object
57 58 59 |
# File 'lib/bayon.rb', line 57 def labels @documents.map {|l, f| l } end |
#output_similairty_point=(output) ⇒ Object
28 29 30 31 32 33 34 |
# File 'lib/bayon.rb', line 28 def output_similairty_point=(output) unless output.instance_of?(TrueClass) or output.instance_of?(FalseClass) raise TypeError, "wrong argument type #{limit.class} (expected boolean value)" end @output_similairty_point = output end |