Class: Categorize::Models::Cluster
- Inherits:
- 
      AbstractModel
      
        - Object
- AbstractModel
- Categorize::Models::Cluster
 
- Defined in:
- lib/categorize/models/cluster.rb
Direct Known Subclasses
Instance Attribute Summary collapse
- 
  
    
      #num_clusters  ⇒ Object 
    
    
  
  
  
  
    
    
  
  
  
  
  
  
    Returns the value of attribute num_clusters. 
Instance Method Summary collapse
- #build_categories(clusters) ⇒ Object
- 
  
    
      #initialize  ⇒ Cluster 
    
    
  
  
  
    constructor
  
  
  
  
  
  
  
    A new instance of Cluster. 
- #model(query, records_to_tokens) ⇒ Object
Methods inherited from AbstractModel
#build_dataset, #build_vars, #vectorize
Constructor Details
#initialize ⇒ Cluster
Returns a new instance of Cluster.
| 9 10 11 12 13 | # File 'lib/categorize/models/cluster.rb', line 9 def initialize @num_clusters = 10 @clusterer = Ai4r::Clusterers::WardLinkage.new super end | 
Instance Attribute Details
#num_clusters ⇒ Object
Returns the value of attribute num_clusters.
| 7 8 9 | # File 'lib/categorize/models/cluster.rb', line 7 def num_clusters @num_clusters end | 
Instance Method Details
#build_categories(clusters) ⇒ Object
| 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | # File 'lib/categorize/models/cluster.rb', line 22 def build_categories(clusters) clusters_to_records = Hash[clusters.each_with_index.map do |cluster, i| [i, cluster.data_items.map { |v| @vectors.index(v) }] end] @query_terms ||= @query.split.map(&:downcase) categories = clusters_to_records.map do |cluster, records| term_vectors = records.map { |r| @vectors[r] }.transpose tf = term_vectors.map { |f| f.reduce(&:+) } get_bigram_max(records, tf) end records = clusters_to_records.values # merge categories with the same label categories_records = [] categories.each_with_index do |category, i| j = categories[0...i].index(category) if j && categories_records[j] categories_records[j].last + records.shift else categories_records << [category, records.shift] end end categories_records end | 
#model(query, records_to_tokens) ⇒ Object
| 15 16 17 18 19 20 | # File 'lib/categorize/models/cluster.rb', line 15 def model(query, records_to_tokens) @query = query dataset = build_vars(records_to_tokens) @clusterer.build(dataset, @num_clusters) build_categories(@clusterer.clusters) end |