Class: Analyzer

Inherits:
Object show all
Defined in:
lib/picky/analyzer.rb

Overview

Analyzes indexes (index bundles, actually).

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeAnalyzer

Returns a new instance of Analyzer.



23
24
25
26
# File 'lib/picky/analyzer.rb', line 23

def initialize
  @analysis = {}
  @comments = []
end

Instance Attribute Details

#analysisObject (readonly)

Returns the value of attribute analysis.



19
20
21
# File 'lib/picky/analyzer.rb', line 19

def analysis
  @analysis
end

#commentsObject (readonly)

Returns the value of attribute comments.



19
20
21
# File 'lib/picky/analyzer.rb', line 19

def comments
  @comments
end

Instance Method Details

#analyze(bundle) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/picky/analyzer.rb', line 30

def analyze bundle
  bundle.load_inverted
  analysis[:__keys] = bundle.inverted.size
  cardinality :index, bundle.inverted
  index_analysis
  bundle.clear_inverted

  bundle.load_weights
  weights bundle.weights
  weights_analysis
  bundle.clear_weights

  bundle.load_similarity
  cardinality :similarity, bundle.similarity
  bundle.clear_similarity

  # bundle.load_configuration
  # analysis[:configuration] = bundle.configuration
  # bundle.clear_configuration

  self
end

#can_calculate_cardinality?(index) ⇒ Boolean

Returns:

  • (Boolean)


79
80
81
82
83
# File 'lib/picky/analyzer.rb', line 79

def can_calculate_cardinality? index
  return if index.size.zero?
  return unless index.respond_to? :each_pair
  true
end

#cardinality(identifier, index) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/picky/analyzer.rb', line 53

def cardinality identifier, index
  return unless can_calculate_cardinality? index

  key_length_sum = 0
  ids_length_sum = 0
  
  key_length = (1.0/0..0)
  ids_length = (1.0/0..0)

  index.each_pair do |key, ids|
    key_length = key_length.expand_with key.size
    key_length_sum += key.size
    ids_length = ids_length.expand_with ids.size
    ids_length_sum += ids.size
  end

  report_cardinality identifier, index, key_length, ids_length, key_length_sum, ids_length_sum
end

#configuration_to_sObject



170
171
172
# File 'lib/picky/analyzer.rb', line 170

def configuration_to_s
  # analysis[:configuration]
end

#formatted(description, key, index = :index) ⇒ Object



150
151
152
153
154
155
# File 'lib/picky/analyzer.rb', line 150

def formatted description, key, index = :index
  what    = "%-40s" % ["index", description, "key length range (avg):"].compact.join(' ')
  range   = "%7s" % analysis[index][key]
  average = "%8s" % "(#{analysis[index][:"#{key}_average"].round(2)})"
  what + range + average
end

#index_analysisObject



85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/picky/analyzer.rb', line 85

def index_analysis
  return unless analysis[:index]

  if analysis[:__keys] < 100
    comments << "\033[33mVery small index (< 100 keys).\033[m"
  end

  range = analysis[:index][:key_length]
  case range.min
  when 1
    comments << "\033[33mIndex matches single characters.\033[m"
  end
end

#index_to_sObject



141
142
143
144
145
146
147
148
# File 'lib/picky/analyzer.rb', line 141

def index_to_s
  return if analysis[:__keys].zero?
  ary = ["index key cardinality:                #{"%9d" % analysis[:__keys]}"]
  return ary.join "\n" unless analysis[:index]
  ary << formatted(nil,       :key_length)
  ary << formatted('ids per', :ids_length)
  ary.join "\n"
end

#report_cardinality(identifier, index, key_length, ids_length, key_length_sum, ids_length_sum) ⇒ Object



71
72
73
74
75
76
77
# File 'lib/picky/analyzer.rb', line 71

def report_cardinality identifier, index, key_length, ids_length, key_length_sum, ids_length_sum
  analysis_identifier = analysis[identifier] ||= {}
  analysis_identifier[:key_length]         = key_length
  analysis_identifier[:ids_length]         = ids_length
  analysis_identifier[:key_length_average] = key_length_sum.to_f / index.size
  analysis_identifier[:ids_length_average] = ids_length_sum.to_f / index.size    
end

#similarity_to_sObject



165
166
167
168
# File 'lib/picky/analyzer.rb', line 165

def similarity_to_s
  return unless analysis[:similarity]
  formatted('similarity', :key_length, :similarity)
end

#to_sObject



137
138
139
# File 'lib/picky/analyzer.rb', line 137

def to_s
  [*comments, index_to_s, weights_to_s, similarity_to_s, configuration_to_s].compact.join "\n"
end

#weights(index) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/picky/analyzer.rb', line 99

def weights index
  return if !index.respond_to?(:size) || index.size.zero?
  return unless index.respond_to?(:each_pair)

  min_weight = 1.0/0 # Infinity
  max_weight =   0.0

  weight_average = 0

  index.each_pair do |key, value|
    if value < min_weight
      min_weight = value
    else
      max_weight = value if value > max_weight
    end
    weight_average += value
  end

  weight_average = weight_average / index.size

  analysis[:weights] ||= {}
  analysis[:weights][:weight_range]   = (min_weight..max_weight)
  analysis[:weights][:weight_average] = weight_average
end

#weights_analysisObject



124
125
126
127
128
129
130
131
132
133
# File 'lib/picky/analyzer.rb', line 124

def weights_analysis
  return unless analysis[:weights]

  range = analysis[:weights][:weight_range]

  case range.max
  when 0.0
    comments << "\033[31mThere's only one id per key – you'll only get single results.\033[m"
  end
end

#weights_to_sObject



157
158
159
160
161
162
163
# File 'lib/picky/analyzer.rb', line 157

def weights_to_s
  return unless analysis[:weights]
  what    = "%-30s" % "weights range (avg):"
  range   = "%17s" % analysis[:weights][:weight_range]
  average = "%8s" % "(#{analysis[:weights][:weight_average].round(2)})"
  what + range + average
end