Module: Utilities::Statistics

Defined in:
lib/utilities.rb

Instance Method Summary collapse

Instance Method Details

#first_quartile(already_sorted = false) ⇒ Object Also known as: lower_quartile

Return the first quartile of self



180
181
182
183
184
# File 'lib/utilities.rb', line 180

def first_quartile( already_sorted = false )
  return nil unless size >= 4
  a = already_sorted ? self : sort
  a[0..((size / 2) - 1)].extend(Utilities::Statistics).median( true )
end

#frequencesObject

Calculate the number of occurences for each element of the array



152
153
154
# File 'lib/utilities.rb', line 152

def frequences
  inject(Hash.new(0)) { |h, v| h[v] += 1; h }
end

#interquartile_range(already_sorted = false) ⇒ Object

Calculate the interquartile range of self



202
203
204
205
206
# File 'lib/utilities.rb', line 202

def interquartile_range( already_sorted = false )
  return nil unless size >= 4
  a = sort_and_extend( already_sorted )
  a.last_quartile - a.first_quartile
end

#last_quartile(already_sorted = false) ⇒ Object Also known as: upper_quartile

Return the last quartile of self



188
189
190
191
192
# File 'lib/utilities.rb', line 188

def last_quartile( already_sorted = false )
  return nil unless size >= 4
  a = already_sorted ? self : sort
  a[((size / 2) + 1)..-1].extend(Utilities::Statistics).median( true )
end

#meanObject Also known as: average

Calculate the mean of the array, as long as all objects respond to / operator



145
146
147
148
# File 'lib/utilities.rb', line 145

def mean
  a = flatten.compact.to_stat
  (a.size > 0) ? a.sum.to_f / a.size : 0.0
end

#median(already_sorted = false) ⇒ Object Also known as: second_quartile

Return the median of sorted self



171
172
173
174
175
176
# File 'lib/utilities.rb', line 171

def median( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  m_pos = size / 2
  size % 2 == 1 ? a[m_pos] : (a[m_pos-1] + a[m_pos]).to_f / 2
end

#midrange(already_sorted = false) ⇒ Object

Return the midrange of sorted self



216
217
218
219
220
# File 'lib/utilities.rb', line 216

def midrange( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  (a.first + a.last) / 2.0
end

#modesObject

Return an array of modes with their corresponding occurences



209
210
211
212
213
# File 'lib/utilities.rb', line 209

def modes
  freq = frequences
  max = freq.values.max
  freq.select { |k, f| f == max }
end

#quartiles(already_sorted = false) ⇒ Object

Return an array containing the first, the second and the last quartile of self



196
197
198
199
# File 'lib/utilities.rb', line 196

def quartiles( already_sorted = false )
  a = sort_and_extend( already_sorted )
  [a.first_quartile( true ), a.median( true ), a.last_quartile( true )]
end

#ranks(already_sorted = false) ⇒ Object

Return a new array containing the rank of each value Ex: [1, 2, 2, 8, 9] #=> [0.0, 1.5, 1.5, 3.0, 4.0]



134
135
136
137
# File 'lib/utilities.rb', line 134

def ranks( already_sorted = false )
  a = already_sorted ? self : sort
  map{ |i| (a.index(i) + a.rindex(i)) / 2.0 }
end

#sqrtsObject

Calculate square roots of each item



140
141
142
# File 'lib/utilities.rb', line 140

def sqrts
  map{ |i| i.sqrt }
end

#squaresObject

Calculate squares of each item



128
129
130
# File 'lib/utilities.rb', line 128

def squares
  map{ |i| i**2 }
end

#standard_deviation(population = false) ⇒ Object Also known as: std_dev

Return the (sample|population) standard deviation of self If population is set to true, then we consider the dataset as the complete population Else, we consider the dataset as a sample, so we use the sample standard deviation (size - 1)



165
166
167
# File 'lib/utilities.rb', line 165

def standard_deviation( population = false )
 size > 1 ? Math.sqrt( variance / ( size - ( population ? 0 : 1 ) ) ) : 0.0
end

#statistical_range(already_sorted = false) ⇒ Object

Return the statistical range of sorted self



223
224
225
226
227
# File 'lib/utilities.rb', line 223

def statistical_range( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  (a.last - a.first)
end

#statistics(already_sorted = false) ⇒ Object Also known as: stats

Return all statistics from self in a simple hash



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/utilities.rb', line 230

def statistics( already_sorted = false )
  sorted = sort_and_extend( already_sorted )
  
  {
    :first => self.first,
    :last => self.last,
    :size => self.size,
    :sum => self.sum,
    :min => self.min,
    :max => self.max,
    :mean => self.mean,
    :frequences => self.frequences,
    :variance => self.variance,
    :standard_deviation => self.standard_deviation,
    :modes => self.modes,
    
    # Need to be sorted...
    :ranks => sorted.ranks( true ),
    :median => sorted.median( true ),
    :midrange => sorted.midrange( true ),
    :statistical_range => sorted.statistical_range( true ),
    :quartiles => sorted.quartiles( true ),
    :interquartile_range => sorted.interquartile_range( true )
  }
end

#sumObject

Add each object of the array to each other in order to get the sum, as long as all objects respond to + operator



123
124
125
# File 'lib/utilities.rb', line 123

def sum
  flatten.compact.inject( :+ )
end

#varianceObject

Return the variance of self



157
158
159
160
# File 'lib/utilities.rb', line 157

def variance
  m = mean
  inject(0) { |v, x| v += (x - m) ** 2 }
end