Class: Array

Inherits:
Object
  • Object
show all
Defined in:
lib/array-statistics.rb

Instance Method Summary collapse

Instance Method Details

#<<(*as_args) ⇒ Object



271
272
273
274
275
# File 'lib/array-statistics.rb', line 271

def <<(*as_args) 
  ret = as_old_app(*as_args)
  dirty
  ret
end

#[]=(*as_args) ⇒ Object



264
265
266
267
268
# File 'lib/array-statistics.rb', line 264

def []=(*as_args) 
  ret = as_old_ass(*as_args)
  dirty
  ret 
end

#as_old_appObject



270
# File 'lib/array-statistics.rb', line 270

alias as_old_app <<

#as_old_assObject

Side-note: there are a lot of array methods that don’t follow the destructive-methods-end-in-! naming pattern



263
# File 'lib/array-statistics.rb', line 263

alias as_old_ass []=

#as_old_collectObject



284
# File 'lib/array-statistics.rb', line 284

alias as_old_collect collect!

#as_old_fillObject



298
# File 'lib/array-statistics.rb', line 298

alias as_old_fill fill

#as_old_flattenObject



305
# File 'lib/array-statistics.rb', line 305

alias as_old_flatten flatten!

#as_old_map!Object



291
# File 'lib/array-statistics.rb', line 291

alias as_old_map! map!

#as_old_pushObject



277
# File 'lib/array-statistics.rb', line 277

alias as_old_push push

#as_old_replaceObject



312
# File 'lib/array-statistics.rb', line 312

alias as_old_replace replace

#as_old_reverseObject



319
# File 'lib/array-statistics.rb', line 319

alias as_old_reverse reverse!

#as_old_unshiftObject



326
# File 'lib/array-statistics.rb', line 326

alias as_old_unshift unshift

#average(&value_block) ⇒ Object Also known as: mean

Returns the average of the values in this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



202
203
204
205
206
# File 'lib/array-statistics.rb', line 202

def average(&value_block) # :yields: element
  s = sum(&value_block)
  s = s.to_f if s.integer?
  s / length
end

#cleanObject



250
251
252
# File 'lib/array-statistics.rb', line 250

def clean 
  @as_sort_dirty = false
end

#collect!(*as_args, &block) ⇒ Object



285
286
287
288
289
# File 'lib/array-statistics.rb', line 285

def collect!(*as_args, &block) 
  ret = as_old_collect(*as_args, &block)
  dirty
  ret
end

#dirtyObject

:stopdoc:



241
242
243
# File 'lib/array-statistics.rb', line 241

def dirty 
  @as_sort_dirty = true    
end

#dirty?Boolean

Returns:

  • (Boolean)


245
246
247
248
# File 'lib/array-statistics.rb', line 245

def dirty? 
  dirty unless instance_variable_defined? :@as_sort_dirty
  @as_sort_dirty
end

#fill(*as_args, &block) ⇒ Object



299
300
301
302
303
# File 'lib/array-statistics.rb', line 299

def fill(*as_args, &block) 
  ret = as_old_fill(*as_args, &block)
  dirty
  ret
end

#flatten!(*as_args, &block) ⇒ Object



306
307
308
309
310
# File 'lib/array-statistics.rb', line 306

def flatten!(*as_args, &block) 
  ret = as_old_flatten(*as_args, &block)
  dirty
  ret
end

#map!(*as_args, &block) ⇒ Object



292
293
294
295
296
# File 'lib/array-statistics.rb', line 292

def map!(*as_args, &block) 
  ret = as_old_map!(*as_args, &block)
  dirty
  ret
end

#median(sort_required = true, &value_block) ⇒ Object

Get the median value of this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



62
63
64
65
66
67
68
69
70
71
72
# File 'lib/array-statistics.rb', line 62

def median(sort_required=true, &value_block) # :yields: element
  return 0 if empty? #to reduce instances of calling math methods on nil.
  value_block = proc{|element| element} unless block_given?

  median_index_arr = median_indices(&value_block)
  median_vals = median_index_arr.collect do |element_index|
    value_block.call(self[element_index])
  end

  median_vals.average
end

#median_indices(sort_required = true, &value_block) ⇒ Object

returns either a single or double-value array containing the index or surrounding indeces

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/array-statistics.rb', line 78

def median_indices(sort_required=true, &value_block) # :yields: element
  return 0 if length == 0
  value_block = proc{|element| element} unless block_given?
  if(sort_required) 
    sort! do |x, y|
      value_block.call(x) <=> value_block.call(y)
    end
  end

  median_index_arr = [length / 2]
  if(length % 2 == 0)
    median_index_arr.unshift(median_index_arr[0]-1)
  end

  median_index_arr
end

#old_sort!Object



225
# File 'lib/array-statistics.rb', line 225

alias old_sort! sort!

#outlier_threshold_indices(quartile_range_factor = 1.5, &value_block) ⇒ Object

Returns an array with two values. The first value is the index of the last low outlier in this sorted array (this array will be sorted as a side-effect of this method) or nil if there are no low-end outliers The second value is the index of the first high outlier in this sorted array or nil if there are no high-end outliers

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/array-statistics.rb', line 152

def outlier_threshold_indices(quartile_range_factor=1.5, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  thresholds = outlier_thresholds(quartile_range_factor, &value_block) #this sorts self!
  low_index = -1
  while(value_block.call(self[low_index +1] ) < thresholds[0]) do
    low_index = low_index + 1
  end
  low_index = nil if(low_index == -1)

  high_index = length
  while(value_block.call(self[high_index -1] ) > thresholds[1]) do
    high_index = high_index - 1
  end
  high_index = nil if(high_index == length) 
  return [low_index, high_index]
end

#outlier_thresholds(quartile_range_factor = 1.5, &value_block) ⇒ Object

returns an array with two values: The first value is the low outlier threshhold for this data set The second value is the high outlier threshhold for this data set

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/array-statistics.rb', line 175

def outlier_thresholds(quartile_range_factor=1.5, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?

  quartile_is = quartile_indices(&value_block)
  q1 = quartile_is[0].collect do |element_index|
    value_block.call(self[element_index])
  end
  q1 = q1.average

  q3 = quartile_is[1].collect do |element_index|
    value_block.call(self[element_index])
  end
  q3 = q3.average

  interquartile_range = q3-q1
  # puts("Interquartile Range = [#{q1} <=> #{q3}], quartile range factor: #{quartile_range_factor}")
  low_outlier_threshold = q1 - (interquartile_range * quartile_range_factor)
  high_outlier_threshold = q3 + (interquartile_range * quartile_range_factor)
  # puts("Outlier Thresholds = [#{low_outlier_threshold} <=> #{high_outlier_threshold}]")

  return [low_outlier_threshold, high_outlier_threshold]
end

#outliers(quartile_range_factor = 1.5, &value_block) ⇒ Object

returns an array containing all the outliers in this set

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



119
120
121
122
123
124
125
126
127
128
# File 'lib/array-statistics.rb', line 119

def outliers(quartile_range_factor=1.5, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  outlier_arr = []
  outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)
  # puts("Outlier Thresholds Indeces = [#{outlier_threshold_is[0]} <=> #{outlier_threshold_is[1]}]")
  outlier_arr << self[0..outlier_threshold_is[0]] unless outlier_threshold_is[0].nil?
  # puts("upper outliers = #{self[outlier_threshold_is[1]..-1] }")
  outlier_arr << self[outlier_threshold_is[1]..-1] unless outlier_threshold_is[1].nil?
  return outlier_arr.flatten
end

#percentile(percent_less_than, &value_block) ⇒ Object

Get percent order statistic based on “order statistic” from here: mathworld.wolfram.com/topics/RankStatistics.html Given some percentage between 0 and 1 (inclusive) return the greatest value in the subarray of this array which contains the bottom percent_less_than values of this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/array-statistics.rb', line 23

def percentile(percent_less_than, &value_block) #  :yields: element
  value_block = proc{|element| element} unless block_given?
  sort! do |x, y|
    value_block.call(x) <=> value_block.call(y)
  end

  percent_less_than = 0 if percent_less_than < 0
  return nil if percent_less_than == 0
  percent_less_than = 1 if percent_less_than > 1
  percentile_i = (percent_less_than * (length-1)).floor
  return self[percentile_i]
end

#percentile_rank(value, &value_block) ⇒ Object

Get percent rank based on “statistical rank” from here:mathworld.wolfram.com/topics/RankStatistics.html Given some value, find the percentage of its rank in this array the number returned will be the number between 0 and 1 (inclusive) which represents the percentage of values in this array which are less than or equal to the value passed in.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/array-statistics.rb', line 43

def percentile_rank(value, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  sort! do |x, y|
    value_block.call(x) <=> value_block.call(y)
  end
  return 0.0 if value < value_block.call(self[0])
  each_index do |i|
    if(value_block.call(self[i]) > value)

      return i.to_f/length 
    end
  end
  return 1
end

#push(*as_args) ⇒ Object



278
279
280
281
282
# File 'lib/array-statistics.rb', line 278

def push(*as_args)
  ret = as_old_push(*as_args)
  dirty
  ret
end

#quartile_indices(&value_block) ⇒ Object

returns an array with 2 values. The values are the first and third quartile indices following the same rules as the results of the median_indices method

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/array-statistics.rb', line 99

def quartile_indices(&value_block) # :yields: element
  median_i = median_indices(&value_block)
  low_end = median_i[0] 
  high_start = median_i[median_i.length() -1] 

  low_arr = self[0..low_end]
  high_arr = self[high_start..-1]

  q1_indices = low_arr.median_indices(false, &value_block)
  q3_indices = high_arr.median_indices(false, &value_block)
  q3_indices.collect! do |index|
    index + high_start
  end
  return [q1_indices, q3_indices]
end

#remove_outliers!(quartile_range_factor = 1.5, &value_block) ⇒ Object

removes all the outliers from this set and returns them.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



134
135
136
137
138
139
140
141
142
143
# File 'lib/array-statistics.rb', line 134

def remove_outliers!(quartile_range_factor=1.5, &value_block) # :yields: element
  outlier_arr = []
  outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)

  outlier_arr = outlier_arr + self.slice!(0..outlier_threshold_is[0]) unless outlier_threshold_is[0].nil?
  #the preceeding slice!() will, of course shift the upper outlier threshold index down
  high_outlier_index_offset = (outlier_threshold_is[0].nil?)? 0 : outlier_threshold_is[0] + 1
  outlier_arr = outlier_arr + self.slice!(outlier_threshold_is[1]-high_outlier_index_offset..-1) unless outlier_threshold_is[1].nil?
  return outlier_arr
end

#replace(*as_args, &block) ⇒ Object



313
314
315
316
317
# File 'lib/array-statistics.rb', line 313

def replace(*as_args, &block) 
  ret = as_old_replace(*as_args, &block)
  dirty
  ret
end

#reverse!(*as_args, &block) ⇒ Object



320
321
322
323
324
# File 'lib/array-statistics.rb', line 320

def reverse!(*as_args, &block) 
  ret = as_old_reverse(*as_args, &block)
  dirty
  ret
end

#sort!(&comparison_block) ⇒ Object

Adds smarter sorting: A lot of methods above need the array sorted and they may call one another. This sort! method only sorts the array if it isn’t currently known to be sorted.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



231
232
233
234
235
236
237
238
# File 'lib/array-statistics.rb', line 231

def sort!(&comparison_block) # :yields: element
   if(dirty? || (comparison_block != @as_last_comparison_block)) 
     old_sort!(&comparison_block)
     @as_last_comparison_block = comparison_block
     clean
   end
   self
end

#sum(&value_block) ⇒ Object

Returns the sum of all the values in this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



213
214
215
216
217
218
219
220
# File 'lib/array-statistics.rb', line 213

def sum(&value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  s = 0
  each  do |element| 
    s = s + value_block.call(element)
  end
  s
end

#unshift(*as_args, &block) ⇒ Object



327
328
329
330
331
# File 'lib/array-statistics.rb', line 327

def unshift(*as_args, &block) 
  ret = as_old_unshift(*as_args, &block)
  dirty
  ret
end