Class: MiniStat::Data
- Inherits:
-
Object
- Object
- MiniStat::Data
- Includes:
- Enumerable
- Defined in:
- lib/ministat/data.rb
Instance Attribute Summary collapse
-
#data ⇒ Object
readonly
Returns the value of attribute data.
Instance Method Summary collapse
- #<<(obj) ⇒ Object
- #clear_results ⇒ Object
- #each(&block) ⇒ Object
-
#geometric_mean ⇒ Object
Geometric mean.
-
#harmonic_mean ⇒ Object
Harmonic or subcontrary mean.
-
#hist ⇒ Object
Put the histogram into a string if we have it.
-
#initialize(d) ⇒ Data
constructor
A new instance of Data.
-
#iqr ⇒ Object
Interquartile range, ie, the middle 50% of the data.
-
#mean(data = @data) ⇒ Object
Computes arthmetic mean (most common average).
-
#median(data = @data) ⇒ Object
Return the median of
data
. -
#mode ⇒ Object
Computes mode and generates a histogram (for free!).
-
#outliers ⇒ Object
Returns an array of outlying data points.
-
#partition(pivot, data = @data) ⇒ Object
Partition a set of numbers about
pivot
. -
#q1 ⇒ Object
First quartile.
-
#q3 ⇒ Object
Third quartile.
-
#std_dev ⇒ Object
Standard deviation.
-
#to_s ⇒ Object
Return a string with statisical info about a dataset.
-
#variance ⇒ Object
Computes variance.
Constructor Details
#initialize(d) ⇒ Data
Returns a new instance of Data.
9 10 11 12 13 |
# File 'lib/ministat/data.rb', line 9 def initialize(d) @data = d.map { |n| n.to_f }.sort @sorted = true clear_results end |
Instance Attribute Details
#data ⇒ Object (readonly)
Returns the value of attribute data.
7 8 9 |
# File 'lib/ministat/data.rb', line 7 def data @data end |
Instance Method Details
#<<(obj) ⇒ Object
15 16 17 18 19 20 21 22 |
# File 'lib/ministat/data.rb', line 15 def <<(obj) throw "#{obj.to_s} is not numeric" unless obj.to_f @data << obj # force computation! clear_results @sorted = false obj end |
#clear_results ⇒ Object
28 29 30 31 |
# File 'lib/ministat/data.rb', line 28 def clear_results @q1, @q3, @iqr, @outliers, @std_dev, @variance = nil @mode, @harmonic_mean, @geometric_mean = nil end |
#each(&block) ⇒ Object
24 25 26 |
# File 'lib/ministat/data.rb', line 24 def each(&block) @data.each(&block) end |
#geometric_mean ⇒ Object
Geometric mean. Only applies to non-negative numbers, and relates to log-normal distribution.
135 136 137 138 139 140 141 142 143 144 |
# File 'lib/ministat/data.rb', line 135 def geometric_mean if @data.any? { |x| x < 0 } raise "Geometric mean only applies to non-negative data" end @geometric_mean ||= 2 ** (mean @data.map { |x| Math.log2(x) }) # this overflowed for dataset with large numbers # (@data.inject(1) {|i,j| i *= j})**(1.0/@data.size) end |
#harmonic_mean ⇒ Object
Harmonic or subcontrary mean. Tends strongly toward the least elements of the dataset.
149 150 151 152 |
# File 'lib/ministat/data.rb', line 149 def harmonic_mean @harmonic_mean ||= @data.size.to_f / (@data.inject(0) {|i,j| i += (1.0/j)}) end |
#hist ⇒ Object
Put the histogram into a string if we have it
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/ministat/data.rb', line 156 def hist if defined? @hist # this is a textbook example of how to lie with statistics... # TODO: iterate over a range rather than @hist.keys--a histogram # produced out of the keys won't properly represent flat spots # with no data. or something like that. do as i say, not as i do. # # this code borrows liberally from the ruby cookbook, recipe 5.12 # ORA, 2006 pairs = @hist.keys.map { |x| [x.to_s, @hist[x]] }.sort largest_key_size = pairs.max { |x,y| x[0].size <=> y[0].size }[0].size pairs.inject("") do |s,kv| s<< "#{kv[0].ljust(largest_key_size)} |#{char*kv[1]}\n" end end end |
#iqr ⇒ Object
Interquartile range, ie, the middle 50% of the data.
81 82 83 |
# File 'lib/ministat/data.rb', line 81 def iqr @iqr ||= q3 - q1 end |
#mean(data = @data) ⇒ Object
Computes arthmetic mean (most common average).
96 97 98 |
# File 'lib/ministat/data.rb', line 96 def mean(data=@data) @mean = (data.inject(0) {|i,j| i += j}) / data.size end |
#median(data = @data) ⇒ Object
Return the median of data
. Naive implementaion – does a sort on the data.
36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/ministat/data.rb', line 36 def median(data=@data) unless @sorted and data == @data data.sort! @sorted = true end if data.size % 2 == 0 return (data[data.size / 2.0 - 1] + data[(data.size / 2.0)]) / 2.0 else return data[(data.size - 1)/2.0] end end |
#mode ⇒ Object
Computes mode and generates a histogram (for free!). (We needed it anyway).
103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/ministat/data.rb', line 103 def mode @hist ||= {} @max_freq ||= 0 @mode ||= nil unless @mode @data.each do |val| @hist[val] ||= 0 @hist[val] += 1 @max_freq, @mode = @hist[val], val if @hist[val] > @max_freq end end @mode end |
#outliers ⇒ Object
Returns an array of outlying data points.
87 88 89 90 91 92 |
# File 'lib/ministat/data.rb', line 87 def outliers @outliers ||= @data.map do |i| i if (i < q1 - (1.5 * iqr) or i > q3 + (1.5 * iqr)) end.compact end |
#partition(pivot, data = @data) ⇒ Object
Partition a set of numbers about pivot
50 51 52 53 54 55 56 57 58 |
# File 'lib/ministat/data.rb', line 50 def partition(pivot, data=@data) low = [] high = [] data.each do |i| high.push(i) if i > pivot low.push(i) if i < pivot end return {:low => low, :high => high} end |
#q1 ⇒ Object
First quartile.
69 70 71 |
# File 'lib/ministat/data.rb', line 69 def q1 @q1 ||= median(partition(median(@data), @data)[:low]) end |
#q3 ⇒ Object
Third quartile
75 76 77 |
# File 'lib/ministat/data.rb', line 75 def q3 @q3 ||= median(partition(median(@data), @data)[:high]) end |
#std_dev ⇒ Object
Standard deviation. Square root of variance, measure of the spread of the data about the mean.
128 129 130 |
# File 'lib/ministat/data.rb', line 128 def std_dev @std_dev ||= Math.sqrt(variance) end |
#to_s ⇒ Object
Return a string with statisical info about a dataset.
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/ministat/data.rb', line 174 def to_s <<-DATA_STR Partition:#{partition(median).inspect} Mean:#{mean} Geometric Mean:#{geometric_mean} Harmonic Mean:#{harmonic_mean} Median:#{median} Min:#{data.min} Q1:#{q1} Q3:#{q3} Max:#{data.max} IQR:#{iqr} Outliers:#{outliers.inspect} Variance:#{variance} Std Dev:#{std_dev} DATA_STR end |
#variance ⇒ Object
Computes variance. Used to measure degree of spread in dataset.
120 121 122 123 |
# File 'lib/ministat/data.rb', line 120 def variance @variance ||= @data.inject(0) { |i,j| i += (j - mean(@data)) ** 2} / (@data.size - 1) end |