Module: Daru::Maths::Statistics::Vector

Extended by:
Gem::Deprecate
Included in:
Vector
Defined in:
lib/daru/maths/statistics/vector.rb

Overview

rubocop:disable Metrics/ModuleLength

Instance Method Summary collapse

Instance Method Details

#acf(max_lags = nil) ⇒ Object

Calculates the autocorrelation coefficients of the series.

The first element is always 1, since that is the correlation of the series with itself.

Examples:

ts = Daru::Vector.new((1..100).map { rand })

ts.acf   # => array with first 21 autocorrelations
ts.acf 3 # => array with first 3 autocorrelations


616
617
618
619
620
621
622
623
624
625
626
627
628
629
# File 'lib/daru/maths/statistics/vector.rb', line 616

def acf(max_lags=nil)
  max_lags ||= (10 * Math.log10(size)).to_i

  (0..max_lags).map do |i|
    if i.zero?
      1.0
    else
      m = mean
      # can't use Pearson coefficient since the mean for the lagged series should
      # be the same as the regular series
      ((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
    end
  end
end

#acvf(demean = true, unbiased = true) ⇒ Object

Provides autocovariance.

Options

  • :demean = true; optional. Supply false if series is not to be demeaned

  • :unbiased = true; optional. true/false for unbiased/biased form of autocovariance

Returns

Autocovariance value



641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
# File 'lib/daru/maths/statistics/vector.rb', line 641

def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  opts = {
    demean: true,
    unbaised: true
  }.merge(opts)

  demean   = opts[:demean]
  unbiased = opts[:unbiased]
  demeaned_series = demean ? self - mean : self

  n = (10 * Math.log10(size)).to_i + 1
  m = mean
  d = if unbiased
        Array.new(size, size)
      else
        (1..size).to_a.reverse[0..n]
      end

  0.upto(n - 1).map do |i|
    (demeaned_series * (lag(i) - m)).sum / d[i]
  end
end

#average_deviation_population(m = nil) ⇒ Object Also known as: adp



230
231
232
233
234
235
236
# File 'lib/daru/maths/statistics/vector.rb', line 230

def average_deviation_population m=nil
  must_be_numeric!
  m ||= mean
  reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
    (val - m).abs + memo
  }.quo(size - count_values(*Daru::MISSING_VALUES))
end

#box_cox_transformation(lambda) ⇒ Object

:nocov:



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/daru/maths/statistics/vector.rb', line 296

def box_cox_transformation lambda # :nodoc:
  must_be_numeric!

  recode do |x|
    if !x.nil?
      if lambda.zero?
        Math.log(x)
      else
        (x ** lambda - 1).quo(lambda)
      end
    else
      nil
    end
  end
end

#centerObject

Center data by subtracting the mean from each non-nil value.



277
278
279
# File 'lib/daru/maths/statistics/vector.rb', line 277

def center
  self - mean
end

#coefficient_of_variationObject Also known as: cov



121
122
123
# File 'lib/daru/maths/statistics/vector.rb', line 121

def coefficient_of_variation
  standard_deviation_sample / mean
end

#count(value = false, &block) ⇒ Object

Retrieves number of cases which comply condition. If block given, retrieves number of instances where block returns true. If other values given, retrieves the frequency for this value. If no value given, counts the number of non-nil elements in the Vector.



129
130
131
132
133
134
135
136
137
# File 'lib/daru/maths/statistics/vector.rb', line 129

def count value=false, &block
  if block_given?
    @data.select(&block).count
  elsif value
    count { |val| val == value }
  else
    size - indexes(*Daru::MISSING_VALUES).size
  end
end

#covariance_population(other) ⇒ Object

Population covariance with denominator (N)



179
180
181
182
# File 'lib/daru/maths/statistics/vector.rb', line 179

def covariance_population other
  size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
  covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES))
end

#covariance_sample(other) ⇒ Object Also known as: covariance

Sample covariance with denominator (N-1)



173
174
175
176
# File 'lib/daru/maths/statistics/vector.rb', line 173

def covariance_sample other
  size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
  covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1)
end

#cumsumObject

Calculate cumulative sum of Vector



665
666
667
668
669
670
671
672
673
674
675
676
677
678
# File 'lib/daru/maths/statistics/vector.rb', line 665

def cumsum
  result = []
  acc = 0
  @data.each do |d|
    if include_with_nan? Daru::MISSING_VALUES, d
      result << nil
    else
      acc += d
      result << acc
    end
  end

  Daru::Vector.new(result, index: @index)
end

#describe(methods = nil) ⇒ Object

Create a summary of count, mean, standard deviation, min and max of the vector in one shot.

Arguments

methods - An array with aggregation methods specified as symbols to be applied to vectors. Default is [:count, :mean, :std, :max, :min]. Methods will be applied in the specified order.



47
48
49
50
51
# File 'lib/daru/maths/statistics/vector.rb', line 47

def describe methods=nil
  methods ||= [:count, :mean, :std, :min, :max]
  description = methods.map { |m| send(m) }
  Daru::Vector.new(description, index: methods, name: :statistics)
end

#dichotomize(low = nil) ⇒ Object

Dichotomize the vector with 0 and 1, based on lowest value. If parameter is defined, this value and lower will be 0 and higher, 1.



262
263
264
265
266
267
268
269
270
271
272
273
274
# File 'lib/daru/maths/statistics/vector.rb', line 262

def dichotomize(low=nil)
  low ||= factors.min

  recode do |x|
    if x.nil?
      nil
    elsif x > low
      1
    else
      0
    end
  end
end

#diff(max_lags = 1) ⇒ Daru::Vector

Performs the difference of the series. Note: The first difference of series is X(t) - X(t-1) But, second difference of series is NOT X(t) - X(t-2) It is the first difference of the first difference

> (X(t) - X(t-1)) - (X(t-1) - X(t-2))

Arguments

  • max_lags: integer, (default: 1), number of differences reqd.

Examples:

Using #diff


ts = Daru::Vector.new((1..10).map { rand })
         # => [0.69, 0.23, 0.44, 0.71, ...]

ts.diff   # => [nil, -0.46, 0.21, 0.27, ...]

Returns:



418
419
420
421
422
423
424
425
426
# File 'lib/daru/maths/statistics/vector.rb', line 418

def diff(max_lags=1)
  ts = self
  difference = []
  max_lags.times do
    difference = ts - ts.lag
    ts = difference
  end
  difference
end

#ema(n = 10, wilder = false) ⇒ Daru::Vector

Exponential Moving Average. Calculates an exponential moving average of the series using a specified parameter. If wilder is false (the default) then the EMA uses a smoothing value of 2 / (n + 1), if it is true then it uses the Welles Wilder smoother of 1 / n.

Warning for EMA usage: EMAs are unstable for small series, as they use a lot more than n observations to calculate. The series is stable if the size of the series is >= 3.45 * (n + 1)

Examples:

Using ema


ts = Daru::Vector.new((1..100).map { rand })
         # => [0.577..., 0.123..., 0.173..., 0.233..., ...]

# first 9 observations are nil
ts.ema   # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]

Parameters:

  • n (Integer) (defaults to: 10)

    (10) Loopback length.

  • wilder (TrueClass, FalseClass) (defaults to: false)

    (false) If true, 1/n value is used for smoothing; if false, uses 2/(n+1) value

Returns:



502
503
504
505
506
507
508
509
510
511
512
513
514
515
# File 'lib/daru/maths/statistics/vector.rb', line 502

def ema(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
  # need to start everything from the first non-nil observation
  start = @data.index { |i| !i.nil? }
  # first n - 1 observations are nil
  base = [nil] * (start + n - 1)
  # nth observation is just a moving average
  base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
  (start + n).upto size - 1 do |i|
    base << self[i] * smoother + (1 - smoother) * base.last
  end

  Daru::Vector.new(base, index: @index, name: @name)
end

#emsd(n = 10, wilder = false) ⇒ Daru::Vector

Exponential Moving Standard Deviation. Calculates an exponential moving standard deviation of the series using a specified parameter. If wilder is false (the default) then the EMSD uses a smoothing value of 2 / (n + 1), if it is true then it uses the Welles Wilder smoother of 1 / n.

Examples:

Using emsd


ts = Daru::Vector.new((1..100).map { rand })
         # => [0.400..., 0.727..., 0.862..., 0.013..., ...]

# first 9 observations are nil
ts.emsd   # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]

Parameters:

  • n (Integer) (defaults to: 10)

    (10) Loopback length.

  • wilder (TrueClass, FalseClass) (defaults to: false)

    (false) If true, 1/n value is used for smoothing; if false, uses 2/(n+1) value

Returns:



573
574
575
576
577
578
579
580
# File 'lib/daru/maths/statistics/vector.rb', line 573

def emsd(n=10, wilder=false)
  result = []
  emv_return = emv(n, wilder)
  emv_return.each do |d|
    result << (d.nil? ? nil : Math.sqrt(d))
  end
  Daru::Vector.new(result, index: @index, name: @name)
end

#emv(n = 10, wilder = false) ⇒ Daru::Vector

Exponential Moving Variance. Calculates an exponential moving variance of the series using a specified parameter. If wilder is false (the default) then the EMV uses a smoothing value of 2 / (n + 1), if it is true then it uses the Welles Wilder smoother of 1 / n.

Examples:

Using emv


ts = Daru::Vector.new((1..100).map { rand })
         # => [0.047..., 0.23..., 0.836..., 0.845..., ...]

# first 9 observations are nil
ts.emv   # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]

Parameters:

  • n (Integer) (defaults to: 10)

    (10) Loopback length.

  • wilder (TrueClass, FalseClass) (defaults to: false)

    (false) If true, 1/n value is used for smoothing; if false, uses 2/(n+1) value

Returns:



536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
# File 'lib/daru/maths/statistics/vector.rb', line 536

def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
  # need to start everything from the first non-nil observation
  start = @data.index { |i| !i.nil? }
  # first n - 1 observations are nil
  var_base = [nil] * (start + n - 1)
  mean_base = [nil] * (start + n - 1)
  mean_base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
  # nth observation is just a moving variance_population
  var_base << @data[start...(start + n)].inject(0.0) { |s,x| x.nil? ? s : s + (x - mean_base.last)**2 } / n
  (start + n).upto size - 1 do |i|
    last = mean_base.last
    mean_base << self[i] * smoother + (1 - smoother) * last
    var_base << (1 - smoother) * var_base.last + smoother * (self[i] - last) * (self[i] - mean_base.last)
  end
  Daru::Vector.new(var_base, index: @index, name: @name)
end

#factorsObject

Retrieve unique values of non-nil data



69
70
71
# File 'lib/daru/maths/statistics/vector.rb', line 69

def factors
  reject_values(*Daru::MISSING_VALUES).uniq.reset_index!
end

#frequenciesObject Also known as: freqs



93
94
95
96
97
98
99
# File 'lib/daru/maths/statistics/vector.rb', line 93

def frequencies
  Daru::Vector.new(
    @data.each_with_object(Hash.new(0)) do |element, hash|
      hash[element] += 1 unless element.nil?
    end
  )
end

#kurtosis(m = nil) ⇒ Object



220
221
222
223
224
225
226
227
228
# File 'lib/daru/maths/statistics/vector.rb', line 220

def kurtosis m=nil
  if @data.respond_to? :kurtosis
    @data.kurtosis
  else
    m ||= mean
    fo  = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
    fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3
  end
end

#macd(fast = 12, slow = 26, signal = 9) ⇒ Object

Moving Average Convergence-Divergence. Calculates the MACD (moving average convergence-divergence) of the time series - this is a comparison of a fast EMA with a slow EMA.

Arguments

  • fast: integer, (default = 12) - fast component of MACD

  • slow: integer, (default = 26) - slow component of MACD

  • signal: integer, (default = 9) - signal component of MACD

Usage

ts = Daru::Vector.new((1..100).map { rand })
         # => [0.69, 0.23, 0.44, 0.71, ...]
ts.macd(13)

Returns

Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with signal value



601
602
603
604
# File 'lib/daru/maths/statistics/vector.rb', line 601

def macd(fast=12, slow=26, signal=9)
  series = ema(fast) - ema(slow)
  [series, series.ema(signal)]
end

#max(return_type = :stored_type) ⇒ Object

Maximum element of the vector.

Parameters:

  • return_type (Symbol) (defaults to: :stored_type)

    Data type of the returned value. Defaults to returning only the maximum number but passing :vector will return a Daru::Vector with the index of the corresponding maximum value.



78
79
80
81
82
83
84
85
# File 'lib/daru/maths/statistics/vector.rb', line 78

def max return_type=:stored_type
  max_value = @data.max
  if return_type == :vector
    Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype)
  else
    max_value
  end
end

#max_indexDaru::Vector

Return a Vector with the max element and its index.

Returns:



89
90
91
# File 'lib/daru/maths/statistics/vector.rb', line 89

def max_index
  max :vector
end

#meanObject



10
11
12
# File 'lib/daru/maths/statistics/vector.rb', line 10

def mean
  @data.mean
end

#medianObject



30
31
32
# File 'lib/daru/maths/statistics/vector.rb', line 30

def median
  @data.respond_to?(:median) ? @data.median : percentile(50)
end

#median_absolute_deviationObject Also known as: mad



53
54
55
56
# File 'lib/daru/maths/statistics/vector.rb', line 53

def median_absolute_deviation
  m = median
  recode { |val| (val - m).abs }.median
end

#minObject



22
23
24
# File 'lib/daru/maths/statistics/vector.rb', line 22

def min
  @data.min
end

#modeObject



34
35
36
37
# File 'lib/daru/maths/statistics/vector.rb', line 34

def mode
  mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys
  mode.size > 1 ? Daru::Vector.new(mode) : mode.first
end

#percent_change(periods = 1) ⇒ Object

The percent_change method computes the percent change over the given number of periods.

Examples:


vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
vector.percent_change
#=>
#   <Daru::Vector:28713060 @name = nil size: 5 >
#              nil
#   a
#   f	   0.5
#   t	   0.0
#   i	   0.3333333333333333
#   k          0.25

Parameters:

  • periods (Integer) (defaults to: 1)

    (1) number of nils to insert at the beginning.



383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/daru/maths/statistics/vector.rb', line 383

def percent_change periods=1
  must_be_numeric!

  prev = nil
  arr = @data.each_with_index.map do |cur, i|
    if i < periods ||
       include_with_nan?(Daru::MISSING_VALUES, cur) ||
       include_with_nan?(Daru::MISSING_VALUES, prev)
      nil
    else
      (cur - prev) / prev.to_f
    end.tap { prev = cur if cur }
  end

  Daru::Vector.new(arr, index: @index, name: @name)
end

#percentile(q, strategy = :midpoint) ⇒ Object Also known as: percentil

Returns the value of the percentile q

Accepts an optional second argument specifying the strategy to interpolate when the requested percentile lies between two data points a and b Valid strategies are:

  • :midpoint (Default): (a + b) / 2

  • :linear : a + (b - a) * d where d is the decimal part of the index between a and b.

References

This is the NIST recommended method (en.wikipedia.org/wiki/Percentile#NIST_method)



248
249
250
251
252
253
254
255
256
257
# File 'lib/daru/maths/statistics/vector.rb', line 248

def percentile(q, strategy=:midpoint)
  case strategy
  when :midpoint
    midpoint_percentile(q)
  when :linear
    linear_percentile(q)
  else
    raise ArgumentError, "Unknown strategy #{strategy}"
  end
end

#productObject



18
19
20
# File 'lib/daru/maths/statistics/vector.rb', line 18

def product
  @data.product
end

#proportion(value = 1) ⇒ Object



148
149
150
# File 'lib/daru/maths/statistics/vector.rb', line 148

def proportion value=1
  frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f
end

#proportionsObject



104
105
106
107
108
109
# File 'lib/daru/maths/statistics/vector.rb', line 104

def proportions
  len = size - count_values(*Daru::MISSING_VALUES)
  frequencies.to_h.each_with_object({}) do |(el, count), hash|
    hash[el] = count / len
  end
end

#rangeObject



26
27
28
# File 'lib/daru/maths/statistics/vector.rb', line 26

def range
  max - min
end

#rankedObject



111
112
113
114
115
116
117
118
119
# File 'lib/daru/maths/statistics/vector.rb', line 111

def ranked
  sum = 0
  r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
    memo[el] = ((sum + 1) + (sum + count)).quo(2)
    sum += count
  end

  recode { |e| r[e] }
end

#rolling(function, n = 10) ⇒ Daru::Vector

Calculate the rolling function for a loopback value.

Examples:

Using #rolling

ts = Daru::Vector.new((1..100).map { rand })
         # => [0.69, 0.23, 0.44, 0.71, ...]
# first 9 observations are nil
ts.rolling(:mean)    # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]

Parameters:

  • function (Symbol)

    The rolling function to be applied. Can be any function applicatble to Daru::Vector (:mean, :median, :count, :min, :max, etc.)

  • n (Integer) (defaults to: 10)

    (10) A non-negative value which serves as the loopback length.

Returns:



440
441
442
443
444
445
446
447
# File 'lib/daru/maths/statistics/vector.rb', line 440

def rolling function, n=10
  Daru::Vector.new(
    [nil] * (n - 1) +
    (0..(size - n)).map do |i|
      Daru::Vector.new(@data[i...(i + n)]).send(function)
    end, index: @index
  )
end

#rolling_countObject

Calculate rolling non-missing count

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_maxObject

Calculate rolling max value

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_meanObject

Calculate rolling average

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_medianObject

Calculate rolling median

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_minObject

Calculate rolling min value

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_stdObject

Calculate rolling standard deviation

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_sumObject

Calculate rolling sum

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_varianceObject

Calculate rolling variance

Parameters:

  • n (Integer)

    (10) Loopback length



473
474
475
476
477
# File 'lib/daru/maths/statistics/vector.rb', line 473

[:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#sample_with_replacement(sample = 1) ⇒ Object

Returns an random sample of size n, with replacement, only with non-nil data.

In all the trails, every item have the same probability of been selected.



342
343
344
345
346
347
348
349
350
# File 'lib/daru/maths/statistics/vector.rb', line 342

def sample_with_replacement(sample=1)
  if @data.respond_to? :sample_with_replacement
    @data.sample_with_replacement sample
  else
    valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
    vds = valid.size
    (0...sample).collect { valid[rand(vds)] }
  end
end

#sample_without_replacement(sample = 1) ⇒ Object

Returns an random sample of size n, without replacement, only with valid data.

Every element could only be selected once.

A sample of the same size of the vector is the vector itself.



358
359
360
361
362
363
364
# File 'lib/daru/maths/statistics/vector.rb', line 358

def sample_without_replacement(sample=1)
  if @data.respond_to? :sample_without_replacement
    @data.sample_without_replacement sample
  else
    raw_sample_without_replacement(sample)
  end
end

#skew(m = nil) ⇒ Object

Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)



210
211
212
213
214
215
216
217
218
# File 'lib/daru/maths/statistics/vector.rb', line 210

def skew m=nil
  if @data.respond_to? :skew
    @data.skew
  else
    m ||= mean
    th  = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
    th.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
  end
end

#standard_deviation_population(m = nil) ⇒ Object Also known as: sdp



191
192
193
194
195
196
197
198
# File 'lib/daru/maths/statistics/vector.rb', line 191

def standard_deviation_population m=nil
  m ||= mean
  if @data.respond_to? :standard_deviation_population
    @data.standard_deviation_population(m)
  else
    Math.sqrt(variance_population(m))
  end
end

#standard_deviation_sample(m = nil) ⇒ Object Also known as: sds, sd



200
201
202
203
204
205
206
207
# File 'lib/daru/maths/statistics/vector.rb', line 200

def standard_deviation_sample m=nil
  m ||= mean
  if @data.respond_to? :standard_deviation_sample
    @data.standard_deviation_sample m
  else
    Math.sqrt(variance_sample(m))
  end
end

#standard_errorObject Also known as: se



60
61
62
# File 'lib/daru/maths/statistics/vector.rb', line 60

def standard_error
  standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES))
end

#standardize(use_population = false) ⇒ Object

Standardize data.

Arguments

  • use_population - Pass as true if you want to use population

standard deviation instead of sample standard deviation.



287
288
289
290
291
292
293
# File 'lib/daru/maths/statistics/vector.rb', line 287

def standardize use_population=false
  m ||= mean
  sd = use_population ? sdp : sds
  return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0

  vector_standardized_compute m, sd
end

#sumObject



14
15
16
# File 'lib/daru/maths/statistics/vector.rb', line 14

def sum
  @data.sum
end

#sum_of_squared_deviationObject



64
65
66
# File 'lib/daru/maths/statistics/vector.rb', line 64

def sum_of_squared_deviation
  (@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f
end

#sum_of_squares(m = nil) ⇒ Object Also known as: ss



184
185
186
187
188
189
# File 'lib/daru/maths/statistics/vector.rb', line 184

def sum_of_squares(m=nil)
  m ||= mean
  reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
    memo + (val - m)**2
  }
end

#value_countsObject

Count number of occurrences of each value in the Vector



140
141
142
143
144
145
146
# File 'lib/daru/maths/statistics/vector.rb', line 140

def value_counts
  values = @data.each_with_object(Hash.new(0)) do |d, memo|
    memo[d] += 1
  end

  Daru::Vector.new(values)
end

#variance_population(m = nil) ⇒ Object

Population variance with denominator (N)



163
164
165
166
167
168
169
170
# File 'lib/daru/maths/statistics/vector.rb', line 163

def variance_population m=nil
  m ||= mean
  if @data.respond_to? :variance_population
    @data.variance_population m
  else
    sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES)).to_f
  end
end

#variance_sample(m = nil) ⇒ Object Also known as: variance

Sample variance with denominator (N-1)



153
154
155
156
157
158
159
160
# File 'lib/daru/maths/statistics/vector.rb', line 153

def variance_sample m=nil
  m ||= mean
  if @data.respond_to? :variance_sample
    @data.variance_sample m
  else
    sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES) - 1)
  end
end

#vector_centered_compute(m) ⇒ Object



328
329
330
331
332
333
334
335
# File 'lib/daru/maths/statistics/vector.rb', line 328

def vector_centered_compute(m)
  if @data.respond_to? :vector_centered_compute
    @data.vector_centered_compute(m)
  else
    Daru::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f-m },
      index: index, name: name, dtype: dtype
  end
end

#vector_percentileObject

Replace each non-nil value in the vector with its percentile.



314
315
316
317
# File 'lib/daru/maths/statistics/vector.rb', line 314

def vector_percentile
  c = size - indexes(*Daru::MISSING_VALUES).size
  ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f }
end

#vector_standardized_compute(m, sd) ⇒ Object



319
320
321
322
323
324
325
326
# File 'lib/daru/maths/statistics/vector.rb', line 319

def vector_standardized_compute(m,sd)
  if @data.respond_to? :vector_standardized_compute
    @data.vector_standardized_compute(m,sd)
  else
    Daru::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) },
      index: index, name: name, dtype: dtype
  end
end