Module: Daru::Maths::Statistics::Vector

Extended by:
Gem::Deprecate
Included in:
Vector
Defined in:
lib/daru/maths/statistics/vector.rb

Overview

rubocop:disable Metrics/ModuleLength

Instance Method Summary collapse

Instance Method Details

#acf(max_lags = nil) ⇒ Object

Calculates the autocorrelation coefficients of the series.

The first element is always 1, since that is the correlation of the series with itself.

Examples:

ts = Daru::Vector.new((1..100).map { rand })

ts.acf   # => array with first 21 autocorrelations
ts.acf 3 # => array with first 3 autocorrelations

870
871
872
873
874
875
876
877
878
879
880
881
882
883
# File 'lib/daru/maths/statistics/vector.rb', line 870

def acf(max_lags=nil)
  max_lags ||= (10 * Math.log10(size)).to_i

  (0..max_lags).map do |i|
    if i.zero?
      1.0
    else
      m = mean
      # can't use Pearson coefficient since the mean for the lagged series should
      # be the same as the regular series
      ((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
    end
  end
end

#acvf(demean = true, unbiased = true) ⇒ Object

Provides autocovariance.

Options

  • :demean = true; optional. Supply false if series is not to be demeaned

  • :unbiased = true; optional. true/false for unbiased/biased form of autocovariance

Returns

Autocovariance value


895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
# File 'lib/daru/maths/statistics/vector.rb', line 895

def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  opts = {
    demean: true,
    unbaised: true
  }.merge(opts)

  demean   = opts[:demean]
  unbiased = opts[:unbiased]
  demeaned_series = demean ? self - mean : self

  n = (10 * Math.log10(size)).to_i + 1
  m = mean
  d = if unbiased
        Array.new(size, size)
      else
        (1..size).to_a.reverse[0..n]
      end

  0.upto(n - 1).map do |i|
    (demeaned_series * (lag(i) - m)).sum / d[i]
  end
end

#average_deviation_population(m = nil) ⇒ Object Also known as: adp


482
483
484
485
486
487
488
# File 'lib/daru/maths/statistics/vector.rb', line 482

def average_deviation_population m=nil
  must_be_numeric!
  m ||= mean
  reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
    (val - m).abs + memo
  }.quo(size - count_values(*Daru::MISSING_VALUES))
end

#box_cox_transformation(lambda) ⇒ Object

:nocov:


548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
# File 'lib/daru/maths/statistics/vector.rb', line 548

def box_cox_transformation lambda # :nodoc:
  must_be_numeric!

  recode do |x|
    if !x.nil?
      if lambda.zero?
        Math.log(x)
      else
        (x ** lambda - 1).quo(lambda)
      end
    else
      nil
    end
  end
end

#centerObject

Center data by subtracting the mean from each non-nil value.


529
530
531
# File 'lib/daru/maths/statistics/vector.rb', line 529

def center
  self - mean
end

#coefficient_of_variationObject Also known as: cov


373
374
375
# File 'lib/daru/maths/statistics/vector.rb', line 373

def coefficient_of_variation
  standard_deviation_sample / mean
end

#count(value = false, &block) ⇒ Object

Retrieves number of cases which comply condition. If block given, retrieves number of instances where block returns true. If other values given, retrieves the frequency for this value. If no value given, counts the number of non-nil elements in the Vector.


381
382
383
384
385
386
387
388
389
# File 'lib/daru/maths/statistics/vector.rb', line 381

def count value=false, &block
  if block_given?
    @data.select(&block).count
  elsif value
    count { |val| val == value }
  else
    size - indexes(*Daru::MISSING_VALUES).size
  end
end

#covariance_population(other) ⇒ Object

Population covariance with denominator (N)


431
432
433
434
# File 'lib/daru/maths/statistics/vector.rb', line 431

def covariance_population other
  size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
  covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES))
end

#covariance_sample(other) ⇒ Object Also known as: covariance

Sample covariance with denominator (N-1)


425
426
427
428
# File 'lib/daru/maths/statistics/vector.rb', line 425

def covariance_sample other
  size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
  covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1)
end

#cumsumObject

Calculate cumulative sum of Vector


919
920
921
922
923
924
925
926
927
928
929
930
931
932
# File 'lib/daru/maths/statistics/vector.rb', line 919

def cumsum
  result = []
  acc = 0
  @data.each do |d|
    if include_with_nan? Daru::MISSING_VALUES, d
      result << nil
    else
      acc += d
      result << acc
    end
  end

  Daru::Vector.new(result, index: @index)
end

#describe(methods = nil) ⇒ Object

Create a summary of count, mean, standard deviation, min and max of the vector in one shot.

Arguments

methods - An array with aggregation methods specified as symbols to be applied to vectors. Default is [:count, :mean, :std, :max, :min]. Methods will be applied in the specified order.


43
44
45
46
47
# File 'lib/daru/maths/statistics/vector.rb', line 43

def describe methods=nil
  methods ||= %i[count mean std min max]
  description = methods.map { |m| send(m) }
  Daru::Vector.new(description, index: methods, name: :statistics)
end

#dichotomize(low = nil) ⇒ Object

Dichotomize the vector with 0 and 1, based on lowest value. If parameter is defined, this value and lower will be 0 and higher, 1.


514
515
516
517
518
519
520
521
522
523
524
525
526
# File 'lib/daru/maths/statistics/vector.rb', line 514

def dichotomize(low=nil)
  low ||= factors.min

  recode do |x|
    if x.nil?
      nil
    elsif x > low
      1
    else
      0
    end
  end
end

#diff(max_lags = 1) ⇒ Daru::Vector

Performs the difference of the series. Note: The first difference of series is X(t) - X(t-1) But, second difference of series is NOT X(t) - X(t-2) It is the first difference of the first difference

> (X(t) - X(t-1)) - (X(t-1) - X(t-2))

Arguments

  • max_lags: integer, (default: 1), number of differences reqd.

Examples:

Using #diff


ts = Daru::Vector.new((1..10).map { rand })
         # => [0.69, 0.23, 0.44, 0.71, ...]

ts.diff   # => [nil, -0.46, 0.21, 0.27, ...]

Returns:


670
671
672
673
674
675
676
677
678
# File 'lib/daru/maths/statistics/vector.rb', line 670

def diff(max_lags=1)
  ts = self
  difference = []
  max_lags.times do
    difference = ts - ts.lag
    ts = difference
  end
  difference
end

#ema(n = 10, wilder = false) ⇒ Daru::Vector

Exponential Moving Average. Calculates an exponential moving average of the series using a specified parameter. If wilder is false (the default) then the EMA uses a smoothing value of 2 / (n + 1), if it is true then it uses the Welles Wilder smoother of 1 / n.

Warning for EMA usage: EMAs are unstable for small series, as they use a lot more than n observations to calculate. The series is stable if the size of the series is >= 3.45 * (n + 1)

Examples:

Using ema


ts = Daru::Vector.new((1..100).map { rand })
         # => [0.577..., 0.123..., 0.173..., 0.233..., ...]

# first 9 observations are nil
ts.ema   # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]

Parameters:

  • n (Integer) (defaults to: 10)

    (10) Loopback length.

  • wilder (TrueClass, FalseClass) (defaults to: false)

    (false) If true, 1/n value is used for smoothing; if false, uses 2/(n+1) value

Returns:


754
755
756
757
758
759
760
761
762
763
764
765
766
767
# File 'lib/daru/maths/statistics/vector.rb', line 754

def ema(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
  # need to start everything from the first non-nil observation
  start = @data.index { |i| !i.nil? }
  # first n - 1 observations are nil
  base = [nil] * (start + n - 1)
  # nth observation is just a moving average
  base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
  (start + n).upto size - 1 do |i|
    base << self[i] * smoother + (1 - smoother) * base.last
  end

  Daru::Vector.new(base, index: @index, name: @name)
end

#emsd(n = 10, wilder = false) ⇒ Daru::Vector

Exponential Moving Standard Deviation. Calculates an exponential moving standard deviation of the series using a specified parameter. If wilder is false (the default) then the EMSD uses a smoothing value of 2 / (n + 1), if it is true then it uses the Welles Wilder smoother of 1 / n.

Examples:

Using emsd


ts = Daru::Vector.new((1..100).map { rand })
         # => [0.400..., 0.727..., 0.862..., 0.013..., ...]

# first 9 observations are nil
ts.emsd   # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]

Parameters:

  • n (Integer) (defaults to: 10)

    (10) Loopback length.

  • wilder (TrueClass, FalseClass) (defaults to: false)

    (false) If true, 1/n value is used for smoothing; if false, uses 2/(n+1) value

Returns:


825
826
827
828
829
830
831
832
# File 'lib/daru/maths/statistics/vector.rb', line 825

def emsd(n=10, wilder=false)
  result = []
  emv_return = emv(n, wilder)
  emv_return.each do |d|
    result << (d.nil? ? nil : Math.sqrt(d))
  end
  Daru::Vector.new(result, index: @index, name: @name)
end

#emv(n = 10, wilder = false) ⇒ Daru::Vector

Exponential Moving Variance. Calculates an exponential moving variance of the series using a specified parameter. If wilder is false (the default) then the EMV uses a smoothing value of 2 / (n + 1), if it is true then it uses the Welles Wilder smoother of 1 / n.

Examples:

Using emv


ts = Daru::Vector.new((1..100).map { rand })
         # => [0.047..., 0.23..., 0.836..., 0.845..., ...]

# first 9 observations are nil
ts.emv   # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]

Parameters:

  • n (Integer) (defaults to: 10)

    (10) Loopback length.

  • wilder (TrueClass, FalseClass) (defaults to: false)

    (false) If true, 1/n value is used for smoothing; if false, uses 2/(n+1) value

Returns:


788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
# File 'lib/daru/maths/statistics/vector.rb', line 788

def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
  smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
  # need to start everything from the first non-nil observation
  start = @data.index { |i| !i.nil? }
  # first n - 1 observations are nil
  var_base = [nil] * (start + n - 1)
  mean_base = [nil] * (start + n - 1)
  mean_base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
  # nth observation is just a moving variance_population
  var_base << @data[start...(start + n)].inject(0.0) { |s,x| x.nil? ? s : s + (x - mean_base.last)**2 } / n
  (start + n).upto size - 1 do |i|
    last = mean_base.last
    mean_base << self[i] * smoother + (1 - smoother) * last
    var_base << (1 - smoother) * var_base.last + smoother * (self[i] - last) * (self[i] - mean_base.last)
  end
  Daru::Vector.new(var_base, index: @index, name: @name)
end

#factorsObject

Retrieve unique values of non-nil data


65
66
67
# File 'lib/daru/maths/statistics/vector.rb', line 65

def factors
  reject_values(*Daru::MISSING_VALUES).uniq.reset_index!
end

#frequenciesObject Also known as: freqs


345
346
347
348
349
350
351
# File 'lib/daru/maths/statistics/vector.rb', line 345

def frequencies
  Daru::Vector.new(
    @data.each_with_object(Hash.new(0)) do |element, hash|
      hash[element] += 1 unless element.nil?
    end
  )
end

#index_of_max(size = nil, &block) ⇒ Object

Returns the index of the maximum value(s) present in the vector, with an optional comparator block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.index_of_max
#=> :t

dv.index_of_max(2) { |a,b| a.size <=> b.size }
#=> [:j, :d]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of maximum indices to return. Defaults to nil.


263
264
265
266
267
# File 'lib/daru/maths/statistics/vector.rb', line 263

def index_of_max(size=nil,&block)
  vals = max(size, &block)
  dv   = reject_values(*Daru::MISSING_VALUES)
  vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
end

#index_of_max_by(size = nil, &block) ⇒ Object

Returns the index of the maximum value(s) present in the vector, with a compulsory object block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.index_of_max_by(2) { |i| i.size }
#=> [:j, :d]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of maximum indices to return. Defaults to nil.


285
286
287
288
289
# File 'lib/daru/maths/statistics/vector.rb', line 285

def index_of_max_by(size=nil,&block)
  vals = max_by(size, &block)
  dv   = reject_values(*Daru::MISSING_VALUES)
  vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
end

#index_of_min(size = nil, &block) ⇒ Object

Returns the index of the minimum value(s) present in the vector, with an optional comparator block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.index_of_min
#=> :d

dv.index_of_min(2) { |a,b| a.size <=> b.size }
#=> [:t, :d]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of minimum indices to return. Defaults to nil.


310
311
312
313
314
# File 'lib/daru/maths/statistics/vector.rb', line 310

def index_of_min(size=nil,&block)
  vals = min(size, &block)
  dv   = reject_values(*Daru::MISSING_VALUES)
  vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
end

#index_of_min_by(size = nil, &block) ⇒ Object

Returns the index of the minimum value(s) present in the vector, with a compulsory object block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.index_of_min(2) { |i| i.size }
#=> [:t, :d]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of minimum indices to return. Defaults to nil.


332
333
334
335
336
# File 'lib/daru/maths/statistics/vector.rb', line 332

def index_of_min_by(size=nil,&block)
  vals = min_by(size, &block)
  dv   = reject_values(*Daru::MISSING_VALUES)
  vals.is_a?(Array) ? (vals.map { |x| dv.index_of(x) }) : dv.index_of(vals)
end

#kurtosis(m = nil) ⇒ Object


472
473
474
475
476
477
478
479
480
# File 'lib/daru/maths/statistics/vector.rb', line 472

def kurtosis m=nil
  if @data.respond_to? :kurtosis
    @data.kurtosis
  else
    m ||= mean
    fo  = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
    fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3
  end
end

#macd(fast = 12, slow = 26, signal = 9) ⇒ Array<Daru::Vector>

Moving Average Convergence-Divergence. Calculates the MACD (moving average convergence-divergence) of the time series.

Examples:

Create a series and calculate MACD values

ts = Daru::Vector.new((1..100).map { rand })
         # => [0.69, 0.23, 0.44, 0.71, ...]
macdseries, macdsignal, macdhist = ts.macd
macdseries, macdsignal, macdhist = ts.macd(13)
macdseries, macdsignal, macdhist = ts.macd(signal=5)

Parameters:

  • fast (Integer) (defaults to: 12)

    fast period of MACD (default 12)

  • slow (Integer) (defaults to: 26)

    slow period of MACD (default 26)

  • signal (Integer) (defaults to: 9)

    signal period of MACD (default 9)

Returns:

  • (Array<Daru::Vector>)

    macdseries, macdsignal and macdhist are returned as an array of three Daru::Vectors

See Also:


853
854
855
856
857
858
# File 'lib/daru/maths/statistics/vector.rb', line 853

def macd(fast=12, slow=26, signal=9)
  macdseries = ema(fast) - ema(slow)
  macdsignal = macdseries.ema(signal)
  macdhist = macdseries - macdsignal
  [macdseries, macdsignal, macdhist]
end

#max(size = nil, &block) ⇒ Object

Returns the maximum value(s) present in the vector, with an optional comparator block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.max
#=> "Tyrion"

dv.max(2) { |a,b| a.size <=> b.size }
#=> ["Jon Starkgaryen","Daenerys"]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of maximum values to return. Defaults to nil.


88
89
90
# File 'lib/daru/maths/statistics/vector.rb', line 88

def max(size=nil, &block)
  reject_values(*Daru::MISSING_VALUES).to_a.max(size, &block)
end

#max_by(size = nil, &block) ⇒ Object

Returns the maximum value(s) present in the vector, with a compulsory object block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.max_by(2) { |i| i.size }
#=> ["Jon Starkgaryen","Daenerys"]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of maximum values to return. Defaults to nil.

Raises:

  • (ArgumentError)

107
108
109
110
# File 'lib/daru/maths/statistics/vector.rb', line 107

def max_by(size=nil, &block)
  raise ArgumentError, 'Expected compulsory object block in max_by method' unless block_given?
  reject_values(*Daru::MISSING_VALUES).to_a.max_by(size, &block)
end

#max_indexDaru::Vector

Return the maximum element present in the Vector, as a Vector.

Returns:


340
341
342
343
# File 'lib/daru/maths/statistics/vector.rb', line 340

def max_index
  max_value = @data.max
  Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype)
end

#meanObject


10
11
12
# File 'lib/daru/maths/statistics/vector.rb', line 10

def mean
  @data.mean
end

#medianObject


26
27
28
# File 'lib/daru/maths/statistics/vector.rb', line 26

def median
  @data.respond_to?(:median) ? @data.median : percentile(50)
end

#median_absolute_deviationObject Also known as: mad


49
50
51
52
# File 'lib/daru/maths/statistics/vector.rb', line 49

def median_absolute_deviation
  m = median
  recode { |val| (val - m).abs }.median
end

#min(size = nil, &block) ⇒ Object

Returns the minimum value(s) present in the vector, with an optional comparator block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.min
#=> "Daenerys"

dv.min(2) { |a,b| a.size <=> b.size }
#=> ["Tyrion","Daenerys"]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of minimum values to return. Defaults to nil.


130
131
132
# File 'lib/daru/maths/statistics/vector.rb', line 130

def min(size=nil, &block)
  reject_values(*Daru::MISSING_VALUES).to_a.min(size, &block)
end

#min_by(size = nil, &block) ⇒ Object

Returns the minimum value(s) present in the vector, with a compulsory object block.

Examples:


dv = Daru::Vector.new (["Tyrion", "Daenerys", "Jon Starkgaryen"]), index: Daru::Index.new([:t, :d, :j])
#=>
#   #<Daru::Vector(3)>
#       t   Tyrion
#       d   Daenerys
#       j   Jon Starkgaryen

dv.min_by
#=> "Daenerys"

dv.min_by(2) { |i| i.size }
#=> ["Tyrion","Daenerys"]

Parameters:

  • size (Integer) (defaults to: nil)

    Number of minimum values to return. Defaults to nil.

Raises:

  • (ArgumentError)

149
150
151
152
# File 'lib/daru/maths/statistics/vector.rb', line 149

def min_by(size=nil, &block)
  raise ArgumentError, 'Expected compulsory object block in min_by method' unless block_given?
  reject_values(*Daru::MISSING_VALUES).to_a.min_by(size, &block)
end

#modeObject


30
31
32
33
# File 'lib/daru/maths/statistics/vector.rb', line 30

def mode
  mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys
  mode.size > 1 ? Daru::Vector.new(mode) : mode.first
end

#percent_change(periods = 1) ⇒ Object

The percent_change method computes the percent change over the given number of periods.

Examples:


vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
vector.percent_change
#=>
#   <Daru::Vector:28713060 @name = nil size: 5 >
#              nil
#   a
#   f	   0.5
#   t	   0.0
#   i	   0.3333333333333333
#   k          0.25

Parameters:

  • periods (Integer) (defaults to: 1)

    (1) number of nils to insert at the beginning.


635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
# File 'lib/daru/maths/statistics/vector.rb', line 635

def percent_change periods=1
  must_be_numeric!

  prev = nil
  arr = @data.each_with_index.map do |cur, i|
    if i < periods ||
       include_with_nan?(Daru::MISSING_VALUES, cur) ||
       include_with_nan?(Daru::MISSING_VALUES, prev)
      nil
    else
      (cur - prev) / prev.to_f
    end.tap { prev = cur if cur }
  end

  Daru::Vector.new(arr, index: @index, name: @name)
end

#percentile(q, strategy = :midpoint) ⇒ Object Also known as: percentil

Returns the value of the percentile q

Accepts an optional second argument specifying the strategy to interpolate when the requested percentile lies between two data points a and b Valid strategies are:

  • :midpoint (Default): (a + b) / 2

  • :linear : a + (b - a) * d where d is the decimal part of the index between a and b.

References

This is the NIST recommended method (en.wikipedia.org/wiki/Percentile#NIST_method)


500
501
502
503
504
505
506
507
508
509
# File 'lib/daru/maths/statistics/vector.rb', line 500

def percentile(q, strategy=:midpoint)
  case strategy
  when :midpoint
    midpoint_percentile(q)
  when :linear
    linear_percentile(q)
  else
    raise ArgumentError, "Unknown strategy #{strategy}"
  end
end

#productObject


18
19
20
# File 'lib/daru/maths/statistics/vector.rb', line 18

def product
  @data.product
end

#proportion(value = 1) ⇒ Object


400
401
402
# File 'lib/daru/maths/statistics/vector.rb', line 400

def proportion value=1
  frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f
end

#proportionsObject


356
357
358
359
360
361
# File 'lib/daru/maths/statistics/vector.rb', line 356

def proportions
  len = size - count_values(*Daru::MISSING_VALUES)
  frequencies.to_h.each_with_object({}) do |(el, count), hash|
    hash[el] = count / len.to_f
  end
end

#rangeObject


22
23
24
# File 'lib/daru/maths/statistics/vector.rb', line 22

def range
  max - min
end

#rankedObject


363
364
365
366
367
368
369
370
371
# File 'lib/daru/maths/statistics/vector.rb', line 363

def ranked
  sum = 0
  r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
    memo[el] = ((sum + 1) + (sum + count)).quo(2)
    sum += count
  end

  recode { |e| r[e] }
end

#rolling(function, n = 10) ⇒ Daru::Vector

Calculate the rolling function for a loopback value.

Examples:

Using #rolling

ts = Daru::Vector.new((1..100).map { rand })
         # => [0.69, 0.23, 0.44, 0.71, ...]
# first 9 observations are nil
ts.rolling(:mean)    # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]

Parameters:

  • function (Symbol)

    The rolling function to be applied. Can be any function applicatble to Daru::Vector (:mean, :median, :count, :min, :max, etc.)

  • n (Integer) (defaults to: 10)

    (10) A non-negative value which serves as the loopback length.

Returns:


692
693
694
695
696
697
698
699
# File 'lib/daru/maths/statistics/vector.rb', line 692

def rolling function, n=10
  Daru::Vector.new(
    [nil] * (n - 1) +
    (0..(size - n)).map do |i|
      Daru::Vector.new(@data[i...(i + n)]).send(function)
    end, index: @index
  )
end

#rolling_count {|n| ... } ⇒ Object

Calculate rolling non-missing count

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_max {|n| ... } ⇒ Object

Calculate rolling max value

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_mean {|n| ... } ⇒ Object

Calculate rolling average

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_median {|n| ... } ⇒ Object

Calculate rolling median

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_min {|n| ... } ⇒ Object

Calculate rolling min value

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_std {|n| ... } ⇒ Object

Calculate rolling standard deviation

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_sum {|n| ... } ⇒ Object

Calculate rolling sum

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#rolling_variance {|n| ... } ⇒ Object

Calculate rolling variance

Yield Parameters:

  • n (Integer)

    (10) Loopback length


725
726
727
728
729
# File 'lib/daru/maths/statistics/vector.rb', line 725

%i[count mean median max min sum std variance].each do |meth|
  define_method("rolling_#{meth}".to_sym) do |n=10|
    rolling(meth, n)
  end
end

#sample_with_replacement(sample = 1) ⇒ Object

Returns an random sample of size n, with replacement, only with non-nil data.

In all the trails, every item have the same probability of been selected.


594
595
596
597
598
599
600
601
602
# File 'lib/daru/maths/statistics/vector.rb', line 594

def sample_with_replacement(sample=1)
  if @data.respond_to? :sample_with_replacement
    @data.sample_with_replacement sample
  else
    valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
    vds = valid.size
    (0...sample).collect { valid[rand(vds)] }
  end
end

#sample_without_replacement(sample = 1) ⇒ Object

Returns an random sample of size n, without replacement, only with valid data.

Every element could only be selected once.

A sample of the same size of the vector is the vector itself.


610
611
612
613
614
615
616
# File 'lib/daru/maths/statistics/vector.rb', line 610

def sample_without_replacement(sample=1)
  if @data.respond_to? :sample_without_replacement
    @data.sample_without_replacement sample
  else
    raw_sample_without_replacement(sample)
  end
end

#skew(m = nil) ⇒ Object

Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)


462
463
464
465
466
467
468
469
470
# File 'lib/daru/maths/statistics/vector.rb', line 462

def skew m=nil
  if @data.respond_to? :skew
    @data.skew
  else
    m ||= mean
    th  = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
    th.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
  end
end

#standard_deviation_population(m = nil) ⇒ Object Also known as: sdp


443
444
445
446
447
448
449
450
# File 'lib/daru/maths/statistics/vector.rb', line 443

def standard_deviation_population m=nil
  m ||= mean
  if @data.respond_to? :standard_deviation_population
    @data.standard_deviation_population(m)
  else
    Math.sqrt(variance_population(m))
  end
end

#standard_deviation_sample(m = nil) ⇒ Object Also known as: sds, sd


452
453
454
455
456
457
458
459
# File 'lib/daru/maths/statistics/vector.rb', line 452

def standard_deviation_sample m=nil
  m ||= mean
  if @data.respond_to? :standard_deviation_sample
    @data.standard_deviation_sample m
  else
    Math.sqrt(variance_sample(m))
  end
end

#standard_errorObject Also known as: se


56
57
58
# File 'lib/daru/maths/statistics/vector.rb', line 56

def standard_error
  standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES))
end

#standardize(use_population = false) ⇒ Object

Standardize data.

Arguments

  • use_population - Pass as true if you want to use population

standard deviation instead of sample standard deviation.


539
540
541
542
543
544
545
# File 'lib/daru/maths/statistics/vector.rb', line 539

def standardize use_population=false
  m ||= mean
  sd = use_population ? sdp : sds
  return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0

  vector_standardized_compute m, sd
end

#sumObject


14
15
16
# File 'lib/daru/maths/statistics/vector.rb', line 14

def sum
  @data.sum
end

#sum_of_squared_deviationObject


60
61
62
# File 'lib/daru/maths/statistics/vector.rb', line 60

def sum_of_squared_deviation
  (@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f
end

#sum_of_squares(m = nil) ⇒ Object Also known as: ss


436
437
438
439
440
441
# File 'lib/daru/maths/statistics/vector.rb', line 436

def sum_of_squares(m=nil)
  m ||= mean
  reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
    memo + (val - m)**2
  }
end

#value_countsObject

Count number of occurrences of each value in the Vector


392
393
394
395
396
397
398
# File 'lib/daru/maths/statistics/vector.rb', line 392

def value_counts
  values = @data.each_with_object(Hash.new(0)) do |d, memo|
    memo[d] += 1
  end

  Daru::Vector.new(values)
end

#variance_population(m = nil) ⇒ Object

Population variance with denominator (N)


415
416
417
418
419
420
421
422
# File 'lib/daru/maths/statistics/vector.rb', line 415

def variance_population m=nil
  m ||= mean
  if @data.respond_to? :variance_population
    @data.variance_population m
  else
    sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES)).to_f
  end
end

#variance_sample(m = nil) ⇒ Object Also known as: variance

Sample variance with denominator (N-1)


405
406
407
408
409
410
411
412
# File 'lib/daru/maths/statistics/vector.rb', line 405

def variance_sample m=nil
  m ||= mean
  if @data.respond_to? :variance_sample
    @data.variance_sample m
  else
    sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES) - 1)
  end
end

#vector_centered_compute(m) ⇒ Object


580
581
582
583
584
585
586
587
# File 'lib/daru/maths/statistics/vector.rb', line 580

def vector_centered_compute(m)
  if @data.respond_to? :vector_centered_compute
    @data.vector_centered_compute(m)
  else
    Daru::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f-m },
      index: index, name: name, dtype: dtype
  end
end

#vector_percentileObject

Replace each non-nil value in the vector with its percentile.


566
567
568
569
# File 'lib/daru/maths/statistics/vector.rb', line 566

def vector_percentile
  c = size - indexes(*Daru::MISSING_VALUES).size
  ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f }
end

#vector_standardized_compute(m, sd) ⇒ Object


571
572
573
574
575
576
577
578
# File 'lib/daru/maths/statistics/vector.rb', line 571

def vector_standardized_compute(m,sd)
  if @data.respond_to? :vector_standardized_compute
    @data.vector_standardized_compute(m,sd)
  else
    Daru::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) },
      index: index, name: name, dtype: dtype
  end
end