Module: Kalibera

Defined in:
lib/kalibera/data.rb

Defined Under Namespace

Classes: ConfRange, Data

Constant Summary collapse

CONSTANTS =
RBzip2::Decompressor.new(StringIO.new(Base64.decode64("""\
QlpoOTFBWSZTWbTS4VUAC9bYAEAQAAF/4GAOGZ3e40HH2YJERUKomGbCNMAAtMBaAkCOP9U0/R+q
qNCqfjAqVGOY3+qk96qmmIp+CCVNDD/1VGjfqkBJpIElG6uN92vE/PP+5IxhMIIgAbOxEMKLMVSq
VWtZmZaEklAAAttoAAAAAAAAAAAAEklAAEklABttkksklkkknVu2dX1vW9yWrkuXJJJJJJJJJJKK
JWsS5dq7k3RRRbu2222227oAAFQqFCAjkB0w7eMpKWy3bVI42225QlbQAAAAAAlbQbbUqkolE7JZ
jjjmS5LluZkuZmZmZmZmZmZvZhOYnktttsskiaSSToAAA5znOZmMzGTSSJJJ1JO+7gLbR067u48V
bZIAABJCSSjElG436ySek9f1/X3vZ72+7wPk5bbJG0kYTYA2+fHiolu7u8S62JEpjmZ3YS40AEt3
mb8lzXwEpar+9P3s9vAq1o23mt3oaJmZvJAPQu6AlL3s9ojg6rRBmOQaKRb+zbOaL0FMxZKBTm9O
vLmUJuqwVc+KevulFMM/JOzWTMN5Aa7cO5hmZuioHbboGzxzZLFATHYvXg5SUqCWxmre6As43wzV
30514PDn2m7ema93M9u9199F6QCSfsxJ7wA5R3bTsglUQaJLy4wKYu895byRoTJb7vXsGwZzhPZ0
xOdgtMncj5PGCPeKFPCgenS83zcvnQwGfm3prLnb6bcxKJABZeOrvfNAUNTTobmLQ+fOHAjxo2WE
JaevegHIDVvW+kRAD2TpoeJWFQDKtubzWOr6EFU3xs3rojhW98aghZQmIWXe9sUXKEXKvWvk6bTH
GURStAQ1M7OzF07ui6Q2DYl1NojMzlvrwcO6+uY7V3ZFerzz3sIqJsGzcJN2EAAew/vvqqvvvvi7
xXjhGH3nGNKv2u+Bt8k4USU+SaoLuU6HNmQoYyFTN3huLP721dwHIqQzrqVhjz2+UQw0ezok7gQl
wyZ2YM0hgPVaZaOLK9q3TtGiaO3Br4xGyy7HfAWw72nvLmaGPeSz2c/FkuN7Qj1guqtgUU1NHry2
5h7KvWgs2jglhCZpYpa8qbl3PrrEDL1Jg/1VrZ8IthQhNKLznYMPozi9arWla2BODhV6yuIKmzsa
zhOb3kxyjcD0ExuXvdys3WRxxYEQszLy8jxqTPZB7UQJ2xbk3YGV2QcdPN2HYuoVkWxUhtErw9u3
0mdw5HiO0WVtRUCEyxEAOdIHV1sWmbReT4iMTzRsB7Q36e72rpwePnrPggpSxjlZ9Lm8YJrgXDzJ
/30MSDPwzV8s+g4Rcpy3a8c7Y1jxgHJQs8+MyLsudmYSFySWm3OrSn5p3qb++m8fvHUGfCfNCbol
RSZ6wp+ZM14k8S+SKwqES7PQ72DFK4PTiMCA6LbvuSSSJ1R3iJAF10sQYlhpp2GSzWBw3ty+HjLj
HCDTxku3yHPrNvTXekcBSOuzMfOvy3dybchXeLxvXN3vKTN/BdbwUlqXY+g4sWMoHTQT61MeXIMf
PhgYq8KhOEbqeMqoyhWQp03eOOpV/LVvXl2X71ztaX7tMZJ5gBCshDGQCskDme9zu9b1dcgB1khU
mmEk2yTySG2QPmEJp3m/jM+93nYSoe7YEPmExITTpITut87rehm+UgF13IG0nUk52+95Z+9wg49Y
SUraiKIYo3UOvdtq6bVDDmbPTmhtyLfS1LCPXQmYLD7c9lu5ZfdaWSGn1m82kCd4xhYOuVUH33zB
Kh5IsOsxNe+yB7XNd77Xc05kD5h1Jpk0hnLJpnrzXe9xdXpOJfrA4kzdhvLB1tzn3e6OqyaeM8m9
2HWH2m59jnvrO2w+9TTFDibQffe7880+cfu08zjLw/Mbx4faLWcMbzQ8vDWj6uDmr75CuG9hzAOl
1Wk0mWKqglrLcmu/uw/IVcPCtGw3hY3TgkN0PqENShQhpj5ZN7dzethJScvIGNEPPE7lcJTwYM8t
7zB5zMNkYZmHc1cbY1RirWMmuHzEFi7P04mPluFvMqnoirRUEEB3taRpio2svFVXtMcub+PuTmqL
vlSOqbSO996bd/e0AoLJ1hV97AmbtfxIsAkWBILJAUgAoEiySCwgsICwDqAZlkiyEWBFhBSCwqSc
9zeoAskUBYRYRYb3rmeHWXZOMgsFgLAWBq2RQWRcSVIpFikWCwWF3mAoKKCgpr9TE21BYqy8zDbW
LFFiixRRXLpcoooovmqiirm/rmlRVWl57xynNqqo8tVVVy1VVyrRVVVFb39rSovrvKitpVR/Woo5
So32dxukUUUz2YY1FFLbF1u91TbbZUWNsqVrM3336515OpjWP1DMaFZ5ufsDOXTHLBSsrN85f1/G
Z97s999hpF0nwOBV8gYfoGPnQqiKzPLcnpOky/b652qCQ9ti4PbvcjqmneMEtaV17cnt6NKZYybS
TwHdBK34b2wy3CJ1qqi8qpigCKsVSvFUFMUMtVTFPjBoq+K5AGXzuffdyXtm0+ebv5HdMVnN0mMe
++473+/HTWnzd0OuWnHE20ZtC7oaZvN/jvn9efa9UHKC++prtL9ZWDu7c73vvaOTiKbTmUPJ7Pv2
jEFDnO6Xe/deOG0+v7Cn6z8zO2VH9TMse/fvt67+w77n7QaQffsxOJfqGteOa/HdYe1Tm6LFOpUz
VMR/aPvadm0zXsnMppiffYG27ZXfslV2hAJrPGmKsVfe9fSO8vVnru7tbzSU1a9cGv0qsQEdhHK7
rJBfbPMSKZc3wmij3ULrhE9nIwoDMp4WAK2GkIKIqrHAK0Bjvo7sA2VZ941ggrwIsfGLZTHvGSZR
8UGKDKFAAcC8U45fTlKQKM8fnx+IAr3rmwtVbfFhj4VZqQviRXhavLu9zOQWISS0w9PxFYCEfK1l
9GK0GhrKxr5CwCveB4XDEsPYWKwfHDgrBnZT4XW5dlE2tW7FAR8RGW0XMy1MQoDwyQ+Hnmvet5I/
HrTVYQJbJ1e3y6B7LoCh5qyXWO03X5WbxWT0UvY55cyRbhmB8ib6lkhRo5USRAoLFA4WELV93ZV/
DKh2MIhnIWCPBLEh3FUTBSxJC7h4Z15qTFPTRmpe1Ldj1rlkVnAKHDySryior3OheiTPKZY2GaQ6
N2YyvJh9wuO75VOarCWLEUdLavAs2RShYOntLrMVabUAyDnTJIQ4deJa92pAWd6KBz+F3JFOFCQt
NLhVQA=="""))).read.split().map { |x| Float(x) }

Class Method Summary collapse

Class Method Details

.bootstrap_geomean(l_data_a, l_data_b, iterations = 10000, confidence = '0.95') ⇒ Object



351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/kalibera/data.rb', line 351

def self.bootstrap_geomean(l_data_a, l_data_b, iterations=10000, confidence='0.95')
  raise "lists need to match" unless l_data_a.size == l_data_b.size
  geomeans = []
  iterations.times do
    ratios = []
    l_data_a.zip(l_data_b).each do |a, b|
      ra = a.bootstrap_sample
      rb = b.bootstrap_sample
      mean_ra = mean(ra)
      mean_rb = mean(rb)
      ratios << mean_ra / mean_rb
    end
    geomeans << geomean(ratios)
  end
  geomeans.sort!
  confidence_slice(geomeans, confidence)
end

.confidence_slice(means, confidence = "0.95") ⇒ Object

Returns a tuples (lower, median, upper), where: lower: lower bound of 95% confidence interval median: the median value of the data upper: upper bound of 95% confidence interval

Arguments: means – the list of means (need not be sorted).



76
77
78
79
80
81
82
# File 'lib/kalibera/data.rb', line 76

def self.confidence_slice(means, confidence="0.95")
  means = means.sort
  # There may be >1 median indicies, i.e. data is even-sized.
  lower, middle_indicies, upper = confidence_slice_indicies(means.size, confidence)
  median = mean(middle_indicies.map { |i| means[i] })
  ConfRange.new(means[lower], median, means[upper - 1]) # upper is *exclusive*
end

.confidence_slice_indicies(length, confidence_level = BigDecimal.new('0.95')) ⇒ Object

Returns a triple (lower, mean_indicies, upper) so that l gives confidence_level of all samples. Mean_indicies is a tuple of one or two indicies that correspond to the mean position

Keyword arguments: confidence_level – desired level of confidence as a Decimal instance.



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/kalibera/data.rb', line 90

def self.confidence_slice_indicies(length, confidence_level=BigDecimal.new('0.95'))
  raise unless !confidence_level.instance_of?(Float)
  confidence_level = BigDecimal.new(confidence_level)
  raise unless confidence_level.instance_of?(BigDecimal)
  exclude = (1 - confidence_level) / 2

  if length % 2 == 0
    mean_indicies = [length / 2 - 1, length / 2]  # TRANSLITERATION: was //
  else
    mean_indicies = [length / 2]  # TRANSLITERATION: was //
  end

  lower_index = Integer(
      (exclude * length).round(0, BigDecimal::ROUND_DOWN) # TRANSLITERATION: was quantize 1.
  )

  upper_index = Integer(
      ((1 - exclude) * length).round(0, BigDecimal::ROUND_UP) # TRANSLITERATION: was quantize 1.
  )

  [lower_index, mean_indicies, upper_index]
end

.geomean(l) ⇒ Object



117
118
119
# File 'lib/kalibera/data.rb', line 117

def self.geomean(l)
  l.inject(1, :*) ** (1.0 / Float(l.size))
end

.mean(l) ⇒ Object



113
114
115
# File 'lib/kalibera/data.rb', line 113

def self.mean(l)
  l.inject(0, :+) / Float(l.size)
end

.student_t_quantile95(ndeg) ⇒ Object

Look up the 95% quantile from constant table.



55
56
57
58
59
60
61
# File 'lib/kalibera/data.rb', line 55

def self.student_t_quantile95(ndeg)
  index = ndeg - 1
  if index >= CONSTANTS.size
    index = -1 # the quantile converges, we just take the last value
  end
  CONSTANTS[index]
end