Class: GithubStats::Data

Inherits:
Object
  • Object
show all
Includes:
MethodCacher
Defined in:
lib/githubstats/data.rb

Overview

Data class for calculations

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ Data

Create a data object and turn on caching



31
32
33
34
35
# File 'lib/githubstats/data.rb', line 31

def initialize(data)
  @raw = data.map { |d, s| Datapoint.new(Date.parse(d), s.to_i) }
  enable_caching %i[to_h today streaks longest_streak streak max mean
                    std_var quartile_boundaries quartiles]
end

Instance Attribute Details

#rawObject (readonly) Also known as: to_a

Returns the value of attribute raw.



25
26
27
# File 'lib/githubstats/data.rb', line 25

def raw
  @raw
end

Instance Method Details

#[](date) ⇒ Object

The score for a given day



56
57
58
# File 'lib/githubstats/data.rb', line 56

def [](date)
  to_h[Date.parse(date)]
end

#gh_outliersObject

Outliers as calculated by GitHub They only consider the first 3 or 1, based on the mean and max of the set



130
131
132
# File 'lib/githubstats/data.rb', line 130

def gh_outliers
  outliers.take(max.score - mean < 6 || max.score < 15 ? 1 : 3)
end

#longest_streakObject

The longest streak



81
82
83
84
# File 'lib/githubstats/data.rb', line 81

def longest_streak
  return [] if streaks.empty?
  streaks.max_by(&:length)
end

#maxObject

The highest scoring day



97
98
99
# File 'lib/githubstats/data.rb', line 97

def max
  @raw.max_by(&:score)
end

#meanObject

The mean score



104
105
106
# File 'lib/githubstats/data.rb', line 104

def mean
  scores.reduce(:+) / @raw.size.to_f
end

#outliersObject

Outliers of the set



121
122
123
124
# File 'lib/githubstats/data.rb', line 121

def outliers
  return [] if scores.uniq.size < 5
  scores.select { |x| ((mean - x) / std_var).abs > GITHUB_MAGIC }.uniq
end

#pad(fill_value = -1,, data = @raw.clone) ⇒ Object

Pad the dataset to full week increments



172
173
174
175
# File 'lib/githubstats/data.rb', line 172

def pad(fill_value = -1, data = @raw.clone)
  data = _pad data, 0, fill_value, 0
  _pad data, -1, fill_value, 6
end

#quartile(score) ⇒ Object

Return the quartile of a given score



164
165
166
167
# File 'lib/githubstats/data.rb', line 164

def quartile(score)
  return nil if score < 0 || score > max.score
  quartile_boundaries.count { |bound| score > bound }
end

#quartile_boundariesObject

The boundaries of the quartiles The index represents the quartile number The value is the upper bound of the quartile (inclusive)



139
140
141
142
143
144
145
146
147
148
149
# File 'lib/githubstats/data.rb', line 139

def quartile_boundaries # rubocop:disable Metrics/AbcSize
  top = scores.reject { |x| gh_outliers.include? x }.max
  range = (1..top).to_a
  range = [0] * 3 if range.empty?
  mids = (1..3).map do |q|
    index = q * range.size / 4 - 1
    range[index]
  end
  bounds = (mids + [max.score]).uniq.sort
  [0] * (5 - bounds.size) + bounds
end

#quartilesObject

Return the list split into quartiles



154
155
156
157
158
159
# File 'lib/githubstats/data.rb', line 154

def quartiles
  quartiles = Array.new(5) { [] }
  @raw.each_with_object(quartiles) do |elem, acc|
    acc[quartile(elem.score)] << elem
  end
end

#scoresObject

Scores in chronological order



63
64
65
# File 'lib/githubstats/data.rb', line 63

def scores
  @raw.map(&:score)
end

#std_varObject

The standard variance (two pass)



111
112
113
114
115
116
# File 'lib/githubstats/data.rb', line 111

def std_var
  first_pass = @raw.reduce(0) do |acc, elem|
    (elem.score.to_f - mean)**2 + acc
  end
  Math.sqrt(first_pass / (@raw.size - 1))
end

#streakObject

The current streak, or nil



89
90
91
92
# File 'lib/githubstats/data.rb', line 89

def streak
  return [] if streaks.empty?
  streaks.last.last.date >= Date.today - 1 ? streaks.last : []
end

#streaksObject

All streaks for a user



70
71
72
73
74
75
76
# File 'lib/githubstats/data.rb', line 70

def streaks
  streaks = @raw.each_with_object(Array.new(1, [])) do |point, acc|
    point.score.zero? ? acc << [] : acc.last << point
  end
  streaks.reject!(&:empty?)
  streaks
end

#to_hObject

The data as a hash where the keys are dates and values are scores



40
41
42
43
44
# File 'lib/githubstats/data.rb', line 40

def to_h
  @raw.reduce(Hash.new(0)) do |acc, elem|
    acc.merge(elem.date => elem.score)
  end
end

#todayObject

The score for today



49
50
51
# File 'lib/githubstats/data.rb', line 49

def today
  to_h[Date.today]
end