Class: GithubStats::Data

Inherits:
Object
  • Object
show all
Includes:
MethodCacher
Defined in:
lib/githubstats/data.rb

Overview

Data class for calculations

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ Data

Create a data object and turn on caching


35
36
37
38
39
40
# File 'lib/githubstats/data.rb', line 35

def initialize(data)
  @raw = data.map { |d, s| Datapoint.new(Date.parse(d), s.to_i) }
  enable_caching %i[to_h today streaks longest_streak streak max mean
                    std_var quartile_boundaries quartiles start_date
                    end_date]
end

Instance Attribute Details

#rawObject (readonly) Also known as: to_a

Returns the value of attribute raw


29
30
31
# File 'lib/githubstats/data.rb', line 29

def raw
  @raw
end

Instance Method Details

#[](date) ⇒ Object

The score for a given day


74
75
76
# File 'lib/githubstats/data.rb', line 74

def [](date)
  to_h[Date.parse(date)]
end

#end_dateObject

The end of the dataset


60
61
62
# File 'lib/githubstats/data.rb', line 60

def end_date
  @raw.last.date
end

#gh_outliersObject

Outliers as calculated by GitHub They only consider the first 3 or 1, based on the mean and max of the set


148
149
150
# File 'lib/githubstats/data.rb', line 148

def gh_outliers
  outliers.take(max.score - mean < 6 || max.score < 15 ? 1 : 3)
end

#longest_streakObject

The longest streak


99
100
101
102
# File 'lib/githubstats/data.rb', line 99

def longest_streak
  return [] if streaks.empty?
  streaks.max_by(&:length)
end

#maxObject

The highest scoring day


115
116
117
# File 'lib/githubstats/data.rb', line 115

def max
  @raw.max_by(&:score)
end

#meanObject

The mean score


122
123
124
# File 'lib/githubstats/data.rb', line 122

def mean
  scores.reduce(:+) / @raw.size.to_f
end

#outliersObject

Outliers of the set


139
140
141
142
# File 'lib/githubstats/data.rb', line 139

def outliers
  return [] if scores.uniq.size < 5
  scores.select { |x| ((mean - x) / std_var).abs > GITHUB_MAGIC }.uniq
end

#pad(fill_value = -1,, data = @raw.clone) ⇒ Object

Pad the dataset to full week increments


190
191
192
193
# File 'lib/githubstats/data.rb', line 190

def pad(fill_value = -1, data = @raw.clone)
  data = _pad data, 0, fill_value, 0
  _pad data, -1, fill_value, 6
end

#quartile(score) ⇒ Object

Return the quartile of a given score


182
183
184
185
# File 'lib/githubstats/data.rb', line 182

def quartile(score)
  return nil if score.negative? || score > max.score
  quartile_boundaries.count { |bound| score > bound }
end

#quartile_boundariesObject

The boundaries of the quartiles The index represents the quartile number The value is the upper bound of the quartile (inclusive)


157
158
159
160
161
162
163
164
165
166
167
# File 'lib/githubstats/data.rb', line 157

def quartile_boundaries # rubocop:disable Metrics/AbcSize
  top = scores.reject { |x| gh_outliers.include? x }.max
  range = (1..top).to_a
  range = [0] * 3 if range.empty?
  mids = (1..3).map do |q|
    index = q * range.size / 4 - 1
    range[index]
  end
  bounds = (mids + [max.score]).uniq.sort
  [0] * (5 - bounds.size) + bounds
end

#quartilesObject

Return the list split into quartiles


172
173
174
175
176
177
# File 'lib/githubstats/data.rb', line 172

def quartiles
  quartiles = Array.new(5) { [] }
  @raw.each_with_object(quartiles) do |elem, acc|
    acc[quartile(elem.score)] << elem
  end
end

#scoresObject

Scores in chronological order


81
82
83
# File 'lib/githubstats/data.rb', line 81

def scores
  @raw.map(&:score)
end

#start_dateObject

The start of the dataset


54
55
56
# File 'lib/githubstats/data.rb', line 54

def start_date
  @raw.first.date
end

#std_varObject

The standard variance (two pass)


129
130
131
132
133
134
# File 'lib/githubstats/data.rb', line 129

def std_var
  first_pass = @raw.reduce(0) do |acc, elem|
    (elem.score.to_f - mean)**2 + acc
  end
  Math.sqrt(first_pass / (@raw.size - 1))
end

#streakObject

The current streak, or nil


107
108
109
110
# File 'lib/githubstats/data.rb', line 107

def streak
  return [] if streaks.empty?
  streaks.last.last.date >= Date.today - 1 ? streaks.last : []
end

#streaksObject

All streaks for a user


88
89
90
91
92
93
94
# File 'lib/githubstats/data.rb', line 88

def streaks
  streaks = @raw.each_with_object(Array.new(1, [])) do |point, acc|
    point.score.zero? ? acc << [] : acc.last << point
  end
  streaks.reject!(&:empty?)
  streaks
end

#to_hObject

The data as a hash where the keys are dates and values are scores


45
46
47
48
49
# File 'lib/githubstats/data.rb', line 45

def to_h
  @raw.reduce(Hash.new(0)) do |acc, elem|
    acc.merge(elem.date => elem.score)
  end
end

#todayObject

The score for today


67
68
69
# File 'lib/githubstats/data.rb', line 67

def today
  to_h[Date.today]
end