Class: Daru::Core::GroupBy
- Inherits:
-
Object
- Object
- Daru::Core::GroupBy
- Defined in:
- lib/daru/core/group_by.rb
Instance Attribute Summary collapse
-
#groups ⇒ Object
readonly
Returns the value of attribute groups.
Instance Method Summary collapse
-
#count ⇒ Object
Count groups, excludes missing values.
-
#each_group ⇒ Object
Iterate over each group created by group_by.
-
#first ⇒ Object
Get the first group.
-
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
-
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups.
-
#initialize(context, names) ⇒ GroupBy
constructor
A new instance of GroupBy.
-
#last ⇒ Object
Get the last group.
-
#max ⇒ Object
Find the max element of each numeric vector group.
-
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
-
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
-
#min ⇒ Object
Find the min element of each numeric vector group.
-
#reduce(init = nil) ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
-
#size ⇒ Object
Get a Daru::Vector of the size of each group.
-
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
-
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
-
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups.
Constructor Details
#initialize(context, names) ⇒ GroupBy
Returns a new instance of GroupBy.
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/daru/core/group_by.rb', line 14 def initialize context, names @groups = {} @non_group_vectors = context.vectors.to_a - names @context = context vectors = names.map { |vec| context[vec].to_a } tuples = vectors[0].zip(*vectors[1..-1]) keys = tuples.uniq.sort do |a,b| if a && b a.compact <=> b.compact else a ? 1 : -1 end end keys.each do |key| @groups[key] = all_indices_for(tuples, key) end @groups.freeze end |
Instance Attribute Details
#groups ⇒ Object (readonly)
Returns the value of attribute groups.
4 5 6 |
# File 'lib/daru/core/group_by.rb', line 4 def groups @groups end |
Instance Method Details
#count ⇒ Object
Count groups, excludes missing values.
153 154 155 156 |
# File 'lib/daru/core/group_by.rb', line 153 def count width = @non_group_vectors.size Daru::DataFrame.new([size]*width, order: @non_group_vectors) end |
#each_group ⇒ Object
Iterate over each group created by group_by. A DataFrame is yielded in block.
8 9 10 11 12 |
# File 'lib/daru/core/group_by.rb', line 8 def each_group groups.keys.each do |k| yield get_group(k) end end |
#first ⇒ Object
Get the first group
49 50 51 |
# File 'lib/daru/core/group_by.rb', line 49 def first head(1) end |
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/daru/core/group_by.rb', line 190 def get_group group indexes = @groups[group] elements = [] @context.each_vector do |vector| elements << vector.to_a end rows = [] transpose = elements.transpose indexes.each do |idx| rows << transpose[idx] end new_index = begin @context.index[indexes] rescue IndexError indexes end Daru::DataFrame.rows( rows, index: new_index, order: @context.vectors ) end |
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups
77 78 79 |
# File 'lib/daru/core/group_by.rb', line 77 def head quantity=5 select_groups_from :first, quantity end |
#last ⇒ Object
Get the last group
54 55 56 |
# File 'lib/daru/core/group_by.rb', line 54 def last tail(1) end |
#max ⇒ Object
Find the max element of each numeric vector group.
165 166 167 |
# File 'lib/daru/core/group_by.rb', line 165 def max apply_method :numeric, :max end |
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
121 122 123 |
# File 'lib/daru/core/group_by.rb', line 121 def mean apply_method :numeric, :mean end |
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
126 127 128 |
# File 'lib/daru/core/group_by.rb', line 126 def median apply_method :numeric, :median end |
#min ⇒ Object
Find the min element of each numeric vector group.
170 171 172 |
# File 'lib/daru/core/group_by.rb', line 170 def min apply_method :numeric, :min end |
#reduce(init = nil) ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
# File 'lib/daru/core/group_by.rb', line 231 def reduce(init=nil) result_hash = @groups.each_with_object({}) do |(group, indices), h| group_indices = indices.map { |v| @context.index.to_a[v] } grouped_result = init group_indices.each do |idx| grouped_result = yield(grouped_result, @context.row[idx]) end h[group] = grouped_result end index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples result_hash.keys else Daru::Index.new result_hash.keys.flatten end Daru::Vector.new(result_hash.values, index: index) end |
#size ⇒ Object
Get a Daru::Vector of the size of each group.
36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/daru/core/group_by.rb', line 36 def size index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples @groups.keys else Daru::Index.new @groups.keys.flatten end values = @groups.values.map(&:size) Daru::Vector.new(values, index: index, name: :size) end |
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
160 161 162 |
# File 'lib/daru/core/group_by.rb', line 160 def std apply_method :numeric, :std end |
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
131 132 133 |
# File 'lib/daru/core/group_by.rb', line 131 def sum apply_method :numeric, :sum end |
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups
100 101 102 |
# File 'lib/daru/core/group_by.rb', line 100 def tail quantity=5 select_groups_from :last, quantity end |