Class: Daru::Core::GroupBy
Constant Summary collapse
- TUPLE_SORTER =
lambda do |left, right| return -1 unless right return 1 unless left left = left.compact right = right.compact return left <=> right || 0 if left.length == right.length left.length <=> right.length end
Instance Attribute Summary collapse
-
#df ⇒ Object
readonly
Returns the value of attribute df.
-
#groups ⇒ Object
readonly
Returns the value of attribute groups.
Instance Method Summary collapse
-
#aggregate(options = {}) ⇒ Daru::DataFrame
Function to use for aggregating the data.
-
#count ⇒ Object
Count groups, excludes missing values.
-
#each_group ⇒ Object
Iterate over each group created by group_by.
-
#first ⇒ Object
Get the first group.
-
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
-
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups.
-
#initialize(context, names) ⇒ GroupBy
constructor
A new instance of GroupBy.
- #inspect ⇒ Object
-
#last ⇒ Object
Get the last group.
-
#max ⇒ Object
Find the max element of each numeric vector group.
-
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
-
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
-
#min ⇒ Object
Find the min element of each numeric vector group.
-
#reduce(init = nil) {|block| ... } ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
-
#size ⇒ Object
Get a Daru::Vector of the size of each group.
-
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
-
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
-
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups.
Constructor Details
#initialize(context, names) ⇒ GroupBy
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/daru/core/group_by.rb', line 24 def initialize context, names @groups = {} @non_group_vectors = context.vectors.to_a - names @context = context vectors = names.map { |vec| context[vec].to_a } tuples = vectors[0].zip(*vectors[1..-1]) # FIXME: It feels like we don't want to sort here. Ruby's #group_by # never sorts: # # ['test', 'me', 'please'].group_by(&:size) # # => {4=>["test"], 2=>["me"], 6=>["please"]} # # - zverok, 2016-09-12 init_groups_df tuples, names end |
Instance Attribute Details
#df ⇒ Object (readonly)
Returns the value of attribute df.
4 5 6 |
# File 'lib/daru/core/group_by.rb', line 4 def df @df end |
#groups ⇒ Object (readonly)
Returns the value of attribute groups.
4 5 6 |
# File 'lib/daru/core/group_by.rb', line 4 def groups @groups end |
Instance Method Details
#aggregate(options = {}) ⇒ Daru::DataFrame
Function to use for aggregating the data. ‘group_by` is using Daru::DataFrame#aggregate
284 285 286 287 |
# File 'lib/daru/core/group_by.rb', line 284 def aggregate(={}) @df.index = @df.index.remove_layer(@df.index.levels.size - 1) @df.aggregate() end |
#count ⇒ Object
Count groups, excludes missing values.
158 159 160 161 |
# File 'lib/daru/core/group_by.rb', line 158 def count width = @non_group_vectors.size Daru::DataFrame.new([size]*width, order: @non_group_vectors) end |
#each_group ⇒ Object
Iterate over each group created by group_by. A DataFrame is yielded in block.
8 9 10 11 12 |
# File 'lib/daru/core/group_by.rb', line 8 def each_group groups.keys.each do |k| yield get_group(k) end end |
#first ⇒ Object
Get the first group
54 55 56 |
# File 'lib/daru/core/group_by.rb', line 54 def first head(1) end |
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
195 196 197 198 199 200 201 202 203 204 |
# File 'lib/daru/core/group_by.rb', line 195 def get_group group indexes = @groups[group] elements = @context.each_vector.map(&:to_a) transpose = elements.transpose rows = indexes.each.map { |idx| transpose[idx] } Daru::DataFrame.rows( rows, index: indexes, order: @context.vectors ) end |
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups
82 83 84 |
# File 'lib/daru/core/group_by.rb', line 82 def head quantity=5 select_groups_from :first, quantity end |
#inspect ⇒ Object
244 245 246 |
# File 'lib/daru/core/group_by.rb', line 244 def inspect @df.inspect end |
#last ⇒ Object
Get the last group
59 60 61 |
# File 'lib/daru/core/group_by.rb', line 59 def last tail(1) end |
#max ⇒ Object
Find the max element of each numeric vector group.
170 171 172 |
# File 'lib/daru/core/group_by.rb', line 170 def max apply_method :numeric, :max end |
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
126 127 128 |
# File 'lib/daru/core/group_by.rb', line 126 def mean apply_method :numeric, :mean end |
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
131 132 133 |
# File 'lib/daru/core/group_by.rb', line 131 def median apply_method :numeric, :median end |
#min ⇒ Object
Find the min element of each numeric vector group.
175 176 177 |
# File 'lib/daru/core/group_by.rb', line 175 def min apply_method :numeric, :min end |
#reduce(init = nil) {|block| ... } ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
# File 'lib/daru/core/group_by.rb', line 222 def reduce(init=nil) result_hash = @groups.each_with_object({}) do |(group, indices), h| group_indices = indices.map { |v| @context.index.to_a[v] } grouped_result = init group_indices.each do |idx| grouped_result = yield(grouped_result, @context.row[idx]) end h[group] = grouped_result end index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples result_hash.keys else Daru::Index.new result_hash.keys.flatten end Daru::Vector.new(result_hash.values, index: index) end |
#size ⇒ Object
Get a Daru::Vector of the size of each group.
41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/daru/core/group_by.rb', line 41 def size index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples @groups.keys else Daru::Index.new @groups.keys.flatten end values = @groups.values.map(&:size) Daru::Vector.new(values, index: index, name: :size) end |
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
165 166 167 |
# File 'lib/daru/core/group_by.rb', line 165 def std apply_method :numeric, :std end |
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
136 137 138 |
# File 'lib/daru/core/group_by.rb', line 136 def sum apply_method :numeric, :sum end |
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups
105 106 107 |
# File 'lib/daru/core/group_by.rb', line 105 def tail quantity=5 select_groups_from :last, quantity end |