Class: Daru::Core::GroupBy
Constant Summary collapse
- TUPLE_SORTER =
lambda do |a, b| if a && b a.compact <=> b.compact else a ? 1 : -1 end end
Instance Attribute Summary collapse
-
#df ⇒ Object
readonly
Returns the value of attribute df.
-
#groups ⇒ Object
readonly
Returns the value of attribute groups.
Instance Method Summary collapse
-
#count ⇒ Object
Count groups, excludes missing values.
-
#each_group ⇒ Object
Iterate over each group created by group_by.
-
#first ⇒ Object
Get the first group.
-
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
-
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups.
-
#initialize(context, names) ⇒ GroupBy
constructor
A new instance of GroupBy.
- #inspect ⇒ Object
-
#last ⇒ Object
Get the last group.
-
#max ⇒ Object
Find the max element of each numeric vector group.
-
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
-
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
-
#min ⇒ Object
Find the min element of each numeric vector group.
-
#reduce(init = nil) ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
-
#size ⇒ Object
Get a Daru::Vector of the size of each group.
-
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
-
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
-
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups.
Constructor Details
#initialize(context, names) ⇒ GroupBy
Returns a new instance of GroupBy.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/daru/core/group_by.rb', line 22 def initialize context, names @groups = {} @non_group_vectors = context.vectors.to_a - names @context = context vectors = names.map { |vec| context[vec].to_a } tuples = vectors[0].zip(*vectors[1..-1]) # FIXME: It feels like we don't want to sort here. Ruby's #group_by # never sorts: # # ['test', 'me', 'please'].group_by(&:size) # # => {4=>["test"], 2=>["me"], 6=>["please"]} # # - zverok, 2016-09-12 init_groups_df tuples, names end |
Instance Attribute Details
#df ⇒ Object (readonly)
Returns the value of attribute df.
4 5 6 |
# File 'lib/daru/core/group_by.rb', line 4 def df @df end |
#groups ⇒ Object (readonly)
Returns the value of attribute groups.
4 5 6 |
# File 'lib/daru/core/group_by.rb', line 4 def groups @groups end |
Instance Method Details
#count ⇒ Object
Count groups, excludes missing values.
156 157 158 159 |
# File 'lib/daru/core/group_by.rb', line 156 def count width = @non_group_vectors.size Daru::DataFrame.new([size]*width, order: @non_group_vectors) end |
#each_group ⇒ Object
Iterate over each group created by group_by. A DataFrame is yielded in block.
8 9 10 11 12 |
# File 'lib/daru/core/group_by.rb', line 8 def each_group groups.keys.each do |k| yield get_group(k) end end |
#first ⇒ Object
Get the first group
52 53 54 |
# File 'lib/daru/core/group_by.rb', line 52 def first head(1) end |
#get_group(group) ⇒ Object
Returns one of the selected groups as a DataFrame.
193 194 195 196 197 198 199 200 201 202 |
# File 'lib/daru/core/group_by.rb', line 193 def get_group group indexes = @groups[group] elements = @context.each_vector.map(&:to_a) transpose = elements.transpose rows = indexes.each.map { |idx| transpose[idx] } Daru::DataFrame.rows( rows, index: indexes, order: @context.vectors ) end |
#head(quantity = 5) ⇒ Object
Get the top ‘n’ groups
80 81 82 |
# File 'lib/daru/core/group_by.rb', line 80 def head quantity=5 select_groups_from :first, quantity end |
#inspect ⇒ Object
242 243 244 |
# File 'lib/daru/core/group_by.rb', line 242 def inspect @df.inspect end |
#last ⇒ Object
Get the last group
57 58 59 |
# File 'lib/daru/core/group_by.rb', line 57 def last tail(1) end |
#max ⇒ Object
Find the max element of each numeric vector group.
168 169 170 |
# File 'lib/daru/core/group_by.rb', line 168 def max apply_method :numeric, :max end |
#mean ⇒ Object
Calculate mean of numeric groups, excluding missing values.
124 125 126 |
# File 'lib/daru/core/group_by.rb', line 124 def mean apply_method :numeric, :mean end |
#median ⇒ Object
Calculate the median of numeric groups, excluding missing values.
129 130 131 |
# File 'lib/daru/core/group_by.rb', line 129 def median apply_method :numeric, :median end |
#min ⇒ Object
Find the min element of each numeric vector group.
173 174 175 |
# File 'lib/daru/core/group_by.rb', line 173 def min apply_method :numeric, :min end |
#reduce(init = nil) ⇒ Object
Iteratively applies a function to the values in a group and accumulates the result.
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/daru/core/group_by.rb', line 220 def reduce(init=nil) result_hash = @groups.each_with_object({}) do |(group, indices), h| group_indices = indices.map { |v| @context.index.to_a[v] } grouped_result = init group_indices.each do |idx| grouped_result = yield(grouped_result, @context.row[idx]) end h[group] = grouped_result end index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples result_hash.keys else Daru::Index.new result_hash.keys.flatten end Daru::Vector.new(result_hash.values, index: index) end |
#size ⇒ Object
Get a Daru::Vector of the size of each group.
39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/daru/core/group_by.rb', line 39 def size index = if multi_indexed_grouping? Daru::MultiIndex.from_tuples @groups.keys else Daru::Index.new @groups.keys.flatten end values = @groups.values.map(&:size) Daru::Vector.new(values, index: index, name: :size) end |
#std ⇒ Object
Calculate sample standard deviation of numeric vector groups, excluding missing values.
163 164 165 |
# File 'lib/daru/core/group_by.rb', line 163 def std apply_method :numeric, :std end |
#sum ⇒ Object
Calculate sum of numeric groups, excluding missing values.
134 135 136 |
# File 'lib/daru/core/group_by.rb', line 134 def sum apply_method :numeric, :sum end |
#tail(quantity = 5) ⇒ Object
Get the bottom ‘n’ groups
103 104 105 |
# File 'lib/daru/core/group_by.rb', line 103 def tail quantity=5 select_groups_from :last, quantity end |