Class: Daru::Core::GroupBy

Inherits:
Object
  • Object
show all
Defined in:
lib/daru/core/group_by.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(context, names) ⇒ GroupBy

Returns a new instance of GroupBy.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/daru/core/group_by.rb', line 14

def initialize context, names
  @groups = {}
  @non_group_vectors = context.vectors.to_a - names
  @context = context
  vectors = names.map { |vec| context[vec].to_a }
  tuples  = vectors[0].zip(*vectors[1..-1])
  keys    =
    tuples.uniq.sort do |a,b|
      if a && b
        a.compact <=> b.compact
      else
        a ? 1 : -1
      end
    end

  keys.each do |key|
    @groups[key] = all_indices_for(tuples, key)
  end
  @groups.freeze
end

Instance Attribute Details

#groupsObject (readonly)

Returns the value of attribute groups.



4
5
6
# File 'lib/daru/core/group_by.rb', line 4

def groups
  @groups
end

Instance Method Details

#countObject

Count groups, excludes missing values.

Examples:

Using count

df = Daru::DataFrame.new({
  a: %w{foo bar foo bar   foo bar foo foo},
  b: %w{one one two three two two one three},
  c:   [1  ,2  ,3  ,1    ,3  ,6  ,3  ,8],
  d:   [11 ,22 ,33 ,44   ,55 ,66 ,77 ,88]
})
df.group_by([:a, :b]).count
# =>
# #<Daru::DataFrame:76900210 @name = 7b9cf55d-17f8-48c7-b03a-2586c6e5ec5a @size = 6>
#                           c          d
# ["bar", "one"]            1          1
# ["bar", "two"]            1          1
# ["bar", "three"]          1          1
# ["foo", "one"]            2          2
# ["foo", "three"]          1          1
# ["foo", "two"]            2          2


153
154
155
156
# File 'lib/daru/core/group_by.rb', line 153

def count
  width = @non_group_vectors.size
  Daru::DataFrame.new([size]*width, order: @non_group_vectors)
end

#each_groupObject

Iterate over each group created by group_by. A DataFrame is yielded in block.



8
9
10
11
12
# File 'lib/daru/core/group_by.rb', line 8

def each_group
  groups.keys.each do |k|
    yield get_group(k)
  end
end

#firstObject

Get the first group



49
50
51
# File 'lib/daru/core/group_by.rb', line 49

def first
  head(1)
end

#get_group(group) ⇒ Object

Returns one of the selected groups as a DataFrame.

Examples:

Getting a group


df = Daru::DataFrame.new({
      a: %w{foo bar foo bar   foo bar foo foo},
      b: %w{one one two three two two one three},
      c:   [1  ,2  ,3  ,1    ,3  ,6  ,3  ,8],
      d:   [11 ,22 ,33 ,44   ,55 ,66 ,77 ,88]
    })
df.group_by([:a, :b]).get_group ['bar','two']
#=>
##<Daru::DataFrame:83258980 @name = 687ee3f6-8874-4899-97fa-9b31d84fa1d5 @size = 1>
#                    a          b          c          d
#         5        bar        two          6         66

Parameters:

  • group (Array)

    The group that is to be selected from those grouped.



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/daru/core/group_by.rb', line 190

def get_group group
  indexes   = @groups[group]
  elements  = []

  @context.each_vector do |vector|
    elements << vector.to_a
  end
  rows = []
  transpose = elements.transpose

  indexes.each do |idx|
    rows << transpose[idx]
  end

  new_index =
    begin
      @context.index[indexes]
    rescue IndexError
      indexes
    end
  Daru::DataFrame.rows(
    rows, index: new_index, order: @context.vectors
  )
end

#head(quantity = 5) ⇒ Object

Get the top ‘n’ groups

Examples:

Usage of head

df = Daru::DataFrame.new({
  a: %w{foo bar foo bar   foo bar foo foo},
  b: %w{one one two three two two one three},
  c:   [1  ,2  ,3  ,1    ,3  ,6  ,3  ,8],
  d:   [11 ,22 ,33 ,44   ,55 ,66 ,77 ,88]
})
df.group_by([:a, :b]).head(1)
# =>
# #<Daru::DataFrame:82745170 @name = d7003f75-5eb9-4967-9303-c08dd9160224 @size = 6>
#                     a          b          c          d
#          1        bar        one          2         22
#          3        bar      three          1         44
#          5        bar        two          6         66
#          0        foo        one          1         11
#          7        foo      three          8         88
#          2        foo        two          3         33

Parameters:

  • quantity (Fixnum) (defaults to: 5)

    (5) The number of groups.



77
78
79
# File 'lib/daru/core/group_by.rb', line 77

def head quantity=5
  select_groups_from :first, quantity
end

#lastObject

Get the last group



54
55
56
# File 'lib/daru/core/group_by.rb', line 54

def last
  tail(1)
end

#maxObject

Find the max element of each numeric vector group.



165
166
167
# File 'lib/daru/core/group_by.rb', line 165

def max
  apply_method :numeric, :max
end

#meanObject

Calculate mean of numeric groups, excluding missing values.

Examples:

Usage of mean

df = Daru::DataFrame.new({
  a: %w{foo bar foo bar   foo bar foo foo},
  b: %w{one one two three two two one three},
  c:   [1  ,2  ,3  ,1    ,3  ,6  ,3  ,8],
  d:   [11 ,22 ,33 ,44   ,55 ,66 ,77 ,88]
df.group_by([:a, :b]).mean
# =>
# #<Daru::DataFrame:81097450 @name = 0c32983f-3e06-451f-a9c9-051cadfe7371 @size = 6>
#                         c          d
# ["bar", "one"]          2         22
# ["bar", "three"]        1         44
# ["bar", "two"]          6         66
# ["foo", "one"]        2.0       44.0
# ["foo", "three"]        8         88
# ["foo", "two"]        3.0       44.0


121
122
123
# File 'lib/daru/core/group_by.rb', line 121

def mean
  apply_method :numeric, :mean
end

#medianObject

Calculate the median of numeric groups, excluding missing values.



126
127
128
# File 'lib/daru/core/group_by.rb', line 126

def median
  apply_method :numeric, :median
end

#minObject

Find the min element of each numeric vector group.



170
171
172
# File 'lib/daru/core/group_by.rb', line 170

def min
  apply_method :numeric, :min
end

#reduce(init = nil) ⇒ Object

Iteratively applies a function to the values in a group and accumulates the result.

Examples:

Usage of reduce

df = Daru::DataFrame.new({
  a: ['a','b'] * 3,
  b: [1,2,3] * 2,
  c: 'A'..'F'
})
df.group_by([:a]).reduce('') { |result, row| result += row[:c]; result }
# =>
# #<Daru::Vector:70343147159900 @name = nil @metadata = {} @size = 2 >
#     nil
#   a ACE
#   b BDF

Parameters:

  • init (nil) (defaults to: nil)

    The initial value of the accumulator.

  • block (Proc)

    A proc or lambda that accepts two arguments. The first argument is the accumulated result. The second argument is a DataFrame row.



231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/daru/core/group_by.rb', line 231

def reduce(init=nil)
  result_hash = @groups.each_with_object({}) do |(group, indices), h|
    group_indices = indices.map { |v| @context.index.to_a[v] }

    grouped_result = init
    group_indices.each do |idx|
      grouped_result = yield(grouped_result, @context.row[idx])
    end

    h[group] = grouped_result
  end

  index =
    if multi_indexed_grouping?
      Daru::MultiIndex.from_tuples result_hash.keys
    else
      Daru::Index.new result_hash.keys.flatten
    end

  Daru::Vector.new(result_hash.values, index: index)
end

#sizeObject

Get a Daru::Vector of the size of each group.



36
37
38
39
40
41
42
43
44
45
46
# File 'lib/daru/core/group_by.rb', line 36

def size
  index =
    if multi_indexed_grouping?
      Daru::MultiIndex.from_tuples @groups.keys
    else
      Daru::Index.new @groups.keys.flatten
    end

  values = @groups.values.map(&:size)
  Daru::Vector.new(values, index: index, name: :size)
end

#stdObject

Calculate sample standard deviation of numeric vector groups, excluding missing values.



160
161
162
# File 'lib/daru/core/group_by.rb', line 160

def std
  apply_method :numeric, :std
end

#sumObject

Calculate sum of numeric groups, excluding missing values.



131
132
133
# File 'lib/daru/core/group_by.rb', line 131

def sum
  apply_method :numeric, :sum
end

#tail(quantity = 5) ⇒ Object

Get the bottom ‘n’ groups

Examples:

Usage of tail

df = Daru::DataFrame.new({
  a: %w{foo bar foo bar   foo bar foo foo},
  b: %w{one one two three two two one three},
  c:   [1  ,2  ,3  ,1    ,3  ,6  ,3  ,8],
  d:   [11 ,22 ,33 ,44   ,55 ,66 ,77 ,88]
})
# df.group_by([:a, :b]).tail(1)
# =>
# #<Daru::DataFrame:82378270 @name = 0623db46-5425-41bd-a843-99baac3d1d9a @size = 6>
#                     a          b          c          d
#          1        bar        one          2         22
#          3        bar      three          1         44
#          5        bar        two          6         66
#          6        foo        one          3         77
#          7        foo      three          8         88
#          4        foo        two          3         55

Parameters:

  • quantity (Fixnum) (defaults to: 5)

    (5) The number of groups.



100
101
102
# File 'lib/daru/core/group_by.rb', line 100

def tail quantity=5
  select_groups_from :last, quantity
end