Class: Rover::DataFrame

Inherits:
Object
  • Object
show all
Defined in:
lib/rover/data_frame.rb

Instance Method Summary collapse

Constructor Details

#initialize(data = {}) ⇒ DataFrame

Returns a new instance of DataFrame.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/rover/data_frame.rb', line 3

def initialize(data = {})
  @vectors = {}

  if data.is_a?(DataFrame)
    data.vectors.each do |k, v|
      @vectors[k] = v
    end
  elsif data.is_a?(Hash)
    data.to_h.each do |k, v|
      @vectors[k] =
        if v.respond_to?(:to_a)
          Vector.new(v)
        else
          v
        end
    end

    # handle scalars
    size = @vectors.values.find { |v| v.is_a?(Vector) }&.size || 1
    @vectors.each_key do |k|
      @vectors[k] = to_vector(@vectors[k], size)
    end
  elsif data.is_a?(Array)
    vectors = {}
    raise ArgumentError, "Array elements must be hashes" unless data.all? { |d| d.is_a?(Hash) }
    keys = data.flat_map(&:keys).uniq
    keys.each do |k|
      vectors[k] = []
    end
    data.each do |d|
      keys.each do |k|
        vectors[k] << d[k]
      end
    end
    vectors.each do |k, v|
      @vectors[k] = to_vector(v)
    end
  elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base))
    result = data.connection.select_all(data.all.to_sql)
    result.columns.each_with_index do |k, i|
      @vectors[k] = to_vector(result.rows.map { |r| r[i] })
    end
  else
    raise ArgumentError, "Cannot cast to data frame: #{data.class.name}"
  end

  # check keys
  @vectors.each_key do |k|
    check_key(k)
  end

  # check sizes
  sizes = @vectors.values.map(&:size).uniq
  if sizes.size > 1
    raise ArgumentError, "Different sizes: #{sizes}"
  end
end

Instance Method Details

#+(other) ⇒ Object



269
270
271
# File 'lib/rover/data_frame.rb', line 269

def +(other)
  dup.concat(other)
end

#==(other) ⇒ Object

don’t check types



310
311
312
313
314
# File 'lib/rover/data_frame.rb', line 310

def ==(other)
  size == other.size &&
  keys == other.keys &&
  keys.all? { |k| self[k] == other[k] }
end

#[](where) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/rover/data_frame.rb', line 61

def [](where)
  if (where.is_a?(Vector) && where.to_numo.is_a?(Numo::Bit)) || where.is_a?(Numeric) || where.is_a?(Range) || (where.is_a?(Array) && where.all? { |v| v.is_a?(Integer) } )
    new_vectors = {}
    @vectors.each do |k, v|
      new_vectors[k] = v[where]
    end
    DataFrame.new(new_vectors)
  elsif where.is_a?(Array)
    # multiple columns
    df = DataFrame.new
    where.each do |k|
      df[k] = @vectors[k]
    end
    df
  else
    # single column
    @vectors[where]
  end
end

#[]=(k, v) ⇒ Object

Raises:

  • (ArgumentError)


93
94
95
96
97
98
# File 'lib/rover/data_frame.rb', line 93

def []=(k, v)
  check_key(k)
  v = to_vector(v, size)
  raise ArgumentError, "Size mismatch: expected #{size}, got #{v.size}" if @vectors.any? && v.size != size
  @vectors[k] = v
end

#any?Boolean

should this check for columns as well?

Returns:

  • (Boolean)


107
108
109
# File 'lib/rover/data_frame.rb', line 107

def any?
  size > 0
end

#clearObject



116
117
118
# File 'lib/rover/data_frame.rb', line 116

def clear
  @vectors.clear
end

#concat(other) ⇒ Object

in-place, like Array#concat TODO make more performant

Raises:

  • (ArgumentError)


275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/rover/data_frame.rb', line 275

def concat(other)
  raise ArgumentError, "Must be a data frame" unless other.is_a?(DataFrame)

  size = self.size
  vectors.each do |k, v|
    @vectors[k] = Vector.new(v.to_a + (other[k] ? other[k].to_a : [nil] * other.size))
  end
  (other.vector_names - vector_names).each do |k|
    @vectors[k] = Vector.new([nil] * size + other[k].to_a)
  end
  self
end

#delete(key) ⇒ Object



130
131
132
# File 'lib/rover/data_frame.rb', line 130

def delete(key)
  @vectors.delete(key)
end

#dupObject



261
262
263
264
265
266
267
# File 'lib/rover/data_frame.rb', line 261

def dup
  df = DataFrame.new
  @vectors.each do |k, v|
    df[k] = v
  end
  df
end

#each_rowObject

return each row as a hash



82
83
84
85
86
# File 'lib/rover/data_frame.rb', line 82

def each_row
  size.times do |i|
    yield @vectors.map { |k, v| [k, v[i]] }.to_h
  end
end

#empty?Boolean

should this check for columns as well?

Returns:

  • (Boolean)


112
113
114
# File 'lib/rover/data_frame.rb', line 112

def empty?
  size == 0
end

#except(*keys) ⇒ Object



134
135
136
# File 'lib/rover/data_frame.rb', line 134

def except(*keys)
  dup.except!(*keys)
end

#except!(*keys) ⇒ Object



138
139
140
141
142
143
# File 'lib/rover/data_frame.rb', line 138

def except!(*keys)
  keys.each do |key|
    delete(key)
  end
  self
end

#first(n = nil) ⇒ Object



157
158
159
160
161
162
163
# File 'lib/rover/data_frame.rb', line 157

def first(n = nil)
  new_vectors = {}
  @vectors.each do |k, v|
    new_vectors[k] = v.first(n)
  end
  DataFrame.new(new_vectors)
end

#head(n = 5) ⇒ Object



149
150
151
# File 'lib/rover/data_frame.rb', line 149

def head(n = 5)
  first(n)
end

#include?(key) ⇒ Boolean

Returns:

  • (Boolean)


145
146
147
# File 'lib/rover/data_frame.rb', line 145

def include?(key)
  @vectors.include?(key)
end

#inner_join(other, on: nil) ⇒ Object

see join for options



300
301
302
# File 'lib/rover/data_frame.rb', line 300

def inner_join(other, on: nil)
  join(other, on: on, how: "inner")
end

#inspectObject Also known as: to_s

TODO handle long text better



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# File 'lib/rover/data_frame.rb', line 211

def inspect
  return "#<Rover::DataFrame>" if keys.empty?

  lines = []
  line_start = 0
  spaces = 2

  @vectors.each do |k, v|
    v = v.first(5).to_a
    width = ([k] + v).map(&:to_s).map(&:size).max
    width = 3 if width < 3

    if lines.empty? || lines[-2].map { |l| l.size + spaces }.sum + width > 120
      line_start = lines.size
      lines << []
      [size, 5].min.times do |i|
        lines << []
      end
      lines << [] if size > 5
      lines << []
    end

    lines[line_start] << "%#{width}s" % k.to_s
    v.each_with_index do |v2, i|
      lines[line_start + 1 + i] << "%#{width}s" % v2.to_s
    end
    lines[line_start + 6] << "%#{width}s" % "..." if size > 5
  end

  lines.pop
  lines.map { |l| l.join(" " * spaces) }.join("\n")
end

#keysObject Also known as: names, vector_names



124
125
126
# File 'lib/rover/data_frame.rb', line 124

def keys
  @vectors.keys
end

#last(n = nil) ⇒ Object



165
166
167
168
169
170
171
# File 'lib/rover/data_frame.rb', line 165

def last(n = nil)
  new_vectors = {}
  @vectors.each do |k, v|
    new_vectors[k] = v.last(n)
  end
  DataFrame.new(new_vectors)
end

#left_join(other, on: nil) ⇒ Object

see join for options



305
306
307
# File 'lib/rover/data_frame.rb', line 305

def left_join(other, on: nil)
  join(other, on: on, how: "left")
end

#merge(other) ⇒ Object



288
289
290
# File 'lib/rover/data_frame.rb', line 288

def merge(other)
  dup.merge!(other)
end

#merge!(other) ⇒ Object



292
293
294
295
296
297
# File 'lib/rover/data_frame.rb', line 292

def merge!(other)
  other.vectors.each do |k, v|
    self[k] = v
  end
  self
end

#shapeObject



120
121
122
# File 'lib/rover/data_frame.rb', line 120

def shape
  [size, @vectors.size]
end

#sizeObject Also known as: length, count



100
101
102
# File 'lib/rover/data_frame.rb', line 100

def size
  @vectors.values.first&.size || 0
end

#sort_by(&block) ⇒ Object



257
258
259
# File 'lib/rover/data_frame.rb', line 257

def sort_by(&block)
  dup.sort_by!(&block)
end

#sort_by!Object



245
246
247
248
249
250
251
252
253
254
255
# File 'lib/rover/data_frame.rb', line 245

def sort_by!
  indexes =
    size.times.sort_by do |i|
      yield @vectors.map { |k, v| [k, v[i]] }.to_h
    end

  @vectors.each do |k, v|
    self[k] = v.to_numo.at(indexes)
  end
  self
end

#tail(n = 5) ⇒ Object



153
154
155
# File 'lib/rover/data_frame.rb', line 153

def tail(n = 5)
  last(n)
end

#to_aObject



173
174
175
176
177
178
179
# File 'lib/rover/data_frame.rb', line 173

def to_a
  a = []
  each_row do |row|
    a << row
  end
  a
end

#to_csvObject



193
194
195
196
197
198
199
200
201
202
# File 'lib/rover/data_frame.rb', line 193

def to_csv
  require "csv"
  CSV.generate do |csv|
    csv << keys
    numo = vectors.values.map(&:to_numo)
    size.times do |i|
      csv << numo.map { |n| n[i] }
    end
  end
end

#to_hObject



181
182
183
184
185
186
187
# File 'lib/rover/data_frame.rb', line 181

def to_h
  hsh = {}
  @vectors.each do |k, v|
    hsh[k] = v.to_a
  end
  hsh
end

#to_htmlObject

for IRuby



205
206
207
208
# File 'lib/rover/data_frame.rb', line 205

def to_html
  require "iruby"
  IRuby::HTML.table(to_h)
end

#to_numoObject



189
190
191
# File 'lib/rover/data_frame.rb', line 189

def to_numo
  Numo::NArray.column_stack(vectors.values.map(&:to_numo))
end

#vectorsObject

dup to prevent direct modification of keys



89
90
91
# File 'lib/rover/data_frame.rb', line 89

def vectors
  @vectors.dup
end