Class: Rover::DataFrame
- Inherits:
-
Object
- Object
- Rover::DataFrame
- Defined in:
- lib/rover/data_frame.rb
Instance Method Summary collapse
- #+(other) ⇒ Object
-
#==(other) ⇒ Object
don’t check types.
- #[](where) ⇒ Object
- #[]=(k, v) ⇒ Object
-
#any? ⇒ Boolean
should this check for columns as well?.
- #clear ⇒ Object
-
#concat(other) ⇒ Object
in-place, like Array#concat TODO make more performant.
- #delete(key) ⇒ Object
- #dup ⇒ Object
-
#each_row ⇒ Object
return each row as a hash.
-
#empty? ⇒ Boolean
should this check for columns as well?.
- #except(*keys) ⇒ Object
- #except!(*keys) ⇒ Object
- #first(n = nil) ⇒ Object
- #head(n = 5) ⇒ Object
- #include?(key) ⇒ Boolean
-
#initialize(data = {}) ⇒ DataFrame
constructor
A new instance of DataFrame.
-
#inner_join(other, on: nil) ⇒ Object
see join for options.
-
#inspect ⇒ Object
(also: #to_s)
TODO handle long text better.
- #keys ⇒ Object (also: #names, #vector_names)
- #last(n = nil) ⇒ Object
-
#left_join(other, on: nil) ⇒ Object
see join for options.
- #merge(other) ⇒ Object
- #merge!(other) ⇒ Object
- #shape ⇒ Object
- #size ⇒ Object (also: #length, #count)
- #sort_by(&block) ⇒ Object
- #sort_by! ⇒ Object
- #tail(n = 5) ⇒ Object
- #to_a ⇒ Object
- #to_csv ⇒ Object
- #to_h ⇒ Object
-
#to_html ⇒ Object
for IRuby.
- #to_numo ⇒ Object
-
#vectors ⇒ Object
dup to prevent direct modification of keys.
Constructor Details
#initialize(data = {}) ⇒ DataFrame
Returns a new instance of DataFrame.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/rover/data_frame.rb', line 3 def initialize(data = {}) @vectors = {} if data.is_a?(DataFrame) data.vectors.each do |k, v| @vectors[k] = v end elsif data.is_a?(Hash) data.to_h.each do |k, v| @vectors[k] = if v.respond_to?(:to_a) Vector.new(v) else v end end # handle scalars size = @vectors.values.find { |v| v.is_a?(Vector) }&.size || 1 @vectors.each_key do |k| @vectors[k] = to_vector(@vectors[k], size) end elsif data.is_a?(Array) vectors = {} raise ArgumentError, "Array elements must be hashes" unless data.all? { |d| d.is_a?(Hash) } keys = data.flat_map(&:keys).uniq keys.each do |k| vectors[k] = [] end data.each do |d| keys.each do |k| vectors[k] << d[k] end end vectors.each do |k, v| @vectors[k] = to_vector(v) end elsif defined?(ActiveRecord) && (data.is_a?(ActiveRecord::Relation) || (data.is_a?(Class) && data < ActiveRecord::Base)) result = data.connection.select_all(data.all.to_sql) result.columns.each_with_index do |k, i| @vectors[k] = to_vector(result.rows.map { |r| r[i] }) end else raise ArgumentError, "Cannot cast to data frame: #{data.class.name}" end # check keys @vectors.each_key do |k| check_key(k) end # check sizes sizes = @vectors.values.map(&:size).uniq if sizes.size > 1 raise ArgumentError, "Different sizes: #{sizes}" end end |
Instance Method Details
#+(other) ⇒ Object
269 270 271 |
# File 'lib/rover/data_frame.rb', line 269 def +(other) dup.concat(other) end |
#==(other) ⇒ Object
don’t check types
310 311 312 313 314 |
# File 'lib/rover/data_frame.rb', line 310 def ==(other) size == other.size && keys == other.keys && keys.all? { |k| self[k] == other[k] } end |
#[](where) ⇒ Object
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/rover/data_frame.rb', line 61 def [](where) if (where.is_a?(Vector) && where.to_numo.is_a?(Numo::Bit)) || where.is_a?(Numeric) || where.is_a?(Range) || (where.is_a?(Array) && where.all? { |v| v.is_a?(Integer) } ) new_vectors = {} @vectors.each do |k, v| new_vectors[k] = v[where] end DataFrame.new(new_vectors) elsif where.is_a?(Array) # multiple columns df = DataFrame.new where.each do |k| df[k] = @vectors[k] end df else # single column @vectors[where] end end |
#[]=(k, v) ⇒ Object
93 94 95 96 97 98 |
# File 'lib/rover/data_frame.rb', line 93 def []=(k, v) check_key(k) v = to_vector(v, size) raise ArgumentError, "Size mismatch: expected #{size}, got #{v.size}" if @vectors.any? && v.size != size @vectors[k] = v end |
#any? ⇒ Boolean
should this check for columns as well?
107 108 109 |
# File 'lib/rover/data_frame.rb', line 107 def any? size > 0 end |
#clear ⇒ Object
116 117 118 |
# File 'lib/rover/data_frame.rb', line 116 def clear @vectors.clear end |
#concat(other) ⇒ Object
in-place, like Array#concat TODO make more performant
275 276 277 278 279 280 281 282 283 284 285 286 |
# File 'lib/rover/data_frame.rb', line 275 def concat(other) raise ArgumentError, "Must be a data frame" unless other.is_a?(DataFrame) size = self.size vectors.each do |k, v| @vectors[k] = Vector.new(v.to_a + (other[k] ? other[k].to_a : [nil] * other.size)) end (other.vector_names - vector_names).each do |k| @vectors[k] = Vector.new([nil] * size + other[k].to_a) end self end |
#delete(key) ⇒ Object
130 131 132 |
# File 'lib/rover/data_frame.rb', line 130 def delete(key) @vectors.delete(key) end |
#dup ⇒ Object
261 262 263 264 265 266 267 |
# File 'lib/rover/data_frame.rb', line 261 def dup df = DataFrame.new @vectors.each do |k, v| df[k] = v end df end |
#each_row ⇒ Object
return each row as a hash
82 83 84 85 86 |
# File 'lib/rover/data_frame.rb', line 82 def each_row size.times do |i| yield @vectors.map { |k, v| [k, v[i]] }.to_h end end |
#empty? ⇒ Boolean
should this check for columns as well?
112 113 114 |
# File 'lib/rover/data_frame.rb', line 112 def empty? size == 0 end |
#except(*keys) ⇒ Object
134 135 136 |
# File 'lib/rover/data_frame.rb', line 134 def except(*keys) dup.except!(*keys) end |
#except!(*keys) ⇒ Object
138 139 140 141 142 143 |
# File 'lib/rover/data_frame.rb', line 138 def except!(*keys) keys.each do |key| delete(key) end self end |
#first(n = nil) ⇒ Object
157 158 159 160 161 162 163 |
# File 'lib/rover/data_frame.rb', line 157 def first(n = nil) new_vectors = {} @vectors.each do |k, v| new_vectors[k] = v.first(n) end DataFrame.new(new_vectors) end |
#head(n = 5) ⇒ Object
149 150 151 |
# File 'lib/rover/data_frame.rb', line 149 def head(n = 5) first(n) end |
#include?(key) ⇒ Boolean
145 146 147 |
# File 'lib/rover/data_frame.rb', line 145 def include?(key) @vectors.include?(key) end |
#inner_join(other, on: nil) ⇒ Object
see join for options
300 301 302 |
# File 'lib/rover/data_frame.rb', line 300 def inner_join(other, on: nil) join(other, on: on, how: "inner") end |
#inspect ⇒ Object Also known as: to_s
TODO handle long text better
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 |
# File 'lib/rover/data_frame.rb', line 211 def inspect return "#<Rover::DataFrame>" if keys.empty? lines = [] line_start = 0 spaces = 2 @vectors.each do |k, v| v = v.first(5).to_a width = ([k] + v).map(&:to_s).map(&:size).max width = 3 if width < 3 if lines.empty? || lines[-2].map { |l| l.size + spaces }.sum + width > 120 line_start = lines.size lines << [] [size, 5].min.times do |i| lines << [] end lines << [] if size > 5 lines << [] end lines[line_start] << "%#{width}s" % k.to_s v.each_with_index do |v2, i| lines[line_start + 1 + i] << "%#{width}s" % v2.to_s end lines[line_start + 6] << "%#{width}s" % "..." if size > 5 end lines.pop lines.map { |l| l.join(" " * spaces) }.join("\n") end |
#keys ⇒ Object Also known as: names, vector_names
124 125 126 |
# File 'lib/rover/data_frame.rb', line 124 def keys @vectors.keys end |
#last(n = nil) ⇒ Object
165 166 167 168 169 170 171 |
# File 'lib/rover/data_frame.rb', line 165 def last(n = nil) new_vectors = {} @vectors.each do |k, v| new_vectors[k] = v.last(n) end DataFrame.new(new_vectors) end |
#left_join(other, on: nil) ⇒ Object
see join for options
305 306 307 |
# File 'lib/rover/data_frame.rb', line 305 def left_join(other, on: nil) join(other, on: on, how: "left") end |
#merge(other) ⇒ Object
288 289 290 |
# File 'lib/rover/data_frame.rb', line 288 def merge(other) dup.merge!(other) end |
#merge!(other) ⇒ Object
292 293 294 295 296 297 |
# File 'lib/rover/data_frame.rb', line 292 def merge!(other) other.vectors.each do |k, v| self[k] = v end self end |
#shape ⇒ Object
120 121 122 |
# File 'lib/rover/data_frame.rb', line 120 def shape [size, @vectors.size] end |
#size ⇒ Object Also known as: length, count
100 101 102 |
# File 'lib/rover/data_frame.rb', line 100 def size @vectors.values.first&.size || 0 end |
#sort_by(&block) ⇒ Object
257 258 259 |
# File 'lib/rover/data_frame.rb', line 257 def sort_by(&block) dup.sort_by!(&block) end |
#sort_by! ⇒ Object
245 246 247 248 249 250 251 252 253 254 255 |
# File 'lib/rover/data_frame.rb', line 245 def sort_by! indexes = size.times.sort_by do |i| yield @vectors.map { |k, v| [k, v[i]] }.to_h end @vectors.each do |k, v| self[k] = v.to_numo.at(indexes) end self end |
#tail(n = 5) ⇒ Object
153 154 155 |
# File 'lib/rover/data_frame.rb', line 153 def tail(n = 5) last(n) end |
#to_a ⇒ Object
173 174 175 176 177 178 179 |
# File 'lib/rover/data_frame.rb', line 173 def to_a a = [] each_row do |row| a << row end a end |
#to_csv ⇒ Object
193 194 195 196 197 198 199 200 201 202 |
# File 'lib/rover/data_frame.rb', line 193 def to_csv require "csv" CSV.generate do |csv| csv << keys numo = vectors.values.map(&:to_numo) size.times do |i| csv << numo.map { |n| n[i] } end end end |
#to_h ⇒ Object
181 182 183 184 185 186 187 |
# File 'lib/rover/data_frame.rb', line 181 def to_h hsh = {} @vectors.each do |k, v| hsh[k] = v.to_a end hsh end |
#to_html ⇒ Object
for IRuby
205 206 207 208 |
# File 'lib/rover/data_frame.rb', line 205 def to_html require "iruby" IRuby::HTML.table(to_h) end |
#to_numo ⇒ Object
189 190 191 |
# File 'lib/rover/data_frame.rb', line 189 def to_numo Numo::NArray.column_stack(vectors.values.map(&:to_numo)) end |
#vectors ⇒ Object
dup to prevent direct modification of keys
89 90 91 |
# File 'lib/rover/data_frame.rb', line 89 def vectors @vectors.dup end |