Method: Daru::DataFrame#initialize

Defined in:
lib/daru/dataframe.rb

#initialize(source = {}, opts = {}) ⇒ DataFrame

DataFrame basically consists of an Array of Vector objects. These objects are indexed by row and column by vectors and index Index objects.

Arguments

  • source - Source from the DataFrame is to be initialized. Can be a Hash

of names and vectors (array or Daru::Vector), an array of arrays or array of Daru::Vectors.

Options

:order - An Array/Daru::Index/Daru::MultiIndex containing the order in which Vectors should appear in the DataFrame.

:index - An Array/Daru::Index/Daru::MultiIndex containing the order in which rows of the DataFrame will be named.

:name - A name for the DataFrame.

:clone - Specify as true or false. When set to false, and Vector objects are passed for the source, the Vector objects will not duplicated when creating the DataFrame. Will have no effect if Array is passed in the source, or if the passed Daru::Vectors have different indexes. Default to true.

Usage

df = Daru::DataFrame.new
# =>
# <Daru::DataFrame(0x0)>
# Creates an empty DataFrame with no rows or columns.

df = Daru::DataFrame.new({}, order: [:a, :b])
#<Daru::DataFrame(0x2)>
  a   b
# Creates a DataFrame with no rows and columns :a and :b

df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
  index: [:a, :b, :c, :d], name: :spider_man)

# =>
# <Daru::DataFrame:80766980 @name = spider_man @size = 4>
#             b          a
#  a          6          1
#  b          7          2
#  c          8          3
#  d          9          4

df = Daru::DataFrame.new([[1,2,3,4],[6,7,8,9]], name: :bat_man)

# =>
# #<Daru::DataFrame: bat_man (4x2)>
#             0          1
#  0          1          6
#  1          2          7
#  2          3          8
#  3          4          9

# Dataframe having Index name

df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
  index: Daru::Index.new([:a, :b, :c, :d], name: 'idx_name'),
  name: :spider_man)

# =>
# <Daru::DataFrame:80766980 @name = spider_man @size = 4>
# idx_name            b          a
#        a          6          1
#        b          7          2
#        c          8          3
#        d          9          4

idx = Daru::Index.new [100, 99, 101, 1, 2], name: "s1"
=> #<Daru::Index(5): s1 {100, 99, 101, 1, 2}>

df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
  c: [11,22,33,44,55]},
  order: [:a, :b, :c],
  index: idx)
 # =>
 #<Daru::DataFrame(5x3)>
 #   s1   a   b   c
 #  100   1  11  11
 #   99   2  12  22
 #  101   3  13  33
 #    1   4  14  44
 #    2   5  15  55


346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
# File 'lib/daru/dataframe.rb', line 346

def initialize source={}, opts={} # rubocop:disable Metrics/MethodLength
  vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1
  @data = []
  @name = opts[:name]

  case source
  when [], {}
    create_empty_vectors(vectors, index)
  when Array
    initialize_from_array source, vectors, index, opts
  when Hash
    initialize_from_hash source, vectors, index, opts
  when ->(s) { s.empty? } # TODO: likely want to remove this case
    create_empty_vectors(vectors, index)
  end

  set_size
  validate
  update
end