Class: Mikon::DataFrame

Inherits:
Object
  • Object
show all
Defined in:
lib/mikon/core/dataframe.rb,
lib/mikon/plot.rb,
lib/mikon/pivot.rb

Overview

The main data structure in Mikon gem. DataFrame consists of labels(column name), index(row name), and labels.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(source, options = {}) ⇒ DataFrame

Returns a new instance of DataFrame.


12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/mikon/core/dataframe.rb', line 12

def initialize(source, options={})
  options = {
    name: SecureRandom.uuid(),
    index: nil,
    labels: nil
  }.merge(options)

  case
  when source.is_a?(Array)
    case
    when source.length == 0
      @data = [DArray.new([])]
    when source.all? {|el| el.is_a?(Mikon::Series)}
      raise "NotImplementedError" + source.to_s

    when source.all? {|el| el.is_a?(Mikon::DArray)}
      @data = source

    when source.all? {|el| el.is_a?(Mikon::Row)}
      @labels = source.first.labels
      @index = source.map{|row| row.index}
      @data = source.map{|row| row.to_hash.values}.transpose.map do |arr|
        Mikon::DArray.new(arr)
      end

    when source.all? {|el| el.is_a?(Hash)}
      @labels = source.first.keys
      @data = source.map{|hash| hash.values}.transpose.map do |arr|
        Mikon::DArray.new(arr)
      end

    when source.all? {|el| el.is_a?(Array)}
      @data = source.map do |arr|
        Mikon::DArray.new(arr)
      end

    else raise "Non-acceptable Arguments Error"
    end

  when source.is_a?(Hash)
    case
    when source.values.all? {|val| val.is_a?(Array)}
      @labels = source.keys
      @data = source.values.map do |arr|
        Mikon::DArray.new(arr)
      end
    when source.all? {|arr| arr[1].is_a?(Series)}
    else raise "Non-acceptable Arguments Error"
    end

  else raise "Non-acceptable Arguments Error"
  end

  @labels = options[:labels] unless options[:labels].nil?
  @name = options[:name]

  unless (index = options[:index]).nil?
    if index.is_a?(Symbol)
      raise "labels should be set" if @labels.nil?
      pos = @labels.index(index)
      raise "Thre is no column named" + index.to_s if pos.nil?
      name = @labels.delete(index)
      @index = @data.delete(@data[pos])
    elsif index.is_a?(Array)
      @index = index
    else
      raise "Invalid index type"
    end
  end

  _check_if_valid
end

Instance Attribute Details

#indexObject (readonly)

Returns the value of attribute index


366
367
368
# File 'lib/mikon/core/dataframe.rb', line 366

def index
  @index
end

#labelsObject (readonly)

Returns the value of attribute labels


366
367
368
# File 'lib/mikon/core/dataframe.rb', line 366

def labels
  @labels
end

#nameObject (readonly)

Returns the value of attribute name


366
367
368
# File 'lib/mikon/core/dataframe.rb', line 366

def name
  @name
end

Class Method Details

.from_csv(path, options = {}) {|csv| ... } ⇒ Object

Create Mikon::DataFrame from a csv/tsv file

Parameters:

  • path (String)

    path to csv

  • options (defaults to: {})

    :col_sep [String] string to separate by :headers [Array] headers

Yields:

  • (csv)

Raises:

  • (ArgumentError)

113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/mikon/core/dataframe.rb', line 113

def self.from_csv(path, options={})
  csv_options = {
    :col_sep => ',',
    :headers => true,
    :converters => :numeric,
    :header_converters => :symbol,
  }

  options = csv_options.merge(options)
  raise ArgumentError, "options[:hearders] should be set" if options[:headers] == false
  options.delete(:header_converters) if options[:headers].is_a?(Array)

  csv = CSV.readlines(path, "r", options)
  yield csv if block_given?

  hash = {}
  csv.by_col.each {|label, arr| hash[label] = arr}
  csv_options.keys.each{|key| options.delete(key)}

  self.new(hash, options)
end

Instance Method Details

#[](arg) ⇒ Object

Accessor for column and rows

Examples:

df = DataFrame.new({a: [1, 2, 3], b: [2, 3, 4]})
df[0..1].to_json #-> {a: [1, 2], b: [2, 3]}
df[:a] #-> <Mikon::Series>

140
141
142
143
144
145
146
147
148
149
# File 'lib/mikon/core/dataframe.rb', line 140

def [](arg)
  case
  when arg.is_a?(Range)
    index = @index.select{|i| arg.include?(i)}
    Mikon::DataFrame.new(index.map{|i| self.row(i)}, {index: index})

  when arg.is_a?(Symbol)
    self.column(arg)
  end
end

#_check_if_validObject


85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/mikon/core/dataframe.rb', line 85

def _check_if_valid
  # All array should should have the same length
  length = @data.map{|darr| darr.length}.max
  @data.each{|darr| darr.expand(length) if darr.length < length}

  # DataFrame should have index object
  @index = (0..(length-1)).to_a if @index.nil?
  raise "index should have the same length as arrays" if @index.length != length

  # Labels should be an instance of Symbol
  if @labels.nil?
    @labels = @data.map.with_index{|darr, i| i.to_s.to_sym}
  elsif @labels.any?{|label| !label.is_a?(Symbol)}
    @labels = @labels.map{|label| label.to_sym}
  end
end

#all?(&block) ⇒ Boolean

Mikon::Row DSL

Returns:

  • (Boolean)

246
247
248
249
# File 'lib/mikon/core/dataframe.rb', line 246

def all?(&block)
  self.each_row {|row| return false unless row.instance_eval(&block)}
  true
end

#any?(&block) ⇒ Boolean

Mikon::Row DSL

Returns:

  • (Boolean)

252
253
254
255
# File 'lib/mikon/core/dataframe.rb', line 252

def any?(&block)
  self.each_row {|row| return true if row.instance_eval(&block)}
  false
end

#column(label) ⇒ Object

Access column with its name


152
153
154
155
156
# File 'lib/mikon/core/dataframe.rb', line 152

def column(label)
  pos = @labels.index(label)
  raise "There is no column named " + label if pos.nil?
  Mikon::Series.new(label, @data[pos], index: @index)
end

#delete(label) ⇒ Object

Delete column


351
352
353
354
355
356
# File 'lib/mikon/core/dataframe.rb', line 351

def delete(label)
  pos = @labels.index(label)
  raise "there is no column named " + label.to_s if pos.nil?
  @labels.delete_at(pos)
  @data.delete_at(pos)
end

#dupObject


358
359
360
# File 'lib/mikon/core/dataframe.rb', line 358

def dup
  Mikon::DataFrame.new(@data.map{|darr| darr.dup}, {index: @index, label: @labels})
end

#dup_only_validObject


362
363
364
# File 'lib/mikon/core/dataframe.rb', line 362

def dup_only_valid
  self.dup
end

#each(&block) ⇒ Object

Iterate rows using Mikon::Row DSL


225
226
227
228
229
230
231
# File 'lib/mikon/core/dataframe.rb', line 225

def each(&block)
  return self.to_enum(:each) unless block_given?
  self.each_row do |row|
    row.instance_eval(&block)
  end
  self
end

#each_row(&block) ⇒ Object

Iterate row


334
335
336
337
338
339
340
341
# File 'lib/mikon/core/dataframe.rb', line 334

def each_row(&block)
  return self.to_enum(:each_row) unless block_given?
  @index.each.with_index do |el, i|
    row_arr = @data.map{|darr| darr[i]}
    row = Mikon::Row.new(@labels, row_arr, @index[i])
    block.call(row)
  end
end

#fillna(value = 0) ⇒ Object

Replace NaN with specified value (destructive)

Parameters:

  • value (Float|Fixnum) (defaults to: 0)

    new value to replace NaN


345
346
347
348
# File 'lib/mikon/core/dataframe.rb', line 345

def fillna(value=0)
  @data.each {|darr| darr.fillna(value)}
  self
end

#head(num) ⇒ Object

same as head of Linux


159
160
161
# File 'lib/mikon/core/dataframe.rb', line 159

def head(num)
  self[0..(num-1)]
end

#insert_column(*args, &block) ⇒ Object

Insert column using Mikon::Row DSL or raw Array

Examples:

df = Mikon::DataFrame.new({a: [1,2,3], b: [2,3,4]})
df.insert_column(:c){a + b}.to_json #-> {a: [1,2,3], b: [2,3,4], c: [3,5,7]}
df.insert_column(:d, [1, 2, 3]).to_json #-> {a: [1,2,3], b: [2,3,4], c: [3,5,7], d: [1,2,3]}
df.insert_column((df[:d]*2).name(:e)) #-> {a: [1,2,3], b: [2,3,4], c: [3,5,7], d: [1,2,3], e: [2,4,6]

Parameters:

  • Symbol (label)

    the name of new column (optional)

  • the (Array|Series|DArray)

    content of new column (optional)


290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# File 'lib/mikon/core/dataframe.rb', line 290

def insert_column(*args, &block)
  if block_given?
    rows = []
    name = args[0]
    self.each_row do |row|
      val = row.instance_eval(&block)
      row[name] = val
      rows.push(row)
    end
    @data = rows.map{|row| row.to_hash.values}.transpose.map do |arr|
      Mikon::DArray.new(arr)
    end
    @labels = rows.first.labels
  else
    if args[0].is_a?(Symbol)
      name = args[0]
      case
      when args[1].is_a?(Mikon::DArray)
        @data.push(args[1])
      when args[1].is_a?(Mikon::Series)
        @data.push(args[1].to_darr)
      when args[1].is_a?(Array)
        @data.push(Mikon::DArray.new(args[1]))
      else
        raise ArgumentError
      end
    elsif args[0].is_a?(Mikon::Series)
      @data.push(args[0].to_darr)
      name = args[0].name
    end
    @labels.push(name)
  end
  _check_if_valid
  return self
end

#lengthObject

return the length of columns


103
104
105
# File 'lib/mikon/core/dataframe.rb', line 103

def length
  @data.first.length
end

#map(&block) ⇒ Object Also known as: collect

Iterate rows using Mikon::Row DSL and return new Mikon::Series


234
235
236
237
238
239
240
241
# File 'lib/mikon/core/dataframe.rb', line 234

def map(&block)
  return self.to_enum(:map) unless block_given?
  arr = []
  self.each_row do |row|
    arr.push(row.instance_eval(&block))
  end
  Mikon::Series.new(:new_series, arr, index: @index.clone)
end

#pivot(args = {}) ⇒ Object

Experimental Implementation. DO NOT USE THIS METHOD

Raises:

  • (ArgumentError)

5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/mikon/pivot.rb', line 5

def pivot(args={})
  args = {
    column: nil,
    row: nil,
    value: nil,
    fill_value: Float::NAN
  }.merge(args)

  raise ArgumentError unless [:column, :row, :value].all?{|sym| args[sym].is_a?(Symbol)}

  column = self[args[:column]].factors
  index = self[args[:row]].factors

  source = column.reduce({}) do |memo, label|
    arr = []
    df = self.select{|row| row[args[:column]] == label}
    index.each do |i|
      unless df.any?{|row| row[args[:row]] == i}
        arr.push(args[:fill_value])
      else
        column = df.select{|row| row[args[:row]] == i}[args[:value]]
        arr.push(column.to_a[0])
      end
    end
    memo[label] = arr
    memo
  end

  Mikon::DataFrame.new(source, index: index)
end

#plot(args = {}) ⇒ Object


24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/mikon/plot.rb', line 24

def plot(args={})
  args = {
    :type => :line,
    :x => nil,
    :y => nil,
    :fill_by => nil,
    :color => nil
  }.merge(args)

  plot = Nyaplot::Plot.new
  plot.x_label("")
  plot.y_label("")

  unless args[:color].nil?
    colors = Nyaplot::Colors.send(args[:color]).to_a
  else
    colors = Nyaplot::Colors.qual.to_a
  end

  case args[:type]
  when :line
    @data.each.with_index do |darr, i|
      line = plot.add(:line, @index, darr.to_a)
      line.color(colors.pop)
      line.title(@labels[i])
    end
    plot.legend(true)

  when :box
    plot.add_with_df(self, :box, *@labels)

  when :scatter
    sc = plot.add_with_df(self, :scatter, args[:x], args[:y])
    sc.color(colors)
    sc.fill_by(args[:fill_by]) unless args[:fill_by].nil?
    plot.x_label(args[:x])
    plot.y_label(args[:y])
  end

  plot
end

#row(index) ⇒ Object

Access row using index


327
328
329
330
331
# File 'lib/mikon/core/dataframe.rb', line 327

def row(index)
  pos = @index.index(index)
  arr = @data.map{|column| column[pos]}
  Mikon::Row.new(@labels, arr, index)
end

#select(&block) ⇒ Object Also known as: filter

Select rows using Mikon::Row DSL and create new DataFrame

Examples:

df = Mikon::DataFrame.new({a: [1,2,3], b: [2,3,4]})
df.select{a%2==0}[:a].to_a #-> [2]

210
211
212
213
214
215
216
217
218
219
220
# File 'lib/mikon/core/dataframe.rb', line 210

def select(&block)
  return self.to_enum(:select) unless block_given?
  rows = []
  i = 0
  self.each_row do |row|
    if row.instance_eval(&block)
      rows.push(row)
    end
  end
  Mikon::DataFrame.new(rows)
end

#sort(label, ascending = true) ⇒ Object

Sort by label

Parameters:

  • label (Symbol)

    column name to sort by

  • ascending (Bool) (defaults to: true)

    default true


273
274
275
276
277
278
279
# File 'lib/mikon/core/dataframe.rb', line 273

def sort(label, ascending=true)
  i = @labels.index(label)
  raise "No column named" + label.to_s if i.nil?
  order = @data[i].sorted_indices
  order.reverse! unless ascending
  self.sort_by.with_index{|val, i| order.index(i)}
end

#sort_by(ascending = true, &block) ⇒ Object

Sort using Mikon::Row DSL

Parameters:

  • ascending (Bool) (defaults to: true)

    default true


260
261
262
263
264
265
266
267
# File 'lib/mikon/core/dataframe.rb', line 260

def sort_by(ascending=true, &block)
  return self.to_enum(:sort_by) unless block_given?
  order = self.map(&block).to_darr.sorted_indices
  order.reverse! unless ascending
  data = @data.map{|darr| darr.sort_by.with_index{|val, i| order.index(i)}}
  index = @index.sort_by.with_index{|val, i| order.index(i)}
  Mikon::DataFrame.new(data, {index: index, labels: @labels})
end

#tail(num) ⇒ Object

same as tail of Linux


164
165
166
167
# File 'lib/mikon/core/dataframe.rb', line 164

def tail(num)
  last = self.length-1
  self[(last-num+1)..last]
end

#to_html(threshold = 50) ⇒ Object

IRuby notebook automatically call this method


179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/mikon/core/dataframe.rb', line 179

def to_html(threshold=50)
  html = "<html><table><tr><td></td>"
  html += @labels.map{|label| "<th>" + label.to_s +  "</th>"}.join
  html += "</tr>"
  self.each_row.with_index do |row, pos|
    next if pos > threshold && pos != self.length-1
    html += "<tr><th>" + @index[pos].to_s + "</th>"
    html += @labels.map{|label| "<td>" + row[label].to_s + "</td>"}.join
    html += "</tr>"
    html += "<tr><th>...</th>" + "<td>...</td>"*@labels.length + "</tr>" if pos == threshold
  end
  html += "</table>"
end

#to_json(*args) ⇒ Object

Compartible with Nyaplot::DataFrame.to_json


170
171
172
173
174
175
176
# File 'lib/mikon/core/dataframe.rb', line 170

def to_json(*args)
  rows = []
  self.each_row do |row|
    rows.push(row.to_hash)
  end
  rows.to_json
end

#to_s(threshold = 50) ⇒ Object


193
194
195
196
197
198
199
200
201
202
203
# File 'lib/mikon/core/dataframe.rb', line 193

def to_s(threshold=50)
  arr = []
  self.each_row.with_index do |row, pos|
    next nil if pos > threshold && pos != self.length-1
    arr.push({"" => @index[pos]}.merge(row.to_hash))
    if pos == threshold
        arr.push(@labels.reduce({"" => "..."}){|memo, label| memo[label] = "..."; memo})
    end
  end
  Formatador.display_table(arr.select{|el| !(el.nil?)})
end