Class: CADataFrame

Inherits:
Object
  • Object
show all
Defined in:
lib/carray/dataframe/dataframe.rb,
lib/R.rb,
lib/carray/dataframe/dataframe.rb,
lib/carray/dataframe/dataframe.rb,
lib/carray/dataframe/dataframe.rb,
lib/carray/dataframe/dataframe.rb,
lib/carray/dataframe/dataframe.rb,
lib/carray/dataframe/dataframe.rb,
lib/carray/dataframe/dataframe.rb,
lib/carray/dataframe/dataframe.rb

Overview

PIVOT TABLE

Defined Under Namespace

Classes: Arranger

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(columns_or_table, row_index: nil, column_names: nil, &block) ⇒ CADataFrame

Constructor



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/carray/dataframe/dataframe.rb', line 35

def initialize (columns_or_table, row_index: nil, column_names: nil, &block)
  case columns_or_table
  when Hash
    columns = columns_or_table
    @column_names = columns.keys.map(&:to_s)
    @columns = normalize_columns(columns)
    @row_number = @columns.first[1].size
    if @column_names.any?{ |key| @columns[key].size != @row_number }
      raise "column sizes mismatch"
    end
  when CArray
    table = columns_or_table
    if column_names
      @column_names = column_names.map(&:to_s)
    else
      if table.respond_to?(:column_names)
        @column_names = table.column_names.map(&:to_s)
      else
        raise "data table (CArray) has no method 'column_names'."
      end
    end
    @columns = table_to_columns(table)
    @row_number = table.dim0
  else
    raise "unknown data"
  end
  if row_index
    @row_index = row_index.to_ca.object
  else
    @row_index = nil
  end
  @__methods__ = {}
  if block_given?
    arrange(&block)
  end
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args) ⇒ Object



167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/carray/dataframe/dataframe.rb', line 167

def method_missing (name, *args)
  if args.size == 0 
    name = name.to_s
    if has_column?(name) 
      return @columns[name]
    elsif has_column?(name.gsub(/_/,'.')) ### For R
      return @columns[name.gsub(/_/,'.')]
    elsif @__methods__.include?(name)
      return @columns[@__methods__[name]]
    end
  end
  raise "no method '#{name}' for CADataFrame"
end

Instance Attribute Details

#column_namesObject (readonly)

Attributes



121
122
123
# File 'lib/carray/dataframe/dataframe.rb', line 121

def column_names
  @column_names
end

#column_numberObject (readonly)

Attributes



121
122
123
# File 'lib/carray/dataframe/dataframe.rb', line 121

def column_number
  @column_number
end

#columnsObject (readonly)

Attributes



121
122
123
# File 'lib/carray/dataframe/dataframe.rb', line 121

def columns
  @columns
end

#row_indexObject (readonly)

Attributes



121
122
123
# File 'lib/carray/dataframe/dataframe.rb', line 121

def row_index
  @row_index
end

#row_numberObject (readonly)

Attributes



121
122
123
# File 'lib/carray/dataframe/dataframe.rb', line 121

def row_number
  @row_number
end

Class Method Details

.concat(*args) ⇒ Object



1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
# File 'lib/carray/dataframe/dataframe.rb', line 1003

def self.concat (*args)
  ref = args.first
  column_names = ref.column_names
  new_columns = {}
  column_names.each do |name|
    list = args.map{|t| t.col(name) }
    data_type = list.first.data_type
    new_columns[name] = CArray.bind(data_type, list, 0)   
  end
  if args.map(&:row_index).all?
    new_row_index = CArray.join(*args.map(&:row_index))
  else
    new_row_index = nil
  end
  return CADataFrame.new(new_columns, row_index: new_row_index)
end

.from_csv(*args, &block) ⇒ Object



1583
1584
1585
# File 'lib/carray/dataframe/dataframe.rb', line 1583

def self.from_csv (*args, &block)
  return CArray.from_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
end

.from_R_data_frame(obj) ⇒ Object



292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/R.rb', line 292

def self.from_R_data_frame (obj)
  r = R.instance
  RSRuby.set_default_mode(RSRuby::PROC_CONVERSION)
  r.proc_table[lambda{|x| true }] = R::CONVERTER
  dataframe = obj
  column_names = r.colnames(obj).to_a
  column_names = [column_names].flatten
  row_names = r.attr(obj, 'row.names')
  columns = {}
  column_names.each do |name|
    value = r['$'].call(obj, name.to_s)
    case value
    when CArray
      columns[name] = value
    when Array
      columns[name] = value.to_ca        
    else
      columns[name] = [value].to_ca
    end
  end
  column_names.each do |name|
    column = columns[name]
    column.maskout!(nil)
  end
  return CADataFrame.new(columns, row_index: row_names ? row_names.to_ca : nil)
end

.load_csv(*args, &block) ⇒ Object



1579
1580
1581
# File 'lib/carray/dataframe/dataframe.rb', line 1579

def self.load_csv (*args, &block)
  return CArray.load_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
end

.load_sqlite3(*args) ⇒ Object



1574
1575
1576
# File 'lib/carray/dataframe/dataframe.rb', line 1574

def self.load_sqlite3 (*args)
  return CArray.load_sqlite3(*args).to_dataframe.arrange{ maskout nil, *column_names }
end

.merge(*args) ⇒ Object



992
993
994
995
996
997
998
999
1000
1001
# File 'lib/carray/dataframe/dataframe.rb', line 992

def self.merge (*args)
  ref = args.first
  new_columns = {}
  args.each do |table|
    table.column_names.each do |name|
      new_columns[name] = table.col(name)
    end
  end
  return CADataFrame.new(new_columns, row_index: ref.row_index)
end

Instance Method Details

#-@Object



1093
1094
1095
# File 'lib/carray/dataframe/dataframe.rb', line 1093

def -@
  return cmp(:-@)    
end

#<(other) ⇒ Object



1097
1098
1099
# File 'lib/carray/dataframe/dataframe.rb', line 1097

def < (other)
  return cmp(:<, other)
end

#<=(other) ⇒ Object



1101
1102
1103
# File 'lib/carray/dataframe/dataframe.rb', line 1101

def <= (other)
  return cmp(:<=, other)
end

#>(other) ⇒ Object



1105
1106
1107
# File 'lib/carray/dataframe/dataframe.rb', line 1105

def > (other)
  return cmp(:>, other)
end

#>=(other) ⇒ Object



1109
1110
1111
# File 'lib/carray/dataframe/dataframe.rb', line 1109

def >= (other)
  return cmp(:>=, other)
end

#[](*argv) ⇒ Object

Referencing



257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
# File 'lib/carray/dataframe/dataframe.rb', line 257

def [] (*argv)
  row, col = *argv    
  new_columns = {}
  if col.is_a?(NilClass)
    case row
    when CADataFrame
      each_column_name do |key|
        if row.has_column?(key)
          new_columns[key] = column(key).maskout(row.column(key))
        else
          new_columns[key] = column(key).to_ca
        end
      end
      return CADataFrame.new(new_columns, row_index: row.row_index ? row.row_index : nil)      
    when String
      return self[nil,row]
    when Array
      if row.all?{|s| s.is_a?(String) }
        return self[nil,row]
      else
        @column_names.each do |key|
          new_columns[key] = @columns[key][row]
        end
      end
      return CADataFrame.new(new_columns, row_index: @row_index ? @row_index[row] : nil)      
    else
      if row.is_a?(Integer)
        row = [row]
      end
      @column_names.each do |key|
        new_columns[key] = @columns[key][row]
      end
      return CADataFrame.new(new_columns, row_index: @row_index ? @row_index[row] : nil)      
    end
  else
    if row.is_a?(Integer)
      row = [row]
    end
    case col
    when String, Symbol
      key = col.to_s
      if has_column?(key)
         return column(key)[row]
      else
        raise "unknow column name '#{key}'"          
      end
    when Array
      if col.all?{|s| s.is_a?(String) }
        col.each do |key|
          key = key.to_s
          if has_column?(key)
            new_columns[key] = column(key)[row]
          else
            raise "unknow column name '#{key}'"          
          end
        end
      else
        keys = @column_names.to_ca[col].to_a
        keys.each do |key|
          new_columns[key] = column(key)[row]
        end
      end
      return CADataFrame.new(new_columns, row_index: @row_index ? @row_index[row] : nil)      
    else
      if col.is_a?(Integer)
        col = [col]
      end
      keys = @column_names.to_ca[col].to_a
      keys.each do |key|
        new_columns[key] = column(key)[row]
      end
      return CADataFrame.new(new_columns, row_index: @row_index ? @row_index[row] : nil)      
    end
  end
end

#[]=(*argv) ⇒ Object

Setting Values



337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/carray/dataframe/dataframe.rb', line 337

def []= (*argv)
  value = argv.pop
  row, col = *argv
  case col
  when NilClass
    case row
    when CADataFrame
      each_column_name do |key|
        if row.has_column?(key)
          column(key)[row.column(key)] = value
        end
      end
    when String
      self[nil,row] = value
    else
      col = @column_names.to_a
      self[row,col] = value
    end
  when String, Symbol
    key = col.to_s
    if has_column?(key)
      column(key)[row] = value
    else
      arrange {
        append key, value
      }
    end
  when Array
    col.each do |key|
      key = key.to_s
      if has_column?(key)
        column(key)[row] = value
      else
        raise "unknow column name '#{key}'"          
      end
    end
  else
    if col.is_a?(Integer)
      col = [col]
    end
    keys = @column_names.to_ca[col].to_a
    keys.each do |key|
      column(key)[row] = value
    end      
  end
  return value
end

#__methods__Object



72
73
74
# File 'lib/carray/dataframe/dataframe.rb', line 72

def __methods__
  return @__methods__
end

#add_suffix(suf) ⇒ Object



622
623
624
625
626
627
628
629
# File 'lib/carray/dataframe/dataframe.rb', line 622

def add_suffix (suf)
  new_columns = {}
  each_column_name do |name|
    new_name = (name.to_s + suf).to_s
    new_columns[new_name] = column(name)
  end
  return CADataFrame.new(new_columns, row_index: @row_index)        
end

#append(name, new_column = nil, &block) ⇒ Object



434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
# File 'lib/carray/dataframe/dataframe.rb', line 434

def append (name, new_column = nil, &block)
  if new_column
    # do nothing
  elsif block
    new_column = instance_exec(&block)
  else
    new_column = @columns.first[1].template(:object)
  end
  unless new_column.is_a?(CArray)
    new_column = new_column.to_ca
  end
  if new_column.rank != 1 or new_column.size != @row_number
    raise "invalid shape of appended column"
  end
  @column_names.push(name)
  @columns[name] = new_column
  return new_column
end

#arrange(&block) ⇒ Object

Arrange



407
408
409
# File 'lib/carray/dataframe/dataframe.rb', line 407

def arrange (&block)
  return Arranger.new(self).arrange(&block)    
end

#as_rObject



319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/R.rb', line 319

def as_r
  r = R.instance
  new_columns = {}
  @column_names.each do |name|
    column = @columns[name]
    if column.has_mask?
      case column.guess_column_type_for_R
      when "integer"
        column = column.unmask_copy(R.NA_integer_)
      when "numeric"
        column = column.unmask_copy(R.NA_real_)
      else
        column = column.unmask_copy(R.NA_character_)
      end
    end
    new_columns[name] = R.__converter__(column.to_a)
  end
  mode = RSRuby.get_default_mode
   RSRuby.set_default_mode(RSRuby::NO_CONVERSION)
  return r.as_data_frame(:x => new_columns)
ensure
  RSRuby.set_default_mode(mode)
end

#ascii_table(rowmax = :full) ⇒ Object



722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
# File 'lib/carray/dataframe/dataframe.rb', line 722

def ascii_table (rowmax = :full)
  if @row_index
    namelist = ["    "] + @column_names
    tbl = CADFArray.new(namelist, @columns.clone.update("    " => @row_index))
  else
    namelist = ["    "] + @column_names
    tbl = CADFArray.new(namelist, @columns.clone.update("    " => CArray.int(@row_number).seq))
  end
  if rowmax.is_a?(Integer) and @row_number > rowmax
    list = tbl[0..(rowmax/2),nil].to_a
    list.push namelist.map { "..." }
    list.push *(tbl[-rowmax/2+1..-1,nil].to_a)
    tbl = list.to_ca
  end
  datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("")
  datamb  = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0)
  namemb  = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0)
  mb      = datamb.or(namemb)
  namelen = namelist.map(&:length).to_ca
  datalen = datastr.convert(&:length)
  if mb.max == 0
    if datalen.size == 0
      lengths  = namelen.to_a
    else
      lengths  = datalen.max(0).pmax(namelen).to_a
    end
    hrule  = "-" + lengths.map {|len| "-"*len}.join("--") + "-"
    header = " " + 
             [namelist, lengths].transpose.map{|name, len| 
                                          "#{name.to_s.ljust(len)}" }.join("  ") + " "
    ary = [hrule, header, hrule]
  if datalen.size > 0
     datastr[:i,nil].each_with_index do |blk, i|
       list = blk.flatten.to_a
       ary << " " + [list, lengths].transpose.map{|value, len| 
                                          "#{value.ljust(len)}"}.join("  ") + " "
     end
  end
    ary << hrule
    return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n")
  else
    namewidth  = namelist.to_ca.convert{|c| __strwidth__(c.to_s) }
    if datalen.size == 0
      maxwidth   = namewidth
    else
      datawidth  = datastr.convert{|c| __strwidth__(c.to_s) }
      maxwidth   = datawidth.max(0).pmax(namewidth)
    end
    len = maxwidth[:*,nil] - datawidth + datalen 
    hrule  = "-" + maxwidth.map {|len| "-"*len}.join("--") + "-"
    header = " " + 
             [namelist, maxwidth.to_a].transpose.map{|name, len| 
                                          "#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}" }.join("  ") + " "
    ary = [hrule, header, hrule]
  if datalen.size > 0
     datastr[:i,nil].each_with_addr do |blk, i|
       list = blk.flatten.to_a
       ary << " " + list.map.with_index {|value, j|
                 "#{value.ljust(len[i,j])}"}.join("  ") + " "
     end
  end
    ary << hrule
    return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n")
  end
end

#ca(*names) ⇒ Object



651
652
653
654
655
656
657
# File 'lib/carray/dataframe/dataframe.rb', line 651

def ca (*names)
  if names.empty?
    return CADFArray.new(@column_names, @columns)
  else
    return CADFArray.new(names.map(&:to_s), @columns)
  end
end

#calculate(label, &block) ⇒ Object



488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
# File 'lib/carray/dataframe/dataframe.rb', line 488

def calculate (label, &block)
  hash = {}
  each_column_name do |name|
    begin
      if block
        hash[name] = [yield(name, column(name))]
      else
        hash[name] = [column(name).send(label.intern)]
      end
    rescue
      hash[name] = [UNDEF]
    end
  end
  return CADataFrame.new(hash, row_index: [label])
end

#classify(name, scale = nil, opt = {}) ⇒ Object



1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
# File 'lib/carray/dataframe/dataframe.rb', line 1181

def classify (name, scale = nil, opt = {})
  if not scale
    column = @columns[name.to_s]
    mids   = column.uniq
    mapper = {}
    mids.each_with_index do |v,i|
      mapper[v] = i
    end
    cls = columns.convert(:int32) {|v| mapper[v] }
    hash = {
      "#{name}_M" => mids,
      "#{name}_L" => mids,
      "#{name}_R" => mids,
      "#{name}_CLASS" => cls        
    }
  else
    option = {
      :include_upper  => false,
      :include_lowest => true,
      :offset => 0,
    }.update(opt)
    column = @columns[name.to_s]
    cls = scale.bin(column, 
                    option[:include_upper],
                    option[:include_lowest], 
                    option[:offset])
    mids = ((scale + scale.shifted(-1))/2)[0..-2].to_ca
    left = scale[0..-2]
    right = scale.shift(-1)[0..-2]
    hash = {
      "#{name}_M" => mids.project(cls).to_ca,
      "#{name}_L" => left.project(cls).to_ca,
      "#{name}_R" => right.project(cls).to_ca,
      "#{name}_CLASS" => cls
    }
  end
  return CADataFrame.new(hash)
end

#column(name_or_index) ⇒ Object Also known as: col

Column, Row Access



135
136
137
138
139
140
141
142
# File 'lib/carray/dataframe/dataframe.rb', line 135

def column (name_or_index)
  case name_or_index
  when Integer
    return @columns[@column_names[name_or_index]]
  when String, Symbol
    return @columns[name_or_index.to_s]
  end    
end

#column_typesObject



127
128
129
# File 'lib/carray/dataframe/dataframe.rb', line 127

def column_types
  return @columns_names.map{|name| @columns[name].data_type_name }
end

#columns_to_hash(key_name, value_names) ⇒ Object



671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
# File 'lib/carray/dataframe/dataframe.rb', line 671

def columns_to_hash (key_name, value_names) 
  hash = {}
  unless @column_names.include?(key_name)
    raise ArgumentError, "include invalid key column name #{key_name}"
  end
  case value_names
  when String
    unless @column_names.include?(value_names)
      raise ArgumentError, "invalid key column name #{value_names}"
    end
    key_columns = @columns[key_name]
    value_columns = @columns[value_names]
    @row_number.times do |i|
      hash[key_columns[i]] = value_columns[i]
    end
  when Array
    unless value_names.all?{|s| @column_names.include?(s) }
      raise ArgumentError, "include invalid column name in #{value_names.join(' ')}"
    end
    key_columns = @columns[key_name]
    value_columns = @columns.values_at(*value_names)
    @row_number.times do |i|
      hash[key_columns[i]] = value_columns.map{|c| c[i]}
    end
  else
    raise ArgumentError, "invalud argument"
  end
  return hash
end

#cross(name1, name2) ⇒ Object



1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
# File 'lib/carray/dataframe/dataframe.rb', line 1220

def cross (name1, name2)
  col1 = column(name1)
  col2 = column(name2)
  var1 = col1.uniq.sort
  var2 = col2.uniq.sort
  hash = {}
  count = Hash.new {0}
  var1.each do |v1|
    var2.each do |v2|
      hash[[v1,v2]] = 0
    end
  end
  list = CArray.join([col1, col2]).to_a
  list.each do |item|
    hash[item] += 1 
  end
  out = CArray.object(var1.size, var2.size) { 0 }
  var1.each_with_index do |v1, i|
    var2.each_with_index do |v2, j|
      out[i,j] = hash[[v1,v2]]
    end
  end
  return CADataFrame.new(out, row_index: var1, column_names: var2)
end

#detouch!Object



546
547
548
549
550
551
552
553
554
555
# File 'lib/carray/dataframe/dataframe.rb', line 546

def detouch!
  @columns = @columns.clone
  each_column_name do |name|
    @columns[name] = @columns[name].to_ca
  end
  if @row_index
    @row_index = @row_index.clone
  end
  return self
end

#downcaseObject



422
423
424
425
426
427
428
429
430
431
432
# File 'lib/carray/dataframe/dataframe.rb', line 422

def downcase 
  new_column_names = []
  new_columns = {}
  each_column_name do |name|
    new_column_names << name.downcase
    new_columns[name.downcase] = column(name)
  end
  @column_names = new_column_names
  @columns = new_columns
  return self
end

#each_column(&block) ⇒ Object

Iterators



186
187
188
# File 'lib/carray/dataframe/dataframe.rb', line 186

def each_column (&block)
  return @columns.each(&block)
end

#each_column_name(&block) ⇒ Object



190
191
192
# File 'lib/carray/dataframe/dataframe.rb', line 190

def each_column_name (&block)
  return @column_names.each(&block)
end

#each_row(with: Array, &block) ⇒ Object



202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/carray/dataframe/dataframe.rb', line 202

def each_row (with: Array, &block)
  if with == Array
    @row_number.times do |i|
      yield @columns.map{|n,c| c[i] }
    end
  elsif with == Hash
    row = {}
    @row_number.times do |i|
      @column_names.each do |c|
        row[c] = @columns[c][i]
      end
      yield row
    end      
  else
    raise "invalid data type for loop variable"
  end
end

#each_row_index(&block) ⇒ Object



194
195
196
197
198
199
200
# File 'lib/carray/dataframe/dataframe.rb', line 194

def each_row_index (&block)
  if @row_index
    @row_index.each(&block)
  else
    @row_number.times(&block)
  end
end

#each_row_with_row_index(with: Array, &block) ⇒ Object



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/carray/dataframe/dataframe.rb', line 220

def each_row_with_row_index (with: Array, &block)
  if with == Array
    if @row_index
      @row_index.each_with_index do |idx, i|
        yield @columns.map{|n,c| c[i] }, idx
      end
    else
      @row_number.times do |i|
        yield @columns.map{|n,c| c[i] }, i
      end
    end      
  elsif with == Hash
    row = {}
    if @row_index
      @row_index.each_with_index do |idx, i|
        @column_names.each do |c|
          row[c] = @columns[c][i]
        end
        yield row, @row_index[i]
      end
    else
      @row_number.times do |idx, i|
        @column_names.each do |c|
          row[c] = @columns[c][i]
        end
        yield row, @row_index[i]
      end
    end
  else
    raise "invalid  data type for loop variable"
  end
end

#eliminate_columns(*names) ⇒ Object

Transformation



561
562
563
564
565
566
567
568
569
570
571
572
573
# File 'lib/carray/dataframe/dataframe.rb', line 561

def eliminate_columns (*names)
  if names.empty?
    return self
  end
  names = names.map(&:to_s)
  new_columns = {}
  each_column_name do |name|
    unless names.include?(name)
      new_columns[name] = column(name)
    end
  end
  return CADataFrame.new(new_columns, row_index: @row_index)    
end

#execute(&block) ⇒ Object



484
485
486
# File 'lib/carray/dataframe/dataframe.rb', line 484

def execute (&block)
  return instance_exec(&block)
end

#fill(*names, value) ⇒ Object



394
395
396
397
398
399
400
401
# File 'lib/carray/dataframe/dataframe.rb', line 394

def fill (*names, value)
  names.each do |name|
    if has_column?(name)
      column(name).fill(value)
    end
  end
  return self
end

#group_by(*names) ⇒ Object



1256
1257
1258
1259
1260
1261
1262
# File 'lib/carray/dataframe/dataframe.rb', line 1256

def group_by (*names)
  if names.size == 1
    return CADataFrameGroup.new(self, names[0])
  else
    return CADataFrameGroupMulti.new(self, *names)
  end
end

#has_column?(name) ⇒ Boolean

Returns:

  • (Boolean)


123
124
125
# File 'lib/carray/dataframe/dataframe.rb', line 123

def has_column?(name)
  return @column_names.include?(name)
end

#histogram(name, scale = nil, options = nil) ⇒ Object



1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
# File 'lib/carray/dataframe/dataframe.rb', line 1161

def histogram (name, scale = nil, options = nil)
  if scale.nil?
    return group_by(name).table{ { :count => col(name).count_valid } }
  else
    if options
      hist = CAHistogram.int(scale, options)
    else
      hist = CAHistogram.int(scale)      
    end
    hist.increment(@columns[name.to_s])
    hash = {
      name.to_s => hist.midpoints[0],
      "#{name}_L".to_s => scale[0..-2],
      "#{name}_R".to_s => scale.shift(-1)[0..-2],
      :count => hist[0..-2].to_ca,
    }
    return CADataFrame.new(hash)
  end
end

#indexObject



155
156
157
# File 'lib/carray/dataframe/dataframe.rb', line 155

def index 
  return CArray.int(@row_number).seq
end

#inspectObject



789
790
791
# File 'lib/carray/dataframe/dataframe.rb', line 789

def inspect
  return ascii_table(10)
end

#is_finiteObject



1117
1118
1119
# File 'lib/carray/dataframe/dataframe.rb', line 1117

def is_finite
  return cmp(:is_finite)
end

#is_maskedObject



1113
1114
1115
# File 'lib/carray/dataframe/dataframe.rb', line 1113

def is_masked
  return cmp(:is_masked)
end

#join(table, on: nil) ⇒ Object



1158
1159
# File 'lib/carray/dataframe/dataframe.rb', line 1158

def join (table, on: nil)
end

#lead(name, new_column = nil, &block) ⇒ Object



453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/carray/dataframe/dataframe.rb', line 453

def lead (name, new_column = nil, &block)
  if new_column
    # do nothing
  elsif block
    new_column = instance_exec(&block)
  else
    new_column = @columns.first[1].template(:object)
  end
  unless new_column.is_a?(CArray)
    new_column = new_column.to_ca
  end
  if new_column.rank != 1 or new_column.size != @row_number
    raise "invalid shape of appended column"
  end
  @column_names.unshift(name)
  @columns[name] = new_column
  return new_column
end

#matchup(keyname, reference) ⇒ Object



1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
# File 'lib/carray/dataframe/dataframe.rb', line 1137

def matchup (keyname, reference)
  key = column(keyname.to_s)
  idx = reference.matchup(key)
  new_columns = {}
  each_column_name do |name|
    if name == keyname
      new_columns[name] = reference
    else
      new_columns[name] = column(name).project(idx)
    end
  end
  if @row_index
    new_row_index = @row_index.project(idx).unmask(nil)
  else
    new_row_index = nil
  end
  return CADataFrame.new(new_columns, row_index: new_row_index) {
    self.send(keyname)[] = reference
  }
end

#merge(*args) ⇒ Object



480
481
482
# File 'lib/carray/dataframe/dataframe.rb', line 480

def merge (*args)
  return CADataFrame.merge(self, *args)
end

#method(hash) ⇒ Object



159
160
161
162
163
164
165
# File 'lib/carray/dataframe/dataframe.rb', line 159

def method (hash)
  new_hash = {}
  hash.each do |key, value|
    new_hash[key.to_s] = value.to_s
  end
  @__methods__.update(new_hash)
end

#objectifyObject



643
644
645
646
647
648
649
# File 'lib/carray/dataframe/dataframe.rb', line 643

def objectify
  new_columns = {}
  each_column_name do |name|
    new_columns[name] = column(name).object
  end
  return CADataFrame.new(new_columns, row_index: @row_index)
end

#order_by(*names, &block) ⇒ Object



584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
# File 'lib/carray/dataframe/dataframe.rb', line 584

def order_by (*names, &block)
  if names.empty?
    if block
      ret = instance_exec(&block)
      case ret
      when CArray
        list = [ret]
      when Array
        list = ret
      end
    end
  else
    list = @columns.values_at(*names.map{|s| s.to_s})
  end
  return reorder { CA.sort_addr(*list) }
end

#pivot(name1, name2) ⇒ Object



1427
1428
1429
# File 'lib/carray/dataframe/dataframe.rb', line 1427

def pivot (name1, name2)
  return CADataFramePivot.new(self, name1, name2)
end

#rename(name1, name2) ⇒ Object



411
412
413
414
415
416
417
418
419
420
# File 'lib/carray/dataframe/dataframe.rb', line 411

def rename (name1, name2)
  if idx = @column_names.index(name1.to_s)
    @column_names[idx] = name2.to_s
    column = @columns[name1.to_s]
    @columns.delete(name1.to_s)
    @columns[name2.to_s] = column
  else
    raise "unknown column name #{name1}"
  end
end

#reorder(&block) ⇒ Object



575
576
577
578
579
580
581
582
# File 'lib/carray/dataframe/dataframe.rb', line 575

def reorder (&block)
  index = instance_exec(&block)
  new_columns = {}
  each_column_name do |name|
    new_columns[name] = column(name)[index]
  end
  return CADataFrame.new(new_columns, row_index: @row_index ? @row_index[index] : nil)    
end

#replace(other) ⇒ Object



76
77
78
79
80
81
82
83
# File 'lib/carray/dataframe/dataframe.rb', line 76

def replace (other)
  @column_names = other.column_names
  @columns      = other.columns
  @row_index    = other.row_index
  @row_number   = other.row_number
  @__methors__  = other.__methods__
  return self
end

#resample(&block) ⇒ Object



504
505
506
507
508
509
510
511
512
513
# File 'lib/carray/dataframe/dataframe.rb', line 504

def resample (&block)
  new_columns = {}
  each_column_name do |name|
    begin
      new_columns[name] = yield(name, column(name))
    rescue
    end
  end
  return CADataFrame.new(new_columns)
end

#reverseObject



601
602
603
604
605
606
607
# File 'lib/carray/dataframe/dataframe.rb', line 601

def reverse
  new_columns = {}
  each_column_name do |name|
    new_columns[name] = column(name).reverse
  end
  return CADataFrame.new(new_columns, row_index: @row_index ? @row_index.reverse : nil)    
end

#row(idx) ⇒ Object



146
147
148
149
150
151
152
153
# File 'lib/carray/dataframe/dataframe.rb', line 146

def row (idx)
  if @row_index
    addr = @row_index.search(idx)
    return @column_names.map{|name| @columns[name][addr]}.to_ca
  else
    return @column_names.map{|name| @columns[name][idx]}.to_ca
  end
end

#select(*names, &block) ⇒ Object



515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
# File 'lib/carray/dataframe/dataframe.rb', line 515

def select (*names, &block)
  if names.empty?
    names = @column_names
  end
  if block
    row = instance_exec(&block)
  else
    row = nil
  end
  new_columns = {}
  names.map(&:to_s).each do |name|
    new_columns[name] = column(name)[row]
  end
  return CADataFrame.new(new_columns, row_index: @row_index ? @row_index[row] : nil)
end

#to_aryObject



797
798
799
# File 'lib/carray/dataframe/dataframe.rb', line 797

def to_ary
  return [to_s]
end

#to_ca(*names) ⇒ Object



659
660
661
# File 'lib/carray/dataframe/dataframe.rb', line 659

def to_ca (*names)  
  return ca(*names).to_ca
end

#to_csv(io = "", option = {}, rs: $/, sep: ",", fill: "", with_row_index: true, &block) ⇒ Object



1587
1588
1589
1590
1591
1592
1593
1594
1595
# File 'lib/carray/dataframe/dataframe.rb', line 1587

def to_csv (io = "", option = {}, rs: $/, sep: ",", fill: "", with_row_index: true, &block)
  if @row_index and with_row_index
    namelist = [""] + @column_names
    tbl = CADFArray.new(namelist, @columns.clone.update("" => @row_index))
  else
    tbl = ca.to_ca
  end
  return tbl.to_csv(io, option, rs: rs, sep: sep, fill: fill, &block)
end

#to_daruObject



1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
# File 'lib/carray/dataframe/dataframe.rb', line 1597

def to_daru
  require "daru"
  columns = {}
  each_column_name do |name|
    columns[name] = column(name).to_a
  end
  if @row_index
    return Daru::DataFrame.new(columns, index: @row_index.to_a, order: @column_names)
  else
    return Daru::DataFrame.new(columns, order: @column_names)      
  end
end

#to_dfObject

Conversions



635
636
637
638
639
640
641
# File 'lib/carray/dataframe/dataframe.rb', line 635

def to_df 
  new_columns = {}
  each_column_name do |name|
    new_columns[name] = column(name)
  end
  return CADataFrame.new(new_columns, row_index: @row_index).detouch!
end

#to_hashObject



663
664
665
666
667
668
669
# File 'lib/carray/dataframe/dataframe.rb', line 663

def to_hash 
  hash = {}
  @columns.each do |k,v|
    hash[k] = v.to_a
  end
  return hash
end

#to_sObject



793
794
795
# File 'lib/carray/dataframe/dataframe.rb', line 793

def to_s
  return ascii_table
end

#to_sql(tablename) ⇒ Object



1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
# File 'lib/carray/dataframe/dataframe.rb', line 1511

def to_sql (tablename)
  if @column_names.any?{ |s| s =~ /[\. \-]/ }
    columns = {}
    each_column_name do |name|
      name2 = name.gsub(/[\. \-]/, '_')
      columns[name2] = column(name)
    end
    df = CADataFrame.new(columns)
    return df.to_sqlite3(database: ":memory:", table: tablename)
  else
    return to_sqlite3(database: ":memory:", table: tablename)
  end
end

#to_sqlite3(*args) ⇒ Object



1507
1508
1509
# File 'lib/carray/dataframe/dataframe.rb', line 1507

def to_sqlite3 (*args)
  self.ca.to_sqlite3(*args)
end

#to_xlsx(filename, sheet_name: 'Sheet1', with_row_index: false, &block) ⇒ Object



1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
# File 'lib/carray/dataframe/dataframe.rb', line 1610

def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block)
  require "axlsx"
  xl = Axlsx::Package.new
  xl.use_shared_strings = true
  sheet = xl.workbook.add_worksheet(name: sheet_name)
  df = self.to_df.objectify.unmask("=NA()")
  if with_row_index
    sheet.add_row([""] + column_names)
    df.each_row_with_row_index(with: Array) do |list, i|
      sheet.add_row([i] + list)
    end
  else
    sheet.add_row(column_names)
    df.each_row(with: Array) do |list|
      sheet.add_row(list)
    end
  end
  if block_given?
    yield sheet
  end
  xl.serialize(filename)
end

#transpose(column_names: nil) ⇒ Object



609
610
611
612
613
614
615
616
617
618
619
620
# File 'lib/carray/dataframe/dataframe.rb', line 609

def transpose (column_names: nil)
  if column_names
    column_names = header.map(&:to_s)
  else
    if @row_index
      column_names = @row_index.convert(:object) {|v| v.to_s }
    else
      column_names = CArray.object(@row_number).seq("a",:succ)
    end
  end
  return CADataFrame.new(ca.transpose, row_index: @column_names.to_ca, column_names: column_names)
end

#unmask(value = nil) ⇒ Object



542
543
544
# File 'lib/carray/dataframe/dataframe.rb', line 542

def unmask (value = nil)
  return to_df.unmask!(value)
end

#unmask!(value = nil) ⇒ Object

Maintenance



535
536
537
538
539
540
# File 'lib/carray/dataframe/dataframe.rb', line 535

def unmask! (value = nil)
  each_column_name do |name|
    column(name).unmask(value)
  end    
  return self
end

#vacant_copyObject



472
473
474
475
476
477
478
# File 'lib/carray/dataframe/dataframe.rb', line 472

def vacant_copy
  new_columns = {}
  each_column_name do |key|
    new_columns[key] = CArray.object(0)
  end
  return CADataFrame.new(new_columns)    
end

#where(mask, value) ⇒ Object



385
386
387
388
389
390
391
392
# File 'lib/carray/dataframe/dataframe.rb', line 385

def where (mask, value)
  mask.column_names.each do |key|
    if has_column?(key)
      column(key)[mask.column(key).boolean.not] = value        
    end
  end
  return value
end