Module: DatasetsArrow::Arrowable

Included in:
Datasets::Dataset
Defined in:
lib/datasets-arrow/arrowable.rb

Instance Method Summary collapse

Instance Method Details

#each_record_batch(&block) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/datasets-arrow/arrowable.rb', line 22

def each_record_batch(&block)
  return to_enum(__method__) unless block_given?

  data_path = arrow_data_path
  if data_path.exist?
    input = Arrow::MemoryMappedInputStream.new(data_path.to_path)
    reader = Arrow::RecordBatchFileReader.new(input)
    reader.each do |record_batch|
      record_batch.instance_variable_set(:@input, input)
      yield(record_batch)
    end
  else
    to_arrow.each_record_batch(&block)
  end
end

#to_arrowObject



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/datasets-arrow/arrowable.rb', line 5

def to_arrow
  data_path = arrow_data_path
  if data_path.exist?
    Arrow::Table.load(data_path)
  else
    raw_table = {}
    to_table.to_h.each do |name, values|
      raw_table[name] = Arrow::ArrayBuilder.build(values)
    end
    table = Arrow::Table.new(raw_table)
    directory = data_path.parent
    directory.mkpath unless directory.exist?
    table.save(data_path)
    table
  end
end