Module: ArrowActiveRecord::Arrowable

Defined in:
lib/arrow-activerecord/arrowable.rb

Instance Method Summary collapse

Instance Method Details

#to_arrow(batch_size: 10000) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/arrow-activerecord/arrowable.rb', line 5

def to_arrow(batch_size: 10000)
  target_column_names = select_values
  target_column_names = column_names if select_values.empty?

  fields = []
  data_types = []
  target_column_names.each do |name|
    name = name.to_s
    target_column = columns.find do |column|
      column.name == name
    end
    arrow_data_type = extract_arrow_data_type(target_column)
    fields << Arrow::Field.new(name, arrow_data_type)
    data_types << arrow_data_type
  end
  schema = Arrow::Schema.new(fields)

  arrow_array_batches = data_types.collect do
    []
  end
  in_batches(of: batch_size).each do |relation|
    column_values_set = relation.pluck(*target_column_names).transpose
    data_types.each_with_index do |data_type, i|
      column_values = column_values_set[i]
      arrow_array_batches[i] << build_arrow_array(column_values, data_type)
    end
  end
  columns = fields.collect.with_index do |field, i|
    chunked_array = Arrow::ChunkedArray.new(arrow_array_batches[i])
    Arrow::Column.new(field, chunked_array)
  end

  Arrow::Table.new(schema, columns)
end