5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
|
# File 'lib/arrow-activerecord/arrowable.rb', line 5
def to_arrow(batch_size: 10000)
target_column_names = select_values
target_column_names = column_names if select_values.empty?
fields = []
data_types = []
target_column_names.each do |name|
name = name.to_s
target_column = columns.find do |column|
column.name == name
end
arrow_data_type = (target_column)
fields << Arrow::Field.new(name, arrow_data_type)
data_types << arrow_data_type
end
schema = Arrow::Schema.new(fields)
arrow_array_batches = data_types.collect do
[]
end
in_batches(of: batch_size).each do |relation|
column_values_set = relation.pluck(*target_column_names).transpose
data_types.each_with_index do |data_type, i|
column_values = column_values_set[i]
arrow_array_batches[i] << build_arrow_array(column_values, data_type)
end
end
columns = fields.collect.with_index do |field, i|
chunked_array = Arrow::ChunkedArray.new(arrow_array_batches[i])
Arrow::Column.new(field, chunked_array)
end
Arrow::Table.new(schema, columns)
end
|