Module: DatasetsArrow::Arrowable
- Included in:
- Datasets::Dataset
- Defined in:
- lib/datasets-arrow/arrowable.rb
Instance Method Summary collapse
Instance Method Details
#each_record_batch(&block) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/datasets-arrow/arrowable.rb', line 22 def each_record_batch(&block) return to_enum(__method__) unless block_given? data_path = arrow_data_path if data_path.exist? input = Arrow::MemoryMappedInputStream.new(data_path.to_path) reader = Arrow::RecordBatchFileReader.new(input) reader.each do |record_batch| record_batch.instance_variable_set(:@input, input) yield(record_batch) end else to_arrow.each_record_batch(&block) end end |
#to_arrow ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/datasets-arrow/arrowable.rb', line 5 def to_arrow data_path = arrow_data_path if data_path.exist? Arrow::Table.load(data_path) else raw_table = {} to_table.to_h.each do |name, values| raw_table[name] = Arrow::ArrayBuilder.build(values) end table = Arrow::Table.new(raw_table) directory = data_path.parent directory.mkpath unless directory.exist? table.save(data_path) table end end |