Class: Csvtool::Infrastructure::CSV::RowRandomizer

Inherits:
Object
  • Object
show all
Defined in:
lib/csvtool/infrastructure/csv/row_randomizer.rb

Constant Summary collapse

DEFAULT_CHUNK_SIZE =
10_000

Instance Method Summary collapse

Instance Method Details

#call(file_path:, col_sep:, headers:, seed: nil) ⇒ Object



12
13
14
# File 'lib/csvtool/infrastructure/csv/row_randomizer.rb', line 12

def call(file_path:, col_sep:, headers:, seed: nil)
  each(file_path: file_path, col_sep: col_sep, headers: headers, seed: seed).to_a
end

#each(file_path:, col_sep:, headers:, seed: nil, chunk_size: DEFAULT_CHUNK_SIZE) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/csvtool/infrastructure/csv/row_randomizer.rb', line 16

def each(file_path:, col_sep:, headers:, seed: nil, chunk_size: DEFAULT_CHUNK_SIZE)
  chunk_paths = []
  return enum_for(:each, file_path: file_path, col_sep: col_sep, headers: headers, seed: seed, chunk_size: chunk_size) unless block_given?

  rng = seed.nil? ? Random.new : Random.new(seed)
  sequence = 0
  chunk_entries = []

  ::CSV.foreach(file_path, headers: headers, col_sep: col_sep) do |row|
    fields = headers ? row.fields : row
    chunk_entries << [rng.rand, sequence, fields]
    sequence += 1
    flush_chunk(chunk_entries, chunk_paths) if chunk_entries.length >= chunk_size
  end

  flush_chunk(chunk_entries, chunk_paths) unless chunk_entries.empty?
  merge_chunks(chunk_paths) { |fields| yield fields }
ensure
  cleanup_chunks(chunk_paths)
end