Class: Csvtool::Infrastructure::CSV::RowRandomizer
- Inherits:
-
Object
- Object
- Csvtool::Infrastructure::CSV::RowRandomizer
- Defined in:
- lib/csvtool/infrastructure/csv/row_randomizer.rb
Constant Summary collapse
- DEFAULT_CHUNK_SIZE =
10_000
Instance Method Summary collapse
- #call(file_path:, col_sep:, headers:, seed: nil) ⇒ Object
- #each(file_path:, col_sep:, headers:, seed: nil, chunk_size: DEFAULT_CHUNK_SIZE) ⇒ Object
Instance Method Details
#call(file_path:, col_sep:, headers:, seed: nil) ⇒ Object
12 13 14 |
# File 'lib/csvtool/infrastructure/csv/row_randomizer.rb', line 12 def call(file_path:, col_sep:, headers:, seed: nil) each(file_path: file_path, col_sep: col_sep, headers: headers, seed: seed).to_a end |
#each(file_path:, col_sep:, headers:, seed: nil, chunk_size: DEFAULT_CHUNK_SIZE) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/csvtool/infrastructure/csv/row_randomizer.rb', line 16 def each(file_path:, col_sep:, headers:, seed: nil, chunk_size: DEFAULT_CHUNK_SIZE) chunk_paths = [] return enum_for(:each, file_path: file_path, col_sep: col_sep, headers: headers, seed: seed, chunk_size: chunk_size) unless block_given? rng = seed.nil? ? Random.new : Random.new(seed) sequence = 0 chunk_entries = [] ::CSV.foreach(file_path, headers: headers, col_sep: col_sep) do |row| fields = headers ? row.fields : row chunk_entries << [rng.rand, sequence, fields] sequence += 1 flush_chunk(chunk_entries, chunk_paths) if chunk_entries.length >= chunk_size end flush_chunk(chunk_entries, chunk_paths) unless chunk_entries.empty? merge_chunks(chunk_paths) { |fields| yield fields } ensure cleanup_chunks(chunk_paths) end |