Module: CSVTools

Defined in:
lib/csv_tools.rb,
lib/csv_tools/version.rb

Constant Summary collapse

DEFAULT_CSV_READ_OPTS =
{headers: true, skip_blanks: true}
VERSION =
"0.1.2"

Class Method Summary collapse

Class Method Details

.csv_filter(path, column_index, field_value, out = STDOUT) ⇒ Object



82
83
84
85
86
87
88
89
# File 'lib/csv_tools.rb', line 82

def CSVTools.csv_filter(path, column_index, field_value, out = STDOUT)
  csv = CSV.read(path, DEFAULT_CSV_READ_OPTS)

  out.puts csv.headers.join(',')
  csv.select {|row| row.fields[column_index].downcase.include? field_value.downcase} .each do |row|
    out.puts row.fields.join(',')
  end
end

.csv_join(path1, path2, join_by_column, out = STDOUT) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/csv_tools.rb', line 31

def CSVTools.csv_join(path1, path2, join_by_column, out = STDOUT)
  csv1, csv2 = [path1, path2].map {|p| CSV.read(p, DEFAULT_CSV_READ_OPTS)}

  # Make sure the join-by column exists in both CSV's
  index1, index2 = [csv1, csv2].map {|csv| csv.headers.index(join_by_column) }
  raise "#{path1} does not have a column '#{join_by_column}'" if index1.nil?
  raise "#{path2} does not have a column '#{join_by_column}'" if index2.nil?

  # Print the header line (join-by column is first, and appears only once)
  h1, h2 = [csv1.headers , csv2.headers]
  h1.delete_at(index1)
  h2.delete_at(index2)
  out.puts join_by_column + ',' + h1.join(",") + ',' + h2.join(',')

  # Print the data rows ...
  hash1 = group_by_column(csv1, index1)
  hash2 = group_by_column(csv2, index2)

  # And now ... JOIN the two CSV's (adding nil's for missing rows)
  (hash1.keys + hash2.keys).to_set.each do |key|
    rows1 = hash1[key] || [ [nil] * (csv1.headers.length - 1)]
    rows2 = hash2[key] || [ [nil] * (csv2.headers.length - 1)]

    rows1.each do |r1|
      rows2.each do |r2|
        out.puts key + ',' + r1.join(',') + ',' + r2.join(',')
      end
    end
  end

end

.csv_select(path, column_indices, out = STDOUT) ⇒ Object



72
73
74
75
76
77
78
79
# File 'lib/csv_tools.rb', line 72

def CSVTools.csv_select(path, column_indices, out = STDOUT)
  csv = CSV.read(path, DEFAULT_CSV_READ_OPTS)

  out.puts select_values(csv.headers, column_indices).join(',')
  csv.each do |row|
    out.puts select_values(row.fields, column_indices).join(',')
  end
end

.group_by_column(csv_table, column_index) ⇒ Object

Parameters:

  • csv_table (CSV::Table)
  • column_index (integer)


16
17
18
19
20
21
22
23
24
25
26
# File 'lib/csv_tools.rb', line 16

def CSVTools.group_by_column(csv_table, column_index)
  result = {}

  csv_table.group_by {|row| row.fields[column_index]} .each do |key, row_objects|
    result[key] = row_objects.map  do |row|
      row.fields[0...column_index] + row.fields[column_index + 1...row.fields.length]
    end
  end

  return result
end

.select_values(values, indices) ⇒ Object



66
67
68
69
# File 'lib/csv_tools.rb', line 66

def CSVTools.select_values(values, indices)
  raise "Invalid indices #{indices} for #{values}" if indices.any? {|i| i < 0 || i >= values.length}
  return indices.map {|i| values[i]}
end