Module: WorkerTools::CsvInput
- Defined in:
- lib/worker_tools/csv_input.rb
Defined Under Namespace
Classes: CsvInputForeach
Instance Method Summary collapse
-
#csv_input_columns ⇒ Object
If an array is provided, the names will be used as the row keys, the row values will be assign according to the columns order.
- #csv_input_columns_array_check(csv_rows_enum) ⇒ Object
- #csv_input_columns_check(csv_rows_enum) ⇒ Object
- #csv_input_columns_hash_check(csv_rows_enum) ⇒ Object
- #csv_input_columns_hash_check_duplicates(names) ⇒ Object
- #csv_input_columns_hash_check_missing(actual_names, expected_names) ⇒ Object
- #csv_input_csv_options ⇒ Object
- #csv_input_file_path ⇒ Object
- #csv_input_foreach ⇒ Object
- #csv_input_header_normalize? ⇒ Boolean
- #csv_input_header_normalized(name) ⇒ Object
- #csv_input_headers_present ⇒ Object
- #csv_input_include_other_columns ⇒ Object
-
#csv_input_mapping_order(header_names) ⇒ Object
Compares the first row (header names) with the csv_input_columns hash to find the corresponding positions.
- #csv_input_mapping_order_for_hash(header_names) ⇒ Object
- #csv_input_mapping_order_with_other_columns(mapping, filtered_column_names) ⇒ Object
- #csv_rows_enum ⇒ Object
-
#cvs_input_value_cleanup(value) ⇒ Object
Allows for some basic cleanup of the values, such as applying strip to the strings.
Instance Method Details
#csv_input_columns ⇒ Object
If an array is provided, the names will be used as the row keys, the row values will be assign according to the columns order.
Ex: %w(tenant segment area) row =>
tenant: _value_at_first_column_,
segment: _value_at_second_column_,
area: _value_at_third_column_
If a hash if provided, the keys will turn into the row keys, the values will be used to find the corresponding columns (the order in the csv won’t affect the import)
Ex: { tenant: ‘Mandant’, segment: ‘Segment’, area: ‘Bereich’) row =>
tenant: _value_at_column_Mandant,
segment: _value_at_column_Segment,
area: _value_at_column_Bereich
The name of the column is filtered using the csv_input_header_normalized method, which takes care of extra spaces and looks for a case insentive match (so ‘Bereich’ matches ‘ Bereich’, ‘bereich’, etc.). You can override that method as well.
Besides matching the columns using strings, it is possible to use a regular expression or a proc: {
tenant: 'Mandant',
segment: /Segment/i,
area: ->(name) { name.downcase == 'area' }
}
38 39 40 |
# File 'lib/worker_tools/csv_input.rb', line 38 def csv_input_columns raise "csv_input_columns has to be defined in #{self}" end |
#csv_input_columns_array_check(csv_rows_enum) ⇒ Object
61 62 63 64 65 66 67 68 |
# File 'lib/worker_tools/csv_input.rb', line 61 def csv_input_columns_array_check(csv_rows_enum) expected_columns_length = csv_input_columns.length actual_columns_length = csv_rows_enum.first.length return if expected_columns_length == actual_columns_length msg = "The number of columns (#{actual_columns_length}) is not the expected (#{expected_columns_length})" raise Errors::WrongNumberOfColumns, msg end |
#csv_input_columns_check(csv_rows_enum) ⇒ Object
54 55 56 57 58 59 |
# File 'lib/worker_tools/csv_input.rb', line 54 def csv_input_columns_check(csv_rows_enum) # override and return true if you do not want this check to be performed return csv_input_columns_array_check(csv_rows_enum) if csv_input_columns.is_a?(Array) csv_input_columns_hash_check(csv_rows_enum) end |
#csv_input_columns_hash_check(csv_rows_enum) ⇒ Object
70 71 72 73 74 75 |
# File 'lib/worker_tools/csv_input.rb', line 70 def csv_input_columns_hash_check(csv_rows_enum) expected_names = csv_input_columns.values filtered_actual_names = csv_rows_enum.first.map { |n| csv_input_header_normalized(n) } csv_input_columns_hash_check_duplicates(filtered_actual_names) csv_input_columns_hash_check_missing(filtered_actual_names, expected_names) end |
#csv_input_columns_hash_check_duplicates(names) ⇒ Object
77 78 79 80 81 82 |
# File 'lib/worker_tools/csv_input.rb', line 77 def csv_input_columns_hash_check_duplicates(names) dups = names.group_by(&:itself).select { |_, v| v.count > 1 }.keys return unless dups.present? raise Errors::DuplicatedColumns, "The file contains duplicated columns: #{dups}" end |
#csv_input_columns_hash_check_missing(actual_names, expected_names) ⇒ Object
84 85 86 87 88 89 90 |
# File 'lib/worker_tools/csv_input.rb', line 84 def csv_input_columns_hash_check_missing(actual_names, expected_names) missing = expected_names.reject do |name| matchable = name.is_a?(String) ? csv_input_header_normalized(name) : name actual_names.any? { |n| case n when matchable then true end } # rubocop does not like === end raise Errors::MissingColumns, "Some columns are missing: #{missing}" unless missing.empty? end |
#csv_input_csv_options ⇒ Object
92 93 94 95 |
# File 'lib/worker_tools/csv_input.rb', line 92 def # Ex: { col_sep: ';', encoding: Encoding::ISO_8859_1 } { col_sep: ';' } end |
#csv_input_file_path ⇒ Object
136 137 138 |
# File 'lib/worker_tools/csv_input.rb', line 136 def csv_input_file_path model..path.to_s end |
#csv_input_foreach ⇒ Object
148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/worker_tools/csv_input.rb', line 148 def csv_input_foreach @csv_input_foreach ||= begin csv_input_columns_check(csv_rows_enum) CsvInputForeach.new( rows_enum: csv_rows_enum, mapping_order: csv_input_mapping_order(csv_rows_enum.first), cleanup_method: method(:cvs_input_value_cleanup), headers_present: csv_input_headers_present ) end end |
#csv_input_header_normalize? ⇒ Boolean
101 102 103 |
# File 'lib/worker_tools/csv_input.rb', line 101 def csv_input_header_normalize? true end |
#csv_input_header_normalized(name) ⇒ Object
42 43 44 45 46 |
# File 'lib/worker_tools/csv_input.rb', line 42 def csv_input_header_normalized(name) name = name.to_s.strip name = name.downcase if csv_input_header_normalize? name end |
#csv_input_headers_present ⇒ Object
144 145 146 |
# File 'lib/worker_tools/csv_input.rb', line 144 def csv_input_headers_present true end |
#csv_input_include_other_columns ⇒ Object
97 98 99 |
# File 'lib/worker_tools/csv_input.rb', line 97 def csv_input_include_other_columns false end |
#csv_input_mapping_order(header_names) ⇒ Object
Compares the first row (header names) with the csv_input_columns hash to find the corresponding positions.
Ex: csv_input_columns: ‘Mandant’, area: ‘Bereich’
headers: ['Bereich', 'Mandant']
=> { tenant: 1, area: 0}
111 112 113 114 115 |
# File 'lib/worker_tools/csv_input.rb', line 111 def csv_input_mapping_order(header_names) return csv_input_columns.map.with_index { |n, i| [n, i] }.to_h if csv_input_columns.is_a?(Array) csv_input_mapping_order_for_hash(header_names) end |
#csv_input_mapping_order_for_hash(header_names) ⇒ Object
117 118 119 120 121 122 123 124 125 126 |
# File 'lib/worker_tools/csv_input.rb', line 117 def csv_input_mapping_order_for_hash(header_names) filtered_column_names = header_names.map { |n| csv_input_header_normalized(n) } mapping = csv_input_columns.each_with_object({}) do |(k, v), h| matchable = v.is_a?(String) ? csv_input_header_normalized(v) : v h[k] = filtered_column_names.index { |n| case n when matchable then true end } end return mapping unless csv_input_include_other_columns csv_input_mapping_order_with_other_columns(mapping, filtered_column_names) end |
#csv_input_mapping_order_with_other_columns(mapping, filtered_column_names) ⇒ Object
128 129 130 131 132 133 134 |
# File 'lib/worker_tools/csv_input.rb', line 128 def csv_input_mapping_order_with_other_columns(mapping, filtered_column_names) positions_taken = mapping.values filtered_column_names.each_with_index do |header, index| mapping[header.to_sym] = index unless positions_taken.include?(index) end mapping end |
#csv_rows_enum ⇒ Object
140 141 142 |
# File 'lib/worker_tools/csv_input.rb', line 140 def csv_rows_enum @csv_rows_enum ||= CSV.foreach(csv_input_file_path, **) end |
#cvs_input_value_cleanup(value) ⇒ Object
Allows for some basic cleanup of the values, such as applying strip to the strings.
50 51 52 |
# File 'lib/worker_tools/csv_input.rb', line 50 def cvs_input_value_cleanup(value) value.is_a?(String) ? value.strip : value end |