Class: TableImporter::CSV
Constant Summary
Constants inherited from Source
Instance Method Summary collapse
- #clean_file(file) ⇒ Object
- #convert_headers(provided_headers, mapped_headers, headers_present) ⇒ Object
-
#default_options(options = {}) ⇒ Object
fix quote_char bit of a hack here to provide the correct number of default headers to the user (rather than just 100).
- #file_has_no_content ⇒ Object
- #get_chunks(chunk_size) ⇒ Object
- #get_column_separator(first_line = get_first_line) ⇒ Object
- #get_first_line ⇒ Object
- #get_headers ⇒ Object
-
#get_lines(start, number_of_lines) ⇒ Object
this is horrendously slow.
- #get_preview_lines ⇒ Object
- #get_record_separator(first_line = get_first_line) ⇒ Object
- #get_type ⇒ Object
-
#initialize(data) ⇒ CSV
constructor
A new instance of CSV.
Methods inherited from Source
#clean_chunks, #default_headers, #get_sep_count, #sort_separators
Constructor Details
#initialize(data) ⇒ CSV
Returns a new instance of CSV.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
# File 'lib/table_importer/csv.rb', line 7 def initialize(data) @headers_present = data[:headers_present] # user has indicated headers are provided @headers = data[:headers] @column_separator = SEPARATORS[data[:column_separator].to_sym] if !data[:column_separator].nil? @record_separator = !data[:record_separator].nil? && data[:record_separator].length > 0 ? SEPARATORS[data[:record_separator].to_sym] : "\n" @compulsory_headers = data[:compulsory_headers] @file = data[:content] @delete_empty_columns = File.size(@file) < 100000 begin first_line = get_first_line if first_line == 0 raise ArgumentError end get_column_separator(first_line) @preview_lines = file_has_no_content @headers = @headers_present ? first_line.split(@column_separator) : default_headers(100) if @headers.blank? rescue ArgumentError @file = clean_file(@file) retry end end |
Instance Method Details
#clean_file(file) ⇒ Object
138 139 140 141 142 143 144 |
# File 'lib/table_importer/csv.rb', line 138 def clean_file(file) contents = file.read import = Tempfile.new(["import", ".xls"], :encoding => "UTF-8") import.write(contents.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '?').encode('UTF-8').gsub!(/\r\n|\r/, "\n")) import.close return import end |
#convert_headers(provided_headers, mapped_headers, headers_present) ⇒ Object
115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# File 'lib/table_importer/csv.rb', line 115 def convert_headers(provided_headers, mapped_headers, headers_present) new_headers = [] old_headers = headers_present ? provided_headers : default_headers old_headers.each_with_index do |key, index| key_to_add = "column_#{index}".to_sym mapped_headers.each do |new_key, value| if value.to_s == index.to_s key_to_add = new_key end end new_headers << key_to_add end Hash[old_headers.zip(new_headers)] end |
#default_options(options = {}) ⇒ Object
fix quote_char bit of a hack here to provide the correct number of default headers to the user (rather than just 100)
132 133 134 135 136 |
# File 'lib/table_importer/csv.rb', line 132 def ( = {}) {:col_sep => @column_separator, :row_sep => @record_separator, :quote_char => "‱", :remove_empty_values => false, :verbose => false, :headers_in_file => @headers_present, :convert_values_to_numeric => false, :user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)}.merge() end |
#file_has_no_content ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/table_importer/csv.rb', line 43 def file_has_no_content begin lines = get_preview_lines if lines.blank? || lines == 0 raise Exceptions::EmptyFileImportError.new else return lines end rescue NoMethodError raise Exceptions::EmptyFileImportError.new end end |
#get_chunks(chunk_size) ⇒ Object
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/table_importer/csv.rb', line 98 def get_chunks(chunk_size) begin chunks = [] if @headers_present key_mapping = convert_headers(SmarterCSV.process(@file.path, ).first.keys, @headers, @headers_present).delete_if{ |key, value| value.blank?} chunks = SmarterCSV.process(@file.path, ({:chunk_size => chunk_size, :key_mapping => key_mapping, :remove_unmapped_keys => true, :user_provided_headers => nil})) else user_provided_headers = convert_headers(SmarterCSV.process(@file.path, ).first.keys, @headers, @headers_present).values chunks = SmarterCSV.process(@file.path, ({:chunk_size => chunk_size, :user_provided_headers => user_provided_headers, :remove_empty_values => true})) end clean_chunks(chunks, @compulsory_headers, @delete_empty_columns) rescue ArgumentError @file = clean_file(@file) retry end end |
#get_column_separator(first_line = get_first_line) ⇒ Object
64 65 66 67 68 69 |
# File 'lib/table_importer/csv.rb', line 64 def get_column_separator(first_line = get_first_line) return @column_separator if !@column_separator.nil? && @column_separator.length > 0 separators = get_sep_count(first_line) separators.reject!{ |sep| sep.keys[0] == @record_separator} if @record_separator != nil @column_separator = sort_separators(separators) end |
#get_first_line ⇒ Object
29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/table_importer/csv.rb', line 29 def get_first_line begin SmarterCSV.process(@file.path, ({:col_sep => @column_separator.present? ? @column_separator : "\n", :row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 8})) do |chunk| if @headers_present return chunk.first.keys[0].to_s else return chunk.first.values[0].to_s end end rescue EOFError raise Exceptions::EmptyFileImportError.new end end |
#get_headers ⇒ Object
60 61 62 |
# File 'lib/table_importer/csv.rb', line 60 def get_headers @headers end |
#get_lines(start, number_of_lines) ⇒ Object
this is horrendously slow
94 95 96 |
# File 'lib/table_importer/csv.rb', line 94 def get_lines(start, number_of_lines) get_chunks(50)[start..(start + number_of_lines)] end |
#get_preview_lines ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/table_importer/csv.rb', line 78 def get_preview_lines begin return clean_chunks([@preview_lines], @compulsory_headers, @delete_empty_columns)[0].symbolize_keys[:lines] if !@preview_lines.blank? if @delete_empty_columns chunks = SmarterCSV.process(@file.path, ({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 50})) return clean_chunks(chunks, @compulsory_headers, true)[0].symbolize_keys[:lines][0..7] end SmarterCSV.process(@file.path, ({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 8})) do |chunk| return clean_chunks([chunk], @compulsory_headers)[0].symbolize_keys[:lines][0..7] end rescue SmarterCSV::HeaderSizeMismatch raise Exceptions::HeaderMismatchError.new end end |
#get_record_separator(first_line = get_first_line) ⇒ Object
71 72 73 74 75 76 |
# File 'lib/table_importer/csv.rb', line 71 def get_record_separator(first_line = get_first_line) return @record_separator if !@record_separator.nil? && @record_separator.length > 0 separators = get_sep_count(first_line) separators.reject!{ |sep| sep.keys[0] == get_column_separator} @record_separator = sort_separators(separators) end |
#get_type ⇒ Object
56 57 58 |
# File 'lib/table_importer/csv.rb', line 56 def get_type "csv" end |