Class: TableImporter::CSV

Inherits:
Source
  • Object
show all
Defined in:
lib/table_importer/csv.rb

Constant Summary

Constants inherited from Source

Source::SEPARATORS

Instance Method Summary collapse

Methods inherited from Source

#clean_chunks, #default_headers, #get_sep_count, #sort_separators

Constructor Details

#initialize(data) ⇒ CSV

Returns a new instance of CSV.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/table_importer/csv.rb', line 7

def initialize(data)
  @headers_present = data[:headers_present] # user has indicated headers are provided
  @headers = data[:headers]
  @column_separator = SEPARATORS[data[:column_separator].to_sym] if !data[:column_separator].nil?
  @record_separator = !data[:record_separator].nil? && data[:record_separator].length > 0 ? SEPARATORS[data[:record_separator].to_sym] : "\n"
  @compulsory_headers = data[:compulsory_headers]
  @file = data[:content]
  @delete_empty_columns = File.size(@file) < 100000
  begin
    first_line = get_first_line
    if first_line == 0
      raise ArgumentError
    end
    get_column_separator(first_line)
    @preview_lines = file_has_no_content
    @headers = @headers_present ? first_line.split(@column_separator) : default_headers(100) if @headers.blank?
  rescue ArgumentError
    @file = clean_file(@file)
    retry
  end
end

Instance Method Details

#clean_file(file) ⇒ Object



138
139
140
141
142
143
144
# File 'lib/table_importer/csv.rb', line 138

def clean_file(file)
  contents = file.read
  import = Tempfile.new(["import", ".xls"], :encoding => "UTF-8")
  import.write(contents.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '?').encode('UTF-8').gsub!(/\r\n|\r/, "\n"))
  import.close
  return import
end

#convert_headers(provided_headers, mapped_headers, headers_present) ⇒ Object



115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/table_importer/csv.rb', line 115

def convert_headers(provided_headers, mapped_headers, headers_present)
  new_headers = []
  old_headers = headers_present ? provided_headers : default_headers
  old_headers.each_with_index do |key, index|
    key_to_add = "column_#{index}".to_sym
    mapped_headers.each do |new_key, value|
      if value.to_s == index.to_s
        key_to_add = new_key
      end
    end
    new_headers << key_to_add
  end
  Hash[old_headers.zip(new_headers)]
end

#default_options(options = {}) ⇒ Object

fix quote_char bit of a hack here to provide the correct number of default headers to the user (rather than just 100)



132
133
134
135
136
# File 'lib/table_importer/csv.rb', line 132

def default_options(options = {})
  {:col_sep => @column_separator, :row_sep => @record_separator, :quote_char => "", :remove_empty_values => false,
    :verbose => false, :headers_in_file => @headers_present, :convert_values_to_numeric => false,
    :user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : default_headers(100)}.merge(options)
end

#file_has_no_contentObject



43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/table_importer/csv.rb', line 43

def file_has_no_content
  begin
    lines = get_preview_lines
    if lines.blank? || lines == 0
      raise Exceptions::EmptyFileImportError.new
    else
      return lines
    end
  rescue NoMethodError
    raise Exceptions::EmptyFileImportError.new
  end
end

#get_chunks(chunk_size) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/table_importer/csv.rb', line 98

def get_chunks(chunk_size)
  begin
    chunks = []
    if @headers_present
      key_mapping = convert_headers(SmarterCSV.process(@file.path, default_options).first.keys, @headers, @headers_present).delete_if{ |key, value| value.blank?}
      chunks = SmarterCSV.process(@file.path, default_options({:chunk_size => chunk_size, :key_mapping => key_mapping, :remove_unmapped_keys => true, :user_provided_headers => nil}))
    else
      user_provided_headers = convert_headers(SmarterCSV.process(@file.path, default_options).first.keys, @headers, @headers_present).values
      chunks = SmarterCSV.process(@file.path, default_options({:chunk_size => chunk_size, :user_provided_headers => user_provided_headers, :remove_empty_values => true}))
    end
    clean_chunks(chunks, @compulsory_headers, @delete_empty_columns)
  rescue ArgumentError
    @file = clean_file(@file)
    retry
  end
end

#get_column_separator(first_line = get_first_line) ⇒ Object



64
65
66
67
68
69
# File 'lib/table_importer/csv.rb', line 64

def get_column_separator(first_line = get_first_line)
  return @column_separator if !@column_separator.nil? && @column_separator.length > 0
  separators = get_sep_count(first_line)
  separators.reject!{ |sep| sep.keys[0] == @record_separator} if @record_separator != nil
  @column_separator = sort_separators(separators)
end

#get_first_lineObject



29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/table_importer/csv.rb', line 29

def get_first_line
  begin
    SmarterCSV.process(@file.path, default_options({:col_sep => @column_separator.present? ? @column_separator : "\n", :row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 8})) do |chunk|
      if @headers_present
        return chunk.first.keys[0].to_s
      else
        return chunk.first.values[0].to_s
      end
    end
  rescue EOFError
    raise Exceptions::EmptyFileImportError.new
  end
end

#get_headersObject



60
61
62
# File 'lib/table_importer/csv.rb', line 60

def get_headers
  @headers
end

#get_lines(start, number_of_lines) ⇒ Object

this is horrendously slow



94
95
96
# File 'lib/table_importer/csv.rb', line 94

def get_lines(start, number_of_lines)
  get_chunks(50)[start..(start + number_of_lines)]
end

#get_preview_linesObject



78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/table_importer/csv.rb', line 78

def get_preview_lines
  begin
    return clean_chunks([@preview_lines], @compulsory_headers, @delete_empty_columns)[0].symbolize_keys[:lines] if !@preview_lines.blank?
    if @delete_empty_columns
      chunks = SmarterCSV.process(@file.path, default_options({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 50}))
      return clean_chunks(chunks, @compulsory_headers, true)[0].symbolize_keys[:lines][0..7]
    end
    SmarterCSV.process(@file.path, default_options({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 8})) do |chunk|
      return clean_chunks([chunk], @compulsory_headers)[0].symbolize_keys[:lines][0..7]
    end
  rescue SmarterCSV::HeaderSizeMismatch
    raise Exceptions::HeaderMismatchError.new
  end
end

#get_record_separator(first_line = get_first_line) ⇒ Object



71
72
73
74
75
76
# File 'lib/table_importer/csv.rb', line 71

def get_record_separator(first_line = get_first_line)
  return @record_separator if !@record_separator.nil? && @record_separator.length > 0
  separators = get_sep_count(first_line)
  separators.reject!{ |sep| sep.keys[0] == get_column_separator}
  @record_separator = sort_separators(separators)
end

#get_typeObject



56
57
58
# File 'lib/table_importer/csv.rb', line 56

def get_type
  "csv"
end