Class: TableImporter::CSV
Constant Summary
Constants inherited
from Source
Source::SEPARATORS
Instance Method Summary
collapse
Methods inherited from Source
#clean_chunks, #default_headers, #get_sep_count, #sort_separators
Constructor Details
#initialize(data) ⇒ CSV
Returns a new instance of CSV.
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
# File 'lib/table_importer/csv.rb', line 7
def initialize(data)
@headers_present = data[:headers_present] @column_separator, @record_separator = initialize_separators(data[:column_separator], data[:record_separator])
@compulsory_headers = data[:compulsory_headers]
@file = data[:content]
@delete_empty_columns = File.size(@file) < 100000
begin
first_line = get_first_line
if first_line == 0
raise ArgumentError
end
get_column_separator(first_line)
raise TableImporter::EmptyFileImportError.new unless file_has_content
@headers = @headers_present ? first_line.split(@column_separator) : (100)
rescue ArgumentError
@file = clean_file(@file)
@column_separator = get_column_separator
retry
end
end
|
Instance Method Details
116
117
118
119
|
# File 'lib/table_importer/csv.rb', line 116
def (chunk_size)
key_mapping = (SmarterCSV.process(@file.path, default_options).first.keys, @headers, @headers_present).delete_if{ |key, value| value.blank?}
SmarterCSV.process(@file.path, default_options({:chunk_size => chunk_size, :key_mapping => key_mapping, :remove_unmapped_keys => true, :user_provided_headers => nil}))
end
|
121
122
123
124
|
# File 'lib/table_importer/csv.rb', line 121
def (chunk_size)
= (SmarterCSV.process(@file.path, default_options).first.keys, @headers, @headers_present).values
SmarterCSV.process(@file.path, default_options({:chunk_size => chunk_size, :user_provided_headers => , :remove_empty_values => true}))
end
|
#clean_file(file) ⇒ Object
151
152
153
154
155
156
157
158
159
160
|
# File 'lib/table_importer/csv.rb', line 151
def clean_file(file)
contents = file.read
import = Tempfile.new(["import", ".xls"], :encoding => "UTF-8")
utf8_content = contents.force_encoding('UTF-8').encode('UTF-16', :invalid => :replace, :replace => '?').encode('UTF-8').gsub(/\r\n|\r/, "\n").squeeze("\n")
clean_contents = utf8_content[0] == "\n" ? utf8_content[1..-1] : utf8_content
import.write(clean_contents)
import.close
reset_separators
return import
end
|
126
127
128
129
130
131
132
133
|
# File 'lib/table_importer/csv.rb', line 126
def (, , )
= []
= ? :
.each_with_index do |key, index|
<< (, index)
end
Hash[.zip()]
end
|
#default_options(options = {}) ⇒ Object
145
146
147
148
149
|
# File 'lib/table_importer/csv.rb', line 145
def default_options(options = {})
{:col_sep => @column_separator, :row_sep => @record_separator, :force_simple_split => true, :strip_chars_from_headers => /[\-"]/, :remove_empty_values => false,
:verbose => false, :headers_in_file => @headers_present, :convert_values_to_numeric => false,
:user_provided_headers => @headers_present ? (@headers == nil || @headers == {} ? nil : @headers) : (100)}.merge(options)
end
|
#file_has_content ⇒ Object
52
53
54
55
56
57
58
59
60
61
62
63
|
# File 'lib/table_importer/csv.rb', line 52
def file_has_content
begin
lines = get_preview_lines
if lines.blank? || lines == 0
return false
else
return true
end
rescue NoMethodError
false
end
end
|
#get_chunks(chunk_size) ⇒ Object
105
106
107
108
109
110
111
112
113
114
|
# File 'lib/table_importer/csv.rb', line 105
def get_chunks(chunk_size)
begin
chunks = @headers_present ? (chunk_size) : (chunk_size)
clean_chunks(chunks, @compulsory_headers, @delete_empty_columns)
rescue ArgumentError
@file = clean_file(@file)
@column_separator = get_column_separator
retry
end
end
|
#get_column_separator(first_line = get_first_line) ⇒ Object
73
74
75
76
77
78
|
# File 'lib/table_importer/csv.rb', line 73
def get_column_separator(first_line = get_first_line)
return @column_separator if !@column_separator.nil? && @column_separator.length > 0
separators = get_sep_count(first_line)
separators.reject!{ |sep| sep.keys[0] == @record_separator} if @record_separator != nil
@column_separator = sort_separators(separators)
end
|
#get_first_line ⇒ Object
34
35
36
37
38
39
40
41
42
43
44
45
46
|
# File 'lib/table_importer/csv.rb', line 34
def get_first_line
begin
SmarterCSV.process(@file.path, default_options({:col_sep => @column_separator.present? ? @column_separator : "\n", :row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => 2})) do |chunk|
if @headers_present
return line_count(chunk.first.keys)
else
return line_count(chunk.first.values)
end
end
rescue EOFError
raise TableImporter::EmptyFileImportError.new
end
end
|
69
70
71
|
# File 'lib/table_importer/csv.rb', line 69
def
@headers
end
|
#get_lines(start, number_of_lines) ⇒ Object
this is horrendously slow
101
102
103
|
# File 'lib/table_importer/csv.rb', line 101
def get_lines(start, number_of_lines)
get_chunks(50)[start..(start + number_of_lines)]
end
|
#get_preview_lines(start = 0, finish = 7, chunk_size = 8) ⇒ Object
87
88
89
90
91
92
93
94
95
96
97
98
|
# File 'lib/table_importer/csv.rb', line 87
def get_preview_lines(start = 0, finish = 7, chunk_size = 8)
begin
SmarterCSV.process(@file.path, default_options({:row_sep => @record_separator != nil ? @record_separator : "\n", :chunk_size => chunk_size})) do |chunk|
cleaned_chunk = clean_chunks([chunk], @compulsory_headers, @delete_empty_columns)[0].symbolize_keys[:lines]
return cleaned_chunk[start..finish] if cleaned_chunk.first.present?
@headers_present = false
get_preview_lines(start+8, finish+8, chunk_size+8)
end
rescue SmarterCSV::HeaderSizeMismatch
raise TableImporter::HeaderMismatchError.new
end
end
|
#get_record_separator(first_line = get_first_line) ⇒ Object
80
81
82
83
84
85
|
# File 'lib/table_importer/csv.rb', line 80
def get_record_separator(first_line = get_first_line)
return @record_separator if !@record_separator.nil? && @record_separator.length > 0
separators = get_sep_count(first_line)
separators.reject!{ |sep| sep.keys[0] == get_column_separator}
@record_separator = sort_separators(separators)
end
|
#get_type ⇒ Object
65
66
67
|
# File 'lib/table_importer/csv.rb', line 65
def get_type
"csv"
end
|
#initialize_separators(col_sep, rec_sep) ⇒ Object
28
29
30
31
32
|
# File 'lib/table_importer/csv.rb', line 28
def initialize_separators(col_sep, rec_sep)
col_sep = SEPARATORS[col_sep.to_sym] if !col_sep.nil?
rec_sep = !rec_sep.nil? && rec_sep.length > 0 ? SEPARATORS[rec_sep.to_sym] : "\n"
return col_sep, rec_sep
end
|
#line_count(vals) ⇒ Object
48
49
50
|
# File 'lib/table_importer/csv.rb', line 48
def line_count(vals)
vals.count == 1 ? vals[0].to_s : vals.join(@column_separator)
end
|
135
136
137
138
139
140
141
142
143
|
# File 'lib/table_importer/csv.rb', line 135
def (, index)
key_to_add = "column_#{index}".to_sym
.each do |new_key, value|
if value.to_s == index.to_s
key_to_add = new_key
end
end
key_to_add
end
|
#reset_separators ⇒ Object
162
163
164
165
166
|
# File 'lib/table_importer/csv.rb', line 162
def reset_separators
SEPARATORS.except!(:newline_windows, :old_newline_mac)
@record_separator = "\n"
@column_separator = ""
end
|