Class: DataLoader::Inspector

Inherits:
Object
  • Object
show all
Defined in:
lib/data_loader/inspector.rb

Class Method Summary collapse

Class Method Details

.dbtype(value) ⇒ Object

determine what datatype is most suitable for the value



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/data_loader/inspector.rb', line 45

def self.dbtype(value)
  if value.is_a?(Fixnum)
    :integer
  elsif value.is_a?(DateTime)
    :datetime
  elsif value.is_a?(String)
    if value.blank?
      nil
    elsif value.length <= 255
      :string
    else
      :text
    end
  elsif value.nil?
    nil
  else
    raise 'Unknown type'
  end
end

.inspect_file(file, separator = ',', inspect_rows = 10) ⇒ Object

read a csv and return the columns and types in an ordered array



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/data_loader/inspector.rb', line 8

def self.inspect_file(file, separator = ',', inspect_rows = 10)
  fields = nil
  FasterCSV.open(file,
    :col_sep => separator,
    :converters => [:date_time, :integer],    # :integer, :float, :date, :date_time
    :headers => true,
    :header_converters => lambda {|h| h.underscore.gsub(/[^a-z0-9_]/, ' ').strip.gsub(' ', '_').squeeze('_') },
    :skip_blanks => true) do |csv|
      fields = scan_rows(csv, inspect_rows)
  end
  fields
end

.promote_type(*types) ⇒ Object

given two datatypes choose what fits them both



66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/data_loader/inspector.rb', line 66

def self.promote_type(*types)
  types.compact!
  if types.empty?
    nil
  elsif (types - [:text, :string, :datetime, :integer]).length > 0 # unknown types
    raise 'Unknown type'
  elsif Set.new(types).length == 1  # one type
    types.first
  elsif types.include?(:text)
    :text
  else
    :string
  end
end

.scan_rows(csv, inspect_rows) ⇒ Object

scan a few rows to determine data types



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/data_loader/inspector.rb', line 22

def self.scan_rows(csv, inspect_rows)
  first_row = nil
  columns = {}  # unordered hash containing date types for each header

  1.upto(inspect_rows) do
    row = csv.gets
    break unless row
    row.each do |header, value|
      columns[header] = promote_type(columns[header], dbtype(value))
    end
    first_row ||= row # save for later
  end

  # form an ordered array based on the first row read:
  fields = []
  first_row.each do |header, value|
    data_type = columns[header] || :string  # default to :string if everything was nil
    fields << {:name => header, :type => data_type}
  end
  fields
end