Class: Dbtools::Converter::Csv_importer

Inherits:
Object
  • Object
show all
Defined in:
lib/dbtools/converter/csv_importer.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename, delimiter: '', tablename: '') ⇒ Csv_importer

Returns a new instance of Csv_importer.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/dbtools/converter/csv_importer.rb', line 7

def initialize(filename, delimiter: '', tablename: '')
  @delimiter = guess_delimiter(filename) if delimiter.empty?
  options = { :headers => true,
              :header_converters => :symbol,
              :converters => :all,
              :col_sep => @delimiter
  }
  csv = CSV.open(filename, options)
  arr = Array.new
  csv.take(10000).each do |row|
    arr.push(row)
  end
  @data = CSV::Table.new(arr)
  @tablename = tablename
  @tablename = File.basename(filename, '.csv').gsub(/[^0-9a-zA-Z_]/,'_').to_sym if tablename.empty?
  @types = Hash.new
end

Instance Attribute Details

#delimiterObject (readonly)

Returns the value of attribute delimiter.



6
7
8
# File 'lib/dbtools/converter/csv_importer.rb', line 6

def delimiter
  @delimiter
end

#tablenameObject (readonly)

Returns the value of attribute tablename.



6
7
8
# File 'lib/dbtools/converter/csv_importer.rb', line 6

def tablename
  @tablename
end

Instance Method Details

#class_to_sql_type(klass) ⇒ Object

Converts a ruby class to a string representing a SQL type.



54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/dbtools/converter/csv_importer.rb', line 54

def class_to_sql_type(klass)
  # There's probably a better way to detect the type.. 
  if klass == Fixnum
    'BIGINT'
  elsif klass == Float
    'FLOAT'
  #elsif klass == Time
    #'DATE'
  else
    'VARCHAR(255)'
  end
end

#guess_delimiter(filename) ⇒ Object

Attempt to guess delimiter based on occurrence in the header.



94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/dbtools/converter/csv_importer.rb', line 94

def guess_delimiter(filename)
  delimiters = [',', '|', "\t", ';']
  lines = File.foreach("#{filename}").first(10).join
  delimiters_count = delimiters.map { |x| [x, lines.count(x)] }.to_h
  puts delimiters_count
  
  # Key is the delimiter, value is the occurence. 
  most_likely_delimiter = delimiters_count.max_by { |k, v| v }
  # Check if the occurrence is not zero. 
  raise "No delimiter detected. " if most_likely_delimiter[1].zero?
  return most_likely_delimiter.first
end

#infer_type(entry) ⇒ Object

Infer the type of a value by using Ruby’s internal type inference system.



47
48
49
50
51
# File 'lib/dbtools/converter/csv_importer.rb', line 47

def infer_type(entry)
  type = (Time.parse(entry) rescue nil)
  type = entry if type.nil?
  return type.class
end

#infer_type_of_columnsObject

Try to infer the type of the columns, and store them.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/dbtools/converter/csv_importer.rb', line 26

def infer_type_of_columns
  @data.by_col!.each do |colName, rows|
    # Count all the types.
    count = Hash.new
    rows.each do |entry|
      type = infer_type(entry)
      count[type] = count[type].nil? ? 1 : count[type] + 1
    end
    # Set the type to the most occurring type.
    most_occurring_type = count.sort_by(&:last).last
    type = most_occurring_type.first

    # Let float take precedence over integers if it occurred.
    type = Float if type == Fixnum && !count[Float].nil?
    # Let string take precedence over other types if it occurred.
    type = String unless count[String].nil?
    @types[colName] = type
  end
end

#output_schema_to_file(filename) ⇒ Object

Writes the script to a file.



88
89
90
91
# File 'lib/dbtools/converter/csv_importer.rb', line 88

def output_schema_to_file(filename)
  open(filename, 'w') { |f| f << to_sql_schema_script}
  return filename
end

#to_sql_schema_scriptObject

Returns a sql schema script of the csv file.



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/dbtools/converter/csv_importer.rb', line 68

def to_sql_schema_script
  infer_type_of_columns if @types.empty?
  output = %{CREATE TABLE IF NOT EXISTS "#{@tablename}" ( \n}
  columns = @data.by_col!.map.with_index do |data, index|
    colName, rows = data
    # Use column position if no header is defined. 
    colName = "col_#{index}" if (colName.nil? || colName.empty?)
    # Check if column can be null.
    nullable = rows.all? { |entry| !entry.to_s.gsub(/\s/, '').empty? }

    sql_type = class_to_sql_type(@types[colName])

    result = "\t#{colName.downcase} #{sql_type}" 
    result << "\tNOT NULL" if nullable
    result
  end.join(", \n")
  output << columns << "\n);\n"
end