Class: Dbtools::Converter::Csv_importer
- Inherits:
-
Object
- Object
- Dbtools::Converter::Csv_importer
- Defined in:
- lib/dbtools/converter/csv_importer.rb
Instance Attribute Summary collapse
-
#delimiter ⇒ Object
readonly
Returns the value of attribute delimiter.
-
#tablename ⇒ Object
readonly
Returns the value of attribute tablename.
Instance Method Summary collapse
-
#class_to_sql_type(klass) ⇒ Object
Converts a ruby class to a string representing a SQL type.
-
#guess_delimiter(filename) ⇒ Object
Attempt to guess delimiter based on occurrence in the header.
-
#infer_type(entry) ⇒ Object
Infer the type of a value by using Ruby’s internal type inference system.
-
#infer_type_of_columns ⇒ Object
Try to infer the type of the columns, and store them.
-
#initialize(filename, delimiter: '', tablename: '') ⇒ Csv_importer
constructor
A new instance of Csv_importer.
-
#output_schema_to_file(filename) ⇒ Object
Writes the script to a file.
-
#to_sql_schema_script ⇒ Object
Returns a sql schema script of the csv file.
Constructor Details
#initialize(filename, delimiter: '', tablename: '') ⇒ Csv_importer
Returns a new instance of Csv_importer.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/dbtools/converter/csv_importer.rb', line 7 def initialize(filename, delimiter: '', tablename: '') @delimiter = guess_delimiter(filename) if delimiter.empty? = { :headers => true, :header_converters => :symbol, :converters => :all, :col_sep => @delimiter } csv = CSV.open(filename, ) arr = Array.new csv.take(10000).each do |row| arr.push(row) end @data = CSV::Table.new(arr) @tablename = tablename @tablename = File.basename(filename, '.csv').gsub(/[^0-9a-zA-Z_]/,'_').to_sym if tablename.empty? @types = Hash.new end |
Instance Attribute Details
#delimiter ⇒ Object (readonly)
Returns the value of attribute delimiter.
6 7 8 |
# File 'lib/dbtools/converter/csv_importer.rb', line 6 def delimiter @delimiter end |
#tablename ⇒ Object (readonly)
Returns the value of attribute tablename.
6 7 8 |
# File 'lib/dbtools/converter/csv_importer.rb', line 6 def tablename @tablename end |
Instance Method Details
#class_to_sql_type(klass) ⇒ Object
Converts a ruby class to a string representing a SQL type.
54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/dbtools/converter/csv_importer.rb', line 54 def class_to_sql_type(klass) # There's probably a better way to detect the type.. if klass == Fixnum 'BIGINT' elsif klass == Float 'FLOAT' #elsif klass == Time #'DATE' else 'VARCHAR(255)' end end |
#guess_delimiter(filename) ⇒ Object
Attempt to guess delimiter based on occurrence in the header.
94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/dbtools/converter/csv_importer.rb', line 94 def guess_delimiter(filename) delimiters = [',', '|', "\t", ';'] lines = File.foreach("#{filename}").first(10).join delimiters_count = delimiters.map { |x| [x, lines.count(x)] }.to_h puts delimiters_count # Key is the delimiter, value is the occurence. most_likely_delimiter = delimiters_count.max_by { |k, v| v } # Check if the occurrence is not zero. raise "No delimiter detected. " if most_likely_delimiter[1].zero? return most_likely_delimiter.first end |
#infer_type(entry) ⇒ Object
Infer the type of a value by using Ruby’s internal type inference system.
47 48 49 50 51 |
# File 'lib/dbtools/converter/csv_importer.rb', line 47 def infer_type(entry) type = (Time.parse(entry) rescue nil) type = entry if type.nil? return type.class end |
#infer_type_of_columns ⇒ Object
Try to infer the type of the columns, and store them.
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/dbtools/converter/csv_importer.rb', line 26 def infer_type_of_columns @data.by_col!.each do |colName, rows| # Count all the types. count = Hash.new rows.each do |entry| type = infer_type(entry) count[type] = count[type].nil? ? 1 : count[type] + 1 end # Set the type to the most occurring type. most_occurring_type = count.sort_by(&:last).last type = most_occurring_type.first # Let float take precedence over integers if it occurred. type = Float if type == Fixnum && !count[Float].nil? # Let string take precedence over other types if it occurred. type = String unless count[String].nil? @types[colName] = type end end |
#output_schema_to_file(filename) ⇒ Object
Writes the script to a file.
88 89 90 91 |
# File 'lib/dbtools/converter/csv_importer.rb', line 88 def output_schema_to_file(filename) open(filename, 'w') { |f| f << to_sql_schema_script} return filename end |
#to_sql_schema_script ⇒ Object
Returns a sql schema script of the csv file.
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/dbtools/converter/csv_importer.rb', line 68 def to_sql_schema_script infer_type_of_columns if @types.empty? output = %{CREATE TABLE IF NOT EXISTS "#{@tablename}" ( \n} columns = @data.by_col!.map.with_index do |data, index| colName, rows = data # Use column position if no header is defined. colName = "col_#{index}" if (colName.nil? || colName.empty?) # Check if column can be null. nullable = rows.all? { |entry| !entry.to_s.gsub(/\s/, '').empty? } sql_type = class_to_sql_type(@types[colName]) result = "\t#{colName.downcase} #{sql_type}" result << "\tNOT NULL" if nullable result end.join(", \n") output << columns << "\n);\n" end |