Class: Csv2sql

Inherits:
Object
  • Object
show all
Defined in:
lib/csv2sql.rb

Overview

Example:

puts Csv2sql.new("account_balances.csv").to_updates([nil, 'balance'], :table => 'accounts')

Constant Summary collapse

@@defaults =
{
  :before => "",   # can be changed to "start transaction;\n"
  :after  => ";\n" # can be changes to "commit;\n"
}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename) ⇒ Csv2sql

Returns a new instance of Csv2sql.



43
44
45
# File 'lib/csv2sql.rb', line 43

def initialize(filename)
  @filename = filename
end

Class Method Details

.default_value_filter(v, i, j, k) ⇒ Object



36
37
38
39
40
41
# File 'lib/csv2sql.rb', line 36

def self.default_value_filter(v, i, j, k)
  return 'null' if v.to_s == ''
  return v.to_s if v.is_a? Float or v.is_a? Fixnum
  v.gsub!(/"/, '\\"')
  "\"#{v}\""
end

Instance Method Details

#parse(args = {}) ⇒ Object

Parse file

args - proc, called with (line, line_number) args - proc, called with (values, csv_line_number, filtered_line_number)



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/csv2sql.rb', line 135

def parse(args={})
  args[:value_filter] ||= Csv2sql.method :default_value_filter
  
  i = j = 0
  File.open(@filename, 'r').each_line do |line|
    i += 1
    line = args[:csv_line_filter].call(line, i) if args[:csv_line_filter]
    if line
      j += 1
      unless values = CSV.parse_line(line)
        raise "ERROR:#{@filename}:#{i}:#{j} #{line}"
      else
        
        values = args[:values_filter].call(values, i, j) if args[:values_filter]
        
        if values
        
          if args[:value_filter] # LOOK OUT! value_filter is for single value
            k = -1
            values = values.map do |value|
              k += 1
              args[:value_filter].call(value, i, j, k)
            end
          end
          
          yield values if values
          
        end
      end
    end
  end
end

#to_any(args = {}) ⇒ Object

When :row_format is proc, values_glue is ignored

:before 
:values_glue
:row_format
:row_glue
:after


108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/csv2sql.rb', line 108

def to_any(args={})
  args[:values_glue] ||= ", "
  args[:row_format]  ||= "%s"
  args[:row_glue]    ||= "\n"
  r = []
  case args[:row_format].class.to_s
    when 'String'
      parse(args) do |values|
        r << sprintf(args[:row_format], values.join(args[:values_glue]))
      end
      
    when 'Proc'
      parse(args) do |values|
        r << args[:row_format].call(values) # LOOK OUT: args[:values_glue] ignored
      end
  end
  r = r.join args[:row_glue]
  r = args[:before] + r if args[:before]
  r = r + args[:after] if args[:after]
  r
end

#to_inserts(args = {}) ⇒ Object

Sql inserts

Please note that you can set table name with values :table => ‘my_table(id, col1, col2…)’ to make inserts into specific columns only.

Optional named args:

:ignore - true/false, if true uses INSERT IGNORE ...
:bulk   - if true, bulk insert (see cluster size in your sql server to make big bulks to avoid server gone away!)
:table  - default based on filename
:before - default to blank 
:after  - default to ;
...see Csv2sql#to_any for the rest


60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/csv2sql.rb', line 60

def to_inserts(args={})
  args[:table] ||= Pathname.new(@filename).basename.to_s.downcase.gsub(/\W/, '_')
  args[:before] ||= @@defaults[:before]
  args[:after]  ||= @@defaults[:after]
  insert_sql = args[:ignore] ? 'insert ignore' : 'insert'
  if args[:bulk]
    args[:before]       += "#{insert_sql} into #{args[:table]} values"
    args[:values_glue] ||= ", "
    args[:row_format]  ||= " (%s)"
    args[:row_glue]    ||= ",\n"
  else
    args[:before]      ||= ""
    args[:values_glue] ||= ", "
    args[:row_format]  ||= "#{insert_sql} into #{args[:table]} values(%s)"
    args[:row_glue]    ||= ";\n"
  end
  to_any args
end

#to_updates(set_columns, args = {}) ⇒ Object

Sql updates from csv file (useful when one of the columns is a PK)

set_columns - ie. [nil, 'first_name', 'last_name'] will ignore first column (PK probably) and set first_name and last_name attributes

Optional args:

:pk - default to first (index 0) column in csv file with 'id' name, a pair: [0, 'id']


86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/csv2sql.rb', line 86

def to_updates(set_columns, args={})
  args[:pk]          ||= [0, 'id']
  args[:table]       ||= Pathname.new(@filename).basename.to_s.downcase.gsub(/\W/, '_')
  args[:before]      ||= @@defaults[:before]
  args[:after]       ||= @@defaults[:after]
  args[:values_glue] ||= ", "
  args[:row_format]  ||= lambda do |values|
    r = []
    set_columns.each_with_index { |set_column, i| r << "#{set_column} = #{values[i]}" if set_column }
    "update #{args[:table]} set #{r.join(', ')} where #{args[:pk][1]} = #{values[args[:pk][0]]}"
  end
  args[:row_glue]    ||= ";\n"
  to_any args
end