Class: Csv2sql

Inherits:
Object
  • Object
show all
Defined in:
lib/csv2sql.rb

Overview

Example:

puts Csv2sql.new("account_balances.csv").to_updates([nil, 'balance'], :table => 'accounts')

Constant Summary collapse

VERSION =
'0.3.1'
@@defaults =
{
  :before     => "",   # can be changed to "start transaction;\n"
  :after      => ";\n" # can be changes to "commit;\n"
}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename) ⇒ Csv2sql

Returns a new instance of Csv2sql.



46
47
48
# File 'lib/csv2sql.rb', line 46

def initialize(filename)
  @filename = filename
end

Class Method Details

.default_value_filter(v, i = nil, j = nil) ⇒ Object



38
39
40
41
42
43
44
# File 'lib/csv2sql.rb', line 38

def self.default_value_filter(v, i=nil, j=nil)
  return 'null' if v.to_s == ''
  return v.to_s if v.is_a? Float or v.is_a? Fixnum
  v.gsub!(/\\/, '\\\\')
  v.gsub!(/"/, '\\"')
  "\"#{v}\""
end

Instance Method Details

#parse(args = {}) ⇒ Object

Parse file

args - proc, called with (values, line_number) args - proc, called with values, line_number, column_number



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/csv2sql.rb', line 144

def parse(args={})
  args[:value_filter] ||= Csv2sql.method :default_value_filter
  data = open(@filename, 'r').read
  last_offset = offset = 0
  i = 0
  begin
    values = []
    last_offset = offset
    parsed_cells, offset = CSV.parse_row(data, offset, values)
    # TODO: ignore blank lines? what about single column csv files with blank values?
    values = args[:values_filter].call(values, i) if args[:values_filter]
    if values
      if args[:value_filter] # LOOK OUT! value_filter is for single value, values_filter is for whole row
        j = -1
        values = values.map do |value|
          j += 1
          args[:value_filter].call(value, i, j)
        end
      end
      yield values if values
    end
    i += 1
  end while parsed_cells > 0
  #raise "Illegal format error in #{@filename} at row starting with the line #{i} (last offset #{offset} not the same as the data size #{data.size})" if offset != data.size
end

#to_any(args = {}) ⇒ Object

When :row_format is proc, values_glue is ignored

:before 
:values_glue
:row_format
:row_glue
:after
:when_empty


112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/csv2sql.rb', line 112

def to_any(args={})
  args[:when_empty]  ||= ""
  args[:values_glue] ||= ", "
  args[:row_format]  ||= "%s"
  args[:row_glue]    ||= "\n"
  r = []
  case args[:row_format].class.to_s
    when 'String'
      parse(args) do |values|
        r << sprintf(args[:row_format], values.join(args[:values_glue]))
      end
      
    when 'Proc'
      parse(args) do |values|
        r << args[:row_format].call(values) # LOOK OUT: args[:values_glue] ignored
      end
  end
  if r.size > 0
    r = r.join args[:row_glue]
    r = args[:before] + r if args[:before]
    r = r + args[:after] if args[:after]
    r
  else
    args[:when_empty]
  end
end

#to_inserts(args = {}) ⇒ Object

Sql inserts

Please note that you can set table name with values :table => ‘my_table(id, col1, col2…)’ to make inserts into specific columns only.

Optional named args:

:ignore - true/false, if true uses INSERT IGNORE ...
:bulk   - if true, bulk insert (see cluster size in your sql server to make big bulks to avoid server gone away!)
:table  - default based on filename
:before - default to blank 
:after  - default to ;
...see Csv2sql#to_any for the rest


63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/csv2sql.rb', line 63

def to_inserts(args={})
  args[:table] ||= Pathname.new(@filename).basename.to_s.downcase.gsub(/\W/, '_')
  args[:before] ||= @@defaults[:before]
  args[:after]  ||= @@defaults[:after]
  insert_sql = args[:ignore] ? 'insert ignore' : 'insert'
  if args[:bulk]
    args[:before]       += "#{insert_sql} into #{args[:table]} values"
    args[:values_glue] ||= ", "
    args[:row_format]  ||= " (%s)"
    args[:row_glue]    ||= ",\n"
  else
    args[:before]      ||= ""
    args[:values_glue] ||= ", "
    args[:row_format]  ||= "#{insert_sql} into #{args[:table]} values(%s)"
    args[:row_glue]    ||= ";\n"
  end
  to_any args
end

#to_updates(set_columns, args = {}) ⇒ Object

Sql updates from csv file (useful when one of the columns is a PK)

set_columns - ie. [nil, 'first_name', 'last_name'] will ignore first column (PK probably) and set first_name and last_name attributes

Optional args:

:pk - default to first (index 0) column in csv file with 'id' name, a pair: [0, 'id']


89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/csv2sql.rb', line 89

def to_updates(set_columns, args={})
  args[:pk]          ||= [0, 'id']
  args[:table]       ||= Pathname.new(@filename).basename.to_s.downcase.gsub(/\W/, '_')
  args[:before]      ||= @@defaults[:before]
  args[:after]       ||= @@defaults[:after]
  args[:values_glue] ||= ", "
  args[:row_format]  ||= lambda do |values|
    r = []
    set_columns.each_with_index { |set_column, i| r << "#{set_column} = #{values[i]}" if set_column }
    "update #{args[:table]} set #{r.join(', ')} where #{args[:pk][1]} = #{values[args[:pk][0]]}"
  end
  args[:row_glue]    ||= ";\n"
  to_any args
end