Class: CsvPack::Tab

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/csvpack/pack.rb

Constant Summary collapse

DATA_TYPES =

mappings for data types

from tabular data package to ActiveRecord migrations

see http://dataprotocols.org/json-table-schema/   (section Field Types and Formats)

for now supports these types

{
  'string'   => :string,    ## use text for larger strings ???
  'number'   => :float,     ## note: use float for now
  'integer'  => :integer,
  'boolean'  => :boolean,
  'datetime' => :datetime,
  'date'     => :date,
  'time'     => :time,
  'year'     => :string,     ## note: map year for now to string - anything better? why? why not?
}

Instance Method Summary collapse

Constructor Details

#initialize(h, text) ⇒ Tab

Returns a new instance of Tab.



103
104
105
106
107
108
109
110
111
# File 'lib/csvpack/pack.rb', line 103

def initialize( h, text )
  @h = h

  ## todo parse csv
  ##  note: use header options (first row MUST include headers)
  @data = CSV.parse( text, headers: true )

  pp @data[0]
end

Instance Method Details

#ar_clazzObject



259
260
261
262
263
264
265
266
267
268
269
# File 'lib/csvpack/pack.rb', line 259

def ar_clazz
  @ar_clazz ||= begin
    clazz = Class.new( ActiveRecord::Base ) do
      ## nothing here for now
    end
    puts "set table_name to #{sanitize_name( name )}"
    clazz.table_name = sanitize_name( name )
    clazz
  end
  @ar_clazz
end

#dump_schemaObject



234
235
236
237
238
239
240
241
242
# File 'lib/csvpack/pack.rb', line 234

def dump_schema
  ## try to dump schema (fields)
  puts "*** dump schema:"

  @h['schema']['fields'].each do |f|
    puts "   #{f['name']} ( #{sanitize_name(f['name'])} ) : #{f['type']}} ( #{DATA_TYPES[f['type']]} )"
  end

end

#import!Object



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/csvpack/pack.rb', line 139

def import!
  connect!
  con = ActiveRecord::Base.connection

  column_names = []
  column_types = []
  column_placeholders = []
  @h['schema']['fields'].each do |f|
    column_names << sanitize_name(f['name'])
    column_types << DATA_TYPES[f['type']]
    column_placeholders << '?'
  end

  sql_insert_into = "INSERT INTO #{sanitize_name(name)} (#{column_names.join(',')}) VALUES "
  puts sql_insert_into

  i=0
  @data.each do |row|
    i+=1
    ## next if i > 3   ## for testing; only insert a couple of recs

    ## todo: check if all string is ok; or number/date/etc. conversion needed/required?
    values = []
    row.fields.each_with_index do |value,index|   # get array of values
      type = column_types[index]
      ## todo add boolean ??
      if value.blank?
        values << 'NULL'
      elsif [:number,:float,:integer].include?( type )
        values << value           ## do NOT wrap in quotes (numeric)
      else
        esc_value = value.gsub( "'", "''" )  ## escape quotes e.g. ' becomse \'\', that is, double quotes
        values << "'#{esc_value}'"    ## wrap in quotes
      end
    end
    pp values

    sql = "#{sql_insert_into} (#{values.join(',')})"
    puts sql
    con.execute( sql )
  end
end

#import_v1!Object



183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/csvpack/pack.rb', line 183

def import_v1!
   ### note: import via sql for (do NOT use ActiveRecord record class for now)
  con = ActiveRecord::Base.connection

  column_names = []
  column_types = []
  column_placeholders = []
  @h['schema']['fields'].each do |f|
    column_names << sanitize_name(f['name'])
    column_types << DATA_TYPES[f['type']]
    column_placeholders << '?'
  end

  sql = "INSERT INTO #{sanitize_name(name)} (#{column_names.join(',')}) VALUES (#{column_placeholders.join(',')})"
  puts sql

  i=0
  @data.each do |row|
    i+=1
    next if i > 3   ## for testing; only insert a couple of recs

    ## todo: check if all string is ok; or number/date/etc. conversion needed/required?
    params = row.fields   # get array of values
    pp params
    con.exec_insert( sql, 'SQL', params )  # todo/check: 2nd param name used for logging only??
  end
end

#nameObject



113
# File 'lib/csvpack/pack.rb', line 113

def name()  @h['name']; end

#pretty_print(printer) ⇒ Object



116
117
118
# File 'lib/csvpack/pack.rb', line 116

def pretty_print( printer )
  printer.text "Tab<#{object_id} @data.name=#{name}, @data.size=#{@data.size}>"
end

#sanitize_name(ident) ⇒ Object



245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/csvpack/pack.rb', line 245

def sanitize_name( ident )
  ##
  ## if identifier starts w/ number add leading underscore (_)
  ##  e.g. 52 Week Price  => becomes  _52_week_price

  ident = ident.strip.downcase
  ident = ident.gsub( /[\.\-\/]/, '_' )  ## convert some special chars to underscore (e.g. dash -)
  ident = ident.gsub( ' ', '_' )
  ident = ident.gsub( /[^a-z0-9_]/, '' )
  ident = "_#{ident}"  if ident =~ /^[0-9]/
  ident
end

#up!Object



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/csvpack/pack.rb', line 121

def up!
  # run Migration#up to create table
  connect!
  con = ActiveRecord::Base.connection

  con.create_table sanitize_name( name ) do |t|
    @h['schema']['fields'].each do |f|
      column_name = sanitize_name(f['name'])
      column_type = DATA_TYPES[f['type']]

      puts "  #{column_type} :#{column_name}  =>  #{f['type']} - #{f['name']}"

      t.send( column_type.to_sym, column_name.to_sym )   ## todo/check: to_sym needed?
    end
    t.string  :name
  end
end