Class: Marty::DataConversion

Inherits:
Object
  • Object
show all
Defined in:
lib/marty/data_conversion.rb

Constant Summary collapse

EXCEL_START_DATE =
Date.parse('1/1/1900') - 2
FLOAT_PAT =
/\A-?\d+(\.?\d+)?([eE][-+]?[0-9]+)?\z/
PATS =
{
  integer: /^-?\d+(\.0+)?$/,
  float:   FLOAT_PAT,
  decimal: FLOAT_PAT,
}
DATABASE_TYPES =

database types that can be converted to on import

Set[
  :boolean,
  :string,
  :text,
  :integer,
  :float,
  :decimal,
  :date,
  :datetime,
  :numrange,
  :int4range,
  :int8range,
  :float_array,
  :json,
  :jsonb,
  :enum,
]
@@associations =
{}
@@col_types =
{}

Class Method Summary collapse

Class Method Details

.assoc_cols(klass) ⇒ Object



109
110
111
112
# File 'lib/marty/data_conversion.rb', line 109

def self.assoc_cols(klass)
  # array of klass association columns (e.g. ["xxx_id", ...])
  associations(klass).values.map { |a| a[:foreign_key] }
end

.assoc_keys(klass) ⇒ Object



82
83
84
85
86
87
88
89
90
91
# File 'lib/marty/data_conversion.rb', line 82

def self.assoc_keys(klass)
  return Mcfly.mcfly_uniqueness(klass) if Mcfly.has_mcfly?(klass)

  # FIXME: very hacky -- picks 1st non-id attr as the association
  # key for regular (non-mcfly) AR models which don't have
  # MARTY_IMPORT_UNIQUENESS.
  klass.const_get(:MARTY_IMPORT_UNIQUENESS) rescue [
    klass.column_names.reject { |x| x == 'id' }.first.to_sym
  ]
end

.associations(klass) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/marty/data_conversion.rb', line 95

def self.associations(klass)
  # build a profile for ActiveRecord klass associations which
  # enables find/import of its database records

  @@associations[klass] ||= klass.reflect_on_all_associations.
    each_with_object({}) do |assoc, h|
    h[assoc.name.to_s] = {
      assoc_keys:  assoc_keys(assoc.klass),
      assoc_class: assoc.klass,
      foreign_key: assoc.foreign_key,
    }
  end
end

.col_types(klass) ⇒ Object



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/marty/data_conversion.rb', line 118

def self.col_types(klass)
  # build profile for ActiveRecord non-assoc columns -- used to
  # find/import of klass database records.

  @@col_types[klass] ||= klass.columns.each_with_object({}) do |col, h|
    assoc ||= associations(klass)
    acols ||= assoc_cols(klass)

    cn = col.name

    # ignore mcfly cols
    next if Mcfly::COLUMNS.member?(cn)

    if acols.member?(cn)
      h[cn] = assoc.values.detect { |a| a[:foreign_key] == cn }
    else
      # for JSON fields in Rails 3.x type is nil, so use sql_type
      type = col.type || col.sql_type
      type = "#{type}_array" if col.array
      h[cn] = type.to_sym
    end
  end
end

.columns(klass) ⇒ Object



142
143
144
145
# File 'lib/marty/data_conversion.rb', line 142

def self.columns(klass)
  # list of non-mcfly columns
  col_types(klass).keys
end

.convert(v, type) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/marty/data_conversion.rb', line 31

def self.convert(v, type)
  # Converts external data v (e.g. from a CSV, cut/paste) to
  # ActiveRecord database data type.

  pat = PATS[type]

  raise "bad #{type} #{v.inspect}" if
    v.is_a?(String) && pat && !(v =~ pat)

  case type
  when :boolean
    case v.to_s.downcase
    when 'true',  '1', 'y', 't' then true
    when 'false', '0', 'n', 'f' then false
    else raise "unknown boolean: #{v.inspect}"
    end
  when :string, :text, :enum
    v
  when :integer
    v.to_i
  when :float
    v.to_f
  when :decimal
    v.to_d
  when :date
    # Dates are kept as float in Google spreadsheets.  Need to
    # convert them to dates.
    begin
      FLOAT_PAT.match?(v.to_s) ? EXCEL_START_DATE + v.to_f :
        Mcfly.is_infinity(v) ? 'infinity' : v.to_date
    rescue StandardError => e
      raise "date conversion failed for #{v.inspect}}"
    end
  when :datetime
    begin
      Mcfly.is_infinity(v) ? 'infinity' : v.to_datetime
    rescue StandardError => e
      raise "datetime conversion failed for #{v.inspect}}"
    end
  when :numrange, :int4range, :int8range
    v.to_s
  when :float_array, :json, :jsonb, :enum_array, :string_array, :integer_array
    # v might be base64 or might be a readable string
    JSON.parse Marty::DataExporter.decode_json(v) rescue JSON.parse(v)
  else
    raise "unknown type #{type} for #{v.inspect}}"
  end
end

.convert_row(klass, row, dt) ⇒ Object



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/marty/data_conversion.rb', line 170

def self.convert_row(klass, row, dt)
  # Given row information from imports (usually csv row or hash),
  # return a hash with fields converted into proper ruby types.

  ctypes = col_types(klass)
  assoc  = associations(klass)

  raise "bad row (extra columns?) -- #{row}" if row.key?(nil)

  key_groups = row.keys.group_by { |x| x.to_s.split('__').first }

  # FIXME: map all empty string values to nil --- this means that
  # user can't import empty strings -- Perhaps, mapping "" -> nil
  # should be optional?
  row = row.each_with_object({}) do |(k, v), h|
    h[k.to_s] = v == '' ? nil : v
  end
  key_groups.each_with_object({}) do |(ga, g), h|
    # find the association's details
    ai = assoc[ga]

    unless ai
      raise "unexpected grouping for non assoc #{g}" unless g.length == 1

      type = ctypes[ga]

      raise "unknown column #{ga} in #{klass}" unless type

      v = row[ga]

      if v.nil?
        h[ga] = nil
      elsif Hash === type
        # got an id for an association -- FIXME: perhaps this should
        # not be allowed at all?
        raise "#{type[:assoc_class].name} with id #{v} not found" unless
          type[:assoc_class].find_by(id: v)

        h[ga] = v
      else
        # not an association, so we need to convert
        h[ga] = convert(v, type)
      end
      next
    end

    srch_class = ai[:assoc_class]
    fk = "#{ga}_id"

    if g.length == 1
      # optimization for case where we have a 1-key association
      v = row[g.first]

      # If group has only one attr and the attr is nil or AR obj, then
      # we don't need to search.
      if v.nil? || v.is_a?(ActiveRecord::Base)
        h[fk] = v && v.id
        next
      end

      # If it's an Enum, use the faster cached looked mechanism
      if Marty::Enum === srch_class
        h[fk] = srch_class[v].id
        next
      end
    end

    # group size > 1 or not an Enum, so it must be an association
    raise "expected an association for #{ga}" unless ai

    # build a new row map for this association, we need to convert
    # it and search for it.
    arow = g.each_with_object({}) do |k, h|
      # Some old exports don't provide full assoc__attr column names
      # (e.g. 'xxx_name').  Instead the columns are just named by
      # assoc (e.g. 'xxx').
      gname, ka = k.split('__', 2)

      ka ||= ai[:assoc_keys][0].to_s
      h[ka] = row[k]
    end

    c_arow = convert_row(srch_class, arow, dt)
    o_arow = find_row(srch_class, c_arow, dt)

    raise "obj not found: #{ai[:assoc_class]}, #{c_arow}, #{dt}" unless o_arow

    h[fk] = o_arow.id
  end
end

.create_or_update(klass, row, dt) ⇒ Object



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/marty/data_conversion.rb', line 263

def self.create_or_update(klass, row, dt)
  # Given a row data (usually from import) try to find the
  # associated DB row from the klass keys.  If found the row is
  # updated using the dt datetime.  Otherwise, a new row is created
  # with the provided row data.

  c_row = convert_row(klass, row.to_hash, dt)
  obj = find_row(klass, c_row, dt)

  obj ||= klass.new

  c_row.each do |k, v|
    # For each attr, check to see if it's begin changed before
    # setting it.  The AR obj.changed? doesn't work properly
    # with array, JSON or lazy attrs.
    obj.send("#{k}=", v) if obj.send(k) != v
  end

  # FIXME: obj.changed? doesn't work properly for timestamp
  # fields in Rails 3.2. It evaluates to true even when datetime
  # is not changed.  Caused by lack of awareness of timezones.
  tag = obj.new_record? ? :create : (obj.changed? ? :update : :same)

  raise "old created_dt >= current #{obj} #{obj.created_dt} #{dt}" if
    (tag == :update) && dt && !Mcfly.is_infinity(dt) && (obj.created_dt > dt)

  obj.created_dt = dt unless tag == :same || Mcfly.is_infinity(dt) || !dt
  obj.save!

  [tag, obj.id]
end

.find_row(klass, options, dt) ⇒ Object



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/marty/data_conversion.rb', line 149

def self.find_row(klass, options, dt)
  key_attrs = assoc_keys(klass)

  raise "no key_attrs for #{klass}" unless key_attrs

  find_options = options.select { |k, _v| key_attrs.member? k.to_sym }

  raise "no keys for #{klass} -- #{options}" if find_options.empty?

  q = klass.where(find_options)
  q = q.where('obsoleted_dt >= ? AND created_dt < ?', dt, dt) if
     dt && Mcfly.has_mcfly?(klass)

  # q.count is almost always 0 or 1 => hopefully it's not too slow on PG.
  raise "too many results for: #{klass} -- #{options}" if q.count > 1

  q.first
end