Class: Marty::DataConversion
- Inherits:
-
Object
- Object
- Marty::DataConversion
- Defined in:
- lib/marty/data_conversion.rb
Constant Summary collapse
- EXCEL_START_DATE =
Date.parse('1/1/1900') - 2
- FLOAT_PAT =
/\A-?\d+(\.?\d+)?([eE][-+]?[0-9]+)?\z/
- PATS =
{ integer: /^-?\d+(\.0+)?$/, float: FLOAT_PAT, decimal: FLOAT_PAT, }
- DATABASE_TYPES =
database types that can be converted to on import
Set[ :boolean, :string, :text, :integer, :float, :decimal, :date, :datetime, :numrange, :int4range, :int8range, :float_array, :json, :jsonb, :enum, ]
- @@associations =
{}
- @@col_types =
{}
Class Method Summary collapse
- .assoc_cols(klass) ⇒ Object
- .assoc_keys(klass) ⇒ Object
- .associations(klass) ⇒ Object
- .col_types(klass) ⇒ Object
- .columns(klass) ⇒ Object
- .convert(v, type) ⇒ Object
- .convert_row(klass, row, dt) ⇒ Object
- .create_or_update(klass, row, dt) ⇒ Object
- .find_row(klass, options, dt) ⇒ Object
Class Method Details
.assoc_cols(klass) ⇒ Object
109 110 111 112 |
# File 'lib/marty/data_conversion.rb', line 109 def self.assoc_cols(klass) # array of klass association columns (e.g. ["xxx_id", ...]) associations(klass).values.map { |a| a[:foreign_key] } end |
.assoc_keys(klass) ⇒ Object
82 83 84 85 86 87 88 89 90 91 |
# File 'lib/marty/data_conversion.rb', line 82 def self.assoc_keys(klass) return Mcfly.mcfly_uniqueness(klass) if Mcfly.has_mcfly?(klass) # FIXME: very hacky -- picks 1st non-id attr as the association # key for regular (non-mcfly) AR models which don't have # MARTY_IMPORT_UNIQUENESS. klass.const_get(:MARTY_IMPORT_UNIQUENESS) rescue [ klass.column_names.reject { |x| x == 'id' }.first.to_sym ] end |
.associations(klass) ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 |
# File 'lib/marty/data_conversion.rb', line 95 def self.associations(klass) # build a profile for ActiveRecord klass associations which # enables find/import of its database records @@associations[klass] ||= klass.reflect_on_all_associations. each_with_object({}) do |assoc, h| h[assoc.name.to_s] = { assoc_keys: assoc_keys(assoc.klass), assoc_class: assoc.klass, foreign_key: assoc.foreign_key, } end end |
.col_types(klass) ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/marty/data_conversion.rb', line 118 def self.col_types(klass) # build profile for ActiveRecord non-assoc columns -- used to # find/import of klass database records. @@col_types[klass] ||= klass.columns.each_with_object({}) do |col, h| assoc ||= associations(klass) acols ||= assoc_cols(klass) cn = col.name # ignore mcfly cols next if Mcfly::COLUMNS.member?(cn) if acols.member?(cn) h[cn] = assoc.values.detect { |a| a[:foreign_key] == cn } else # for JSON fields in Rails 3.x type is nil, so use sql_type type = col.type || col.sql_type type = "#{type}_array" if col.array h[cn] = type.to_sym end end end |
.columns(klass) ⇒ Object
142 143 144 145 |
# File 'lib/marty/data_conversion.rb', line 142 def self.columns(klass) # list of non-mcfly columns col_types(klass).keys end |
.convert(v, type) ⇒ Object
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# File 'lib/marty/data_conversion.rb', line 31 def self.convert(v, type) # Converts external data v (e.g. from a CSV, cut/paste) to # ActiveRecord database data type. pat = PATS[type] raise "bad #{type} #{v.inspect}" if v.is_a?(String) && pat && !(v =~ pat) case type when :boolean case v.to_s.downcase when 'true', '1', 'y', 't' then true when 'false', '0', 'n', 'f' then false else raise "unknown boolean: #{v.inspect}" end when :string, :text, :enum v when :integer v.to_i when :float v.to_f when :decimal v.to_d when :date # Dates are kept as float in Google spreadsheets. Need to # convert them to dates. begin FLOAT_PAT.match?(v.to_s) ? EXCEL_START_DATE + v.to_f : Mcfly.is_infinity(v) ? 'infinity' : v.to_date rescue StandardError => e raise "date conversion failed for #{v.inspect}}" end when :datetime begin Mcfly.is_infinity(v) ? 'infinity' : v.to_datetime rescue StandardError => e raise "datetime conversion failed for #{v.inspect}}" end when :numrange, :int4range, :int8range v.to_s when :float_array, :json, :jsonb, :enum_array, :string_array, :integer_array # v might be base64 or might be a readable string JSON.parse Marty::DataExporter.decode_json(v) rescue JSON.parse(v) else raise "unknown type #{type} for #{v.inspect}}" end end |
.convert_row(klass, row, dt) ⇒ Object
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
# File 'lib/marty/data_conversion.rb', line 170 def self.convert_row(klass, row, dt) # Given row information from imports (usually csv row or hash), # return a hash with fields converted into proper ruby types. ctypes = col_types(klass) assoc = associations(klass) raise "bad row (extra columns?) -- #{row}" if row.key?(nil) key_groups = row.keys.group_by { |x| x.to_s.split('__').first } # FIXME: map all empty string values to nil --- this means that # user can't import empty strings -- Perhaps, mapping "" -> nil # should be optional? row = row.each_with_object({}) do |(k, v), h| h[k.to_s] = v == '' ? nil : v end key_groups.each_with_object({}) do |(ga, g), h| # find the association's details ai = assoc[ga] unless ai raise "unexpected grouping for non assoc #{g}" unless g.length == 1 type = ctypes[ga] raise "unknown column #{ga} in #{klass}" unless type v = row[ga] if v.nil? h[ga] = nil elsif Hash === type # got an id for an association -- FIXME: perhaps this should # not be allowed at all? raise "#{type[:assoc_class].name} with id #{v} not found" unless type[:assoc_class].find_by(id: v) h[ga] = v else # not an association, so we need to convert h[ga] = convert(v, type) end next end srch_class = ai[:assoc_class] fk = "#{ga}_id" if g.length == 1 # optimization for case where we have a 1-key association v = row[g.first] # If group has only one attr and the attr is nil or AR obj, then # we don't need to search. if v.nil? || v.is_a?(ActiveRecord::Base) h[fk] = v && v.id next end # If it's an Enum, use the faster cached looked mechanism if Marty::Enum === srch_class h[fk] = srch_class[v].id next end end # group size > 1 or not an Enum, so it must be an association raise "expected an association for #{ga}" unless ai # build a new row map for this association, we need to convert # it and search for it. arow = g.each_with_object({}) do |k, h| # Some old exports don't provide full assoc__attr column names # (e.g. 'xxx_name'). Instead the columns are just named by # assoc (e.g. 'xxx'). gname, ka = k.split('__', 2) ka ||= ai[:assoc_keys][0].to_s h[ka] = row[k] end c_arow = convert_row(srch_class, arow, dt) o_arow = find_row(srch_class, c_arow, dt) raise "obj not found: #{ai[:assoc_class]}, #{c_arow}, #{dt}" unless o_arow h[fk] = o_arow.id end end |
.create_or_update(klass, row, dt) ⇒ Object
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 |
# File 'lib/marty/data_conversion.rb', line 263 def self.create_or_update(klass, row, dt) # Given a row data (usually from import) try to find the # associated DB row from the klass keys. If found the row is # updated using the dt datetime. Otherwise, a new row is created # with the provided row data. c_row = convert_row(klass, row.to_hash, dt) obj = find_row(klass, c_row, dt) obj ||= klass.new c_row.each do |k, v| # For each attr, check to see if it's begin changed before # setting it. The AR obj.changed? doesn't work properly # with array, JSON or lazy attrs. obj.send("#{k}=", v) if obj.send(k) != v end # FIXME: obj.changed? doesn't work properly for timestamp # fields in Rails 3.2. It evaluates to true even when datetime # is not changed. Caused by lack of awareness of timezones. tag = obj.new_record? ? :create : (obj.changed? ? :update : :same) raise "old created_dt >= current #{obj} #{obj.created_dt} #{dt}" if (tag == :update) && dt && !Mcfly.is_infinity(dt) && (obj.created_dt > dt) obj.created_dt = dt unless tag == :same || Mcfly.is_infinity(dt) || !dt obj.save! [tag, obj.id] end |
.find_row(klass, options, dt) ⇒ Object
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/marty/data_conversion.rb', line 149 def self.find_row(klass, , dt) key_attrs = assoc_keys(klass) raise "no key_attrs for #{klass}" unless key_attrs = .select { |k, _v| key_attrs.member? k.to_sym } raise "no keys for #{klass} -- #{}" if .empty? q = klass.where() q = q.where('obsoleted_dt >= ? AND created_dt < ?', dt, dt) if dt && Mcfly.has_mcfly?(klass) # q.count is almost always 0 or 1 => hopefully it's not too slow on PG. raise "too many results for: #{klass} -- #{}" if q.count > 1 q.first end |