Class: Marty::DataImporter

Inherits:
Object
  • Object
show all
Defined in:
lib/marty/data_importer.rb

Defined Under Namespace

Classes: Error

Class Method Summary collapse

Class Method Details

.do_import(klass, data, dt = 'infinity', cleaner_function = nil, validation_function = nil, col_sep = "\t", allow_dups = false, preprocess_function = nil) ⇒ Object

Given a Mcfly klass and CSV data, import data into the database and report on affected rows. Result is an array of tuples. Each tuple is associated with one data row and looks like [tag, id]. Tag is one of :same, :update, :create and “id” is the id of the affected row.



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/marty/data_importer.rb', line 45

def self.do_import(klass,
                   data,
                   dt                  = 'infinity',
                   cleaner_function    = nil,
                   validation_function = nil,
                   col_sep             = "\t",
                   allow_dups          = false,
                   preprocess_function = nil
                  )

  parsed = data.is_a?(Array) ? data :
    CSV.new(data, headers: true, col_sep: col_sep)

  # run preprocessor
  parsed = klass.send(preprocess_function.to_sym, parsed) if
    preprocess_function

  klass.transaction do
    cleaner_ids = cleaner_function ? klass.send(cleaner_function.to_sym) :
      []

    raise 'bad cleaner function result' unless
      cleaner_ids.all? { |id| id.is_a?(Integer) }

    eline = 0

    begin
      res = parsed.each_with_index.map do |row, line|
        eline = line

        # skip lines which are all nil
        next :blank if row.to_hash.values.none?

        Marty::DataConversion.create_or_update(klass, row, dt)
      end
    rescue StandardError => e
      # to find problems with the importer, comment out the rescue block
      raise Error.new(e.to_s, [eline])
    end

    ids = {}
    # raise an error if record referenced more than once.
    res.each_with_index do |(op, id), line|
      raise Error.
        new('record referenced more than once', [ids[id], line]) if
        op != :blank && ids.member?(id) && !allow_dups

      ids[id] = line
    end

    begin
      # Validate affected rows if necessary
      klass.send(validation_function.to_sym, ids.keys) if
        validation_function
    rescue StandardError => e
      raise Error.new(e.to_s, [])
    end

    remainder_ids = cleaner_ids - ids.keys

    raise Error.
      new('Missing import data. ' +
          'Please provide header line and at least one data line.', [1]) if
      ids.keys.compact.count == 0

    klass.delete(remainder_ids)
    res + remainder_ids.map { |id| [:clean, id] }
  end
end

.do_import_summary(klass, data, dt = 'infinity', cleaner_function = nil, validation_function = nil, col_sep = "\t", allow_dups = false, preprocess_function = nil) ⇒ Object

perform cleaning and do_import and summarize its results



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/marty/data_importer.rb', line 15

def self.do_import_summary(klass,
                           data,
                           dt                  = 'infinity',
                           cleaner_function    = nil,
                           validation_function = nil,
                           col_sep             = "\t",
                           allow_dups          = false,
                           preprocess_function = nil
                          )

  recs = do_import(klass,
                   data,
                   dt,
                   cleaner_function,
                   validation_function,
                   col_sep,
                   allow_dups,
                   preprocess_function,
                  )

  recs.each_with_object(Hash.new(0)) do |(op, _id), h|
    h[op] += 1
  end
end