Class: DataShift::ExcelLoader

Inherits:
LoaderBase show all
Includes:
ExcelBase, FileLoader
Defined in:
lib/loaders/excel_loader.rb

Instance Attribute Summary

Attributes included from FileLoader

#file_name

Attributes included from ExcelBase

#excel, #sheet

Attributes inherited from LoaderBase

#binder, #configuration, #doc_context, #file_name

Attributes included from Delimiters

#attribute_list_end, #attribute_list_start, #csv_delimiter, #key_value_sep, #text_delim

Instance Method Summary collapse

Methods included from ExcelBase

#ar_to_xls, #ar_to_xls_cell, #ar_to_xls_row, max_columns, #open_excel, #parse_headers, #sanitize_sheet_name, #start_excel

Methods inherited from LoaderBase

#abort_on_failure?, #bind_headers, #configure_from, #load_object_class, #report, #reset, #run, #set_headers, #setup_load_class

Methods included from Querying

#find_or_new, #get_record_by, #get_record_by!, #search_for_record, where_field_and_values

Methods included from Logging

#logdir, #logdir=, #logger, #verbose

Methods included from Delimiters

#column_delim, #column_delim=, #eol, #multi_assoc_delim, #multi_assoc_delim=, #multi_facet_delim, #multi_value_delim, #multi_value_delim=, #name_value_delim, #name_value_delim=, #setmulti_facet_delim

Constructor Details

#initializeExcelLoader

Returns a new instance of ExcelLoader.



21
22
23
# File 'lib/loaders/excel_loader.rb', line 21

def initialize
  super
end

Instance Method Details

#perform_load(options = {}) ⇒ Object

Options

[:sheet_name]      : Create a new worksheet assign to @sheet. Default is class.name
[:sheet_number]    : Default is 0. The index of the Excel Worksheet to use.


30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/loaders/excel_loader.rb', line 30

def perform_load( options = {} )

  allow_empty_rows = DataShift::Loaders::Configuration.call.allow_empty_rows

  logger.info "Starting bulk load from Excel : #{file_name}"

  start(file_name, options)

  # maps list of headers into suitable calls on the Active Record class
  bind_headers(headers)

  begin
    puts 'Dummy Run - Changes will be rolled back' if(configuration.dummy_run)

    load_object_class.transaction do
      sheet.each_with_index do |row, i|
        current_row_idx = i

        next if current_row_idx == headers.idx

        # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
        # (TODO - write spec to process .xls with a huge number of rows)
        #
        # manually have to detect when actual data ends, this isn't very smart but
        # got no better idea than ending once we hit the first completely empty row
        break if !allow_empty_rows && (row.nil? || row.empty?)

        logger.info "Processing Row #{current_row_idx}"

        contains_data = false

        doc_context.progress_monitor.start_monitoring

        # Iterate over the bindings,
        # For each column bound to a model operator, create a context from data in associated Excel column

        @binder.bindings.each_with_index do |method_binding, _i|
          unless method_binding.valid?
            logger.warn("No binding was found for column (#{current_row_idx})")
            next
          end

          # get the value from the cell, binding contains the column number
          value = row[method_binding.inbound_index]

          context = doc_context.create_node_context(method_binding, current_row_idx, value)

          contains_data ||= context.contains_data?

          logger.info "Processing Column #{method_binding.inbound_index} (#{method_binding.pp})"

          begin
            context.process
          rescue
            if doc_context.all_or_nothing?
              logger.error('All or nothing set and Current Column failed so complete Row aborted')
              break
            end
          end

        end

        # Excel data rows not accurate, seems to have to manually detect when actual Excel data rows end
        break if !allow_empty_rows && contains_data == false

        doc_context.save_and_monitor_progress

        # unless next operation is update, reset the loader object
        doc_context.reset unless doc_context.node_context.next_update?
      end # all rows processed

      if(configuration.dummy_run)
        puts 'Excel loading stage done - Dummy run so Rolling Back.'
        raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run
      end
    end # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block

  rescue => e
    puts "ERROR: Excel loading failed : #{e.inspect}"
    raise e
  ensure
    report
  end

  puts 'Excel loading stage Complete.'
end