Class: ETL::Processor::BulkImportProcessor

Inherits:
Processor
  • Object
show all
Defined in:
lib/etl/processor/bulk_import_processor.rb

Overview

Processor which is used to bulk import data into a target database. The underlying database driver from ActiveRecord must support the methods bulk_load method.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(control, configuration) ⇒ BulkImportProcessor

Initialize the processor.

Configuration options:

  • :file: The file to load data from

  • :target: The target connection information

  • :truncate: Set to true to truncate before loading

  • :columns: The columns to load in the order they appear in the bulk data file

  • :field_separator: The field separator. Defaults to a comma

  • :line_separator: The line separator. Defaults to a newline

  • :field_enclosure: The field enclosure charcaters



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/etl/processor/bulk_import_processor.rb', line 33

def initialize(control, configuration)
  super
  @file = File.join(File.dirname(control.file), configuration[:file])
  @target = configuration[:target]
  @truncate = configuration[:truncate] ||= false
  @columns = configuration[:columns]
  @field_separator = (configuration[:field_separator] || ',')
  @line_separator = (configuration[:line_separator] || "\n")
  @field_enclosure = configuration[:field_enclosure]
  connect
end

Instance Attribute Details

#columnsObject (readonly)

Array of symbols representing the column load order



14
15
16
# File 'lib/etl/processor/bulk_import_processor.rb', line 14

def columns
  @columns
end

#field_enclosureObject

The field enclosure (defaults to nil)



18
19
20
# File 'lib/etl/processor/bulk_import_processor.rb', line 18

def field_enclosure
  @field_enclosure
end

#field_separatorObject

The field separator (defaults to a comma)



16
17
18
# File 'lib/etl/processor/bulk_import_processor.rb', line 16

def field_separator
  @field_separator
end

#fileObject (readonly)

The file to load from



8
9
10
# File 'lib/etl/processor/bulk_import_processor.rb', line 8

def file
  @file
end

#line_separatorObject

The line separator (defaults to a newline)



20
21
22
# File 'lib/etl/processor/bulk_import_processor.rb', line 20

def line_separator
  @line_separator
end

#targetObject (readonly)

The target database information (see initialize)



10
11
12
# File 'lib/etl/processor/bulk_import_processor.rb', line 10

def target
  @target
end

#truncateObject (readonly)

Set to true to truncate



12
13
14
# File 'lib/etl/processor/bulk_import_processor.rb', line 12

def truncate
  @truncate
end

Instance Method Details

#processObject

Execute the processor



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/etl/processor/bulk_import_processor.rb', line 46

def process
  return if ETL::Engine.skip_bulk_import
  
  conn = ETL::ActiveRecord::Base.connection
  conn.transaction do
    # TODO: Support all database types
    # Since LOCAL is used this must be allowed by both the client and server
    conn.truncate(target[:table]) if truncate
    options = {}
    options[:columns] = columns
    if field_separator || field_enclosure
      options[:fields] = {}
      options[:fields][:delimited_by] = field_separator if field_separator
      options[:fields][:enclosed_by] = field_enclosure if field_enclosure
      options[:fields][:terminated_by] = line_separator if line_separator
    end
    conn.bulk_load(file, target[:table], options)
  end
end