Class: DataSpork::Importer

Inherits:
Object
  • Object
show all
Defined in:
lib/data_spork.rb,
lib/data_spork/importer.rb

Direct Known Subclasses

GoogleSpreadsheet

Defined Under Namespace

Classes: GoogleSpreadsheet

Constant Summary collapse

VERBOSE =
false
VERBOSE_IO_OPTIONS =
ENV['VERBOSE'].eql?('true')
ENCODE_VALUES =

skip encoding until a need is found

false
SANITIZE_VALUES =

skip sanitizing until a need is found

false

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input_type, options = nil) ⇒ Importer

Constructor



25
26
27
28
29
# File 'lib/data_spork/importer.rb', line 25

def initialize(input_type, options = nil)
  @input_type = input_type
  init_options options
  init_writers
end

Instance Attribute Details

#blank_rowObject

Returns the value of attribute blank_row.



13
14
15
# File 'lib/data_spork/importer.rb', line 13

def blank_row
  @blank_row
end

#col_mapObject (readonly)

Returns the value of attribute col_map.



11
12
13
# File 'lib/data_spork/importer.rb', line 11

def col_map
  @col_map
end

#col_numObject

Returns the value of attribute col_num.



13
14
15
# File 'lib/data_spork/importer.rb', line 13

def col_num
  @col_num
end

#col_tagsObject (readonly)

Returns the value of attribute col_tags.



12
13
14
# File 'lib/data_spork/importer.rb', line 12

def col_tags
  @col_tags
end

#effective_dateObject

Returns the value of attribute effective_date.



14
15
16
# File 'lib/data_spork/importer.rb', line 14

def effective_date
  @effective_date
end

#headersObject (readonly)

Returns the value of attribute headers.



10
11
12
# File 'lib/data_spork/importer.rb', line 10

def headers
  @headers
end

#input_typeObject (readonly)

Returns the value of attribute input_type.



10
11
12
# File 'lib/data_spork/importer.rb', line 10

def input_type
  @input_type
end

#optionsObject (readonly)

Returns the value of attribute options.



10
11
12
# File 'lib/data_spork/importer.rb', line 10

def options
  @options
end

#root_tagObject (readonly)

Returns the value of attribute root_tag.



12
13
14
# File 'lib/data_spork/importer.rb', line 12

def root_tag
  @root_tag
end

#rowObject (readonly)

Returns the value of attribute row.



11
12
13
# File 'lib/data_spork/importer.rb', line 11

def row
  @row
end

#row_numObject (readonly)

Returns the value of attribute row_num.



11
12
13
# File 'lib/data_spork/importer.rb', line 11

def row_num
  @row_num
end

#row_tagObject (readonly)

Returns the value of attribute row_tag.



12
13
14
# File 'lib/data_spork/importer.rb', line 12

def row_tag
  @row_tag
end

#setup_stateObject

Returns the value of attribute setup_state.



13
14
15
# File 'lib/data_spork/importer.rb', line 13

def setup_state
  @setup_state
end

#writersObject (readonly)

Returns the value of attribute writers.



10
11
12
# File 'lib/data_spork/importer.rb', line 10

def writers
  @writers
end

Class Method Details

.convert(input_type, options = nil) ⇒ Object

Entry point to convert the input document and output it to the selected format(s).

Parameters:

  • :input_type

    symbol indicating whether to output :xlsx, :csv, or :json

  • :options

    hash with options to control the behavior of the conversion



20
21
22
# File 'lib/data_spork/importer.rb', line 20

def self.convert(input_type, options = nil)
  self.new(input_type, options).convert
end

Instance Method Details

#add_writersObject



41
42
43
# File 'lib/data_spork/importer.rb', line 41

def add_writers
  writers << XmlWriter.new(self)
end

#append(row) ⇒ Object

Appends the specified row to the output.

Parameters:

  • :row

    Array of values parsed from the CSV input.



135
136
137
138
139
140
# File 'lib/data_spork/importer.rb', line 135

def append(row)
  @row = row
  @row_num += 1
  sanitize
  output
end

#clip_effective_date?(first_col) ⇒ Boolean

Answer true if the first_col value is the effective date header, and clip the effective date value.

Returns:

  • (Boolean)


216
217
218
219
220
221
222
223
# File 'lib/data_spork/importer.rb', line 216

def clip_effective_date?(first_col)
  if first_col.match(effective_date_pattern)
    self.effective_date = "#{row[1]}".strip
    true
  else
    false
  end
end

#col_value(index = nil) ⇒ Object

Answer the value for the current column of data, or for the specified index.



241
242
243
# File 'lib/data_spork/importer.rb', line 241

def col_value(index = nil)
  row[index || col_num]
end

#convertObject

Drives the conversion of the CSV input file to XML formatted output.

Parameters:

  • :path_to_csv

    string path name of the CSV input file



106
107
108
109
110
111
112
# File 'lib/data_spork/importer.rb', line 106

def convert
  start
  each do |row|
    append row
  end
  finish
end

#csv?Boolean

Returns:

  • (Boolean)


96
97
98
# File 'lib/data_spork/importer.rb', line 96

def csv?
  input_type == :csv
end

#each(&block) ⇒ Object



92
93
94
# File 'lib/data_spork/importer.rb', line 92

def each(&block)
  reader.each &block
end

#effective_date_patternObject



45
46
47
# File 'lib/data_spork/importer.rb', line 45

def effective_date_pattern
  /^[Ee]ffective [Dd]ate+/
end

#file_modifierObject



80
81
82
# File 'lib/data_spork/importer.rb', line 80

def file_modifier
  ''
end

#finishObject



129
130
131
# File 'lib/data_spork/importer.rb', line 129

def finish
  write :finish
end

#get_substitute_value(value) ⇒ Object

Overridden by subclasses to substitute field-specific values based on their position in the row. The returned value is substituted for the passed value. This method expects only columns that are included in the output.

Parameters:

  • :value

    the value to be substituted



176
177
178
# File 'lib/data_spork/importer.rb', line 176

def get_substitute_value(value)
  value
end

#header(index = nil) ⇒ Object

Answer the header for the current column of data, or for the specified index.



246
247
248
# File 'lib/data_spork/importer.rb', line 246

def header(index = nil)
  headers[index || col_num]
end

#headers?Boolean

Answer true if the headers are already determined.

Returns:

  • (Boolean)


195
196
197
# File 'lib/data_spork/importer.rb', line 195

def headers?
  !headers.empty?
end

#init_options(options) ⇒ Object



31
32
33
34
# File 'lib/data_spork/importer.rb', line 31

def init_options(options)
  @options = { source_path: '.' }.merge(options ||= {})
  @options[:output_path] = @options[:source_path] if @options[:output_path].nil? and @options[:output_file]
end

#init_writersObject



36
37
38
39
# File 'lib/data_spork/importer.rb', line 36

def init_writers
  @writers = [ ]
  add_writers
end

#input_pathnameObject



67
68
69
# File 'lib/data_spork/importer.rb', line 67

def input_pathname
  Pathname(options[:source_path]).join(source_name).to_s
end

#on_begin_rowObject



255
256
257
# File 'lib/data_spork/importer.rb', line 255

def on_begin_row
  write :begin_put_row
end

#on_end_rowObject



263
264
265
# File 'lib/data_spork/importer.rb', line 263

def on_end_row
  write :end_put_row
end

#on_output_columnObject



259
260
261
# File 'lib/data_spork/importer.rb', line 259

def on_output_column
  write :put_column
end

#outputObject

Output the current row of data, which were parsed from the CSV input.



200
201
202
203
204
205
206
207
208
# File 'lib/data_spork/importer.rb', line 200

def output
  unless reject?.tap { |r| print "rejected #{row_num}: #{row}" if r and VERBOSE }
    if headers.empty?
      send setup_state
    else
      put_row #if location_filter?
    end
  end
end

#output_column?Boolean

Answer true when the current column should be included in the output.

Returns:

  • (Boolean)


251
252
253
# File 'lib/data_spork/importer.rb', line 251

def output_column?
  col_tags.include? header
end

#output_filenameObject



75
76
77
78
# File 'lib/data_spork/importer.rb', line 75

def output_filename
  p = output_pathname.join(options[:output_file])
  p.sub_ext "#{file_modifier}#{p.extname}"
end

#output_pathnameObject



71
72
73
# File 'lib/data_spork/importer.rb', line 71

def output_pathname
  Pathname(options[:output_path]).join('output')
end


49
50
51
# File 'lib/data_spork/importer.rb', line 49

def print(str)
  puts str
end


53
54
55
# File 'lib/data_spork/importer.rb', line 53

def print_error(str)
  $stderr.puts str
end

#put_rowObject

Output the current row, one column at a time.



268
269
270
271
272
273
274
275
276
277
# File 'lib/data_spork/importer.rb', line 268

def put_row
  on_begin_row
  row.each_index do |index|
    self.col_num = index
    if output_column?
      on_output_column
    end
  end
  on_end_row
end

#readerObject



84
85
86
87
88
89
90
# File 'lib/data_spork/importer.rb', line 84

def reader
  if csv?
    CSV_Reader.new(self)
  elsif xlsx?
    XLSX_Reader.new(self)
  end
end

#reject?Boolean

Answer true if rules dictate the current row should be discarded from processing.

Returns:

  • (Boolean)


211
212
213
# File 'lib/data_spork/importer.rb', line 211

def reject?
  headers? and blank_row
end

#sanitizeObject

Sanitize the current row of data. This is done in place, so not worried about a return value.



143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/data_spork/importer.rb', line 143

def sanitize
  self.col_num = 0
  self.blank_row = true
  row.collect! do |utf_8|
    value = (ENCODE_VALUES ? "#{utf_8}".encode('iso-8859-1', xml: :text) : utf_8)
    self.blank_row = false if blank_row and !value.blank?
    sanitize_value(value) if headers? and SANITIZE_VALUES
    substitute_value(value).tap do
      self.col_num += 1
    end
  end
end

#sanitize_value(value) ⇒ Object

Sanitize field-specific values based on their position in the row. The values must be modified in place, so there is no need to return a value. This method does not sanitize columns that are not included in the output.

Parameters:

  • :value

    the value to be sanitized



185
186
187
188
189
190
191
192
# File 'lib/data_spork/importer.rb', line 185

def sanitize_value(value)
  if headers? and output_column?
    case header
      when nil?
        0
    end
  end
end

#setupObject

Initializes the headers on the first row and optionally outputs them when VERBOSE=true.



233
234
235
236
237
238
# File 'lib/data_spork/importer.rb', line 233

def setup
  row.each do |col|
    headers << col_map[col]
  end
  print "headers: #{row_num}: #{headers}" if VERBOSE
end

#setup_writersObject

Initializes the xml document and transfers setup_state to :setup



226
227
228
229
230
# File 'lib/data_spork/importer.rb', line 226

def setup_writers
  write :start
  self.setup_state = :setup
  send setup_state  # automatically transition to next state
end

#source_nameObject



61
62
63
64
65
# File 'lib/data_spork/importer.rb', line 61

def source_name
  base = DEFAULT_INPUT_NAME
  modifier = ''
  "#{base}#{modifier}.#{input_type}"
end

#startObject



114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/data_spork/importer.rb', line 114

def start
  if VERBOSE_IO_OPTIONS
    print_error "options: #{options}"
    print_error "input_pathname: #{input_pathname}, exists: #{File.exist?(input_pathname)}"
    print_error "output_filename: #{output_filename}" if options[:output_file]
  end
  if options[:output_file]
    output_pathname.mkpath
    $stdout = File.open("#{output_filename}", 'w')
  end
  @row_num = 0
  @headers = []
  self.setup_state = :setup_writers
end

#substitute_value(value) ⇒ Object

Substitute field-specific values based on their position in the row. The returned value is substituted for the passed value. This method does not process columns that are not included in the output.

Subclasses should not override this method, but should override #get_substitute_value instead.

Parameters:

  • :value

    the value to be substituted



163
164
165
166
167
168
169
# File 'lib/data_spork/importer.rb', line 163

def substitute_value(value)
  if headers? and output_column?
    get_substitute_value(value)
  else
    value
  end
end

#write(msg) ⇒ Object



57
58
59
# File 'lib/data_spork/importer.rb', line 57

def write(msg)
  writers.each {|writer| writer.send msg }
end

#xlsx?Boolean

Returns:

  • (Boolean)


100
101
102
# File 'lib/data_spork/importer.rb', line 100

def xlsx?
  [ :xlsx, :xls ].include? input_type
end