Class: Importer::Column

Inherits:
Object
  • Object
show all
Defined in:
lib/iron/import/column.rb

Overview

Columns represent the settings for importing a given column within a Sheet. They do not hold data, rather they capture the settings needed for identifying the column in the header, how to parse and validate each of their cell’s data, and so forth.

Here’s the complete list of column configuration options:

Importer.build do
  column :key do
    # Mark this column as optional, i.e. if the header isn't found, the import will
    # work without error and the imported row will simply not contain this column's data.
    optional!

    # Set a fixed position - may be a column number or a letter-based
    # column description, ie 'A' == 1.  In most cases, you can leave
    # this defaulted to nil, which will mean "look for the proper header"
    position 'C'

    # Specify a regex to locate the header for this column, defaults to 
    # finding a string containing the key, ignored if position is set.
    header /(price|cost)/i

    # Tells the data parser what type of data this column contains, one
    # of :integer, :string, :date, :float, :bool or :cents.  Defaults to :string.
    type :cents

    # Instead of a type, you can set an explicit parse block.  Be aware
    # that different source types may give you different raw values for what
    # seems like the "same" source value, for example an Excel source file
    # will give you a float value for all numeric types, even "integers", while
    # CSV and HTML values are always strings.  By default, will take the raw
    # value of the row, but if used with #type, you can have the pre-processed
    # output of that type as your input.
    parse do |raw_value|
      val = raw_value.to_i + 1000
      # NOTE: we're in a block, so don't do this:
      return val
      # Instead, use implied return:
      val
    end

    # You can also add a custom validator to check the value and add
    # an error if it's not within a given range, or whatever.  To fail validation,
    # return false, raise an exception, or use #add_error
    validate do |parsed_value, row|
      add_error "Out of range" unless (parsed_value > 0 && parsed_value < 5000)
    end

    # Mark a column as _virtual_, meaning it won't be looked for in the source
    # file/stream, and instead will be calculated using #calculate.  When set,
    # causes importer to ignore position/header/type/parse settings.
    virtual!

    # When #virtual! is set, gets called to calculate each row's value for this
    # column using the row's parsed values from other columns.
    calculate do |row|
      row[:other_col_key] + 5
    end
  end
end

Defined Under Namespace

Classes: Data

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(importer, key, options_hash = {}) ⇒ Column

Create a new column definition with the key for the column, and an optional set of options. The options supported are the same as those supported in block/builder mode.



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/iron/import/column.rb', line 121

def initialize(importer, key, options_hash = {})
  # Save off our info
  @key = key
  @importer = importer

  # Are we optional?
  @optional = options_hash.delete(:optional) { false }
  
  # Are we virtual?
  @virtual = options_hash.delete(:virtual) { false }
  
  # Return it as a string, by default
  @type = options_hash.delete(:type)
  
  # Position can be explicitly set
  @position = options_hash.delete(:position)
  
  # By default, don't parse incoming data, just pass it through
  @parse = options_hash.delete(:parse)
  
  # Custom validation, anyone?
  @validate = options_hash.delete(:validate)
  
  # Custom validation, anyone?
  @calculate = options_hash.delete(:calculate)
  
  # Default matcher, looks for the presence of the column key as text anywhere
  # in the header string, ignoring case and treating underscores as spaces, ie
  # :order_id => /\A\s*order id\s*\z/i
  @header = options_hash.delete(:header) {
    Regexp.new('\A\s*' + key.to_s.gsub('_', ' ') + '\s*\z', Regexp::IGNORECASE)
  }
  
  # Reset our state to pre-load status
  reset
end

Instance Attribute Details

#dataObject (readonly)

Returns the value of attribute data.



80
81
82
# File 'lib/iron/import/column.rb', line 80

def data
  @data
end

#keyObject (readonly)

Core info



79
80
81
# File 'lib/iron/import/column.rb', line 79

def key
  @key
end

Class Method Details

.index_to_pos(index) ⇒ Object

Convert a numeric index to an Excel-like column position, e.g. 3 => ‘C’



103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/iron/import/column.rb', line 103

def self.index_to_pos(index)
  val = index.to_i
  raise 'Invalid column index: ' + index.inspect if (!index.is_a?(Fixnum) || index.to_i < 0)
  
  chars = ('A'..'Z').to_a
  str = ''
  while index > 25
    str = chars[index % 26] + str
    index /= 26
    index -= 1
  end
  str = chars[index] + str
  str
end

.pos_to_index(pos) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
# File 'lib/iron/import/column.rb', line 90

def self.pos_to_index(pos)
  raise 'Invalid column position: ' + pos.inspect unless pos.is_a?(String) && pos.match(/\A[a-z]{1,3}\z/i)
  vals = pos.upcase.bytes.collect {|b| b - 64}
  total = 0
  multiplier = 1
  vals.reverse.each do |val|
    total += val * multiplier
    multiplier *= 26
  end
  total - 1
end

Instance Method Details

#build(&block) ⇒ Object

Customize ourselves using block syntax



159
160
161
# File 'lib/iron/import/column.rb', line 159

def build(&block)
  DslProxy.exec(self, &block)
end

#calculate_value(row) ⇒ Object



222
223
224
225
226
227
228
229
# File 'lib/iron/import/column.rb', line 222

def calculate_value(row)
  return nil if @calculate.nil?
  res = nil
  had_error = Error.with_context(@importer, row, self, nil) do
    res = DslProxy.exec(@importer, row, &@calculate)
  end
  had_error ? nil : res
end

#calculates?Boolean

Returns:

  • (Boolean)


277
278
279
# File 'lib/iron/import/column.rb', line 277

def calculates?
  !@calculate.nil?
end

#error_valuesObject



285
286
287
# File 'lib/iron/import/column.rb', line 285

def error_values
  errors.collect(&:value).uniq
end

#error_values?Boolean

Returns:

  • (Boolean)


289
290
291
# File 'lib/iron/import/column.rb', line 289

def error_values?
  error_values.any?
end

#errorsObject



281
282
283
# File 'lib/iron/import/column.rb', line 281

def errors
  @data.errors
end

#fixed_indexObject

Returns the fixed index of this column based on the set position. In other words, a position of 2 would return an index of 1 (as indicies are 0-based), where a position of ‘C’ would return 2.



182
183
184
185
186
187
188
189
190
# File 'lib/iron/import/column.rb', line 182

def fixed_index
  return nil if virtual?
  return nil unless @position
  if @position.is_a?(Fixnum)
    @position - 1
  elsif @position.is_a?(String)
    Column.pos_to_index(@position)
  end
end

#indexObject

Index of the column in the most recent import, if found, or nil if not present.



251
252
253
# File 'lib/iron/import/column.rb', line 251

def index
  @data.index
end

#internal_typeObject

Override normal dsl_accessor behavior to return our default type which will be :raw if a #parse handler has been set, else :string



194
# File 'lib/iron/import/column.rb', line 194

alias_method :internal_type, :type

#match_header?(text, test_index) ⇒ Boolean

When true, our header definition or index match the passed text or column index.

Returns:

  • (Boolean)


169
170
171
172
173
174
175
176
177
# File 'lib/iron/import/column.rb', line 169

def match_header?(text, test_index)
  return false if virtual?
  return true if test_index == self.fixed_index
  if @header.is_a?(Regexp)
    return !@header.match(text).nil?
  else
    return @header.to_s.downcase == text
  end
end

#missing?Boolean

Sugar, simply the opposite of #present?

Returns:

  • (Boolean)


265
266
267
# File 'lib/iron/import/column.rb', line 265

def missing?
  !present?
end

#parse_value(row, raw_val) ⇒ Object

Applies any custom parser defined to process the given value, capturing errors as needed



212
213
214
215
216
217
218
219
220
# File 'lib/iron/import/column.rb', line 212

def parse_value(row, raw_val)
  return raw_val if @parse.nil?

  res = nil
  had_error = Error.with_context(@importer, row, self, raw_val) do
    res = DslProxy.exec(@importer, raw_val, &@parse)
  end
  had_error ? nil : res
end

#parses?Boolean

Returns:

  • (Boolean)


269
270
271
# File 'lib/iron/import/column.rb', line 269

def parses?
  !@parse.nil?
end

#present?Boolean

When true, column was found in the last import, eg:

importer.process do |row|
  puts "Size: #{row[:size]}" if column(:size).present?
end

Returns:

  • (Boolean)


260
261
262
# File 'lib/iron/import/column.rb', line 260

def present?
  !@data.index.nil?
end

#resetObject

Deletes all stored data in prep for an import run



164
165
166
# File 'lib/iron/import/column.rb', line 164

def reset
  @data = Data.new
end

#to_aObject

Extracts the imported values for this column and returns them in an array. Note that the array indices ARE NOT row indices, as the rows may have been filtered and any header rows have been skipped.



307
308
309
# File 'lib/iron/import/column.rb', line 307

def to_a
  @importer.data.rows.collect {|r| r[@key] }
end

#to_hObject

Extracts the values for this column and returns them in a hash of row num => value for all non-filtered, non-header rows.



313
314
315
316
317
# File 'lib/iron/import/column.rb', line 313

def to_h
  res = {}
  @importer.data.rows.collect {|r| res[r.num] = r[@key] }
  res
end

#to_hashObject



318
# File 'lib/iron/import/column.rb', line 318

def to_hash ; to_h ; end

#to_sObject

Pretty name for ourselves



294
295
296
297
298
299
300
301
302
# File 'lib/iron/import/column.rb', line 294

def to_s
  if !virtual? && @data.header_text.blank?
    "Column #{@data.pos}"
  else
    name = virtual? ? key.to_s : @data.header_text
    name = name.gsub(/(^[a-z]|\s[a-z])/) {|m| m.capitalize } 
    "#{name} Column"
  end
end

#type(*args) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/iron/import/column.rb', line 195

def type(*args)
  if args.count > 0
    internal_type(*args)
  else
    if @type
      # Explicitly set type
      @type
    else
      # Our default is generally :string, but if we have a parser,
      # default to the :raw value
      parses? ? :raw : :string
    end
  end
end

#validate_value(row, parsed_val) ⇒ Object

Applies any validation to a parsed value



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/iron/import/column.rb', line 232

def validate_value(row, parsed_val)
  return true unless @validate

  valid = false
  had_error = Error.with_context(@importer, row, self, parsed_val) do
    valid = DslProxy.exec(@importer, parsed_val, row, &@validate)
  end
  if had_error
    return false
  elsif valid.is_a?(FalseClass)
    @importer.add_error("Invalid value: #{parsed_val.inspect}", :row => row, :column => self, :value => parsed_val)
    return false
  else
    return true
  end
end

#validates?Boolean

Returns:

  • (Boolean)


273
274
275
# File 'lib/iron/import/column.rb', line 273

def validates?
  !@validate.nil?
end