Module: DataTools::IO

Included in:
IO
Defined in:
lib/data_tools/io.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#headersObject (readonly)

Returns the value of attribute headers.



4
5
6
# File 'lib/data_tools/io.rb', line 4

def headers
  @headers
end

#import_optionsObject (readonly)

Returns the value of attribute import_options.



4
5
6
# File 'lib/data_tools/io.rb', line 4

def import_options
  @import_options
end

Instance Method Details

#configure_import(options) ⇒ Object



40
41
42
# File 'lib/data_tools/io.rb', line 40

def configure_import(options)
  import_options.merge!(options)
end

#import(opts = {}) ⇒ Object

expects a block



48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/data_tools/io.rb', line 48

def import(opts = {}) # expects a block
  configure_import(opts)
  @linenumber = 0
  @headers = opts[:headers] || parseline(readline(opts[:rowsep] || $/))
  Enumerator.new do |yielder|
    self.each(opts[:rowsep] || $/) do |line|
      rec = line_to_record(line)
      next if rec.empty? # silently ignore blank records
      rec.extend DataTools::Hash
      yielder.yield rec.cleanse(import_options.merge(:line => @linenumber))
    end
    # need to emit anything to trigger a file-completed action? (such as pushing a batch to storage)
  end
end

#line_to_record(line) ⇒ Object



44
45
46
# File 'lib/data_tools/io.rb', line 44

def line_to_record(line)
  Hash[headers.zip(parseline(line)).select {|k,v| !v.nil?}]
end

#parseline(line) ⇒ Object



25
26
27
28
29
30
# File 'lib/data_tools/io.rb', line 25

def parseline(line)
  @linenumber += 1
  # remove leading and trailing line endings (CR or LF)
  # but NOT whitespace, because e.g. there could be leading or trailing blank fields delimited by tabs
  split(line.gsub(/^[\n\r]*|[\n\r]*$/, ''))
end

#split(line) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/data_tools/io.rb', line 10

def split(line)
  fields = case import_options[:format]
  when :tsv # tab-delimited
    line.split("\t")
  when :wsv # whitespace-delimited
    line.split
  when :qcq # quote-comma-quote (*not* the same as CSV)
    line.split('","')
  else # default is :csv
    line.parse_csv
  end

  fields.map {|f| DataTools.scour(f)}
end

#unmarshalObject



6
7
8
# File 'lib/data_tools/io.rb', line 6

def unmarshal
  Marshal.load(self)
end