Class: DataTransport::DataStore::File

Inherits:
DataTransport::DataStore show all
Defined in:
lib/data_transport/data_store/file.rb

Overview

Data store that reads and writes records in a flat text file.

Although this class can read and write CSV files, you should use the CSVFile data store for that instead of this one.

Direct Known Subclasses

CSVFile

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from DataTransport::DataStore

#finalize, #reset

Constructor Details

#initialize(options = {}) ⇒ File

Accepts the following options:

header

If true, the file has a header row that contains the names of each field. Default is false.

delimiter

String that separates individual fields in a row. Default is “t”.

enclosure

String that encloses individual fields. For example, if this is set to “"”, fields will be enclosed in double quotes. Default is nil (no enclosure).

escape

Escape sequence for occurrences of the enclosure string in field values. Set this to the special value :double if enclosure characters are escaped by doubling them (like in CSV and SQL). Default is nil.

path

Path to the file.

null

String that represents fields whose value is nil (but not blank). Default is “”.

keys

Array of field names. Not necessary for files with a header row. Default for files without a header row is fieldXX, where XX is numbered sequentially starting from 00.

Raises:

  • (ArgumentError)


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/data_transport/data_store/file.rb', line 29

def initialize(options = {})
  super()
  # Extract options.
  @header    = options.delete(:header)
  @delimiter = options.delete(:delimiter) || "\t"
  @enclosure = options.delete(:enclosure)
  @escape    = options.delete(:escape)
  @path      = options.delete(:path)
  @null      = options.delete(:null) || ""
  @keys      = options.delete(:keys)
  # Validate options.
  raise(ArgumentError, "missing required option `path'") if @path.nil?
  if @escape && @enclosure.nil?
    raise(ArgumentError, "`escape' cannot be used without `enclosure'")
  end
  unless options.empty?
    raise(ArgumentError, "unrecognized options: `#{options.join("', `")}'")
  end
  # Handle the special :double escape sequence.
  @escape = @enclosure if @escape == :double
  # Create an enclosure placeholder, which is used to avoid clobbering
  # escaped enclosure characters during parsing.
  if @escape
    if @enclosure == 0.chr
      safe_ch = 1.chr
    else
      safe_ch = 0.chr
    end
    @placeholder = "#{safe_ch}__ENCLOSURE_PLACEHOLDER__#{safe_ch}"
  end
end

Instance Attribute Details

#modeObject

:nodoc:



8
9
10
# File 'lib/data_transport/data_store/file.rb', line 8

def mode
  @mode
end

Instance Method Details

#countObject

Returns the number of lines in the file (not counting the header, if there is one).



63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/data_transport/data_store/file.rb', line 63

def count
  return @count if @count
  self.mode = :input
  line_count = 0
  rewind_and_restore do
    io.readline if @header
    until io.eof?
      io.gets
      line_count += 1
    end
  end
  @count = line_count
end

#each_record(batch_size = nil) ⇒ Object

:nodoc:



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/data_transport/data_store/file.rb', line 77

def each_record(batch_size = nil) # :nodoc:
  self.mode = :input
  io.rewind
  io.readline if @header
  until io.eof?
    line = io.gets || break
    line.chomp!
    values = values_from_s(line)
    if keys.length != values.length
      raise RuntimeError, "wrong number of fields (#{values.length} for #{keys.length})"
    end
    record = {}
    keys.length.times {|i| record[keys[i]] = values[i]}
    yield record
  end
end

#write_record(record) ⇒ Object

:nodoc:



94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/data_transport/data_store/file.rb', line 94

def write_record(record) # :nodoc:
  self.mode = :output
  # If no key order was ever specified, make one up.
  @keys ||= record.keys.sort {|a,b| a.to_s <=> b.to_s}
  # Write the header if this is the first record.
  if @header && io.pos == 0
    io.puts(values_to_s(keys))
  end
  # Write the values in a predictable order.
  values = keys.collect do |k|
    record[k].nil?? @null : record[k]
  end
  io.puts(values_to_s(values))
end