Module: DataTools::IO
- Included in:
- IO
- Defined in:
- lib/data_tools/io.rb
Instance Attribute Summary collapse
-
#headers ⇒ Object
readonly
Returns the value of attribute headers.
-
#import_options ⇒ Object
readonly
Returns the value of attribute import_options.
Instance Method Summary collapse
- #configure_import(options) ⇒ Object
-
#import(opts = {}) ⇒ Object
expects a block.
- #line_to_record(line) ⇒ Object
- #parseline(line) ⇒ Object
- #split(line) ⇒ Object
- #unmarshal ⇒ Object
Instance Attribute Details
#headers ⇒ Object (readonly)
Returns the value of attribute headers.
4 5 6 |
# File 'lib/data_tools/io.rb', line 4 def headers @headers end |
#import_options ⇒ Object (readonly)
Returns the value of attribute import_options.
4 5 6 |
# File 'lib/data_tools/io.rb', line 4 def @import_options end |
Instance Method Details
#configure_import(options) ⇒ Object
40 41 42 |
# File 'lib/data_tools/io.rb', line 40 def configure_import() .merge!() end |
#import(opts = {}) ⇒ Object
expects a block
48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/data_tools/io.rb', line 48 def import(opts = {}) # expects a block configure_import(opts) @linenumber = 0 @headers = opts[:headers] || parseline(readline(opts[:rowsep] || $/)) Enumerator.new do |yielder| self.each(opts[:rowsep] || $/) do |line| rec = line_to_record(line) next if rec.empty? # silently ignore blank records rec.extend DataTools::Hash yielder.yield rec.cleanse(.merge(:line => @linenumber)) end # need to emit anything to trigger a file-completed action? (such as pushing a batch to storage) end end |
#line_to_record(line) ⇒ Object
44 45 46 |
# File 'lib/data_tools/io.rb', line 44 def line_to_record(line) Hash[headers.zip(parseline(line)).select {|k,v| !v.nil?}] end |
#parseline(line) ⇒ Object
25 26 27 28 29 30 |
# File 'lib/data_tools/io.rb', line 25 def parseline(line) @linenumber += 1 # remove leading and trailing line endings (CR or LF) # but NOT whitespace, because e.g. there could be leading or trailing blank fields delimited by tabs split(line.gsub(/^[\n\r]*|[\n\r]*$/, '')) end |
#split(line) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/data_tools/io.rb', line 10 def split(line) fields = case [:format] when :tsv # tab-delimited line.split("\t") when :wsv # whitespace-delimited line.split when :qcq # quote-comma-quote (*not* the same as CSV) line.split('","') else # default is :csv line.parse_csv end fields.map {|f| DataTools.scour(f)} end |
#unmarshal ⇒ Object
6 7 8 |
# File 'lib/data_tools/io.rb', line 6 def unmarshal Marshal.load(self) end |