Module: RemoteTable::Delimited

Defined in:
lib/remote_table/delimited.rb

Overview

Parses plaintext comma-separated (CSV), tab-separated (TSV), or really anything-delimited files using Ruby’s CSV parser.

Constant Summary collapse

Engine =
::FasterCSV

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.extended(base) ⇒ Object

Delimited uses Plaintext.



5
6
7
# File 'lib/remote_table/delimited.rb', line 5

def self.extended(base)
  base.extend Plaintext
end

Instance Method Details

#_eachObject

Yield each row using Ruby’s CSV parser (FasterCSV on Ruby 1.8).



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/remote_table/delimited.rb', line 25

def _each
  Engine.new(local_copy.encoded_io, csv_options.merge(headers: headers)).each do |row|

    some_value_present = false

    if not headers

      # represent the row as an array
      array = row.map do |v|
        v = RemoteTable.normalize_whitespace v
        if not some_value_present and not keep_blank_rows and v.present?
          some_value_present = true
        end
        v
      end
      if some_value_present or keep_blank_rows
        yield array
      end

    else

      # represent the row as a hash
      hash = ::ActiveSupport::OrderedHash.new
      row.each do |k, v|
        next if k.nil?
        v = RemoteTable.normalize_whitespace v
        if not some_value_present and not keep_blank_rows and v.present?
          some_value_present = true
        end
        hash[k] = v
      end
      if some_value_present or keep_blank_rows
        yield hash
      end

    end
  end
ensure
  local_copy.cleanup
end

#adaptive_quote_charObject



79
80
81
82
83
84
85
# File 'lib/remote_table/delimited.rb', line 79

def adaptive_quote_char
  if quote_char
    quote_char
  elsif delimiter == "\t" or delimiter == '|'
    "\0"
  end
end

#csv_optionsObject



66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/remote_table/delimited.rb', line 66

def csv_options
  retval = {
    skip_blanks: !keep_blank_rows,
  }
  if delimiter
    retval[:col_sep] = delimiter
  end
  if adaptive_quote_char
    retval[:quote_char] = adaptive_quote_char
  end
  retval
end

#headersObject



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/remote_table/delimited.rb', line 87

def headers
  return @_headers if defined?(@_headers)
  @_headers = case @headers
  when FalseClass, NilClass
    false
  when :first_row, TrueClass
    i = 0
    begin
      line = local_copy.encoded_io.gets.strip
    end while line.length == 0
    proto_headers = Engine.parse_line(line, csv_options)
    if proto_headers
      proto_headers.inject([]) do |memo, v|
        break memo if stop_after_untitled_headers and i > stop_after_untitled_headers
        header = RemoteTable.normalize_whitespace v
        memo << (header.present? ? header : "untitled_#{i+=1}")
        memo
      end
    else
      raise "No headers found in first line: #{line.inspect}"
    end
  when Array
    @headers
  else
    raise "Invalid headers: #{headers.inspect}"
  end
end

#preprocess!Object



17
18
19
20
21
22
# File 'lib/remote_table/delimited.rb', line 17

def preprocess!
  delete_harmful!
  convert_eol_to_unix!
  transliterate_whole_file_to_utf8!
  skip_rows!
end