Class: CsvHashReader

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/csvreader.rb,
lib/csvreader/reader_hash.rb

Constant Summary collapse

DEFAULT =

pre-define CsvReader (built-in) formats/dialect

CsvHashBuilder.new( CsvReader::Parser::DEFAULT )
STRICT =
CsvHashBuilder.new( CsvReader::Parser::STRICT )
RFC4180 =
CsvHashBuilder.new( CsvReader::Parser::RFC4180 )
EXCEL =
CsvHashBuilder.new( CsvReader::Parser::EXCEL )
MYSQL =
CsvHashBuilder.new( CsvReader::Parser::MYSQL )
POSTGRES =
POSTGRESQL           = CsvHashBuilder.new( CsvReader::Parser::POSTGRESQL )
POSTGRES_TEXT =
POSTGRESQL_TEXT = CsvHashBuilder.new( CsvReader::Parser::POSTGRESQL_TEXT )
TAB =
CsvHashBuilder.new( CsvReader::Parser::TAB )

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil) ⇒ CsvHashReader

Returns a new instance of CsvHashReader.

Raises:

  • (ArgumentError)


99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/csvreader/reader_hash.rb', line 99

def initialize( data, headers: nil, sep: nil,
                      converters: nil,
                      header_converters: nil,
                      parser: nil )
      raise ArgumentError.new( "Cannot parse nil as CSV" )  if data.nil?
      ## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV"     if data.nil?


      # create the IO object we will read from

      @io = data.is_a?(String) ? StringIO.new(data) : data

      ## pass in headers as array e.g. ['A', 'B', 'C']

      ##  double check: run header_converters on passed in headers?

      ##    for now - do NOT auto-convert passed in headers - keep them as-is (1:1)

      @names = headers ? headers : nil

      @sep = sep

      @converters        = CsvReader::Converter.create_converters( converters )
      @header_converters = CsvReader::Converter.create_header_converters( header_converters )

      @parser = parser.nil? ? CsvReader::Parser::DEFAULT : parser
end

Class Method Details

.defaultObject

alternative alias for DEFAULT



117
# File 'lib/csvreader.rb', line 117

def self.default()         DEFAULT;         end

.excelObject

alternative alias for EXCEL



120
# File 'lib/csvreader.rb', line 120

def self.excel()           EXCEL;           end

.foreach(path, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, &block) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/csvreader/reader_hash.rb', line 49

def self.foreach( path, headers: nil,
                        sep: nil,
                        converters: nil,
                        header_converters: nil,
                        parser: nil, &block )
  csv = open( path,
              headers: headers,
              sep: sep,
              converters: converters,
              header_converters: header_converters,
              parser: parser )

  if block_given?
    begin
      csv.each( &block )
    ensure
      csv.close
    end
  else
    csv.to_enum    ## note: caller (responsible) must close file!!!

    ## remove version without block given - why? why not?

    ## use Csv.open().to_enum  or Csv.open().each

    ##   or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???

  end
end

.mysqlObject



121
# File 'lib/csvreader.rb', line 121

def self.mysql()           MYSQL;           end

.open(path, mode = nil, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, &block) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/csvreader/reader_hash.rb', line 6

def self.open( path, mode=nil,
               headers: nil,
               sep: nil,
               converters: nil,
               header_converters: nil,
               parser: nil, &block )   ## rename path to filename or name - why? why not?


    ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'

    f = File.open( path, mode ? mode : 'r:bom|utf-8' )
    csv = new(f, headers: headers,
                 sep: sep,
                 converters: converters,
                 header_converters: header_converters,
                 parser: parser )

    # handle blocks like Ruby's open(), not like the (old old) CSV library

    if block_given?
      begin
        block.call( csv )
      ensure
        csv.close
      end
    else
      csv
    end
end

.parse(data, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil, &block) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/csvreader/reader_hash.rb', line 76

def self.parse( data, headers: nil,
                      sep: nil,
                      converters: nil,
                      header_converters: nil,
                      parser: nil, &block )
  csv = new( data,
             headers: headers,
             sep: sep,
             converters: converters,
             header_converters: header_converters,
             parser: parser )

  if block_given?
    csv.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?

  else  # slurp contents, if no block is given

    csv.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?

  end
end

.postgresObject



123
# File 'lib/csvreader.rb', line 123

def self.postgres()        postgresql;      end

.postgres_textObject



125
# File 'lib/csvreader.rb', line 125

def self.postgres_text()   postgresql_text; end

.postgresqlObject



122
# File 'lib/csvreader.rb', line 122

def self.postgresql()      POSTGRESQL;      end

.postgresql_textObject



124
# File 'lib/csvreader.rb', line 124

def self.postgresql_text() POSTGRESQL_TEXT; end

.read(path, headers: nil, sep: nil, converters: nil, header_converters: nil, parser: nil) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/csvreader/reader_hash.rb', line 34

def self.read( path, headers: nil,
                     sep: nil,
                     converters: nil,
                     header_converters: nil,
                     parser: nil )
    open( path,
          headers: headers,
          sep: sep,
          converters: converters,
          header_converters: header_converters,
          parser: parser ) { |csv| csv.read }
end

.rfc4180Object

alternative alias for RFC4180



119
# File 'lib/csvreader.rb', line 119

def self.rfc4180()         RFC4180;         end

.strictObject

alternative alias for STRICT



118
# File 'lib/csvreader.rb', line 118

def self.strict()          STRICT;          end

.tabObject



126
# File 'lib/csvreader.rb', line 126

def self.tab()             TAB;             end

Instance Method Details

#each(&block) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
# File 'lib/csvreader/reader_hash.rb', line 141

def each( &block )

  ## todo/fix:

  ##   add case for headers/names.size != values.size

  ##   - add rest option? for if less headers than values (see python csv.DictReader - why? why not?)

  ##

  ##   handle case with duplicate and empty header names etc.



  if block_given?
    kwargs = {}
    ## note: only add separator if present/defined (not nil)

    kwargs[:sep] = @sep    if @sep && @parser.respond_to?( :'sep=' )

    @parser.parse( @io, kwargs ) do |raw_values|     # sep: sep

       if @names.nil?    ## check for (first) headers row

         if @header_converters.empty?
           @names = raw_values   ## store header row / a.k.a. field/column names

         else
           values = []
           raw_values.each_with_index do |value,i|
             values << @header_converters.convert( value, i )
           end
           @names = values
         end
       else    ## "regular" record

         raw_record = @names.zip( raw_values ).to_h    ## todo/fix: check for more values than names/headers!!!

         if @converters.empty?
           block.call( raw_record )
         else
           ## add "post"-processing with converters pipeline

           ##   that is, convert all strings to integer, float, date, ... if wanted

           record = {}
           raw_record.each do | key, value |
             record[ key ] = @converters.convert( value, key )
           end
           block.call( record )
         end
       end
    end
  else
    to_enum
  end
end

#readObject

method each



186
# File 'lib/csvreader/reader_hash.rb', line 186

def read() to_a; end