Class: CsvReader

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/csvreader/version.rb,
lib/csvreader/base.rb,
lib/csvreader/base.rb,
lib/csvreader/buffer.rb,
lib/csvreader/parser.rb,
lib/csvreader/reader.rb,
lib/csvreader/builder.rb,
lib/csvreader/converter.rb,
lib/csvreader/parser_std.rb,
lib/csvreader/parser_tab.rb,
lib/csvreader/parser_fixed.rb,
lib/csvreader/parser_strict.rb

Overview

note: uses a class for now - change to module - why? why not?

Defined Under Namespace

Classes: Buffer, Builder, Converter, Error, ParseError, Parser, ParserFixed, ParserStd, ParserStrict, ParserTab

Constant Summary collapse

DEFAULT =

pre-define CsvReader (built-in) formats/dialect

Builder.new( Parser::DEFAULT )
NUMERIC =
Builder.new( Parser::NUMERIC )
STRICT =
Builder.new( Parser::STRICT )
RFC4180 =
Builder.new( Parser::RFC4180 )
EXCEL =
Builder.new( Parser::EXCEL )
MYSQL =
Builder.new( Parser::MYSQL )
POSTGRES =
POSTGRESQL           = Builder.new( Parser::POSTGRESQL )
POSTGRES_TEXT =
POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
TAB =
Builder.new( Parser::TAB )
FIXED =
Builder.new( Parser::FIXED )
MAJOR =

todo: namespace inside version or something - why? why not??

1
MINOR =
1
PATCH =
2
VERSION =
[MAJOR,MINOR,PATCH].join('.')

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, sep: nil, converters: nil, width: nil, parser: nil) ⇒ CsvReader

Returns a new instance of CsvReader.

Raises:

  • (ArgumentError)


113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/csvreader/reader.rb', line 113

def initialize( data, sep: nil, converters: nil, width: nil, parser: nil )
      raise ArgumentError.new( "Cannot parse nil as CSV" )  if data.nil?
      ## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV"     if data.nil?

      # create the IO object we will read from
      @io = data.is_a?(String) ? StringIO.new(data) : data

      @sep   = sep     # (optional) for ParserStd, ParserStrict
      @width = width   # (optional) for ParserFixed

      @converters  = Converter.create_converters( converters )

      @parser = parser.nil? ? Parser::DEFAULT : parser
end

Class Method Details



16
17
18
# File 'lib/csvreader/version.rb', line 16

def self.banner
  "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
end

.defaultObject

alternative alias for DEFAULT



109
# File 'lib/csvreader/base.rb', line 109

def self.default()         DEFAULT;         end

.excelObject

alternative alias for EXCEL



115
# File 'lib/csvreader/base.rb', line 115

def self.excel()           EXCEL;           end

.fObject



124
# File 'lib/csvreader/base.rb', line 124

def self.f()               fixed;           end

.fixObject



123
# File 'lib/csvreader/base.rb', line 123

def self.fix()             fixed;           end

.fixedObject



122
# File 'lib/csvreader/base.rb', line 122

def self.fixed()           FIXED;           end

.foreach(path, sep: nil, converters: nil, width: nil, parser: nil, &block) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/csvreader/reader.rb', line 62

def self.foreach( path, sep: nil,
                        converters: nil, width: nil, parser: nil, &block )
  csv = open( path, sep: sep, converters: converters, width: width, parser: parser )

  if block_given?
    begin
      csv.each( &block )
    ensure
      csv.close
    end
  else
    csv.to_enum    ## note: caller (responsible) must close file!!!
    ## remove version without block given - why? why not?
    ## use Csv.open().to_enum  or Csv.open().each
    ##   or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
  end
end

.header(path, sep: nil, width: nil, parser: nil) ⇒ Object

use header or headers - or use both (with alias)?



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/csvreader/reader.rb', line 43

def self.header( path, sep: nil, width: nil, parser: nil )   ## use header or headers - or use both (with alias)?
   # read first lines (only)
   #  and parse with csv to get header from csv library itself

   records = []
   open( path, sep: sep, width: width, parser: parser ) do |csv|
      csv.each do |record|
        records << record
        break   ## only parse/read first record
      end
   end

   ## unwrap record if empty return nil - why? why not?
   ##  return empty record e.g. [] - why? why not?
   ##  returns nil for empty (for now) - why? why not?
   records.size == 0 ? nil : records.first
end

.mysqlObject



116
# File 'lib/csvreader/base.rb', line 116

def self.mysql()           MYSQL;           end

.nObject



112
# File 'lib/csvreader/base.rb', line 112

def self.n()               numeric;         end

.numObject



111
# File 'lib/csvreader/base.rb', line 111

def self.num()             numeric;         end

.numericObject



110
# File 'lib/csvreader/base.rb', line 110

def self.numeric()         NUMERIC;         end

.open(path, mode = nil, sep: nil, converters: nil, width: nil, parser: nil, &block) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/csvreader/reader.rb', line 5

def self.open( path, mode=nil,
               sep: nil,
               converters: nil,
               width: nil,
               parser: nil, &block )   ## rename path to filename or name - why? why not?

    ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
    f = File.open( path, mode ? mode : 'r:bom|utf-8' )
    csv = new(f, sep: sep,
                 converters: converters,
                 width: width,
                 parser: parser )

    # handle blocks like Ruby's open(), not like the (old old) CSV library
    if block_given?
      begin
        block.call( csv )
      ensure
        csv.close
      end
    else
      csv
    end
end

.parse(data, sep: nil, converters: nil, width: nil, parser: nil, &block) ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/csvreader/reader.rb', line 81

def self.parse( data, sep: nil,
                      converters: nil,
                      width: nil,
                      parser: nil, &block )
  csv = new( data, sep: sep, converters: converters, width: width, parser: parser )

  if block_given?
    csv.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  else  # slurp contents, if no block is given
    csv.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  end
end

.parse_line(data, sep: nil, converters: nil, width: nil) ⇒ Object

note: only add parse_line convenience helper for default

always use parse (do NOT/NOT/NOT use parse_line)  - why? why not?
todo/fix: remove parse_line!!!


100
101
102
103
104
105
106
107
108
# File 'lib/csvreader/reader.rb', line 100

def self.parse_line( data, sep: nil,
                           converters: nil, width: nil )
   records = []
   parse( data, sep: sep, converters: converters, width: width ) do |record|
     records << record
     break   # only parse first record
   end
   records.size == 0 ? nil : records.first
end

.postgresObject



118
# File 'lib/csvreader/base.rb', line 118

def self.postgres()        postgresql;      end

.postgres_textObject



120
# File 'lib/csvreader/base.rb', line 120

def self.postgres_text()   postgresql_text; end

.postgresqlObject



117
# File 'lib/csvreader/base.rb', line 117

def self.postgresql()      POSTGRESQL;      end

.postgresql_textObject



119
# File 'lib/csvreader/base.rb', line 119

def self.postgresql_text() POSTGRESQL_TEXT; end

.read(path, sep: nil, converters: nil, width: nil, parser: nil) ⇒ Object



31
32
33
34
35
36
37
38
39
40
# File 'lib/csvreader/reader.rb', line 31

def self.read( path, sep: nil,
                     converters: nil,
                     width: nil,
                     parser: nil )
    open( path,
          sep: sep,
          converters: converters,
          width: width,
          parser: parser ) { |csv| csv.read }
end

.rfc4180Object

alternative alias for RFC4180



114
# File 'lib/csvreader/base.rb', line 114

def self.rfc4180()         RFC4180;         end

.rootObject



20
21
22
# File 'lib/csvreader/version.rb', line 20

def self.root
  File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
end

.strictObject

alternative alias for STRICT



113
# File 'lib/csvreader/base.rb', line 113

def self.strict()          STRICT;          end

.tabObject



121
# File 'lib/csvreader/base.rb', line 121

def self.tab()             TAB;             end

.versionObject



12
13
14
# File 'lib/csvreader/version.rb', line 12

def self.version
  VERSION
end

Instance Method Details

#each(&block) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/csvreader/reader.rb', line 146

def each( &block )
  if block_given?
    kwargs = {}
    ## note: only add separator if present/defined (not nil)
    ##  todo/fix: change sep keyword to "known" classes!!!!
    kwargs[:sep]   = @sep    if @sep && @parser.respond_to?( :'sep=' )

    kwargs[:width] = @width  if @parser.is_a?( ParserFixed )

    ## check array / pipeline of converters is empty (size=0 e.g. is [])
    if @converters.empty?
      @parser.parse( @io, kwargs, &block )
    else
      ## add "post"-processing with converters pipeline
      ##   that is, convert all strings to integer, float, date, ... if wanted
      @parser.parse( @io, kwargs ) do |raw_record|
        record = []
        raw_record.each_with_index do | value, i |
          record << @converters.convert( value, i )
        end
        block.call( record )
      end
    end
  else
    to_enum
  end
end

#readObject

method each



174
# File 'lib/csvreader/reader.rb', line 174

def read() to_a; end