Class: CsvReader

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Includes:
Enumerable
Defined in:
lib/csvreader/version.rb,
lib/csvreader.rb,
lib/csvreader.rb,
lib/csvreader/buffer.rb,
lib/csvreader/parser.rb,
lib/csvreader/reader.rb,
lib/csvreader/converter.rb,
lib/csvreader/parser_std.rb,
lib/csvreader/parser_tab.rb,
lib/csvreader/parser_strict.rb

Overview

note: uses a class for now - change to module - why? why not?

Defined Under Namespace

Classes: Buffer, Converter, Error, ParseError, Parser, ParserStd, ParserStrict, ParserTab

Constant Summary collapse

DEFAULT =

pre-define CsvReader (built-in) formats/dialect

CsvBuilder.new( Parser::DEFAULT )
STRICT =
CsvBuilder.new( Parser::STRICT )
RFC4180 =
CsvBuilder.new( Parser::RFC4180 )
EXCEL =
CsvBuilder.new( Parser::EXCEL )
MYSQL =
CsvBuilder.new( Parser::MYSQL )
POSTGRES =
POSTGRESQL           = CsvBuilder.new( Parser::POSTGRESQL )
POSTGRES_TEXT =
POSTGRESQL_TEXT = CsvBuilder.new( Parser::POSTGRESQL_TEXT )
TAB =
CsvBuilder.new( Parser::TAB )
MAJOR =

todo: namespace inside version or something - why? why not??

1
MINOR =
0
PATCH =
0
VERSION =
[MAJOR,MINOR,PATCH].join('.')

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, sep: nil, converters: nil, parser: nil) ⇒ CsvReader

Returns a new instance of CsvReader.

Raises:

  • (ArgumentError)


111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/csvreader/reader.rb', line 111

def initialize( data, sep: nil, converters: nil, parser: nil )
      raise ArgumentError.new( "Cannot parse nil as CSV" )  if data.nil?
      ## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV"     if data.nil?

      # create the IO object we will read from
      @io = data.is_a?(String) ? StringIO.new(data) : data

      @sep = sep

      @converters  = Converter.create_converters( converters )

      @parser = parser.nil? ? Parser::DEFAULT : parser
end

Class Method Details



16
17
18
# File 'lib/csvreader/version.rb', line 16

def self.banner
  "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
end

.defaultObject

alternative alias for DEFAULT



89
# File 'lib/csvreader.rb', line 89

def self.default()         DEFAULT;         end

.excelObject

alternative alias for EXCEL



92
# File 'lib/csvreader.rb', line 92

def self.excel()           EXCEL;           end

.foreach(path, sep: nil, converters: nil, parser: nil, &block) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/csvreader/reader.rb', line 61

def self.foreach( path, sep: nil,
                        converters: nil, parser: nil, &block )
  csv = open( path, sep: sep, converters: converters, parser: parser )

  if block_given?
    begin
      csv.each( &block )
    ensure
      csv.close
    end
  else
    csv.to_enum    ## note: caller (responsible) must close file!!!
    ## remove version without block given - why? why not?
    ## use Csv.open().to_enum  or Csv.open().each
    ##   or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
  end
end

.header(path, sep: nil, parser: nil) ⇒ Object

use header or headers - or use both (with alias)?



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/csvreader/reader.rb', line 42

def self.header( path, sep: nil, parser: nil )   ## use header or headers - or use both (with alias)?
   # read first lines (only)
   #  and parse with csv to get header from csv library itself

   records = []
   open( path, sep: sep, parser: parser ) do |csv|
      csv.each do |record|
        records << record
        break   ## only parse/read first record
      end
   end

   ## unwrap record if empty return nil - why? why not?
   ##  return empty record e.g. [] - why? why not?
   ##  returns nil for empty (for now) - why? why not?
   records.size == 0 ? nil : records.first
end

.mysqlObject



93
# File 'lib/csvreader.rb', line 93

def self.mysql()           MYSQL;           end

.open(path, mode = nil, sep: nil, converters: nil, parser: nil, &block) ⇒ Object

csv reader



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/csvreader/reader.rb', line 8

def self.open( path, mode=nil,
               sep: nil,
               converters: nil,
               parser: nil, &block )   ## rename path to filename or name - why? why not?

    ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
    f = File.open( path, mode ? mode : 'r:bom|utf-8' )
    csv = new(f, sep: sep,
                 converters: converters,
                 parser: parser )

    # handle blocks like Ruby's open(), not like the (old old) CSV library
    if block_given?
      begin
        block.call( csv )
      ensure
        csv.close
      end
    else
      csv
    end
end

.parse(data, sep: nil, converters: nil, parser: nil, &block) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
# File 'lib/csvreader/reader.rb', line 80

def self.parse( data, sep: nil,
                      converters: nil,
                      parser: nil, &block )
  csv = new( data, sep: sep, converters: converters, parser: parser )

  if block_given?
    csv.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  else  # slurp contents, if no block is given
    csv.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
  end
end

.parse_line(data, sep: nil, converters: nil) ⇒ Object

note: only add parse_line convenience helper for default

always use parse (do NOT/NOT/NOT use parse_line)  - why? why not?
todo/fix: remove parse_line!!!


98
99
100
101
102
103
104
105
106
# File 'lib/csvreader/reader.rb', line 98

def self.parse_line( data, sep: nil,
                           converters: nil )
   records = []
   parse( data, sep: sep, converters: converters ) do |record|
     records << record
     break   # only parse first record
   end
   records.size == 0 ? nil : records.first
end

.postgresObject



95
# File 'lib/csvreader.rb', line 95

def self.postgres()        postgresql;      end

.postgres_textObject



97
# File 'lib/csvreader.rb', line 97

def self.postgres_text()   postgresql_text; end

.postgresqlObject



94
# File 'lib/csvreader.rb', line 94

def self.postgresql()      POSTGRESQL;      end

.postgresql_textObject



96
# File 'lib/csvreader.rb', line 96

def self.postgresql_text() POSTGRESQL_TEXT; end

.read(path, sep: nil, converters: nil, parser: nil) ⇒ Object



32
33
34
35
36
37
38
39
# File 'lib/csvreader/reader.rb', line 32

def self.read( path, sep: nil,
                     converters: nil,
                     parser: nil )
    open( path,
          sep: sep,
          converters: converters,
          parser: parser ) { |csv| csv.read }
end

.rfc4180Object

alternative alias for RFC4180



91
# File 'lib/csvreader.rb', line 91

def self.rfc4180()         RFC4180;         end

.rootObject



20
21
22
# File 'lib/csvreader/version.rb', line 20

def self.root
  File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
end

.strictObject

alternative alias for STRICT



90
# File 'lib/csvreader.rb', line 90

def self.strict()          STRICT;          end

.tabObject



98
# File 'lib/csvreader.rb', line 98

def self.tab()             TAB;             end

.versionObject



12
13
14
# File 'lib/csvreader/version.rb', line 12

def self.version
  VERSION
end

Instance Method Details

#each(&block) ⇒ Object



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/csvreader/reader.rb', line 143

def each( &block )
  if block_given?
    kwargs = {}
    ## note: only add separator if present/defined (not nil)
    kwargs[:sep] = @sep    if @sep && @parser.respond_to?( :'sep=' )

    ## check array / pipeline of converters is empty (size=0 e.g. is [])
    if @converters.empty?
      @parser.parse( @io, kwargs, &block )
    else
      ## add "post"-processing with converters pipeline
      ##   that is, convert all strings to integer, float, date, ... if wanted
      @parser.parse( @io, kwargs ) do |raw_record|
        record = []
        raw_record.each_with_index do | value, i |
          record << @converters.convert( value, i )
        end
        block.call( record )
      end
    end
  else
    to_enum
  end
end

#readObject

method each



168
# File 'lib/csvreader/reader.rb', line 168

def read() to_a; end