Class: CsvReader

Inherits:
Object
  • Object
show all
Defined in:
lib/csvrecord/reader.rb

Overview

use our own wrapper

Class Method Summary collapse

Class Method Details

.foreach(path, sep: Csv.config.sep, headers: true) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
# File 'lib/csvrecord/reader.rb', line 113

def self.foreach( path, sep: Csv.config.sep, headers: true )
  csv_options = Csv.config.default_options.merge(
                   headers: headers,
                   col_sep: sep,
                   external_encoding: 'utf-8'  ## note:  always (auto-)add utf-8 external encoding for now!!!
  )

  CSV.foreach( path, csv_options ) do |row|
    yield( row )    ## check/todo: use block.call( row ) ## why? why not?
  end
end

.header(path, sep: Csv.config.sep) ⇒ Object

use header or headers - or use both (with alias)?



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/csvrecord/reader.rb', line 155

def self.header( path, sep: Csv.config.sep )   ## use header or headers - or use both (with alias)?
  # read first lines (only)
  #  and parse with csv to get header from csv library itself
  #
  #  check - if there's an easier or built-in way for the csv library

  ## readlines until
  ##  - NOT a comments line or
  ##  - NOT a blank line

  lines = ''
  File.open( path, 'r:utf-8' ) do |f|

    ## todo/fix: how to handle empty files or files without headers?!

    ## todo/check if readline includes \n\r too??
    ## yes! - line include \n e.g.
    ##   "Brewery,City,Name,Abv\n" or
    ##   "#######\n#  try with some comments\n#   and blank lines even before header\n\nBrewery,City,Name,Abv\n"
    loop do
      line = f.readline
      lines << line
      break unless  Csv.config.skip?( line ) || Csv.config.blank?( line )
    end
  end

  ## puts "lines:"
  ## pp lines

  ## note: do NOT use headers: true to get "plain" data array (no hash records)
  ##   hash record does NOT work for single line/row
  parse_line( lines, sep: sep )
end

.parse(txt, sep: Csv.config.sep, headers: true) ⇒ Object



134
135
136
137
138
139
140
141
# File 'lib/csvrecord/reader.rb', line 134

def self.parse( txt, sep: Csv.config.sep, headers: true )
  csv_options = Csv.config.default_options.merge(
                   headers: headers,
                   col_sep: sep
  )
  ## pp csv_options
  CSV.parse( txt, csv_options )
end

.parse_line(txt, sep: Csv.config.sep) ⇒ Object



143
144
145
146
147
148
149
150
151
# File 'lib/csvrecord/reader.rb', line 143

def self.parse_line( txt, sep: Csv.config.sep )
  ## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
  csv_options = Csv.config.default_options.merge(
                  headers: false,  ## note: always turn off headers!!!!!!
                  col_sep: sep
  )
  ## pp csv_options
  CSV.parse_line( txt, csv_options )
end

.read(path, sep: Csv.config.sep, headers: true) ⇒ Object



126
127
128
129
130
131
132
# File 'lib/csvrecord/reader.rb', line 126

def self.read( path, sep: Csv.config.sep, headers: true )
  ## note: use our own file.open
  ##   always use utf-8 for now
  ##    check/todo: add skip option bom too - why? why not?
  txt = File.open( path, 'r:utf-8' )
  parse( txt, sep: sep, headers: headers )
end

.unwrap(row_or_array) ⇒ Object

helper methods



101
102
103
104
105
106
107
108
109
# File 'lib/csvrecord/reader.rb', line 101

def self.unwrap( row_or_array )   ## unwrap row - find a better name? why? why not?
  ## return row values as array of strings
  if row_or_array.is_a?( CSV::Row )
    row = row_or_array
    row.fields   ## gets array of string of field values
  else  ## assume "classic" array of strings
    array = row_or_array
  end
end