Class: CsvHuman

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/csvhuman/tag.rb,
lib/csvhuman/column.rb,
lib/csvhuman/reader.rb,
lib/csvhuman/version.rb,
lib/csvhuman/converter.rb,
lib/csvhuman/doc/helper.rb,
lib/csvhuman/doc/schema.rb

Defined Under Namespace

Modules: DocHelper Classes: Column, Columns, Doc, Tag

Constant Summary collapse

MAJOR =
1
MINOR =
1
PATCH =
1
VERSION =
[MAJOR,MINOR,PATCH].join('.')
HEADER_CONVERTERS =
{
 ## e.g.  "#adm1 +code"  =>  "#adm1 +code"
 none:      ->(value) { value },

 ## e.g.  "#adm1 +code"  =>  "adm1+code"   (strip hashtags and whitespace)
 default:   ->(value) { value.downcase.gsub('#', '').
                                       gsub( /\s+/, '' ) },

 ## e.g.  "#adm1 +code"  =>  :adm1_code"   (strip hashtags and whitespace and turn plus (+) into underscore (_))
 symbol:    ->(value) { value.downcase.gsub('#', '').
                                       gsub( /\s+/, '' ).
                                       gsub('+', '_').
                                       gsub( /[^\w]+/, '' ).to_sym }
}
TYPE_MAPPINGS =

convert guess_type to proc (is there a better/idomatic way)?

->(name, attributes) { guess_type( name, attributes ) }

TYPE_MAPPING_GUESS = Kernel.method( :guess_type )

{
  ##  always returns string (that is, keep as is (assumes always string values))
  none:      ->(name, attributes) { String },
  guess:     ->(name, attributes) { guess_type( name, attributes ) },
}
TYPE_CONVERTERS =
{
  Integer => ->(value) { convert_to_i(value) },
  Float   => ->(value) { convert_to_f(value) },
  Date    => ->(value) { convert_to_date(value) },
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(recs_or_stream, sep: nil, header_converter: nil) ⇒ CsvHuman

Returns a new instance of CsvHuman.



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/csvhuman/reader.rb', line 75

def initialize( recs_or_stream, sep: nil,
                                header_converter: nil )
   ## todo/check:  check if arg is a stream/enumarator - why? why not??
   if recs_or_stream.is_a?( String )
     @recs = CsvReader.new( recs_or_stream, sep: sep, parser: CsvReader::Parser.human )     ## note: returns an enumarator-like object
   else
     @recs = recs_or_stream
   end

   @header = []
   @tags   = nil   ## add tags = nil  -- why? why not?
   @cols   = nil   ## column mappings (used "internally")

   if header_converter.nil?
       @header_converter = HEADER_CONVERTERS[:default]
   elsif header_converter.is_a?( Symbol )
       ## todo/fix: check if converter is nil (not found) - raise except!!!!
       @header_converter = HEADER_CONVERTERS[header_converter]
   else  ## assume proc; todo/fix: check it's a proc!!!
       @header_converter = header_converter
   end
end

Instance Attribute Details

#headerObject (readonly)

Returns the value of attribute header.



73
74
75
# File 'lib/csvhuman/reader.rb', line 73

def header
  @header
end

#tagsObject (readonly)

Returns the value of attribute tags.



73
74
75
# File 'lib/csvhuman/reader.rb', line 73

def tags
  @tags
end

Class Method Details



16
17
18
# File 'lib/csvhuman/version.rb', line 16

def self.banner
  "csvhuman/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
end

.convert_to_date(value) ⇒ Object



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/csvhuman/converter.rb', line 103

def self.convert_to_date( value )
  if value.nil? || value.empty?
    nil   ## return nil - why? why not?
  else
    ## todo/fix: add support for more formats
    ##    how to deal with conversion errors (throw exception? ignore? why? why not?)
    if value =~ /\d{4}-\d{1,2}-\d{1,2}/    ### todo: check if 2014-1-9 works for strptime too (leading zero rquired)?
      Date.strptime( value, "%Y-%m-%d" )    # 2014-11-09
    elsif value =~ /\d{1,2}\/\d{1,2}\/\d{4}/
      Date.strptime( value, "%d/%m/%Y" )    # 09/11/2014
    else
      ## todo/fix: throw argument/value error - why? why not
      nil
    end
  end
end

.convert_to_f(value) ⇒ Object



93
94
95
96
97
98
99
100
101
# File 'lib/csvhuman/converter.rb', line 93

def self.convert_to_f( value )
  if value.nil? || value.empty?
    nil   ## return nil - why? why not?
  else
    ## todo/fix: add support for NaN, Inf, -Inf etc.
    ##    how to deal with conversion errors (throw exception? ignore? NaN? why? why not?)
    Float( value )
  end
end

.convert_to_i(value) ⇒ Object



85
86
87
88
89
90
91
# File 'lib/csvhuman/converter.rb', line 85

def self.convert_to_i( value )
  if value.nil? || value.empty?
    nil   ## return nil - why? why not?
  else
    Integer( value )
  end
end

.foreach(path, sep: nil, header_converter: nil, &block) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/csvhuman/reader.rb', line 39

def self.foreach( path, sep: nil,
                        header_converter: nil, &block )
      csv = CsvReader.open( path, sep: sep, parser: CsvReader::Parser.human )     ## note: returns an enumarator-like object
      human = new( csv, header_converter: header_converter )

      if block_given?
        begin
          human.each( &block )
        ensure
          csv.close
        end
      else
        human.to_enum    ## note: caller (responsible) must close file!!!
        ## remove version without block given - why? why not?
        ## use Csv.open().to_enum  or Csv.open().each
        ##   or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
      end
end

.guess_type(name, attributes) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/csvhuman/converter.rb', line 24

def self.guess_type( name, attributes )
  if name == 'date'
     if attributes.include?( 'year' )
       Integer    ##  just the year (e.g. 2011); use an integer number
     else
       Date
     end
  ## todo/fix: add more well-known names with num required!!!
  elsif ['affected', 'inneed', 'targeted', 'reached', 'population'].include?( name )
     Integer
  else
    ## check attributes
    if attributes.nil? || attributes.empty?
      String  ## assume (default to) string
    elsif attributes.include?( 'num' ) ||
          attributes.include?( 'id')   ## assume id is (always) a rowid - why? why not?
      Integer
    elsif attributes.include?( 'date' )   ### todo/check: exists +date?
      Date
    elsif name == 'geo' && (attributes.include?('lat') ||
                            attributes.include?('lon') ||
                            attributes.include?('elevation'))
      Float
    elsif attributes.include?( 'killed' ) ||
          attributes.include?( 'injured' ) ||
          attributes.include?( 'infected' ) ||
          attributes.include?( 'displaced' ) ||
          attributes.include?( 'idps' ) ||
          attributes.include?( 'refugees' ) ||
          attributes.include?( 'abducted' ) ||
          attributes.include?( 'threatened' ) ||
          attributes.include?( 'affected' ) ||
          attributes.include?( 'inneed' ) ||
          attributes.include?( 'targeted' ) ||
          attributes.include?( 'reached' )
      Integer
    else
      String   ## assume (default to) string
    end
  end
end

.open(path, mode = nil, sep: nil, header_converter: nil, &block) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/csvhuman/reader.rb', line 12

def self.open( path, mode=nil, sep: nil,
                               header_converter: nil, &block )   ## rename path to filename or name - why? why not?

   ## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
   ## f = File.open( path, mode ? mode : 'r:bom|utf-8' )
   csv = CsvReader.open( path, mode, sep: sep, parser: CsvReader::Parser.human )     ## note: returns an enumarator-like object
   human = new( csv, header_converter: header_converter )

   # handle blocks like Ruby's open(), not like the (old old) CSV library
   if block_given?
     begin
       block.call( human )
     ensure
       csv.close
     end
   else
     human
   end
end

.parse(str_or_readable, sep: nil, header_converter: nil, &block) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
# File 'lib/csvhuman/reader.rb', line 59

def self.parse( str_or_readable, sep: nil,
                                 header_converter: nil, &block )
      human = new( str_or_readable, sep: sep,
                                    header_converter: header_converter )

      if block_given?
        human.each( &block )  ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
      else  # slurp contents, if no block is given
        human.read            ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
      end
end

.read(path, sep: nil, header_converter: nil) ⇒ Object



33
34
35
36
# File 'lib/csvhuman/reader.rb', line 33

def self.read( path, sep: nil,
                     header_converter: nil )
    open( path, sep: sep, header_converter: header_converter ) { |human| human.read }
end

.rootObject



20
21
22
# File 'lib/csvhuman/version.rb', line 20

def self.root
  "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
end

.versionObject



12
13
14
# File 'lib/csvhuman/version.rb', line 12

def self.version
  VERSION
end

Instance Method Details

#each(&block) ⇒ Object



101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/csvhuman/reader.rb', line 101

def each( &block )
  @header = []
  @tags   = nil   ## add tags = nil  -- why? why not?
  @cols   = nil   ## column mappings (used "internally")

  @recs.each do |values|
    ## pp values
    if @cols.nil?
      if values.any? { |value| value && value.strip.start_with?('#') }
        @cols = Columns.build( values, @header_converter )
        @tags = values
      else
        @header << values
      end
    else

      ## data row
      ##  strip non-tagged - how?
      record = {}
      @cols.each_with_index do |col,i|
        if col.tagged?
          key   = col.key
          value = col.tag.typecast( values[i] )
          if col.list?
            record[ key ] ||= []
            record[ key ] << value
          else
            ## add "regular" single value
            record[ key ] = value
          end
        end
      end
      ## pp record
      block.call( record )
    end
  end
end

#readObject

method each



139
# File 'lib/csvhuman/reader.rb', line 139

def read() to_a; end