Class: OMF::OML::OmlCsvTable

Inherits:
OmlTable
  • Object
show all
Defined in:
lib/omf_oml/csv_table.rb

Overview

This class represents a table whose content is initially stored in a CSV file.

Instance Attribute Summary

Attributes inherited from OmlTable

#max_size, #name, #offset, #schema

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from OmlTable

#<<, #add_row, #add_rows, #create_sliced_table, #data_sources, #describe, #indexed_by, #on_before_row_added, #on_content_changed, #on_row_added, #rows, #to_a

Constructor Details

#initialize(tname, file_name, opts = {}, &on_before_row_added) ⇒ OmlCsvTable

tname - Name of table schema - OmlSchema or Array containing [name, type*] for every column in table

Table adds a '__id__' column at the beginning which keeps track of the rows unique id


36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/omf_oml/csv_table.rb', line 36

def initialize(tname, file_name, opts = {}, &on_before_row_added)
  unless File.readable?(file_name)
    raise "Can't read CSV file '#{file_name}'"
  end
  csv_opts = {}
  csv_opts[:headers] = (opts.delete(:has_csv_header) == true)
  unless csv_opts[:headers]
    raise "Current implementation only works with CSV files which inlcude a schema description in the first line"
  end

  encoding =  opts.delete(:encoding)
  mode =  "rb"
  mode << ":#{encoding}" if encoding
  csv = CSV.open(file_name, mode, csv_opts)

  unless schema = opts.delete(:schema)
    unless csv_opts[:headers]
      raise "No schema given and ':has_csv_header' not set to capture schema from file header"
    end
    first_row = csv.shift.fields # force reading the first row to have the header parsed
    #csv.shift.each do |h, v| puts "#{h} => #{v.class}" end
    schema = csv.headers.map do |c|
      c = c.encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
      name, type = c.split(':')
      [name.strip, (type || 'string').strip]
    end
  end
  super tname, schema, opts

  if first_row # from getting the CSV header
    first_row.insert(0, @row_id += 1) if @add_index
    @rows = [@schema.cast_row(first_row)]
  end

  # This assumes that CSV reader is setup with proper schema converters
  csv.each do |r|
    # Convert any strange strings into a clean ruby string
    row = r.fields.map do |e|
      e ? e.encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?') : nil
    end
    row.insert(0, @row_id += 1) if @add_index
    @rows << @schema.cast_row(row)
  end

end

Class Method Details

.create(tname, file_name, opts = {}, &on_before_row_added) ⇒ Object

Parameters:

  • opts (defaults to: {})


22
23
24
# File 'lib/omf_oml/csv_table.rb', line 22

def self.create(tname, file_name, opts = {}, &on_before_row_added)
  self.new(tname, file_name, opts, &on_before_row_added)
end