Class: Embulk::Plugin::InputRooExcel

Inherits:
InputPlugin
  • Object
show all
Defined in:
lib/embulk/input_roo_excel.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(task, schema, index, page_builder) ⇒ InputRooExcel

Returns a new instance of InputRooExcel.



48
49
50
51
# File 'lib/embulk/input_roo_excel.rb', line 48

def initialize(task, schema, index, page_builder)
  super
  @file = task['files'][index]
end

Class Method Details

.resume(task, columns, count, &control) ⇒ Object



39
40
41
42
43
44
45
46
# File 'lib/embulk/input_roo_excel.rb', line 39

def self.resume(task, columns, count, &control)
  puts "InputRooExcel input started."
  commit_reports = yield(task, columns, count)
  puts "InputRooExcel input finished. Commit reports = #{commit_reports.to_json}"

  next_config_diff = {}
  return next_config_diff
end

.transaction(config, &control) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/embulk/input_roo_excel.rb', line 11

def self.transaction(config, &control)

  task = {
    'columns'  => config.param('columns', :array, default: []),
    'done'     => config.param('done', :array, default: []),
    'sheet'    => config.param('sheet', :string, default: nil),
    'data_pos' => config.param('data_pos', :integer, default: 1),
  }
  task['files'] = config.param('paths', :array, default: []).map{ |path|
    next [] unless Dir.exists?(path)
    Dir.entries(path).sort.select { |entry| entry.match(/\.xlsx\Z/) }.map{ |entry|
        File.join(path,entry)
    }
   }.flatten

  files = task['files'] - task['done']
  if files.empty?
    raise "no valid xlsx file found"
  end

  columns = []
  task['columns'].each_with_index do |c,i|
    columns << Column.new(i, c['name'], c['type'].to_sym)
  end

  resume(task, columns, files.length, &control)
end

Instance Method Details

#convert_cell(column, xlsx, nrow, ncol) ⇒ Object

MEMO roo celltype returns the type of a cell: * :float * :string, * :date * :percentage * :formula * :time * :datetime.



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/embulk/input_roo_excel.rb', line 86

def convert_cell(column,xlsx,nrow,ncol)
  d = xlsx.cell(nrow,ncol)
  type = column['type'] || 'string'

  case type
  when 'long'
    d.to_i
  when 'double'
    d.to_f
  when 'string'
    d.to_s
  when 'timestamp'
    convert_time(d)
  else # TODO
    d.to_s
  end
end

#convert_time(t) ⇒ Object



103
104
105
106
107
108
109
110
111
112
113
# File 'lib/embulk/input_roo_excel.rb', line 103

def convert_time(t)
  if( t.kind_of?(Date) or t.kind_of?(DateTime) )
    t.to_time
  elsif( t.kind_of?(Time) )
    t
  elsif( t.kind_of?(String) )
    Time.parse(t)
  else
    raise ArgumentError,"Can't convert time:#{t}"
  end
end

#runObject



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/embulk/input_roo_excel.rb', line 53

def run
  puts "InputRooExcel input thread #{@index}..."

  columns = @task['columns']
  ncol = columns.size
  data_pos = @task['data_pos']

  sheet = @task['sheet']
  xlsx = Roo::Excelx.new(@file)
  if( sheet )
    xlsx.default_sheet = sheet
  else
    xlsx.default_sheet = xlsx.sheets.first
  end

  data_pos.upto(xlsx.last_row) do |row|
    data = []
    1.upto(ncol) do |col|
      column = columns[col-1]
      data << convert_cell(column,xlsx,row,col)
    end
    @page_builder.add(data)
  end

  @page_builder.finish  # don't forget to call finish :-)

  commit_report = {}
  return commit_report
end