Class: Embulk::Input::GoogleSpreadsheets

Inherits:
InputPlugin
  • Object
show all
Defined in:
lib/embulk/input/google_spreadsheets.rb,
lib/embulk/input/google_spreadsheets/auth.rb,
lib/embulk/input/google_spreadsheets/error.rb,
lib/embulk/input/google_spreadsheets/pager.rb,
lib/embulk/input/google_spreadsheets/pager_util.rb,
lib/embulk/input/google_spreadsheets/typecast/base.rb,
lib/embulk/input/google_spreadsheets/typecast_factory.rb,
lib/embulk/input/google_spreadsheets/record_typecaster.rb,
lib/embulk/input/google_spreadsheets/spreadsheets_client.rb,
lib/embulk/input/google_spreadsheets/spreadsheets_url_util.rb,
lib/embulk/input/google_spreadsheets/typecast/loose_typecast.rb,
lib/embulk/input/google_spreadsheets/typecast/strict_typecast.rb,
lib/embulk/input/google_spreadsheets/typecast/minimal_typecast.rb,
lib/embulk/input/google_spreadsheets/typecast/timestamp_format_util.rb

Defined Under Namespace

Modules: PagerUtil, SpreadsheetsUrlUtil, Traceable, Typecast, TypecastFactory Classes: Auth, ConfigError, CustomColumns, DataError, LocalFile, Pager, RecordTypecaster, SpreadsheetsClient, TypecastError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#clientObject (readonly)

Returns the value of attribute client.



149
150
151
# File 'lib/embulk/input/google_spreadsheets.rb', line 149

def client
  @client
end

#typecasterObject (readonly)

Returns the value of attribute typecaster.



149
150
151
# File 'lib/embulk/input/google_spreadsheets.rb', line 149

def typecaster
  @typecaster
end

Class Method Details

.configure(config) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/embulk/input/google_spreadsheets.rb', line 85

def self.configure(config)
  task = {}
  # auth_method:
  #   - service_account
  #   - authorized_user
  #   - compute_engine
  #   - application_default
  task['auth_method'] = config.param('auth_method', :string, default: 'authorized_user')
  # json_keyfile: Fullpath of json key
  #   if `auth_method` is `authorized_user`, this plugin supposes the format
  #   is the below.
  #   {
  #     "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
  #     "client_secret":"xxxxxxxxxxx",
  #     "refresh_token":"xxxxxxxxxxx"
  #   }
  #
  #   if `auth_method` is `compute_engine` or `application_default`, this
  #   option is not required.
  task['json_keyfile']           = config.param('json_keyfile',          LocalFile, default: nil)
  task['spreadsheets_url']       = config.param('spreadsheets_url',       :string)
  task['worksheet_title']        = config.param('worksheet_title',        :string)
  task['start_column']           = config.param('start_column',           :integer, default: 1)
  task['start_row']              = config.param('start_row',              :integer, default: 1)
  task['end_row']                = config.param('end_row',                :integer, default: -1)
  task['max_fetch_rows']         = config.param('max_fetch_rows',         :integer, default: 10000)
  task['null_string']            = config.param('null_string',            :string,  default: '')
  task['stop_on_invalid_record'] = config.param('stop_on_invalid_record', :bool,    default: true)
  # columns: this option supposes an array of hash has the below structure.
  #   - name
  #   - type
  #   - format
  #   - timezone
  #   - typecast: default: strict
  CustomColumns.default_format   = task['default_timestamp_format'] = config.param('default_timestamp_format', :string, default: CustomColumns.default_format)
  CustomColumns.default_timezone = task['default_timezone']         = config.param('default_timezone',         :string, default: CustomColumns.default_timezone)
  CustomColumns.default_typecast = task['default_typecast']         = config.param('default_typecast',         :string, default: CustomColumns.default_typecast)
  task['columns'] = config.param('columns', CustomColumns)

  task['end_column'] = task['start_column'] + task['columns'].length - 1

  logger.debug { "`embulk-input-google_spreadsheets`: configured task '#{task.reject{|k, v| k == 'json_keyfile'}.to_json}'"}
  task
end

.configure_columns(task) ⇒ Object



130
131
132
133
134
# File 'lib/embulk/input/google_spreadsheets.rb', line 130

def self.configure_columns(task)
  task['columns'].map.with_index do |c, i|
    Column.new(i, c['name'], c['type'].to_sym, c['format'])
  end
end

.loggerObject



77
78
79
# File 'lib/embulk/input/google_spreadsheets.rb', line 77

def self.logger
  ::Embulk.logger
end

.resume(task, columns, count, &control) ⇒ Object



142
143
144
145
146
147
# File 'lib/embulk/input/google_spreadsheets.rb', line 142

def self.resume(task, columns, count, &control)
  task_reports = yield(task, columns, count)

  next_config_diff = {}
  return next_config_diff
end

.transaction(config, &control) ⇒ Object



136
137
138
139
140
# File 'lib/embulk/input/google_spreadsheets.rb', line 136

def self.transaction(config, &control)
  task = configure(config)
  columns = configure_columns(task)
  resume(task, columns, 1, &control)
end

Instance Method Details

#initObject



151
152
153
154
# File 'lib/embulk/input/google_spreadsheets.rb', line 151

def init
  @typecaster = RecordTypecaster.new(task)
  @client = SpreadsheetsClient.new(task, auth: Auth.new(task), pager: Pager.new(task))
end

#loggerObject



81
82
83
# File 'lib/embulk/input/google_spreadsheets.rb', line 81

def logger
  self.class.logger
end

#runObject



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/embulk/input/google_spreadsheets.rb', line 160

def run
  client.worksheet_each_record do |record|
    begin
      record = typecaster.transform_by_columns(record)
      page_builder.add(record)
    rescue => e
      if stop_on_invalid_record?
        raise e if e.is_a?(ConfigError) or e.is_a?(DataError)
        raise DataError.new(e)
      end
      logger.warn{ "`embulk-input-google_spreadsheets`: Error '#{e}' occurred. Skip '#{record}'" }
    end
  end

  page_builder.finish

  task_report = {}
  return task_report
end

#stop_on_invalid_record?Boolean

Returns:

  • (Boolean)


156
157
158
# File 'lib/embulk/input/google_spreadsheets.rb', line 156

def stop_on_invalid_record?
  task['stop_on_invalid_record']
end