Class: Embulk::Input::GoogleSpreadsheets

Inherits:
InputPlugin
  • Object
show all
Defined in:
lib/embulk/input/google_spreadsheets.rb,
lib/embulk/input/google_spreadsheets/auth.rb,
lib/embulk/input/google_spreadsheets/error.rb,
lib/embulk/input/google_spreadsheets/pager.rb,
lib/embulk/input/google_spreadsheets/pager_util.rb,
lib/embulk/input/google_spreadsheets/typecast/base.rb,
lib/embulk/input/google_spreadsheets/typecast_factory.rb,
lib/embulk/input/google_spreadsheets/record_typecaster.rb,
lib/embulk/input/google_spreadsheets/spreadsheets_client.rb,
lib/embulk/input/google_spreadsheets/spreadsheets_url_util.rb,
lib/embulk/input/google_spreadsheets/typecast/loose_typecast.rb,
lib/embulk/input/google_spreadsheets/typecast/strict_typecast.rb,
lib/embulk/input/google_spreadsheets/typecast/minimal_typecast.rb,
lib/embulk/input/google_spreadsheets/typecast/timestamp_format_util.rb

Defined Under Namespace

Modules: PagerUtil, SpreadsheetsUrlUtil, Traceable, Typecast, TypecastFactory Classes: Auth, ConfigError, CustomColumns, DataError, LocalFile, Pager, RecordTypecaster, SpreadsheetsClient, TypecastError

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#clientObject (readonly)

Returns the value of attribute client.



152
153
154
# File 'lib/embulk/input/google_spreadsheets.rb', line 152

def client
  @client
end

#typecasterObject (readonly)

Returns the value of attribute typecaster.



152
153
154
# File 'lib/embulk/input/google_spreadsheets.rb', line 152

def typecaster
  @typecaster
end

Class Method Details

.configure(config) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/embulk/input/google_spreadsheets.rb', line 85

def self.configure(config)
  task = {}
  # auth_method:
  #   - service_account
  #   - authorized_user
  #   - compute_engine
  #   - application_default
  task['auth_method'] = config.param('auth_method', :string, default: 'authorized_user')
  # json_keyfile: Fullpath of json key
  #   if `auth_method` is `authorized_user`, this plugin supposes the format
  #   is the below.
  #   {
  #     "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
  #     "client_secret":"xxxxxxxxxxx",
  #     "refresh_token":"xxxxxxxxxxx"
  #   }
  #
  #   if `auth_method` is `compute_engine` or `application_default`, this
  #   option is not required.
  task['json_keyfile']           = config.param('json_keyfile',          LocalFile, default: nil)
  task['spreadsheets_url']       = config.param('spreadsheets_url',       :string)
  task['worksheet_title']        = config.param('worksheet_title',        :string)
  task['start_column']           = config.param('start_column',           :integer, default: 1)
  task['start_row']              = config.param('start_row',              :integer, default: 1)
  task['end_row']                = config.param('end_row',                :integer, default: -1)
  task['max_fetch_rows']         = config.param('max_fetch_rows',         :integer, default: 10000)
  # FORMATTED_VALUE, UNFORMATTED_VALUE, FORMULA are available.
  # ref. https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption
  task['value_render_option']    = config.param('value_render_option',    :string,  default: 'FORMATTED_VALUE')
  task['null_string']            = config.param('null_string',            :string,  default: '')
  task['stop_on_invalid_record'] = config.param('stop_on_invalid_record', :bool,    default: true)
  # columns: this option supposes an array of hash has the below structure.
  #   - name
  #   - type
  #   - format
  #   - timezone
  #   - typecast: default: strict
  CustomColumns.default_format   = task['default_timestamp_format'] = config.param('default_timestamp_format', :string, default: CustomColumns.default_format)
  CustomColumns.default_timezone = task['default_timezone']         = config.param('default_timezone',         :string, default: CustomColumns.default_timezone)
  CustomColumns.default_typecast = task['default_typecast']         = config.param('default_typecast',         :string, default: CustomColumns.default_typecast)
  task['columns'] = config.param('columns', CustomColumns)

  task['end_column'] = task['start_column'] + task['columns'].length - 1

  logger.debug { "`embulk-input-google_spreadsheets`: configured task '#{task.reject{|k, v| k == 'json_keyfile'}.to_json}'"}
  task
end

.configure_columns(task) ⇒ Object



133
134
135
136
137
# File 'lib/embulk/input/google_spreadsheets.rb', line 133

def self.configure_columns(task)
  task['columns'].map.with_index do |c, i|
    Column.new(i, c['name'], c['type'].to_sym, c['format'])
  end
end

.loggerObject



77
78
79
# File 'lib/embulk/input/google_spreadsheets.rb', line 77

def self.logger
  ::Embulk.logger
end

.resume(task, columns, count, &control) ⇒ Object



145
146
147
148
149
150
# File 'lib/embulk/input/google_spreadsheets.rb', line 145

def self.resume(task, columns, count, &control)
  task_reports = yield(task, columns, count)

  next_config_diff = {}
  return next_config_diff
end

.transaction(config, &control) ⇒ Object



139
140
141
142
143
# File 'lib/embulk/input/google_spreadsheets.rb', line 139

def self.transaction(config, &control)
  task = configure(config)
  columns = configure_columns(task)
  resume(task, columns, 1, &control)
end

Instance Method Details

#initObject



154
155
156
157
# File 'lib/embulk/input/google_spreadsheets.rb', line 154

def init
  @typecaster = RecordTypecaster.new(task)
  @client = SpreadsheetsClient.new(task, auth: Auth.new(task), pager: Pager.new(task))
end

#loggerObject



81
82
83
# File 'lib/embulk/input/google_spreadsheets.rb', line 81

def logger
  self.class.logger
end

#runObject



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/embulk/input/google_spreadsheets.rb', line 163

def run
  client.worksheet_each_record do |record|
    begin
      record = typecaster.transform_by_columns(record)
      page_builder.add(record)
    rescue => e
      if stop_on_invalid_record?
        raise e if e.is_a?(ConfigError) or e.is_a?(DataError)
        raise DataError.new(e)
      end
      logger.warn{ "`embulk-input-google_spreadsheets`: Error '#{e}' occurred. Skip '#{record}'" }
    end
  end

  page_builder.finish

  task_report = {}
  return task_report
end

#stop_on_invalid_record?Boolean

Returns:

  • (Boolean)


159
160
161
# File 'lib/embulk/input/google_spreadsheets.rb', line 159

def stop_on_invalid_record?
  task['stop_on_invalid_record']
end