Class: ConnectorsSdk::Base::Extractor
- Inherits:
-
Object
- Object
- ConnectorsSdk::Base::Extractor
show all
- Defined in:
- lib/connectors_sdk/base/extractor.rb
Constant Summary
collapse
- MAX_CONNECTION_ATTEMPTS =
3
- DEFAULT_CURSOR_KEY =
'all'.freeze
- TRANSIENT_SERVER_ERROR_CLASSES =
Set.new(
[
Faraday::ConnectionFailed,
Faraday::SSLError,
Faraday::TimeoutError,
HTTPClient::ConnectTimeoutError,
Net::OpenTimeout
]
)
Instance Attribute Summary collapse
Instance Method Summary
collapse
-
#authorization_data ⇒ Object
-
#authorization_data! ⇒ Object
-
#client ⇒ Object
-
#client! ⇒ Object
-
#convert_transient_server_errors ⇒ Object
-
#cursors_modified_since_start? ⇒ Boolean
-
#deleted_ids(ids, &block) ⇒ Object
-
#document_changes(modified_since: nil, &block) ⇒ Object
-
#download_args_and_proc(id:, name:, size:, download_args:, &block) ⇒ Object
-
#evictable? ⇒ Boolean
-
#identifying_error_message(identifier) ⇒ Object
-
#initialize(content_source_id:, service_type:, config:, features:, client_proc:, authorization_data_proc:, monitor: ConnectorsShared::Monitor.new(:connector => self)) ⇒ Extractor
constructor
A new instance of Extractor.
-
#permissions(source_user_id, &block) ⇒ Object
-
#retrieve_latest_cursors ⇒ Object
-
#transient_error?(error) ⇒ Boolean
-
#with_auth_tokens_and_retry(&block) ⇒ Object
-
#yield_deleted_ids(_ids) ⇒ Object
-
#yield_document_changes(modified_since: nil) ⇒ Object
-
#yield_permissions(source_user_id) ⇒ Object
-
#yield_single_document_change(identifier: nil, &block) ⇒ Object
Constructor Details
#initialize(content_source_id:, service_type:, config:, features:, client_proc:, authorization_data_proc:, monitor: ConnectorsShared::Monitor.new(:connector => self)) ⇒ Extractor
Returns a new instance of Extractor.
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
|
# File 'lib/connectors_sdk/base/extractor.rb', line 37
def initialize(content_source_id:,
service_type:,
config:,
features:,
client_proc:,
authorization_data_proc:,
monitor: ConnectorsShared::Monitor.new(:connector => self))
@content_source_id = content_source_id
@service_type = service_type
@config = config
@features = features
@client_proc = client_proc
@authorization_data_proc = authorization_data_proc
@original_cursors = config.cursors.deep_dup
@monitor = monitor
@completed = false
end
|
Instance Attribute Details
#client_proc ⇒ Object
Returns the value of attribute client_proc.
35
36
37
|
# File 'lib/connectors_sdk/base/extractor.rb', line 35
def client_proc
@client_proc
end
|
#completed ⇒ Object
Returns the value of attribute completed.
34
35
36
|
# File 'lib/connectors_sdk/base/extractor.rb', line 34
def completed
@completed
end
|
#config ⇒ Object
Returns the value of attribute config.
34
35
36
|
# File 'lib/connectors_sdk/base/extractor.rb', line 34
def config
@config
end
|
#content_source_id ⇒ Object
Returns the value of attribute content_source_id.
34
35
36
|
# File 'lib/connectors_sdk/base/extractor.rb', line 34
def content_source_id
@content_source_id
end
|
#features ⇒ Object
Returns the value of attribute features.
34
35
36
|
# File 'lib/connectors_sdk/base/extractor.rb', line 34
def features
@features
end
|
#monitor ⇒ Object
Returns the value of attribute monitor.
35
36
37
|
# File 'lib/connectors_sdk/base/extractor.rb', line 35
def monitor
@monitor
end
|
#original_cursors ⇒ Object
Returns the value of attribute original_cursors.
34
35
36
|
# File 'lib/connectors_sdk/base/extractor.rb', line 34
def original_cursors
@original_cursors
end
|
#service_type ⇒ Object
Returns the value of attribute service_type.
34
35
36
|
# File 'lib/connectors_sdk/base/extractor.rb', line 34
def service_type
@service_type
end
|
Instance Method Details
#authorization_data ⇒ Object
60
61
62
|
# File 'lib/connectors_sdk/base/extractor.rb', line 60
def authorization_data
@authorization_data ||= @authorization_data_proc.call
end
|
#authorization_data! ⇒ Object
55
56
57
58
|
# File 'lib/connectors_sdk/base/extractor.rb', line 55
def authorization_data!
@authorization_data = nil
authorization_data
end
|
#client ⇒ Object
69
70
71
|
# File 'lib/connectors_sdk/base/extractor.rb', line 69
def client
@client ||= client_proc.call
end
|
#client! ⇒ Object
64
65
66
67
|
# File 'lib/connectors_sdk/base/extractor.rb', line 64
def client!
@client = nil
client
end
|
#convert_transient_server_errors ⇒ Object
215
216
217
218
219
220
221
222
223
224
225
|
# File 'lib/connectors_sdk/base/extractor.rb', line 215
def convert_transient_server_errors
yield
rescue StandardError => e
raise unless transient_error?(e)
raise ConnectorsShared::TransientServerError.new(
"Transient error #{e.class}: #{e.message}",
:suspend_until => Connectors.config.fetch('transient_server_error_retry_delay_minutes').minutes.from_now,
:cursors => config.cursors
)
end
|
#cursors_modified_since_start? ⇒ Boolean
235
236
237
|
# File 'lib/connectors_sdk/base/extractor.rb', line 235
def cursors_modified_since_start?
config.cursors != original_cursors
end
|
#deleted_ids(ids, &block) ⇒ Object
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
|
# File 'lib/connectors_sdk/base/extractor.rb', line 166
def deleted_ids(ids, &block)
enum = nil
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.deleted_ids") do
with_auth_tokens_and_retry do
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_deleted_ids") do
counter = 0
enum = Enumerator.new do |yielder|
yield_deleted_ids(ids) do |id|
yielder.yield id
counter += 1
log_info("Deleted #{counter} documents so far") if counter % 100 == 0
end
end
enum.each(&block) if block_given?
end
end
end
enum
end
|
#document_changes(modified_since: nil, &block) ⇒ Object
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# File 'lib/connectors_sdk/base/extractor.rb', line 122
def document_changes(modified_since: nil, &block)
enum = nil
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.documents") do
with_auth_tokens_and_retry do
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_documents") do
counter = 0
enum = Enumerator.new do |yielder|
yield_document_changes(:modified_since => modified_since) do |action, change, |
yielder.yield action, change,
counter += 1
log_info("Extracted #{counter} documents so far") if counter % 100 == 0
end
end
enum.each(&block) if block_given?
end
end
end
enum
end
|
#download_args_and_proc(id:, name:, size:, download_args:, &block) ⇒ Object
239
240
241
|
# File 'lib/connectors_sdk/base/extractor.rb', line 239
def download_args_and_proc(id:, name:, size:, download_args:, &block)
[id, name, size, download_args, block]
end
|
#evictable? ⇒ Boolean
231
232
233
|
# File 'lib/connectors_sdk/base/extractor.rb', line 231
def evictable?
false
end
|
#identifying_error_message(identifier) ⇒ Object
158
159
160
|
# File 'lib/connectors_sdk/base/extractor.rb', line 158
def identifying_error_message(identifier)
identifier.present? ? " of '#{identifier}'" : ''
end
|
#permissions(source_user_id, &block) ⇒ Object
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
|
# File 'lib/connectors_sdk/base/extractor.rb', line 190
def permissions(source_user_id, &block)
result = []
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.permissions") do
with_auth_tokens_and_retry do
Connectors::Stats.measure("extractor.#{Connectors::Stats.class_key(self.class)}.yield_permissions") do
yield_permissions(source_user_id) do |permissions|
log_info("Extracted #{permissions.size} permissions for source user #{source_user_id}")
result = permissions
block.call(permissions) if block_given?
end
end
end
end
result.each
end
|
#retrieve_latest_cursors ⇒ Object
73
74
75
|
# File 'lib/connectors_sdk/base/extractor.rb', line 73
def retrieve_latest_cursors
nil
end
|
#transient_error?(error) ⇒ Boolean
227
228
229
|
# File 'lib/connectors_sdk/base/extractor.rb', line 227
def transient_error?(error)
TRANSIENT_SERVER_ERROR_CLASSES.any? { |error_class| error.kind_of?(error_class) }
end
|
#with_auth_tokens_and_retry(&block) ⇒ Object
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
# File 'lib/connectors_sdk/base/extractor.rb', line 77
def with_auth_tokens_and_retry(&block)
connection_attempts = 0
begin
convert_transient_server_errors do
convert_rate_limit_errors(&block)
end
rescue ConnectorsShared::TokenRefreshFailedError => e
log_error('Could not refresh token, aborting')
raise e
rescue ConnectorsShared::PublishingFailedError => e
log_error('Could not publish, aborting')
raise e.reason
rescue ConnectorsShared::EvictionWithNoProgressError
log_error('Aborting job because it did not make any progress and cannot be evicted')
raise
rescue ConnectorsShared::EvictionError,
ConnectorsShared::ThrottlingError,
ConnectorsShared::JobDocumentLimitError,
ConnectorsShared::MonitoringError,
ConnectorsShared::JobInterruptedError,
ConnectorsShared::SecretInvalidError,
ConnectorsShared::InvalidIndexingConfigurationError => e
raise
rescue StandardError => e
ConnectorsShared::ExceptionTracking.augment_exception(e)
connection_attempts += 1
if connection_attempts >= MAX_CONNECTION_ATTEMPTS
log_warn("Failed to connect in with_auth_tokens_and_retry Reason: #{e.class}: #{e.message} {:message_id => #{e.id}}")
log_warn("Retries: #{connection_attempts}/#{MAX_CONNECTION_ATTEMPTS}, giving up.")
ConnectorsShared::ExceptionTracking.log_exception(e)
raise e
else
log_warn("Failed to connect in with_auth_tokens_and_retry. Reason: #{e.class}: #{e.message} {:message_id => #{e.id}}")
log_warn("Retries: #{connection_attempts}/#{MAX_CONNECTION_ATTEMPTS}, trying again.")
retry
end
end
end
|
#yield_deleted_ids(_ids) ⇒ Object
162
163
164
|
# File 'lib/connectors_sdk/base/extractor.rb', line 162
def yield_deleted_ids(_ids)
raise NotImplementedError
end
|
#yield_document_changes(modified_since: nil) ⇒ Object
118
119
120
|
# File 'lib/connectors_sdk/base/extractor.rb', line 118
def yield_document_changes(modified_since: nil)
raise NotImplementedError
end
|
#yield_permissions(source_user_id) ⇒ Object
186
187
188
|
# File 'lib/connectors_sdk/base/extractor.rb', line 186
def yield_permissions(source_user_id)
end
|
#yield_single_document_change(identifier: nil, &block) ⇒ Object
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
# File 'lib/connectors_sdk/base/extractor.rb', line 142
def yield_single_document_change(identifier: nil, &block)
log_debug("Extracting single document for #{identifier}") if identifier
convert_transient_server_errors do
convert_rate_limit_errors(&block)
end
monitor.note_success
rescue *fatal_exception_classes => e
ConnectorsShared::ExceptionTracking.augment_exception(e)
log_error("Encountered a fall-through error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
raise
rescue StandardError => e
ConnectorsShared::ExceptionTracking.augment_exception(e)
log_warn("Encountered error during extraction#{identifying_error_message(identifier)}: #{e.class}: #{e.message} {:message_id => #{e.id}}")
monitor.note_error(e, :id => e.id)
end
|