Class: ConnectorsSdk::Office365::Extractor
Constant Summary
collapse
- DRIVE_IDS_CURSOR_KEY =
'drive_ids'.freeze
Base::Extractor::DEFAULT_CURSOR_KEY, Base::Extractor::MAX_CONNECTION_ATTEMPTS, Base::Extractor::TRANSIENT_SERVER_ERROR_CLASSES
Instance Attribute Summary
#client_proc, #completed, #config, #content_source_id, #features, #monitor, #original_cursors, #service_type
Instance Method Summary
collapse
#authorization_data, #authorization_data!, #client, #client!, #convert_transient_server_errors, #cursors_modified_since_start?, #deleted_ids, #document_changes, #download_args_and_proc, #evictable?, #identifying_error_message, #initialize, #permissions, #transient_error?, #with_auth_tokens_and_retry, #yield_single_document_change
Instance Method Details
#download(item) ⇒ Object
106
107
108
109
|
# File 'lib/connectors_sdk/office365/extractor.rb', line 106
def download(item)
download_url = item[:download_url]
client.download_item(download_url)
end
|
#retrieve_latest_cursors ⇒ Object
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# File 'lib/connectors_sdk/office365/extractor.rb', line 74
def retrieve_latest_cursors
delta_links_for_drive_ids = drives_to_index.map(&:id).each_with_object({}) do |drive_id, h|
h[drive_id] = client.get_latest_delta_link(drive_id)
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
log_warn("Error getting delta link for #{drive_id}")
capture_exception(e)
raise e
end
{
DRIVE_IDS_CURSOR_KEY => delta_links_for_drive_ids
}
end
|
#yield_deleted_ids(ids) ⇒ Object
68
69
70
71
72
|
# File 'lib/connectors_sdk/office365/extractor.rb', line 68
def yield_deleted_ids(ids)
ids.each do |id|
yield id unless existing_drive_item_ids.include?(id)
end
end
|
#yield_document_changes(modified_since: nil, break_after_page: false, &block) ⇒ Object
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
# File 'lib/connectors_sdk/office365/extractor.rb', line 17
def yield_document_changes(modified_since: nil, break_after_page: false, &block)
drives_to_index.each do |drive|
drive_id = drive.id
if break_after_page
current_drive_id = config.cursors['current_drive_id']
if current_drive_id.present? && current_drive_id > drive_id next
end
config.cursors['current_drive_id'] = drive_id
end
drive_owner_name = drive.dig(:owner, :user, :displayName)
drive_name = drive.name
drive_id_to_delta_link = config.cursors.fetch(DRIVE_IDS_CURSOR_KEY, {})
begin
if start_delta_link = drive_id_to_delta_link[drive_id]
log_debug("Starting an incremental crawl with cursor for #{service_type.classify} with drive_id: #{drive_id}")
begin
yield_changes(drive_id, :start_delta_link => start_delta_link, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
rescue ConnectorsSdk::Office365::CustomClient::Office365InvalidCursorsError
log_warn("Error listing changes with start_delta_link: #{start_delta_link}, falling back to full crawl")
yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
end
elsif modified_since.present?
log_debug("Starting an incremental crawl using last_modified (no cursor found) for #{service_type.classify} with drive_id: #{drive_id}")
yield_changes(drive_id, :last_modified => modified_since, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
else
log_debug("Starting a full crawl #{service_type.classify} with drive_id: #{drive_id}")
yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :break_after_page => break_after_page, &block)
end
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
log_warn("Error searching and listing drive #{drive_id}")
capture_exception(e)
end
if break_after_page && (config.cursors['page_cursor'].present? || config.cursors['item_children_next_link'].present?)
break
end
end
if break_after_page && config.cursors['page_cursor'].blank? && config.cursors['item_children_next_link'].blank?
@completed = true
config.overwrite_cursors!(retrieve_latest_cursors)
log_debug("Completed #{modified_since.nil? ? 'full' : 'incremental'} extraction")
end
nil
end
|
#yield_permissions(source_user_id) ⇒ Object
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
|
# File 'lib/connectors_sdk/office365/extractor.rb', line 88
def yield_permissions(source_user_id)
permissions = [source_user_id]
client.user_groups(source_user_id, %w(id displayName)).each do |next_group|
permissions << "#{next_group.displayName} Members"
end
yield permissions.uniq
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
if e.status_code == 404
log_warn("Could not find a user with id #{source_user_id}")
yield []
else
raise
end
end
|