Class: ConnectorsSdk::Office365::Extractor

Inherits:
Base::Extractor show all
Defined in:
lib/connectors_sdk/office365/extractor.rb

Direct Known Subclasses

SharePoint::Extractor

Constant Summary collapse

DRIVE_IDS_CURSOR_KEY =
'drive_ids'.freeze

Constants inherited from Base::Extractor

Base::Extractor::DEFAULT_CURSOR_KEY, Base::Extractor::MAX_CONNECTION_ATTEMPTS, Base::Extractor::TRANSIENT_SERVER_ERROR_CLASSES

Instance Attribute Summary

Attributes inherited from Base::Extractor

#client_proc, #completed, #config, #content_source_id, #features, #monitor, #original_cursors, #service_type

Instance Method Summary collapse

Methods inherited from Base::Extractor

#authorization_data, #authorization_data!, #client, #client!, #convert_transient_server_errors, #cursors_modified_since_start?, #deleted_ids, #document_changes, #download_args_and_proc, #evictable?, #identifying_error_message, #initialize, #permissions, #transient_error?, #with_auth_tokens_and_retry, #yield_single_document_change

Constructor Details

This class inherits a constructor from ConnectorsSdk::Base::Extractor

Instance Method Details

#download(item) ⇒ Object



88
89
90
91
# File 'lib/connectors_sdk/office365/extractor.rb', line 88

def download(item)
  download_url = item[:download_url]
  client.download_item(download_url)
end

#retrieve_latest_cursorsObject



56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/connectors_sdk/office365/extractor.rb', line 56

def retrieve_latest_cursors
  delta_links_for_drive_ids = drives_to_index.map(&:id).each_with_object({}) do |drive_id, h|
    h[drive_id] = client.get_latest_delta_link(drive_id)
  rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
    log_warn("Error getting delta link for #{drive_id}")
    capture_exception(e)
    raise e
  end

  {
    DRIVE_IDS_CURSOR_KEY => delta_links_for_drive_ids
  }
end

#yield_deleted_ids(ids) ⇒ Object



50
51
52
53
54
# File 'lib/connectors_sdk/office365/extractor.rb', line 50

def yield_deleted_ids(ids)
  ids.each do |id|
    yield id unless existing_drive_item_ids.include?(id)
  end
end

#yield_document_changes(modified_since: nil, &block) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/connectors_sdk/office365/extractor.rb', line 17

def yield_document_changes(modified_since: nil, &block)
  drives_to_index.each do |drive|
    drive_id = drive.id
    drive_owner_name = drive.dig(:owner, :user, :displayName)
    drive_name = drive.name
    site_name = drive.site_name

    drive_id_to_delta_link = config.cursors.fetch(DRIVE_IDS_CURSOR_KEY, {})
    begin
      if start_delta_link = drive_id_to_delta_link[drive_id]
        log_debug("Starting an incremental crawl with cursor for #{service_type.classify} with drive_id: #{drive_id}")
        begin
          yield_changes(drive_id, :start_delta_link => start_delta_link, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
        rescue ConnectorsSdk::Office365::CustomClient::Office365InvalidCursorsError
          log_warn("Error listing changes with start_delta_link: #{start_delta_link}, falling back to full crawl")
          yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
        end
      elsif modified_since.present?
        log_debug("Starting an incremental crawl using last_modified (no cursor found) for #{service_type.classify} with drive_id: #{drive_id}")
        yield_changes(drive_id, :last_modified => modified_since, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
      else
        log_debug("Starting a full crawl #{service_type.classify} with drive_id: #{drive_id}")
        yield_drive_items(drive_id, :drive_owner_name => drive_owner_name, :drive_name => drive_name, :site_name => site_name, &block)
      end
    rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
      log_warn("Error searching and listing drive #{drive_id}")
      capture_exception(e)
    end
  end

  nil
end

#yield_permissions(source_user_id) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/connectors_sdk/office365/extractor.rb', line 70

def yield_permissions(source_user_id)
  permissions = [source_user_id]
  client.user_groups(source_user_id, %w(id displayName)).each do |next_group|
    # Adding "Members" suffix since that is how the item permissions endpoint return group permissions
    permissions << "#{next_group.displayName} Members"
  end

  yield permissions.uniq
rescue ConnectorsSdk::Office365::CustomClient::ClientError => e
  # if a user is deleted, client.user_groups will throw 404 Not Found error, saving another call to get user profile
  if e.status_code == 404
    log_warn("Could not find a user with id #{source_user_id}")
    yield []
  else
    raise
  end
end