Class: Outhad::Integrations::Core::UnstructuredSourceConnector

Inherits:
SourceConnector show all
Defined in:
lib/outhad/integrations/core/unstructured_source_connector.rb

Constant Summary collapse

UNSTRUCTURED_SCHEMA =
{
  type: "object",
  properties: {
    element_id: { type: "string" },
    text: { type: "string" },
    created_date: { type: "string" },
    modified_date: { type: "string" },
    filename: { type: "string" },
    filetype: { type: "string" }
  },
  required: %w[
    element_id
    text
    created_date
    modified_date
    filename
    filetype
  ]
}.freeze
UNSTRUCTURED_STREAM_CONFIG =
{
  supported_sync_modes: ["incremental"],
  source_defined_cursor: true,
  default_cursor_field: ["modified_date"],
  source_defined_primary_key: [["element_id"]]
}.freeze
UNSTRUCTURED =

Commands for unstructured data operations

"unstructured"
LIST_FILES_CMD =
"list_files"
DOWNLOAD_FILE_CMD =
"download_file"

Constants included from Constants

Constants::AIRTABLE_BASES_ENDPOINT, Constants::AIRTABLE_GET_BASE_SCHEMA_ENDPOINT, Constants::AIRTABLE_URL_BASE, Constants::ANTHROPIC_URL, Constants::CATALOG_SPEC_PATH, Constants::CONNECTOR_SPEC_PATH, Constants::DATABRICKS_DRIVER_PATH, Constants::DATABRICKS_HEALTH_URL, Constants::DATABRICKS_MAC_DRIVER_PATH, Constants::DATABRICKS_SERVING_URL, Constants::FACEBOOK_AUDIENCE_GET_ALL_ACCOUNTS, Constants::FIRECRAWL_CRAWL_ACTIVE_URL, Constants::FIRECRAWL_CRAWL_URL, Constants::FIRECRAWL_GET_CRAWL_URL, Constants::FIRECRAWL_REQUEST_RATE_LIMIT, Constants::FIRECRAWL_SCRAPE_URL, Constants::GOOGLE_SHEETS_SCOPE, Constants::GOOGLE_SPREADSHEET_ID_REGEX, Constants::GOOGLE_VERTEX_ENDPOINT_SERVICE_URL, Constants::GOOGLE_VERTEX_MODEL_NAME, Constants::HTTP_DELETE, Constants::HTTP_GET, Constants::HTTP_PATCH, Constants::HTTP_POST, Constants::HTTP_PUT, Constants::INSTALL_HTTPFS_QUERY, Constants::JSON_SCHEMA_URL, Constants::KLAVIYO_AUTH_ENDPOINT, Constants::KLAVIYO_AUTH_PAYLOAD, Constants::META_DATA_PATH, Constants::MISTRAL_AI_MODEL, Constants::MS_DYNAMICS_REST_API, Constants::MS_DYNAMICS_WHOAMI_API, Constants::MS_EXCEL_AUTH_ENDPOINT, Constants::MS_EXCEL_FILES_API, Constants::MS_EXCEL_SHEET_RANGE_API, Constants::MS_EXCEL_TABLE_API, Constants::MS_EXCEL_TABLE_ROW_WRITE_API, Constants::MS_EXCEL_WORKSHEETS_API, Constants::OPEN_AI_URL, Constants::QDRANT_SEARCH_URL, Constants::QUICKBOOKS_PRODUCTION_QUERY_URL, Constants::QUICKBOOKS_REDIRECT_URL, Constants::QUICKBOOKS_SANDBOX_QUERY_URL, Constants::SNOWFLAKE_DRIVER_PATH, Constants::SNOWFLAKE_MAC_DRIVER_PATH, Constants::WATSONX_DATA_QUERIES_URL, Constants::WATSONX_GENERATION_DEPLOYMENT_URL, Constants::WATSONX_HEALTH_DEPLOYMENT_URL, Constants::WATSONX_PREDICTION_DEPLOYMENT_URL, Constants::WATSONX_STREAM_DEPLOYMENT_URL, Constants::ZENDESK_URL_SUFFIX

Constants included from Protocol

Protocol::CompressionType, Protocol::ConnectionStatusType, Protocol::ConnectorQueryType, Protocol::ConnectorType, Protocol::ControlMessageType, Protocol::DestinationSyncMode, Protocol::FileFormatType, Protocol::LogLevel, Protocol::ModelQueryType, Protocol::OuthadMessageType, Protocol::RequestRateLimitingUnit, Protocol::SchemaMode, Protocol::StreamAction, Protocol::StreamType, Protocol::SyncMode, Protocol::SyncStatus

Instance Method Summary collapse

Methods inherited from SourceConnector

#read

Methods inherited from BaseConnector

#check_connection, #connector_spec, #discover, #meta_data, #relative_path

Methods included from Utils

#build_catalog, #build_stream, #convert_to_json_schema, #create_log_message, #extract_data, #handle_exception, #hash_to_string, #keys_to_symbols, #log_request_response, #logger, #map_type_to_json_schema, #report_exception, #success?

Instance Method Details

#create_unstructured_streamObject



42
43
44
45
46
47
48
49
# File 'lib/outhad/integrations/core/unstructured_source_connector.rb', line 42

def create_unstructured_stream
  Outhad::Integrations::Protocol::Stream.new(
    name: UNSTRUCTURED,
    action: StreamAction["fetch"],
    json_schema: UNSTRUCTURED_SCHEMA,
    **UNSTRUCTURED_STREAM_CONFIG
  )
end

#unstructured_data?(connection_config) ⇒ Boolean

Returns:

  • (Boolean)


38
39
40
# File 'lib/outhad/integrations/core/unstructured_source_connector.rb', line 38

def unstructured_data?(connection_config)
  connection_config["data_type"] == UNSTRUCTURED
end