Class: Carbon::FilesApi

Inherits:
Object
  • Object
show all
Defined in:
lib/carbon_ruby_sdk/api/files_api.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(api_client = ApiClient.default) ⇒ FilesApi

Returns a new instance of FilesApi.



15
16
17
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 15

def initialize(api_client = ApiClient.default)
  @api_client = api_client
end

Instance Attribute Details

#api_clientObject

Returns the value of attribute api_client.



13
14
15
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 13

def api_client
  @api_client
end

Instance Method Details

#create_user_file_tags(tags:, organization_user_file_id:, extra: {}) ⇒ Object

Create File Tags

A tag is a key-value pair that can be added to a file. This pair can then be used for searches (e.g. embedding searches) in order to narrow down the scope of the search. A file can have any number of tags. The following are reserved keys that cannot be used:

  • db_embedding_id

  • organization_id

  • user_id

  • organization_user_file_id

Carbon currently supports two data types for tag values - ‘string` and `list<string>`. Keys can only be `string`. If values other than `string` and `list<string>` are used, they’re automatically converted to strings (e.g. 4 will become “4”).

Parameters:

  • tags (Hash<String, Tags1>)
  • organization_user_file_id (Integer)
  • body (OrganizationUserFileTagCreate)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



37
38
39
40
41
42
43
44
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 37

def create_user_file_tags(tags:, organization_user_file_id:, extra: {})
  _body = {}
  _body[:tags] = tags if tags != SENTINEL
  _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL
  organization_user_file_tag_create = _body
  api_response = create_user_file_tags_with_http_info_impl(organization_user_file_tag_create, extra)
  api_response.data
end

#create_user_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {}) ⇒ Object

Create File Tags

A tag is a key-value pair that can be added to a file. This pair can then be used for searches (e.g. embedding searches) in order to narrow down the scope of the search. A file can have any number of tags. The following are reserved keys that cannot be used:

  • db_embedding_id

  • organization_id

  • user_id

  • organization_user_file_id

Carbon currently supports two data types for tag values - ‘string` and `list<string>`. Keys can only be `string`. If values other than `string` and `list<string>` are used, they’re automatically converted to strings (e.g. 4 will become “4”).

Parameters:

  • tags (Hash<String, Tags1>)
  • organization_user_file_id (Integer)
  • body (OrganizationUserFileTagCreate)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



64
65
66
67
68
69
70
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 64

def create_user_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {})
  _body = {}
  _body[:tags] = tags if tags != SENTINEL
  _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL
  organization_user_file_tag_create = _body
  create_user_file_tags_with_http_info_impl(organization_user_file_tag_create, extra)
end

#delete(file_id:, extra: {}) ⇒ Object

Delete File Endpoint

Parameters:

  • file_id (Integer)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



145
146
147
148
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 145

def delete(file_id:, extra: {})
  api_response = delete_with_http_info_impl(file_id, extra)
  api_response.data
end

#delete_file_tags(tags:, organization_user_file_id:, extra: {}) ⇒ Object

Delete File Tags

Parameters:

  • tags (Array<String>)
  • organization_user_file_id (Integer)
  • body (OrganizationUserFileTagsRemove)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



226
227
228
229
230
231
232
233
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 226

def delete_file_tags(tags:, organization_user_file_id:, extra: {})
  _body = {}
  _body[:tags] = tags if tags != SENTINEL
  _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL
  organization_user_file_tags_remove = _body
  api_response = delete_file_tags_with_http_info_impl(organization_user_file_tags_remove, extra)
  api_response.data
end

#delete_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {}) ⇒ Object

Delete File Tags

Parameters:

  • tags (Array<String>)
  • organization_user_file_id (Integer)
  • body (OrganizationUserFileTagsRemove)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



241
242
243
244
245
246
247
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 241

def delete_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {})
  _body = {}
  _body[:tags] = tags if tags != SENTINEL
  _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL
  organization_user_file_tags_remove = _body
  delete_file_tags_with_http_info_impl(organization_user_file_tags_remove, extra)
end

#delete_many(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) ⇒ Object

Delete Files Endpoint

Parameters:

  • file_ids (Array<Integer>) (defaults to: SENTINEL)
  • sync_statuses (Array<ExternalFileSyncStatuses>) (defaults to: SENTINEL)
  • delete_non_synced_only (Boolean) (defaults to: false)
  • send_webhook (Boolean) (defaults to: false)
  • delete_child_files (Boolean) (defaults to: false)
  • body (DeleteFilesQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



325
326
327
328
329
330
331
332
333
334
335
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 325

def delete_many(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {})
  _body = {}
  _body[:file_ids] = file_ids if file_ids != SENTINEL
  _body[:sync_statuses] = sync_statuses if sync_statuses != SENTINEL
  _body[:delete_non_synced_only] = delete_non_synced_only if delete_non_synced_only != SENTINEL
  _body[:send_webhook] = send_webhook if send_webhook != SENTINEL
  _body[:delete_child_files] = delete_child_files if delete_child_files != SENTINEL
  delete_files_query_input = _body
  api_response = delete_many_with_http_info_impl(delete_files_query_input, extra)
  api_response.data
end

#delete_many_with_http_info(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) ⇒ Object

Delete Files Endpoint

Parameters:

  • file_ids (Array<Integer>) (defaults to: SENTINEL)
  • sync_statuses (Array<ExternalFileSyncStatuses>) (defaults to: SENTINEL)
  • delete_non_synced_only (Boolean) (defaults to: false)
  • send_webhook (Boolean) (defaults to: false)
  • delete_child_files (Boolean) (defaults to: false)
  • body (DeleteFilesQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



346
347
348
349
350
351
352
353
354
355
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 346

def delete_many_with_http_info(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {})
  _body = {}
  _body[:file_ids] = file_ids if file_ids != SENTINEL
  _body[:sync_statuses] = sync_statuses if sync_statuses != SENTINEL
  _body[:delete_non_synced_only] = delete_non_synced_only if delete_non_synced_only != SENTINEL
  _body[:send_webhook] = send_webhook if send_webhook != SENTINEL
  _body[:delete_child_files] = delete_child_files if delete_child_files != SENTINEL
  delete_files_query_input = _body
  delete_many_with_http_info_impl(delete_files_query_input, extra)
end

#delete_v2(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) ⇒ Object

Delete Files V2 Endpoint

Parameters:

  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • send_webhook (Boolean) (defaults to: false)
  • preserve_file_record (Boolean) (defaults to: false)

    Whether or not to delete all data related to the file from the database, BUT to preserve the file metadata, allowing for resyncs. By default ‘preserve_file_record` is false, which means that all data related to the file *as well as* its metadata will be deleted. Note that even if `preserve_file_record` is true, raw files uploaded via the `uploadfile` endpoint still cannot be resynced.

  • body (DeleteFilesV2QueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



431
432
433
434
435
436
437
438
439
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 431

def delete_v2(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {})
  _body = {}
  _body[:filters] = filters if filters != SENTINEL
  _body[:send_webhook] = send_webhook if send_webhook != SENTINEL
  _body[:preserve_file_record] = preserve_file_record if preserve_file_record != SENTINEL
  delete_files_v2_query_input = _body
  api_response = delete_v2_with_http_info_impl(delete_files_v2_query_input, extra)
  api_response.data
end

#delete_v2_with_http_info(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) ⇒ Object

Delete Files V2 Endpoint

Parameters:

  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • send_webhook (Boolean) (defaults to: false)
  • preserve_file_record (Boolean) (defaults to: false)

    Whether or not to delete all data related to the file from the database, BUT to preserve the file metadata, allowing for resyncs. By default ‘preserve_file_record` is false, which means that all data related to the file *as well as* its metadata will be deleted. Note that even if `preserve_file_record` is true, raw files uploaded via the `uploadfile` endpoint still cannot be resynced.

  • body (DeleteFilesV2QueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



448
449
450
451
452
453
454
455
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 448

def delete_v2_with_http_info(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {})
  _body = {}
  _body[:filters] = filters if filters != SENTINEL
  _body[:send_webhook] = send_webhook if send_webhook != SENTINEL
  _body[:preserve_file_record] = preserve_file_record if preserve_file_record != SENTINEL
  delete_files_v2_query_input = _body
  delete_v2_with_http_info_impl(delete_files_v2_query_input, extra)
end

#delete_with_http_info(file_id:, extra: {}) ⇒ Object

Delete File Endpoint

Parameters:

  • file_id (Integer)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



154
155
156
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 154

def delete_with_http_info(file_id:, extra: {})
  delete_with_http_info_impl(file_id, extra)
end

#get_parsed_file(file_id:, extra: {}) ⇒ Object

Parsed File

This route is deprecated. Use ‘/user_files_v2` instead.

Parameters:

  • file_id (Integer)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



530
531
532
533
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 530

def get_parsed_file(file_id:, extra: {})
  api_response = get_parsed_file_with_http_info_impl(file_id, extra)
  api_response.data
end

#get_parsed_file_with_http_info(file_id:, extra: {}) ⇒ Object

Parsed File

This route is deprecated. Use ‘/user_files_v2` instead.

Parameters:

  • file_id (Integer)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



541
542
543
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 541

def get_parsed_file_with_http_info(file_id:, extra: {})
  get_parsed_file_with_http_info_impl(file_id, extra)
end

#get_raw_file(file_id:, extra: {}) ⇒ Object

Raw File

This route is deprecated. Use ‘/user_files_v2` instead.

Parameters:

  • file_id (Integer)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



615
616
617
618
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 615

def get_raw_file(file_id:, extra: {})
  api_response = get_raw_file_with_http_info_impl(file_id, extra)
  api_response.data
end

#get_raw_file_with_http_info(file_id:, extra: {}) ⇒ Object

Raw File

This route is deprecated. Use ‘/user_files_v2` instead.

Parameters:

  • file_id (Integer)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



626
627
628
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 626

def get_raw_file_with_http_info(file_id:, extra: {})
  get_raw_file_with_http_info_impl(file_id, extra)
end

#modify_cold_storage_parameters(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) ⇒ Object

Modify Cold Storage Parameters

Parameters:

  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • enable_cold_storage (Boolean) (defaults to: SENTINEL)
  • hot_storage_time_to_live (Integer) (defaults to: SENTINEL)
  • body (ModifyColdStorageParametersQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



701
702
703
704
705
706
707
708
709
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 701

def modify_cold_storage_parameters(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {})
  _body = {}
  _body[:filters] = filters if filters != SENTINEL
  _body[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL
  _body[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL
  modify_cold_storage_parameters_query_input = _body
  api_response = modify_cold_storage_parameters_with_http_info_impl(modify_cold_storage_parameters_query_input, extra)
  api_response.data
end

#modify_cold_storage_parameters_with_http_info(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) ⇒ Object

Modify Cold Storage Parameters

Parameters:

  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • enable_cold_storage (Boolean) (defaults to: SENTINEL)
  • hot_storage_time_to_live (Integer) (defaults to: SENTINEL)
  • body (ModifyColdStorageParametersQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



718
719
720
721
722
723
724
725
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 718

def modify_cold_storage_parameters_with_http_info(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {})
  _body = {}
  _body[:filters] = filters if filters != SENTINEL
  _body[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL
  _body[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL
  modify_cold_storage_parameters_query_input = _body
  modify_cold_storage_parameters_with_http_info_impl(modify_cold_storage_parameters_query_input, extra)
end

#move_to_hot_storage(filters: SENTINEL, extra: {}) ⇒ Object

Move To Hot Storage

Parameters:



799
800
801
802
803
804
805
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 799

def move_to_hot_storage(filters: SENTINEL, extra: {})
  _body = {}
  _body[:filters] = filters if filters != SENTINEL
  move_to_hot_storage_query_input = _body
  api_response = move_to_hot_storage_with_http_info_impl(move_to_hot_storage_query_input, extra)
  api_response.data
end

#move_to_hot_storage_with_http_info(filters: SENTINEL, extra: {}) ⇒ Object

Move To Hot Storage

Parameters:



812
813
814
815
816
817
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 812

def move_to_hot_storage_with_http_info(filters: SENTINEL, extra: {})
  _body = {}
  _body[:filters] = filters if filters != SENTINEL
  move_to_hot_storage_query_input = _body
  move_to_hot_storage_with_http_info_impl(move_to_hot_storage_query_input, extra)
end

#query_user_files(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object

User Files V2

For pre-filtering documents, using ‘tags_v2` is preferred to using `tags` (which is now deprecated). If both `tags_v2` and `tags` are specified, `tags` is ignored. `tags_v2` enables building complex filters through the use of “AND”, “OR”, and negation logic. Take the below input as an example: “`json {

"OR": [
    {
        "key": "subject",
        "value": "holy-bible",
        "negate": false
    },
    {
        "key": "person-of-interest",
        "value": "jesus christ",
        "negate": false
    },
    {
        "key": "genre",
        "value": "religion",
        "negate": true
    }
    {
        "AND": [
            {
                "key": "subject",
                "value": "tao-te-ching",
                "negate": false
            },
            {
                "key": "author",
                "value": "lao-tzu",
                "negate": false
            }
        ]
    }
]

} “‘ In this case, files will be filtered such that:

  1. “subject” = “holy-bible” OR

  2. “person-of-interest” = “jesus christ” OR

  3. “genre” != “religion” OR

  4. “subject” = “tao-te-ching” AND “author” = “lao-tzu”

Note that the top level of the query must be either an “OR” or “AND” array. Currently, nesting is limited to 3. For tag blocks (those with “key”, “value”, and “negate” keys), the following typing rules apply:

  1. “key” isn’t optional and must be a ‘string`

  2. “value” isn’t optional and can be ‘any` or list

  3. “negate” is optional and must be ‘true` or `false`. If present and `true`, then the filter block is negated in

the resulting query. It is ‘false` by default.

Parameters:

  • pagination (Pagination) (defaults to: SENTINEL)
  • order_by (OrganizationUserFilesToSyncOrderByTypes) (defaults to: SENTINEL)
  • order_dir (OrderDir) (defaults to: SENTINEL)
  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • include_raw_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the raw file. Only relevant for the /user_files_v2 endpoint.

  • include_parsed_text_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the parsed text file. Only relevant for the /user_files_v2 endpoint.

  • include_additional_files (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for additional files. Only relevant for the /user_files_v2 endpoint.

  • presigned_url_expiry_time_seconds (Integer) (defaults to: 3600)

    The expiry time for the presigned URLs. Only relevant for the /user_files_v2 endpoint.

  • body (OrganizationUserFilesToSyncQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



949
950
951
952
953
954
955
956
957
958
959
960
961
962
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 949

def query_user_files(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {})
  _body = {}
  _body[:pagination] = pagination if pagination != SENTINEL
  _body[:order_by] = order_by if order_by != SENTINEL
  _body[:order_dir] = order_dir if order_dir != SENTINEL
  _body[:filters] = filters if filters != SENTINEL
  _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL
  _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL
  _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL
  _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL
  organization_user_files_to_sync_query_input = _body
  api_response = query_user_files_with_http_info_impl(organization_user_files_to_sync_query_input, extra)
  api_response.data
end

#query_user_files_deprecated(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object

User Files

This route is deprecated. Use ‘/user_files_v2` instead.

Parameters:

  • pagination (Pagination) (defaults to: SENTINEL)
  • order_by (OrganizationUserFilesToSyncOrderByTypes) (defaults to: SENTINEL)
  • order_dir (OrderDir) (defaults to: SENTINEL)
  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • include_raw_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the raw file. Only relevant for the /user_files_v2 endpoint.

  • include_parsed_text_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the parsed text file. Only relevant for the /user_files_v2 endpoint.

  • include_additional_files (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for additional files. Only relevant for the /user_files_v2 endpoint.

  • presigned_url_expiry_time_seconds (Integer) (defaults to: 3600)

    The expiry time for the presigned URLs. Only relevant for the /user_files_v2 endpoint.

  • body (OrganizationUserFilesToSyncQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1124

def query_user_files_deprecated(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {})
  _body = {}
  _body[:pagination] = pagination if pagination != SENTINEL
  _body[:order_by] = order_by if order_by != SENTINEL
  _body[:order_dir] = order_dir if order_dir != SENTINEL
  _body[:filters] = filters if filters != SENTINEL
  _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL
  _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL
  _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL
  _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL
  organization_user_files_to_sync_query_input = _body
  api_response = query_user_files_deprecated_with_http_info_impl(organization_user_files_to_sync_query_input, extra)
  api_response.data
end

#query_user_files_deprecated_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object

User Files

This route is deprecated. Use ‘/user_files_v2` instead.

Parameters:

  • pagination (Pagination) (defaults to: SENTINEL)
  • order_by (OrganizationUserFilesToSyncOrderByTypes) (defaults to: SENTINEL)
  • order_dir (OrderDir) (defaults to: SENTINEL)
  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • include_raw_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the raw file. Only relevant for the /user_files_v2 endpoint.

  • include_parsed_text_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the parsed text file. Only relevant for the /user_files_v2 endpoint.

  • include_additional_files (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for additional files. Only relevant for the /user_files_v2 endpoint.

  • presigned_url_expiry_time_seconds (Integer) (defaults to: 3600)

    The expiry time for the presigned URLs. Only relevant for the /user_files_v2 endpoint.

  • body (OrganizationUserFilesToSyncQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1153

def query_user_files_deprecated_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {})
  _body = {}
  _body[:pagination] = pagination if pagination != SENTINEL
  _body[:order_by] = order_by if order_by != SENTINEL
  _body[:order_dir] = order_dir if order_dir != SENTINEL
  _body[:filters] = filters if filters != SENTINEL
  _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL
  _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL
  _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL
  _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL
  organization_user_files_to_sync_query_input = _body
  query_user_files_deprecated_with_http_info_impl(organization_user_files_to_sync_query_input, extra)
end

#query_user_files_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object

User Files V2

For pre-filtering documents, using ‘tags_v2` is preferred to using `tags` (which is now deprecated). If both `tags_v2` and `tags` are specified, `tags` is ignored. `tags_v2` enables building complex filters through the use of “AND”, “OR”, and negation logic. Take the below input as an example: “`json {

"OR": [
    {
        "key": "subject",
        "value": "holy-bible",
        "negate": false
    },
    {
        "key": "person-of-interest",
        "value": "jesus christ",
        "negate": false
    },
    {
        "key": "genre",
        "value": "religion",
        "negate": true
    }
    {
        "AND": [
            {
                "key": "subject",
                "value": "tao-te-ching",
                "negate": false
            },
            {
                "key": "author",
                "value": "lao-tzu",
                "negate": false
            }
        ]
    }
]

} “‘ In this case, files will be filtered such that:

  1. “subject” = “holy-bible” OR

  2. “person-of-interest” = “jesus christ” OR

  3. “genre” != “religion” OR

  4. “subject” = “tao-te-ching” AND “author” = “lao-tzu”

Note that the top level of the query must be either an “OR” or “AND” array. Currently, nesting is limited to 3. For tag blocks (those with “key”, “value”, and “negate” keys), the following typing rules apply:

  1. “key” isn’t optional and must be a ‘string`

  2. “value” isn’t optional and can be ‘any` or list

  3. “negate” is optional and must be ‘true` or `false`. If present and `true`, then the filter block is negated in

the resulting query. It is ‘false` by default.

Parameters:

  • pagination (Pagination) (defaults to: SENTINEL)
  • order_by (OrganizationUserFilesToSyncOrderByTypes) (defaults to: SENTINEL)
  • order_dir (OrderDir) (defaults to: SENTINEL)
  • filters (OrganizationUserFilesToSyncFilters) (defaults to: SENTINEL)
  • include_raw_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the raw file. Only relevant for the /user_files_v2 endpoint.

  • include_parsed_text_file (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for the parsed text file. Only relevant for the /user_files_v2 endpoint.

  • include_additional_files (Boolean) (defaults to: SENTINEL)

    If true, the query will return presigned URLs for additional files. Only relevant for the /user_files_v2 endpoint.

  • presigned_url_expiry_time_seconds (Integer) (defaults to: 3600)

    The expiry time for the presigned URLs. Only relevant for the /user_files_v2 endpoint.

  • body (OrganizationUserFilesToSyncQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1027

def query_user_files_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {})
  _body = {}
  _body[:pagination] = pagination if pagination != SENTINEL
  _body[:order_by] = order_by if order_by != SENTINEL
  _body[:order_dir] = order_dir if order_dir != SENTINEL
  _body[:filters] = filters if filters != SENTINEL
  _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL
  _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL
  _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL
  _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL
  organization_user_files_to_sync_query_input = _body
  query_user_files_with_http_info_impl(organization_user_files_to_sync_query_input, extra)
end

#resync(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) ⇒ Object

Resync File

Parameters:

  • file_id (Integer)
  • chunk_size (Integer) (defaults to: SENTINEL)
  • chunk_overlap (Integer) (defaults to: SENTINEL)
  • force_embedding_generation (Boolean) (defaults to: false)
  • skip_file_processing (Boolean) (defaults to: false)
  • body (ResyncFileQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1245

def resync(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {})
  _body = {}
  _body[:file_id] = file_id if file_id != SENTINEL
  _body[:chunk_size] = chunk_size if chunk_size != SENTINEL
  _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  _body[:force_embedding_generation] = force_embedding_generation if force_embedding_generation != SENTINEL
  _body[:skip_file_processing] = skip_file_processing if skip_file_processing != SENTINEL
  resync_file_query_input = _body
  api_response = resync_with_http_info_impl(resync_file_query_input, extra)
  api_response.data
end

#resync_with_http_info(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) ⇒ Object

Resync File

Parameters:

  • file_id (Integer)
  • chunk_size (Integer) (defaults to: SENTINEL)
  • chunk_overlap (Integer) (defaults to: SENTINEL)
  • force_embedding_generation (Boolean) (defaults to: false)
  • skip_file_processing (Boolean) (defaults to: false)
  • body (ResyncFileQueryInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1266

def resync_with_http_info(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {})
  _body = {}
  _body[:file_id] = file_id if file_id != SENTINEL
  _body[:chunk_size] = chunk_size if chunk_size != SENTINEL
  _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  _body[:force_embedding_generation] = force_embedding_generation if force_embedding_generation != SENTINEL
  _body[:skip_file_processing] = skip_file_processing if skip_file_processing != SENTINEL
  resync_file_query_input = _body
  resync_with_http_info_impl(resync_file_query_input, extra)
end

#upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object

Create Upload File

This endpoint is used to directly upload local files to Carbon. The ‘POST` request should be a multipart form request. Note that the `set_page_as_boundary` query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters:

  • ‘chunk_size`: the chunk size (in tokens) applied when splitting the document

  • ‘chunk_overlap`: the chunk overlap (in tokens) applied when splitting the document

  • ‘skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings

  • ‘set_page_as_boundary`: described above

  • ‘embedding_model`: the model used to generate embeddings for the document chunks

  • ‘use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs

  • ‘generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search.

  • ‘prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text

Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.

Parameters:

  • file (File)
  • chunk_size (Integer) (defaults to: SENTINEL)

    Chunk size in tiktoken tokens to be used when processing file.

  • chunk_overlap (Integer) (defaults to: SENTINEL)

    Chunk overlap in tiktoken tokens to be used when processing file.

  • skip_embedding_generation (Boolean) (defaults to: false)

    Flag to control whether or not embeddings should be generated and stored when processing file.

  • set_page_as_boundary (Boolean) (defaults to: false)

    Flag to control whether or not to set the a page’s worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.

  • embedding_model (EmbeddingModel) (defaults to: 'OPENAI')

    Embedding model that will be used to embed file chunks.

  • use_ocr (Boolean) (defaults to: false)

    Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and PNGs. Useful for documents with tables, images, and/or scanned text.

  • generate_sparse_vectors (Boolean) (defaults to: false)

    Whether or not to generate sparse vectors for the file. This is required for the file to be a candidate for hybrid search.

  • prepend_filename_to_chunks (Boolean) (defaults to: false)

    Whether or not to prepend the file’s name to chunks.

  • max_items_per_chunk (Integer) (defaults to: SENTINEL)

    Number of objects per chunk. For csv, tsv, xlsx, and json files only.

  • parse_pdf_tables_with_ocr (Boolean) (defaults to: false)

    Whether to use rich table parsing when ‘use_ocr` is enabled.

  • detect_audio_language (Boolean) (defaults to: false)

    Whether to automatically detect the language of the uploaded audio file.

  • transcription_service (TranscriptionServiceNullable) (defaults to: SENTINEL)

    The transcription service to use for audio files. If no service is specified, ‘deepgram’ will be used.

  • include_speaker_labels (Boolean) (defaults to: false)

    Detect multiple speakers and label segments of speech by speaker for audio files.

  • media_type (FileContentTypesNullable) (defaults to: SENTINEL)

    The media type of the file. If not provided, it will be inferred from the file extension.

  • split_rows (Boolean) (defaults to: false)

    Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.

  • enable_cold_storage (Boolean) (defaults to: false)

    Enable cold storage for the file. If set to true, the file will be moved to cold storage after a certain period of inactivity. Default is false.

  • hot_storage_time_to_live (Integer) (defaults to: SENTINEL)

    Time in days after which the file will be moved to cold storage. Must be one of [1, 3, 7, 14, 30].

  • generate_chunks_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag.

  • store_file_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be stored with Carbon, but no processing will be done.

  • body (BodyCreateUploadFileUploadfilePost)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1394

def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {})
  _body = {}
  _body[:file] = file if file != SENTINEL
  body_create_upload_file_uploadfile_post = _body
  extra[:chunk_size] = chunk_size if chunk_size != SENTINEL
  extra[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  extra[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
  extra[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL
  extra[:embedding_model] = embedding_model if embedding_model != SENTINEL
  extra[:use_ocr] = use_ocr if use_ocr != SENTINEL
  extra[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
  extra[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL
  extra[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL
  extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
  extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
  extra[:transcription_service] = transcription_service if transcription_service != SENTINEL
  extra[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL
  extra[:media_type] = media_type if media_type != SENTINEL
  extra[:split_rows] = split_rows if split_rows != SENTINEL
  extra[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL
  extra[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL
  extra[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL
  extra[:store_file_only] = store_file_only if store_file_only != SENTINEL
  api_response = upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra)
  api_response.data
end

#upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object

Create Upload File From Url

Parameters:

  • url (String)
  • file_name (String) (defaults to: SENTINEL)
  • chunk_size (Integer) (defaults to: SENTINEL)
  • chunk_overlap (Integer) (defaults to: SENTINEL)
  • skip_embedding_generation (Boolean) (defaults to: false)
  • set_page_as_boundary (Boolean) (defaults to: false)
  • embedding_model (EmbeddingGenerators) (defaults to: SENTINEL)
  • generate_sparse_vectors (Boolean) (defaults to: false)
  • use_textract (Boolean) (defaults to: false)
  • prepend_filename_to_chunks (Boolean) (defaults to: false)
  • max_items_per_chunk (Integer) (defaults to: SENTINEL)

    Number of objects per chunk. For csv, tsv, xlsx, and json files only.

  • parse_pdf_tables_with_ocr (Boolean) (defaults to: false)
  • detect_audio_language (Boolean) (defaults to: false)
  • transcription_service (TranscriptionServiceNullable) (defaults to: SENTINEL)
  • include_speaker_labels (Boolean) (defaults to: false)
  • media_type (FileContentTypesNullable) (defaults to: SENTINEL)
  • split_rows (Boolean) (defaults to: false)
  • cold_storage_params (ColdStorageProps) (defaults to: SENTINEL)
  • generate_chunks_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag.

  • store_file_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be stored with Carbon, but no processing will be done.

  • body (UploadFileFromUrlInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1654

def upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {})
  _body = {}
  _body[:url] = url if url != SENTINEL
  _body[:file_name] = file_name if file_name != SENTINEL
  _body[:chunk_size] = chunk_size if chunk_size != SENTINEL
  _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  _body[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
  _body[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
  _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
  _body[:use_textract] = use_textract if use_textract != SENTINEL
  _body[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL
  _body[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL
  _body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
  _body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
  _body[:transcription_service] = transcription_service if transcription_service != SENTINEL
  _body[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL
  _body[:media_type] = media_type if media_type != SENTINEL
  _body[:split_rows] = split_rows if split_rows != SENTINEL
  _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL
  _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL
  _body[:store_file_only] = store_file_only if store_file_only != SENTINEL
  upload_file_from_url_input = _body
  api_response = upload_from_url_with_http_info_impl(upload_file_from_url_input, extra)
  api_response.data
end

#upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object

Create Upload File From Url

Parameters:

  • url (String)
  • file_name (String) (defaults to: SENTINEL)
  • chunk_size (Integer) (defaults to: SENTINEL)
  • chunk_overlap (Integer) (defaults to: SENTINEL)
  • skip_embedding_generation (Boolean) (defaults to: false)
  • set_page_as_boundary (Boolean) (defaults to: false)
  • embedding_model (EmbeddingGenerators) (defaults to: SENTINEL)
  • generate_sparse_vectors (Boolean) (defaults to: false)
  • use_textract (Boolean) (defaults to: false)
  • prepend_filename_to_chunks (Boolean) (defaults to: false)
  • max_items_per_chunk (Integer) (defaults to: SENTINEL)

    Number of objects per chunk. For csv, tsv, xlsx, and json files only.

  • parse_pdf_tables_with_ocr (Boolean) (defaults to: false)
  • detect_audio_language (Boolean) (defaults to: false)
  • transcription_service (TranscriptionServiceNullable) (defaults to: SENTINEL)
  • include_speaker_labels (Boolean) (defaults to: false)
  • media_type (FileContentTypesNullable) (defaults to: SENTINEL)
  • split_rows (Boolean) (defaults to: false)
  • cold_storage_params (ColdStorageProps) (defaults to: SENTINEL)
  • generate_chunks_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag.

  • store_file_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be stored with Carbon, but no processing will be done.

  • body (UploadFileFromUrlInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1705

def upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {})
  _body = {}
  _body[:url] = url if url != SENTINEL
  _body[:file_name] = file_name if file_name != SENTINEL
  _body[:chunk_size] = chunk_size if chunk_size != SENTINEL
  _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  _body[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
  _body[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
  _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
  _body[:use_textract] = use_textract if use_textract != SENTINEL
  _body[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL
  _body[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL
  _body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
  _body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
  _body[:transcription_service] = transcription_service if transcription_service != SENTINEL
  _body[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL
  _body[:media_type] = media_type if media_type != SENTINEL
  _body[:split_rows] = split_rows if split_rows != SENTINEL
  _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL
  _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL
  _body[:store_file_only] = store_file_only if store_file_only != SENTINEL
  upload_file_from_url_input = _body
  upload_from_url_with_http_info_impl(upload_file_from_url_input, extra)
end

#upload_text(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object

Create Raw Text

Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.

Parameters:

  • contents (String)
  • name (String) (defaults to: SENTINEL)
  • chunk_size (Integer) (defaults to: SENTINEL)
  • chunk_overlap (Integer) (defaults to: SENTINEL)
  • skip_embedding_generation (Boolean) (defaults to: false)
  • overwrite_file_id (Integer) (defaults to: SENTINEL)
  • embedding_model (EmbeddingGeneratorsNullable) (defaults to: 'OPENAI')
  • generate_sparse_vectors (Boolean) (defaults to: false)
  • cold_storage_params (ColdStorageProps) (defaults to: SENTINEL)
  • generate_chunks_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag.

  • store_file_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be stored with Carbon, but no processing will be done.

  • body (RawTextInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1824

def upload_text(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {})
  _body = {}
  _body[:contents] = contents if contents != SENTINEL
  _body[:name] = name if name != SENTINEL
  _body[:chunk_size] = chunk_size if chunk_size != SENTINEL
  _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  _body[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
  _body[:overwrite_file_id] = overwrite_file_id if overwrite_file_id != SENTINEL
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
  _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
  _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL
  _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL
  _body[:store_file_only] = store_file_only if store_file_only != SENTINEL
  raw_text_input = _body
  api_response = upload_text_with_http_info_impl(raw_text_input, extra)
  api_response.data
end

#upload_text_with_http_info(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object

Create Raw Text

Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.

Parameters:

  • contents (String)
  • name (String) (defaults to: SENTINEL)
  • chunk_size (Integer) (defaults to: SENTINEL)
  • chunk_overlap (Integer) (defaults to: SENTINEL)
  • skip_embedding_generation (Boolean) (defaults to: false)
  • overwrite_file_id (Integer) (defaults to: SENTINEL)
  • embedding_model (EmbeddingGeneratorsNullable) (defaults to: 'OPENAI')
  • generate_sparse_vectors (Boolean) (defaults to: false)
  • cold_storage_params (ColdStorageProps) (defaults to: SENTINEL)
  • generate_chunks_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag.

  • store_file_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be stored with Carbon, but no processing will be done.

  • body (RawTextInput)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1868

def upload_text_with_http_info(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {})
  _body = {}
  _body[:contents] = contents if contents != SENTINEL
  _body[:name] = name if name != SENTINEL
  _body[:chunk_size] = chunk_size if chunk_size != SENTINEL
  _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  _body[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
  _body[:overwrite_file_id] = overwrite_file_id if overwrite_file_id != SENTINEL
  _body[:embedding_model] = embedding_model if embedding_model != SENTINEL
  _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
  _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL
  _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL
  _body[:store_file_only] = store_file_only if store_file_only != SENTINEL
  raw_text_input = _body
  upload_text_with_http_info_impl(raw_text_input, extra)
end

#upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object

Create Upload File

This endpoint is used to directly upload local files to Carbon. The ‘POST` request should be a multipart form request. Note that the `set_page_as_boundary` query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters:

  • ‘chunk_size`: the chunk size (in tokens) applied when splitting the document

  • ‘chunk_overlap`: the chunk overlap (in tokens) applied when splitting the document

  • ‘skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings

  • ‘set_page_as_boundary`: described above

  • ‘embedding_model`: the model used to generate embeddings for the document chunks

  • ‘use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs

  • ‘generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search.

  • ‘prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text

Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.

Parameters:

  • file (File)
  • chunk_size (Integer) (defaults to: SENTINEL)

    Chunk size in tiktoken tokens to be used when processing file.

  • chunk_overlap (Integer) (defaults to: SENTINEL)

    Chunk overlap in tiktoken tokens to be used when processing file.

  • skip_embedding_generation (Boolean) (defaults to: false)

    Flag to control whether or not embeddings should be generated and stored when processing file.

  • set_page_as_boundary (Boolean) (defaults to: false)

    Flag to control whether or not to set the a page’s worth of content as the maximum amount of content that can appear in a chunk. Only valid for PDFs. See description route description for more information.

  • embedding_model (EmbeddingModel) (defaults to: 'OPENAI')

    Embedding model that will be used to embed file chunks.

  • use_ocr (Boolean) (defaults to: false)

    Whether or not to use OCR when processing files. Valid for PDFs, JPEGs, and PNGs. Useful for documents with tables, images, and/or scanned text.

  • generate_sparse_vectors (Boolean) (defaults to: false)

    Whether or not to generate sparse vectors for the file. This is required for the file to be a candidate for hybrid search.

  • prepend_filename_to_chunks (Boolean) (defaults to: false)

    Whether or not to prepend the file’s name to chunks.

  • max_items_per_chunk (Integer) (defaults to: SENTINEL)

    Number of objects per chunk. For csv, tsv, xlsx, and json files only.

  • parse_pdf_tables_with_ocr (Boolean) (defaults to: false)

    Whether to use rich table parsing when ‘use_ocr` is enabled.

  • detect_audio_language (Boolean) (defaults to: false)

    Whether to automatically detect the language of the uploaded audio file.

  • transcription_service (TranscriptionServiceNullable) (defaults to: SENTINEL)

    The transcription service to use for audio files. If no service is specified, ‘deepgram’ will be used.

  • include_speaker_labels (Boolean) (defaults to: false)

    Detect multiple speakers and label segments of speech by speaker for audio files.

  • media_type (FileContentTypesNullable) (defaults to: SENTINEL)

    The media type of the file. If not provided, it will be inferred from the file extension.

  • split_rows (Boolean) (defaults to: false)

    Whether to split tabular rows into chunks. Currently only valid for CSV, TSV, and XLSX files.

  • enable_cold_storage (Boolean) (defaults to: false)

    Enable cold storage for the file. If set to true, the file will be moved to cold storage after a certain period of inactivity. Default is false.

  • hot_storage_time_to_live (Integer) (defaults to: SENTINEL)

    Time in days after which the file will be moved to cold storage. Must be one of [1, 3, 7, 14, 30].

  • generate_chunks_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be chunked and stored with Carbon, but no embeddings will be generated. This overrides the skip_embedding_generation flag.

  • store_file_only (Boolean) (defaults to: false)

    If this flag is enabled, the file will be stored with Carbon, but no processing will be done.

  • body (BodyCreateUploadFileUploadfilePost)
  • extra (Hash) (defaults to: {})

    additional parameters to pass along through :header_params, :query_params, or parameter name



1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1471

def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {})
  _body = {}
  _body[:file] = file if file != SENTINEL
  body_create_upload_file_uploadfile_post = _body
  extra[:chunk_size] = chunk_size if chunk_size != SENTINEL
  extra[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL
  extra[:skip_embedding_generation] = skip_embedding_generation if skip_embedding_generation != SENTINEL
  extra[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL
  extra[:embedding_model] = embedding_model if embedding_model != SENTINEL
  extra[:use_ocr] = use_ocr if use_ocr != SENTINEL
  extra[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL
  extra[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL
  extra[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL
  extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL
  extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL
  extra[:transcription_service] = transcription_service if transcription_service != SENTINEL
  extra[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL
  extra[:media_type] = media_type if media_type != SENTINEL
  extra[:split_rows] = split_rows if split_rows != SENTINEL
  extra[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL
  extra[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL
  extra[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL
  extra[:store_file_only] = store_file_only if store_file_only != SENTINEL
  upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra)
end