Class: Carbon::FilesApi
- Inherits:
-
Object
- Object
- Carbon::FilesApi
- Defined in:
- lib/carbon_ruby_sdk/api/files_api.rb
Instance Attribute Summary collapse
-
#api_client ⇒ Object
Returns the value of attribute api_client.
Instance Method Summary collapse
-
#create_user_file_tags(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Create File Tags.
-
#create_user_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Create File Tags.
-
#delete(file_id:, extra: {}) ⇒ Object
Delete File Endpoint.
-
#delete_file_tags(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Delete File Tags.
-
#delete_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Delete File Tags.
-
#delete_many(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) ⇒ Object
Delete Files Endpoint.
-
#delete_many_with_http_info(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) ⇒ Object
Delete Files Endpoint.
-
#delete_v2(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) ⇒ Object
Delete Files V2 Endpoint.
-
#delete_v2_with_http_info(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) ⇒ Object
Delete Files V2 Endpoint.
-
#delete_with_http_info(file_id:, extra: {}) ⇒ Object
Delete File Endpoint.
-
#get_parsed_file(file_id:, extra: {}) ⇒ Object
Parsed File.
-
#get_parsed_file_with_http_info(file_id:, extra: {}) ⇒ Object
Parsed File.
-
#get_raw_file(file_id:, extra: {}) ⇒ Object
Raw File.
-
#get_raw_file_with_http_info(file_id:, extra: {}) ⇒ Object
Raw File.
-
#initialize(api_client = ApiClient.default) ⇒ FilesApi
constructor
A new instance of FilesApi.
-
#modify_cold_storage_parameters(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) ⇒ Object
Modify Cold Storage Parameters.
-
#modify_cold_storage_parameters_with_http_info(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) ⇒ Object
Modify Cold Storage Parameters.
-
#move_to_hot_storage(filters: SENTINEL, extra: {}) ⇒ Object
Move To Hot Storage.
-
#move_to_hot_storage_with_http_info(filters: SENTINEL, extra: {}) ⇒ Object
Move To Hot Storage.
-
#query_user_files(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files V2.
-
#query_user_files_deprecated(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files.
-
#query_user_files_deprecated_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files.
-
#query_user_files_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files V2.
-
#resync(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) ⇒ Object
Resync File.
-
#resync_with_http_info(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) ⇒ Object
Resync File.
-
#upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File.
-
#upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File From Url.
-
#upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File From Url.
-
#upload_text(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Raw Text.
-
#upload_text_with_http_info(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Raw Text.
-
#upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File.
Constructor Details
Instance Attribute Details
#api_client ⇒ Object
Returns the value of attribute api_client.
13 14 15 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 13 def api_client @api_client end |
Instance Method Details
#create_user_file_tags(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Create File Tags
A tag is a key-value pair that can be added to a file. This pair can then be used for searches (e.g. embedding searches) in order to narrow down the scope of the search. A file can have any number of tags. The following are reserved keys that cannot be used:
-
db_embedding_id
-
organization_id
-
user_id
-
organization_user_file_id
Carbon currently supports two data types for tag values - ‘string` and `list<string>`. Keys can only be `string`. If values other than `string` and `list<string>` are used, they’re automatically converted to strings (e.g. 4 will become “4”).
37 38 39 40 41 42 43 44 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 37 def (tags:, organization_user_file_id:, extra: {}) _body = {} _body[:tags] = if != SENTINEL _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL organization_user_file_tag_create = _body api_response = (organization_user_file_tag_create, extra) api_response.data end |
#create_user_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Create File Tags
A tag is a key-value pair that can be added to a file. This pair can then be used for searches (e.g. embedding searches) in order to narrow down the scope of the search. A file can have any number of tags. The following are reserved keys that cannot be used:
-
db_embedding_id
-
organization_id
-
user_id
-
organization_user_file_id
Carbon currently supports two data types for tag values - ‘string` and `list<string>`. Keys can only be `string`. If values other than `string` and `list<string>` are used, they’re automatically converted to strings (e.g. 4 will become “4”).
64 65 66 67 68 69 70 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 64 def (tags:, organization_user_file_id:, extra: {}) _body = {} _body[:tags] = if != SENTINEL _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL organization_user_file_tag_create = _body (organization_user_file_tag_create, extra) end |
#delete(file_id:, extra: {}) ⇒ Object
Delete File Endpoint
145 146 147 148 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 145 def delete(file_id:, extra: {}) api_response = delete_with_http_info_impl(file_id, extra) api_response.data end |
#delete_file_tags(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Delete File Tags
226 227 228 229 230 231 232 233 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 226 def (tags:, organization_user_file_id:, extra: {}) _body = {} _body[:tags] = if != SENTINEL _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL = _body api_response = (, extra) api_response.data end |
#delete_file_tags_with_http_info(tags:, organization_user_file_id:, extra: {}) ⇒ Object
Delete File Tags
241 242 243 244 245 246 247 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 241 def (tags:, organization_user_file_id:, extra: {}) _body = {} _body[:tags] = if != SENTINEL _body[:organization_user_file_id] = organization_user_file_id if organization_user_file_id != SENTINEL = _body (, extra) end |
#delete_many(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) ⇒ Object
Delete Files Endpoint
325 326 327 328 329 330 331 332 333 334 335 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 325 def delete_many(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) _body = {} _body[:file_ids] = file_ids if file_ids != SENTINEL _body[:sync_statuses] = sync_statuses if sync_statuses != SENTINEL _body[:delete_non_synced_only] = delete_non_synced_only if delete_non_synced_only != SENTINEL _body[:send_webhook] = send_webhook if send_webhook != SENTINEL _body[:delete_child_files] = delete_child_files if delete_child_files != SENTINEL delete_files_query_input = _body api_response = delete_many_with_http_info_impl(delete_files_query_input, extra) api_response.data end |
#delete_many_with_http_info(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) ⇒ Object
Delete Files Endpoint
346 347 348 349 350 351 352 353 354 355 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 346 def delete_many_with_http_info(file_ids: SENTINEL, sync_statuses: SENTINEL, delete_non_synced_only: false, send_webhook: false, delete_child_files: false, extra: {}) _body = {} _body[:file_ids] = file_ids if file_ids != SENTINEL _body[:sync_statuses] = sync_statuses if sync_statuses != SENTINEL _body[:delete_non_synced_only] = delete_non_synced_only if delete_non_synced_only != SENTINEL _body[:send_webhook] = send_webhook if send_webhook != SENTINEL _body[:delete_child_files] = delete_child_files if delete_child_files != SENTINEL delete_files_query_input = _body delete_many_with_http_info_impl(delete_files_query_input, extra) end |
#delete_v2(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) ⇒ Object
Delete Files V2 Endpoint
431 432 433 434 435 436 437 438 439 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 431 def delete_v2(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) _body = {} _body[:filters] = filters if filters != SENTINEL _body[:send_webhook] = send_webhook if send_webhook != SENTINEL _body[:preserve_file_record] = preserve_file_record if preserve_file_record != SENTINEL delete_files_v2_query_input = _body api_response = delete_v2_with_http_info_impl(delete_files_v2_query_input, extra) api_response.data end |
#delete_v2_with_http_info(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) ⇒ Object
Delete Files V2 Endpoint
448 449 450 451 452 453 454 455 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 448 def delete_v2_with_http_info(filters: SENTINEL, send_webhook: false, preserve_file_record: false, extra: {}) _body = {} _body[:filters] = filters if filters != SENTINEL _body[:send_webhook] = send_webhook if send_webhook != SENTINEL _body[:preserve_file_record] = preserve_file_record if preserve_file_record != SENTINEL delete_files_v2_query_input = _body delete_v2_with_http_info_impl(delete_files_v2_query_input, extra) end |
#delete_with_http_info(file_id:, extra: {}) ⇒ Object
Delete File Endpoint
154 155 156 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 154 def delete_with_http_info(file_id:, extra: {}) delete_with_http_info_impl(file_id, extra) end |
#get_parsed_file(file_id:, extra: {}) ⇒ Object
Parsed File
This route is deprecated. Use ‘/user_files_v2` instead.
530 531 532 533 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 530 def get_parsed_file(file_id:, extra: {}) api_response = get_parsed_file_with_http_info_impl(file_id, extra) api_response.data end |
#get_parsed_file_with_http_info(file_id:, extra: {}) ⇒ Object
Parsed File
This route is deprecated. Use ‘/user_files_v2` instead.
541 542 543 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 541 def get_parsed_file_with_http_info(file_id:, extra: {}) get_parsed_file_with_http_info_impl(file_id, extra) end |
#get_raw_file(file_id:, extra: {}) ⇒ Object
Raw File
This route is deprecated. Use ‘/user_files_v2` instead.
615 616 617 618 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 615 def get_raw_file(file_id:, extra: {}) api_response = get_raw_file_with_http_info_impl(file_id, extra) api_response.data end |
#get_raw_file_with_http_info(file_id:, extra: {}) ⇒ Object
Raw File
This route is deprecated. Use ‘/user_files_v2` instead.
626 627 628 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 626 def get_raw_file_with_http_info(file_id:, extra: {}) get_raw_file_with_http_info_impl(file_id, extra) end |
#modify_cold_storage_parameters(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) ⇒ Object
Modify Cold Storage Parameters
701 702 703 704 705 706 707 708 709 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 701 def modify_cold_storage_parameters(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) _body = {} _body[:filters] = filters if filters != SENTINEL _body[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL _body[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL modify_cold_storage_parameters_query_input = _body api_response = modify_cold_storage_parameters_with_http_info_impl(modify_cold_storage_parameters_query_input, extra) api_response.data end |
#modify_cold_storage_parameters_with_http_info(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) ⇒ Object
Modify Cold Storage Parameters
718 719 720 721 722 723 724 725 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 718 def modify_cold_storage_parameters_with_http_info(filters: SENTINEL, enable_cold_storage: SENTINEL, hot_storage_time_to_live: SENTINEL, extra: {}) _body = {} _body[:filters] = filters if filters != SENTINEL _body[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL _body[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL modify_cold_storage_parameters_query_input = _body modify_cold_storage_parameters_with_http_info_impl(modify_cold_storage_parameters_query_input, extra) end |
#move_to_hot_storage(filters: SENTINEL, extra: {}) ⇒ Object
Move To Hot Storage
799 800 801 802 803 804 805 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 799 def move_to_hot_storage(filters: SENTINEL, extra: {}) _body = {} _body[:filters] = filters if filters != SENTINEL move_to_hot_storage_query_input = _body api_response = move_to_hot_storage_with_http_info_impl(move_to_hot_storage_query_input, extra) api_response.data end |
#move_to_hot_storage_with_http_info(filters: SENTINEL, extra: {}) ⇒ Object
Move To Hot Storage
812 813 814 815 816 817 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 812 def move_to_hot_storage_with_http_info(filters: SENTINEL, extra: {}) _body = {} _body[:filters] = filters if filters != SENTINEL move_to_hot_storage_query_input = _body move_to_hot_storage_with_http_info_impl(move_to_hot_storage_query_input, extra) end |
#query_user_files(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files V2
For pre-filtering documents, using ‘tags_v2` is preferred to using `tags` (which is now deprecated). If both `tags_v2` and `tags` are specified, `tags` is ignored. `tags_v2` enables building complex filters through the use of “AND”, “OR”, and negation logic. Take the below input as an example: “`json {
"OR": [
{
"key": "subject",
"value": "holy-bible",
"negate": false
},
{
"key": "person-of-interest",
"value": "jesus christ",
"negate": false
},
{
"key": "genre",
"value": "religion",
"negate": true
}
{
"AND": [
{
"key": "subject",
"value": "tao-te-ching",
"negate": false
},
{
"key": "author",
"value": "lao-tzu",
"negate": false
}
]
}
]
} “‘ In this case, files will be filtered such that:
-
“subject” = “holy-bible” OR
-
“person-of-interest” = “jesus christ” OR
-
“genre” != “religion” OR
-
“subject” = “tao-te-ching” AND “author” = “lao-tzu”
Note that the top level of the query must be either an “OR” or “AND” array. Currently, nesting is limited to 3. For tag blocks (those with “key”, “value”, and “negate” keys), the following typing rules apply:
-
“key” isn’t optional and must be a ‘string`
-
“value” isn’t optional and can be ‘any` or list
-
“negate” is optional and must be ‘true` or `false`. If present and `true`, then the filter block is negated in
the resulting query. It is ‘false` by default.
949 950 951 952 953 954 955 956 957 958 959 960 961 962 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 949 def query_user_files(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) _body = {} _body[:pagination] = pagination if pagination != SENTINEL _body[:order_by] = order_by if order_by != SENTINEL _body[:order_dir] = order_dir if order_dir != SENTINEL _body[:filters] = filters if filters != SENTINEL _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL organization_user_files_to_sync_query_input = _body api_response = query_user_files_with_http_info_impl(organization_user_files_to_sync_query_input, extra) api_response.data end |
#query_user_files_deprecated(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files
This route is deprecated. Use ‘/user_files_v2` instead.
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1124 def query_user_files_deprecated(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) _body = {} _body[:pagination] = pagination if pagination != SENTINEL _body[:order_by] = order_by if order_by != SENTINEL _body[:order_dir] = order_dir if order_dir != SENTINEL _body[:filters] = filters if filters != SENTINEL _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL organization_user_files_to_sync_query_input = _body api_response = query_user_files_deprecated_with_http_info_impl(organization_user_files_to_sync_query_input, extra) api_response.data end |
#query_user_files_deprecated_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files
This route is deprecated. Use ‘/user_files_v2` instead.
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1153 def query_user_files_deprecated_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) _body = {} _body[:pagination] = pagination if pagination != SENTINEL _body[:order_by] = order_by if order_by != SENTINEL _body[:order_dir] = order_dir if order_dir != SENTINEL _body[:filters] = filters if filters != SENTINEL _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL organization_user_files_to_sync_query_input = _body query_user_files_deprecated_with_http_info_impl(organization_user_files_to_sync_query_input, extra) end |
#query_user_files_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) ⇒ Object
User Files V2
For pre-filtering documents, using ‘tags_v2` is preferred to using `tags` (which is now deprecated). If both `tags_v2` and `tags` are specified, `tags` is ignored. `tags_v2` enables building complex filters through the use of “AND”, “OR”, and negation logic. Take the below input as an example: “`json {
"OR": [
{
"key": "subject",
"value": "holy-bible",
"negate": false
},
{
"key": "person-of-interest",
"value": "jesus christ",
"negate": false
},
{
"key": "genre",
"value": "religion",
"negate": true
}
{
"AND": [
{
"key": "subject",
"value": "tao-te-ching",
"negate": false
},
{
"key": "author",
"value": "lao-tzu",
"negate": false
}
]
}
]
} “‘ In this case, files will be filtered such that:
-
“subject” = “holy-bible” OR
-
“person-of-interest” = “jesus christ” OR
-
“genre” != “religion” OR
-
“subject” = “tao-te-ching” AND “author” = “lao-tzu”
Note that the top level of the query must be either an “OR” or “AND” array. Currently, nesting is limited to 3. For tag blocks (those with “key”, “value”, and “negate” keys), the following typing rules apply:
-
“key” isn’t optional and must be a ‘string`
-
“value” isn’t optional and can be ‘any` or list
-
“negate” is optional and must be ‘true` or `false`. If present and `true`, then the filter block is negated in
the resulting query. It is ‘false` by default.
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1027 def query_user_files_with_http_info(pagination: SENTINEL, order_by: SENTINEL, order_dir: SENTINEL, filters: SENTINEL, include_raw_file: SENTINEL, include_parsed_text_file: SENTINEL, include_additional_files: SENTINEL, presigned_url_expiry_time_seconds: 3600, extra: {}) _body = {} _body[:pagination] = pagination if pagination != SENTINEL _body[:order_by] = order_by if order_by != SENTINEL _body[:order_dir] = order_dir if order_dir != SENTINEL _body[:filters] = filters if filters != SENTINEL _body[:include_raw_file] = include_raw_file if include_raw_file != SENTINEL _body[:include_parsed_text_file] = include_parsed_text_file if include_parsed_text_file != SENTINEL _body[:include_additional_files] = include_additional_files if include_additional_files != SENTINEL _body[:presigned_url_expiry_time_seconds] = presigned_url_expiry_time_seconds if presigned_url_expiry_time_seconds != SENTINEL organization_user_files_to_sync_query_input = _body query_user_files_with_http_info_impl(organization_user_files_to_sync_query_input, extra) end |
#resync(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) ⇒ Object
Resync File
1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1245 def resync(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) _body = {} _body[:file_id] = file_id if file_id != SENTINEL _body[:chunk_size] = chunk_size if chunk_size != SENTINEL _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL _body[:force_embedding_generation] = if != SENTINEL _body[:skip_file_processing] = skip_file_processing if skip_file_processing != SENTINEL resync_file_query_input = _body api_response = resync_with_http_info_impl(resync_file_query_input, extra) api_response.data end |
#resync_with_http_info(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) ⇒ Object
Resync File
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1266 def resync_with_http_info(file_id:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, force_embedding_generation: false, skip_file_processing: false, extra: {}) _body = {} _body[:file_id] = file_id if file_id != SENTINEL _body[:chunk_size] = chunk_size if chunk_size != SENTINEL _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL _body[:force_embedding_generation] = if != SENTINEL _body[:skip_file_processing] = skip_file_processing if skip_file_processing != SENTINEL resync_file_query_input = _body resync_with_http_info_impl(resync_file_query_input, extra) end |
#upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File
This endpoint is used to directly upload local files to Carbon. The ‘POST` request should be a multipart form request. Note that the `set_page_as_boundary` query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters:
-
‘chunk_size`: the chunk size (in tokens) applied when splitting the document
-
‘chunk_overlap`: the chunk overlap (in tokens) applied when splitting the document
-
‘skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings
-
‘set_page_as_boundary`: described above
-
‘embedding_model`: the model used to generate embeddings for the document chunks
-
‘use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs
-
‘generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search.
-
‘prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text
Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1394 def upload(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) _body = {} _body[:file] = file if file != SENTINEL body_create_upload_file_uploadfile_post = _body extra[:chunk_size] = chunk_size if chunk_size != SENTINEL extra[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL extra[:skip_embedding_generation] = if != SENTINEL extra[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL extra[:embedding_model] = if != SENTINEL extra[:use_ocr] = use_ocr if use_ocr != SENTINEL extra[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL extra[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL extra[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL extra[:transcription_service] = transcription_service if transcription_service != SENTINEL extra[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL extra[:media_type] = media_type if media_type != SENTINEL extra[:split_rows] = split_rows if split_rows != SENTINEL extra[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL extra[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL extra[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL extra[:store_file_only] = store_file_only if store_file_only != SENTINEL api_response = upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra) api_response.data end |
#upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File From Url
1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1654 def upload_from_url(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) _body = {} _body[:url] = url if url != SENTINEL _body[:file_name] = file_name if file_name != SENTINEL _body[:chunk_size] = chunk_size if chunk_size != SENTINEL _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL _body[:skip_embedding_generation] = if != SENTINEL _body[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL _body[:embedding_model] = if != SENTINEL _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL _body[:use_textract] = use_textract if use_textract != SENTINEL _body[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL _body[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL _body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL _body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL _body[:transcription_service] = transcription_service if transcription_service != SENTINEL _body[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL _body[:media_type] = media_type if media_type != SENTINEL _body[:split_rows] = split_rows if split_rows != SENTINEL _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL _body[:store_file_only] = store_file_only if store_file_only != SENTINEL upload_file_from_url_input = _body api_response = upload_from_url_with_http_info_impl(upload_file_from_url_input, extra) api_response.data end |
#upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File From Url
1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1705 def upload_from_url_with_http_info(url:, file_name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: SENTINEL, generate_sparse_vectors: false, use_textract: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) _body = {} _body[:url] = url if url != SENTINEL _body[:file_name] = file_name if file_name != SENTINEL _body[:chunk_size] = chunk_size if chunk_size != SENTINEL _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL _body[:skip_embedding_generation] = if != SENTINEL _body[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL _body[:embedding_model] = if != SENTINEL _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL _body[:use_textract] = use_textract if use_textract != SENTINEL _body[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL _body[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL _body[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL _body[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL _body[:transcription_service] = transcription_service if transcription_service != SENTINEL _body[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL _body[:media_type] = media_type if media_type != SENTINEL _body[:split_rows] = split_rows if split_rows != SENTINEL _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL _body[:store_file_only] = store_file_only if store_file_only != SENTINEL upload_file_from_url_input = _body upload_from_url_with_http_info_impl(upload_file_from_url_input, extra) end |
#upload_text(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Raw Text
Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.
1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1824 def upload_text(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) _body = {} _body[:contents] = contents if contents != SENTINEL _body[:name] = name if name != SENTINEL _body[:chunk_size] = chunk_size if chunk_size != SENTINEL _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL _body[:skip_embedding_generation] = if != SENTINEL _body[:overwrite_file_id] = overwrite_file_id if overwrite_file_id != SENTINEL _body[:embedding_model] = if != SENTINEL _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL _body[:store_file_only] = store_file_only if store_file_only != SENTINEL raw_text_input = _body api_response = upload_text_with_http_info_impl(raw_text_input, extra) api_response.data end |
#upload_text_with_http_info(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Raw Text
Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.
1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1868 def upload_text_with_http_info(contents:, name: SENTINEL, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, overwrite_file_id: SENTINEL, embedding_model: 'OPENAI', generate_sparse_vectors: false, cold_storage_params: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) _body = {} _body[:contents] = contents if contents != SENTINEL _body[:name] = name if name != SENTINEL _body[:chunk_size] = chunk_size if chunk_size != SENTINEL _body[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL _body[:skip_embedding_generation] = if != SENTINEL _body[:overwrite_file_id] = overwrite_file_id if overwrite_file_id != SENTINEL _body[:embedding_model] = if != SENTINEL _body[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL _body[:cold_storage_params] = cold_storage_params if cold_storage_params != SENTINEL _body[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL _body[:store_file_only] = store_file_only if store_file_only != SENTINEL raw_text_input = _body upload_text_with_http_info_impl(raw_text_input, extra) end |
#upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) ⇒ Object
Create Upload File
This endpoint is used to directly upload local files to Carbon. The ‘POST` request should be a multipart form request. Note that the `set_page_as_boundary` query parameter is applicable only to PDFs for now. When this value is set, PDF chunks are at most one page long. Additional information can be retrieved for each chunk, however, namely the coordinates of the bounding box around the chunk (this can be used for things like text highlighting). Following is a description of all possible query parameters:
-
‘chunk_size`: the chunk size (in tokens) applied when splitting the document
-
‘chunk_overlap`: the chunk overlap (in tokens) applied when splitting the document
-
‘skip_embedding_generation`: whether or not to skip the generation of chunks and embeddings
-
‘set_page_as_boundary`: described above
-
‘embedding_model`: the model used to generate embeddings for the document chunks
-
‘use_ocr`: whether or not to use OCR as a preprocessing step prior to generating chunks. Valid for PDFs, JPEGs, and PNGs
-
‘generate_sparse_vectors`: whether or not to generate sparse vectors for the file. Required for hybrid search.
-
‘prepend_filename_to_chunks`: whether or not to prepend the filename to the chunk text
Carbon supports multiple models for use in generating embeddings for files. For images, we support Vertex AI’s multimodal model; for text, we support OpenAI’s ‘text-embedding-ada-002` and Cohere’s embed-multilingual-v3.0. The model can be specified via the ‘embedding_model` parameter (in the POST body for `/embeddings`, and a query parameter in `/uploadfile`). If no model is supplied, the `text-embedding-ada-002` is used by default. When performing embedding queries, embeddings from files that used the specified model will be considered in the query. For example, if files A and B have embeddings generated with `OPENAI`, and files C and D have embeddings generated with `COHERE_MULTILINGUAL_V3`, then by default, queries will only consider files A and B. If `COHERE_MULTILINGUAL_V3` is specified as the `embedding_model` in `/embeddings`, then only files C and D will be considered. Make sure that the set of all files you want considered for a query have embeddings generated via the same model. For now, **do not** set `VERTEX_MULTIMODAL` as an `embedding_model`. This model is used automatically by Carbon when it detects an image file.
1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 |
# File 'lib/carbon_ruby_sdk/api/files_api.rb', line 1471 def upload_with_http_info(file:, chunk_size: SENTINEL, chunk_overlap: SENTINEL, skip_embedding_generation: false, set_page_as_boundary: false, embedding_model: 'OPENAI', use_ocr: false, generate_sparse_vectors: false, prepend_filename_to_chunks: false, max_items_per_chunk: SENTINEL, parse_pdf_tables_with_ocr: false, detect_audio_language: false, transcription_service: SENTINEL, include_speaker_labels: false, media_type: SENTINEL, split_rows: false, enable_cold_storage: false, hot_storage_time_to_live: SENTINEL, generate_chunks_only: false, store_file_only: false, extra: {}) _body = {} _body[:file] = file if file != SENTINEL body_create_upload_file_uploadfile_post = _body extra[:chunk_size] = chunk_size if chunk_size != SENTINEL extra[:chunk_overlap] = chunk_overlap if chunk_overlap != SENTINEL extra[:skip_embedding_generation] = if != SENTINEL extra[:set_page_as_boundary] = set_page_as_boundary if set_page_as_boundary != SENTINEL extra[:embedding_model] = if != SENTINEL extra[:use_ocr] = use_ocr if use_ocr != SENTINEL extra[:generate_sparse_vectors] = generate_sparse_vectors if generate_sparse_vectors != SENTINEL extra[:prepend_filename_to_chunks] = prepend_filename_to_chunks if prepend_filename_to_chunks != SENTINEL extra[:max_items_per_chunk] = max_items_per_chunk if max_items_per_chunk != SENTINEL extra[:parse_pdf_tables_with_ocr] = parse_pdf_tables_with_ocr if parse_pdf_tables_with_ocr != SENTINEL extra[:detect_audio_language] = detect_audio_language if detect_audio_language != SENTINEL extra[:transcription_service] = transcription_service if transcription_service != SENTINEL extra[:include_speaker_labels] = include_speaker_labels if include_speaker_labels != SENTINEL extra[:media_type] = media_type if media_type != SENTINEL extra[:split_rows] = split_rows if split_rows != SENTINEL extra[:enable_cold_storage] = enable_cold_storage if enable_cold_storage != SENTINEL extra[:hot_storage_time_to_live] = hot_storage_time_to_live if hot_storage_time_to_live != SENTINEL extra[:generate_chunks_only] = generate_chunks_only if generate_chunks_only != SENTINEL extra[:store_file_only] = store_file_only if store_file_only != SENTINEL upload_with_http_info_impl(file, body_create_upload_file_uploadfile_post, extra) end |