Class: Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig
- Inherits:
-
Object
- Object
- Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig
- Extended by:
- Protobuf::MessageExts::ClassMethods
- Includes:
- Protobuf::MessageExts
- Defined in:
- proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb
Overview
Related configurations applied to a specific type of document parser.
Defined Under Namespace
Classes: DigitalParsingConfig, LayoutParsingConfig, OcrParsingConfig
Instance Attribute Summary collapse
-
#digital_parsing_config ⇒ ::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::DigitalParsingConfig
Configurations applied to digital parser.
-
#layout_parsing_config ⇒ ::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::LayoutParsingConfig
Configurations applied to layout parser.
-
#ocr_parsing_config ⇒ ::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::OcrParsingConfig
Configurations applied to OCR parser.
Instance Attribute Details
#digital_parsing_config ⇒ ::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::DigitalParsingConfig
Returns Configurations applied to digital parser.
Note: The following fields are mutually exclusive: digital_parsing_config
, ocr_parsing_config
, layout_parsing_config
. If a field in that set is populated, all other fields in the set will automatically be cleared.
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# File 'proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb', line 109 class ParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # The digital parsing configurations for documents. class DigitalParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The OCR parsing configurations for documents. # @!attribute [rw] enhanced_document_elements # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Array<::String>] # [DEPRECATED] This field is deprecated. To use the additional enhanced # document elements processing, please switch to `layout_parsing_config`. # @!attribute [rw] use_native_text # @return [::Boolean] # If true, will use native text instead of OCR text on pages containing # native text. class OcrParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The layout parsing configurations for documents. # @!attribute [rw] enable_table_annotation # @return [::Boolean] # Optional. If true, the LLM based annotation is added to the table # during parsing. # @!attribute [rw] enable_image_annotation # @return [::Boolean] # Optional. If true, the LLM based annotation is added to the image # during parsing. # @!attribute [rw] structured_content_types # @return [::Array<::String>] # Optional. Contains the required structure types to extract from the # document. Supported values: # # * `shareholder-structure` # @!attribute [rw] exclude_html_elements # @return [::Array<::String>] # Optional. List of HTML elements to exclude from the parsed content. # @!attribute [rw] exclude_html_classes # @return [::Array<::String>] # Optional. List of HTML classes to exclude from the parsed content. # @!attribute [rw] exclude_html_ids # @return [::Array<::String>] # Optional. List of HTML ids to exclude from the parsed content. class LayoutParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end |
#layout_parsing_config ⇒ ::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::LayoutParsingConfig
Returns Configurations applied to layout parser.
Note: The following fields are mutually exclusive: layout_parsing_config
, digital_parsing_config
, ocr_parsing_config
. If a field in that set is populated, all other fields in the set will automatically be cleared.
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# File 'proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb', line 109 class ParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # The digital parsing configurations for documents. class DigitalParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The OCR parsing configurations for documents. # @!attribute [rw] enhanced_document_elements # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Array<::String>] # [DEPRECATED] This field is deprecated. To use the additional enhanced # document elements processing, please switch to `layout_parsing_config`. # @!attribute [rw] use_native_text # @return [::Boolean] # If true, will use native text instead of OCR text on pages containing # native text. class OcrParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The layout parsing configurations for documents. # @!attribute [rw] enable_table_annotation # @return [::Boolean] # Optional. If true, the LLM based annotation is added to the table # during parsing. # @!attribute [rw] enable_image_annotation # @return [::Boolean] # Optional. If true, the LLM based annotation is added to the image # during parsing. # @!attribute [rw] structured_content_types # @return [::Array<::String>] # Optional. Contains the required structure types to extract from the # document. Supported values: # # * `shareholder-structure` # @!attribute [rw] exclude_html_elements # @return [::Array<::String>] # Optional. List of HTML elements to exclude from the parsed content. # @!attribute [rw] exclude_html_classes # @return [::Array<::String>] # Optional. List of HTML classes to exclude from the parsed content. # @!attribute [rw] exclude_html_ids # @return [::Array<::String>] # Optional. List of HTML ids to exclude from the parsed content. class LayoutParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end |
#ocr_parsing_config ⇒ ::Google::Cloud::DiscoveryEngine::V1::DocumentProcessingConfig::ParsingConfig::OcrParsingConfig
Returns Configurations applied to OCR parser. Currently it only applies to PDFs.
Note: The following fields are mutually exclusive: ocr_parsing_config
, digital_parsing_config
, layout_parsing_config
. If a field in that set is populated, all other fields in the set will automatically be cleared.
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# File 'proto_docs/google/cloud/discoveryengine/v1/document_processing_config.rb', line 109 class ParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods # The digital parsing configurations for documents. class DigitalParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The OCR parsing configurations for documents. # @!attribute [rw] enhanced_document_elements # @deprecated This field is deprecated and may be removed in the next major version update. # @return [::Array<::String>] # [DEPRECATED] This field is deprecated. To use the additional enhanced # document elements processing, please switch to `layout_parsing_config`. # @!attribute [rw] use_native_text # @return [::Boolean] # If true, will use native text instead of OCR text on pages containing # native text. class OcrParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end # The layout parsing configurations for documents. # @!attribute [rw] enable_table_annotation # @return [::Boolean] # Optional. If true, the LLM based annotation is added to the table # during parsing. # @!attribute [rw] enable_image_annotation # @return [::Boolean] # Optional. If true, the LLM based annotation is added to the image # during parsing. # @!attribute [rw] structured_content_types # @return [::Array<::String>] # Optional. Contains the required structure types to extract from the # document. Supported values: # # * `shareholder-structure` # @!attribute [rw] exclude_html_elements # @return [::Array<::String>] # Optional. List of HTML elements to exclude from the parsed content. # @!attribute [rw] exclude_html_classes # @return [::Array<::String>] # Optional. List of HTML classes to exclude from the parsed content. # @!attribute [rw] exclude_html_ids # @return [::Array<::String>] # Optional. List of HTML ids to exclude from the parsed content. class LayoutParsingConfig include ::Google::Protobuf::MessageExts extend ::Google::Protobuf::MessageExts::ClassMethods end end |