Class: Google::Cloud::Vision::V1::TextAnnotation

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/vision/v1/text_annotation.rb

Overview

TextAnnotation contains a structured representation of OCR extracted text. The hierarchy of an OCR extracted text structure is like this: TextAnnotation -> Page -> Block -> Paragraph -> Word -> Symbol Each structural component, starting from Page, may further have their own properties. Properties describe detected languages, breaks etc.. Please refer to the TextAnnotation.TextProperty message definition below for more detail.

Defined Under Namespace

Classes: DetectedBreak, DetectedLanguage, TextProperty

Instance Attribute Summary collapse

Instance Attribute Details

#pages::Array<::Google::Cloud::Vision::V1::Page>

Returns List of pages detected by OCR.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'proto_docs/google/cloud/vision/v1/text_annotation.rb', line 37

class TextAnnotation
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Detected language for a structural component.
  # @!attribute [rw] language_code
  #   @return [::String]
  #     The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  #     information, see
  #     http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  # @!attribute [rw] confidence
  #   @return [::Float]
  #     Confidence of detected language. Range [0, 1].
  class DetectedLanguage
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Detected start or end of a structural component.
  # @!attribute [rw] type
  #   @return [::Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak::BreakType]
  #     Detected break type.
  # @!attribute [rw] is_prefix
  #   @return [::Boolean]
  #     True if break prepends the element.
  class DetectedBreak
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Enum to denote the type of break found. New line, space etc.
    module BreakType
      # Unknown break label type.
      UNKNOWN = 0

      # Regular space.
      SPACE = 1

      # Sure space (very wide).
      SURE_SPACE = 2

      # Line-wrapping break.
      EOL_SURE_SPACE = 3

      # End-line hyphen that is not present in text; does not co-occur with
      # `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
      HYPHEN = 4

      # Line break that ends a paragraph.
      LINE_BREAK = 5
    end
  end

  # Additional information detected on the structural component.
  # @!attribute [rw] detected_languages
  #   @return [::Array<::Google::Cloud::Vision::V1::TextAnnotation::DetectedLanguage>]
  #     A list of detected languages together with confidence.
  # @!attribute [rw] detected_break
  #   @return [::Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak]
  #     Detected start or end of a text segment.
  class TextProperty
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#text::String

Returns UTF-8 text detected on the pages.

Returns:

  • (::String)

    UTF-8 text detected on the pages.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'proto_docs/google/cloud/vision/v1/text_annotation.rb', line 37

class TextAnnotation
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Detected language for a structural component.
  # @!attribute [rw] language_code
  #   @return [::String]
  #     The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  #     information, see
  #     http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  # @!attribute [rw] confidence
  #   @return [::Float]
  #     Confidence of detected language. Range [0, 1].
  class DetectedLanguage
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Detected start or end of a structural component.
  # @!attribute [rw] type
  #   @return [::Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak::BreakType]
  #     Detected break type.
  # @!attribute [rw] is_prefix
  #   @return [::Boolean]
  #     True if break prepends the element.
  class DetectedBreak
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods

    # Enum to denote the type of break found. New line, space etc.
    module BreakType
      # Unknown break label type.
      UNKNOWN = 0

      # Regular space.
      SPACE = 1

      # Sure space (very wide).
      SURE_SPACE = 2

      # Line-wrapping break.
      EOL_SURE_SPACE = 3

      # End-line hyphen that is not present in text; does not co-occur with
      # `SPACE`, `LEADER_SPACE`, or `LINE_BREAK`.
      HYPHEN = 4

      # Line break that ends a paragraph.
      LINE_BREAK = 5
    end
  end

  # Additional information detected on the structural component.
  # @!attribute [rw] detected_languages
  #   @return [::Array<::Google::Cloud::Vision::V1::TextAnnotation::DetectedLanguage>]
  #     A list of detected languages together with confidence.
  # @!attribute [rw] detected_break
  #   @return [::Google::Cloud::Vision::V1::TextAnnotation::DetectedBreak]
  #     Detected start or end of a text segment.
  class TextProperty
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end