Class: Google::Cloud::Dataplex::V1::StorageFormat

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/dataplex/v1/metadata.rb

Overview

Describes the format of the data within its storage location.

Defined Under Namespace

Modules: CompressionFormat, Format Classes: CsvOptions, IcebergOptions, JsonOptions

Instance Attribute Summary collapse

Instance Attribute Details

#compression_format::Google::Cloud::Dataplex::V1::StorageFormat::CompressionFormat

Returns Optional. The compression type associated with the stored data. If unspecified, the data is uncompressed.

Returns:



596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
# File 'proto_docs/google/cloud/dataplex/v1/metadata.rb', line 596

class StorageFormat
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes CSV and similar semi-structured data formats.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII",
  #     "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
  # @!attribute [rw] header_rows
  #   @return [::Integer]
  #     Optional. The number of rows to interpret as header rows that should be
  #     skipped when reading data rows. Defaults to 0.
  # @!attribute [rw] delimiter
  #   @return [::String]
  #     Optional. The delimiter used to separate values. Defaults to ','.
  # @!attribute [rw] quote
  #   @return [::String]
  #     Optional. The character used to quote column values. Accepts '"'
  #     (double quotation mark) or ''' (single quotation mark). Defaults to
  #     '"' (double quotation mark) if unspecified.
  class CsvOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes JSON data format.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
  #     and "ISO-8859-1". Defaults to UTF-8 if not specified.
  class JsonOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes Iceberg data format.
  # @!attribute [rw] metadata_location
  #   @return [::String]
  #     Optional. The location of where the iceberg metadata is present, must be
  #     within the table path
  class IcebergOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # The specific file format of the data.
  module Format
    # Format unspecified.
    FORMAT_UNSPECIFIED = 0

    # Parquet-formatted structured data.
    PARQUET = 1

    # Avro-formatted structured data.
    AVRO = 2

    # Orc-formatted structured data.
    ORC = 3

    # Csv-formatted semi-structured data.
    CSV = 100

    # Json-formatted semi-structured data.
    JSON = 101

    # Image data formats (such as jpg and png).
    IMAGE = 200

    # Audio data formats (such as mp3, and wav).
    AUDIO = 201

    # Video data formats (such as mp4 and mpg).
    VIDEO = 202

    # Textual data formats (such as txt and xml).
    TEXT = 203

    # TensorFlow record format.
    TFRECORD = 204

    # Data that doesn't match a specific format.
    OTHER = 1000

    # Data of an unknown format.
    UNKNOWN = 1001
  end

  # The specific compressed file format of the data.
  module CompressionFormat
    # CompressionFormat unspecified. Implies uncompressed data.
    COMPRESSION_FORMAT_UNSPECIFIED = 0

    # GZip compressed set of files.
    GZIP = 2

    # BZip2 compressed set of files.
    BZIP2 = 3
  end
end

#csv::Google::Cloud::Dataplex::V1::StorageFormat::CsvOptions

Returns Optional. Additional information about CSV formatted data.

Returns:



596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
# File 'proto_docs/google/cloud/dataplex/v1/metadata.rb', line 596

class StorageFormat
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes CSV and similar semi-structured data formats.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII",
  #     "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
  # @!attribute [rw] header_rows
  #   @return [::Integer]
  #     Optional. The number of rows to interpret as header rows that should be
  #     skipped when reading data rows. Defaults to 0.
  # @!attribute [rw] delimiter
  #   @return [::String]
  #     Optional. The delimiter used to separate values. Defaults to ','.
  # @!attribute [rw] quote
  #   @return [::String]
  #     Optional. The character used to quote column values. Accepts '"'
  #     (double quotation mark) or ''' (single quotation mark). Defaults to
  #     '"' (double quotation mark) if unspecified.
  class CsvOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes JSON data format.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
  #     and "ISO-8859-1". Defaults to UTF-8 if not specified.
  class JsonOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes Iceberg data format.
  # @!attribute [rw] metadata_location
  #   @return [::String]
  #     Optional. The location of where the iceberg metadata is present, must be
  #     within the table path
  class IcebergOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # The specific file format of the data.
  module Format
    # Format unspecified.
    FORMAT_UNSPECIFIED = 0

    # Parquet-formatted structured data.
    PARQUET = 1

    # Avro-formatted structured data.
    AVRO = 2

    # Orc-formatted structured data.
    ORC = 3

    # Csv-formatted semi-structured data.
    CSV = 100

    # Json-formatted semi-structured data.
    JSON = 101

    # Image data formats (such as jpg and png).
    IMAGE = 200

    # Audio data formats (such as mp3, and wav).
    AUDIO = 201

    # Video data formats (such as mp4 and mpg).
    VIDEO = 202

    # Textual data formats (such as txt and xml).
    TEXT = 203

    # TensorFlow record format.
    TFRECORD = 204

    # Data that doesn't match a specific format.
    OTHER = 1000

    # Data of an unknown format.
    UNKNOWN = 1001
  end

  # The specific compressed file format of the data.
  module CompressionFormat
    # CompressionFormat unspecified. Implies uncompressed data.
    COMPRESSION_FORMAT_UNSPECIFIED = 0

    # GZip compressed set of files.
    GZIP = 2

    # BZip2 compressed set of files.
    BZIP2 = 3
  end
end

#format::Google::Cloud::Dataplex::V1::StorageFormat::Format (readonly)

Returns Output only. The data format associated with the stored data, which represents content type values. The value is inferred from mime type.

Returns:



596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
# File 'proto_docs/google/cloud/dataplex/v1/metadata.rb', line 596

class StorageFormat
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes CSV and similar semi-structured data formats.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII",
  #     "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
  # @!attribute [rw] header_rows
  #   @return [::Integer]
  #     Optional. The number of rows to interpret as header rows that should be
  #     skipped when reading data rows. Defaults to 0.
  # @!attribute [rw] delimiter
  #   @return [::String]
  #     Optional. The delimiter used to separate values. Defaults to ','.
  # @!attribute [rw] quote
  #   @return [::String]
  #     Optional. The character used to quote column values. Accepts '"'
  #     (double quotation mark) or ''' (single quotation mark). Defaults to
  #     '"' (double quotation mark) if unspecified.
  class CsvOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes JSON data format.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
  #     and "ISO-8859-1". Defaults to UTF-8 if not specified.
  class JsonOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes Iceberg data format.
  # @!attribute [rw] metadata_location
  #   @return [::String]
  #     Optional. The location of where the iceberg metadata is present, must be
  #     within the table path
  class IcebergOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # The specific file format of the data.
  module Format
    # Format unspecified.
    FORMAT_UNSPECIFIED = 0

    # Parquet-formatted structured data.
    PARQUET = 1

    # Avro-formatted structured data.
    AVRO = 2

    # Orc-formatted structured data.
    ORC = 3

    # Csv-formatted semi-structured data.
    CSV = 100

    # Json-formatted semi-structured data.
    JSON = 101

    # Image data formats (such as jpg and png).
    IMAGE = 200

    # Audio data formats (such as mp3, and wav).
    AUDIO = 201

    # Video data formats (such as mp4 and mpg).
    VIDEO = 202

    # Textual data formats (such as txt and xml).
    TEXT = 203

    # TensorFlow record format.
    TFRECORD = 204

    # Data that doesn't match a specific format.
    OTHER = 1000

    # Data of an unknown format.
    UNKNOWN = 1001
  end

  # The specific compressed file format of the data.
  module CompressionFormat
    # CompressionFormat unspecified. Implies uncompressed data.
    COMPRESSION_FORMAT_UNSPECIFIED = 0

    # GZip compressed set of files.
    GZIP = 2

    # BZip2 compressed set of files.
    BZIP2 = 3
  end
end

#iceberg::Google::Cloud::Dataplex::V1::StorageFormat::IcebergOptions

Returns Optional. Additional information about iceberg tables.

Returns:



596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
# File 'proto_docs/google/cloud/dataplex/v1/metadata.rb', line 596

class StorageFormat
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes CSV and similar semi-structured data formats.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII",
  #     "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
  # @!attribute [rw] header_rows
  #   @return [::Integer]
  #     Optional. The number of rows to interpret as header rows that should be
  #     skipped when reading data rows. Defaults to 0.
  # @!attribute [rw] delimiter
  #   @return [::String]
  #     Optional. The delimiter used to separate values. Defaults to ','.
  # @!attribute [rw] quote
  #   @return [::String]
  #     Optional. The character used to quote column values. Accepts '"'
  #     (double quotation mark) or ''' (single quotation mark). Defaults to
  #     '"' (double quotation mark) if unspecified.
  class CsvOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes JSON data format.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
  #     and "ISO-8859-1". Defaults to UTF-8 if not specified.
  class JsonOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes Iceberg data format.
  # @!attribute [rw] metadata_location
  #   @return [::String]
  #     Optional. The location of where the iceberg metadata is present, must be
  #     within the table path
  class IcebergOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # The specific file format of the data.
  module Format
    # Format unspecified.
    FORMAT_UNSPECIFIED = 0

    # Parquet-formatted structured data.
    PARQUET = 1

    # Avro-formatted structured data.
    AVRO = 2

    # Orc-formatted structured data.
    ORC = 3

    # Csv-formatted semi-structured data.
    CSV = 100

    # Json-formatted semi-structured data.
    JSON = 101

    # Image data formats (such as jpg and png).
    IMAGE = 200

    # Audio data formats (such as mp3, and wav).
    AUDIO = 201

    # Video data formats (such as mp4 and mpg).
    VIDEO = 202

    # Textual data formats (such as txt and xml).
    TEXT = 203

    # TensorFlow record format.
    TFRECORD = 204

    # Data that doesn't match a specific format.
    OTHER = 1000

    # Data of an unknown format.
    UNKNOWN = 1001
  end

  # The specific compressed file format of the data.
  module CompressionFormat
    # CompressionFormat unspecified. Implies uncompressed data.
    COMPRESSION_FORMAT_UNSPECIFIED = 0

    # GZip compressed set of files.
    GZIP = 2

    # BZip2 compressed set of files.
    BZIP2 = 3
  end
end

#json::Google::Cloud::Dataplex::V1::StorageFormat::JsonOptions

Returns Optional. Additional information about CSV formatted data.

Returns:



596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
# File 'proto_docs/google/cloud/dataplex/v1/metadata.rb', line 596

class StorageFormat
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes CSV and similar semi-structured data formats.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII",
  #     "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
  # @!attribute [rw] header_rows
  #   @return [::Integer]
  #     Optional. The number of rows to interpret as header rows that should be
  #     skipped when reading data rows. Defaults to 0.
  # @!attribute [rw] delimiter
  #   @return [::String]
  #     Optional. The delimiter used to separate values. Defaults to ','.
  # @!attribute [rw] quote
  #   @return [::String]
  #     Optional. The character used to quote column values. Accepts '"'
  #     (double quotation mark) or ''' (single quotation mark). Defaults to
  #     '"' (double quotation mark) if unspecified.
  class CsvOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes JSON data format.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
  #     and "ISO-8859-1". Defaults to UTF-8 if not specified.
  class JsonOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes Iceberg data format.
  # @!attribute [rw] metadata_location
  #   @return [::String]
  #     Optional. The location of where the iceberg metadata is present, must be
  #     within the table path
  class IcebergOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # The specific file format of the data.
  module Format
    # Format unspecified.
    FORMAT_UNSPECIFIED = 0

    # Parquet-formatted structured data.
    PARQUET = 1

    # Avro-formatted structured data.
    AVRO = 2

    # Orc-formatted structured data.
    ORC = 3

    # Csv-formatted semi-structured data.
    CSV = 100

    # Json-formatted semi-structured data.
    JSON = 101

    # Image data formats (such as jpg and png).
    IMAGE = 200

    # Audio data formats (such as mp3, and wav).
    AUDIO = 201

    # Video data formats (such as mp4 and mpg).
    VIDEO = 202

    # Textual data formats (such as txt and xml).
    TEXT = 203

    # TensorFlow record format.
    TFRECORD = 204

    # Data that doesn't match a specific format.
    OTHER = 1000

    # Data of an unknown format.
    UNKNOWN = 1001
  end

  # The specific compressed file format of the data.
  module CompressionFormat
    # CompressionFormat unspecified. Implies uncompressed data.
    COMPRESSION_FORMAT_UNSPECIFIED = 0

    # GZip compressed set of files.
    GZIP = 2

    # BZip2 compressed set of files.
    BZIP2 = 3
  end
end

#mime_type::String

Returns Required. The mime type descriptor for the data. Must match the pattern {type}/{subtype}. Supported values:

  • application/x-parquet
  • application/x-avro
  • application/x-orc
  • application/x-tfrecord
  • application/x-parquet+iceberg
  • application/x-avro+iceberg
  • application/x-orc+iceberg
  • application/json
  • application/{subtypes}
  • text/csv
  • text/
  • image/{image subtype}
  • video/{video subtype}
  • audio/{audio subtype}.

Returns:

  • (::String)

    Required. The mime type descriptor for the data. Must match the pattern {type}/{subtype}. Supported values:

    • application/x-parquet
    • application/x-avro
    • application/x-orc
    • application/x-tfrecord
    • application/x-parquet+iceberg
    • application/x-avro+iceberg
    • application/x-orc+iceberg
    • application/json
    • application/{subtypes}
    • text/csv
    • text/
    • image/{image subtype}
    • video/{video subtype}
    • audio/{audio subtype}


596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
# File 'proto_docs/google/cloud/dataplex/v1/metadata.rb', line 596

class StorageFormat
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Describes CSV and similar semi-structured data formats.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII",
  #     "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
  # @!attribute [rw] header_rows
  #   @return [::Integer]
  #     Optional. The number of rows to interpret as header rows that should be
  #     skipped when reading data rows. Defaults to 0.
  # @!attribute [rw] delimiter
  #   @return [::String]
  #     Optional. The delimiter used to separate values. Defaults to ','.
  # @!attribute [rw] quote
  #   @return [::String]
  #     Optional. The character used to quote column values. Accepts '"'
  #     (double quotation mark) or ''' (single quotation mark). Defaults to
  #     '"' (double quotation mark) if unspecified.
  class CsvOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes JSON data format.
  # @!attribute [rw] encoding
  #   @return [::String]
  #     Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
  #     and "ISO-8859-1". Defaults to UTF-8 if not specified.
  class JsonOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # Describes Iceberg data format.
  # @!attribute [rw] metadata_location
  #   @return [::String]
  #     Optional. The location of where the iceberg metadata is present, must be
  #     within the table path
  class IcebergOptions
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # The specific file format of the data.
  module Format
    # Format unspecified.
    FORMAT_UNSPECIFIED = 0

    # Parquet-formatted structured data.
    PARQUET = 1

    # Avro-formatted structured data.
    AVRO = 2

    # Orc-formatted structured data.
    ORC = 3

    # Csv-formatted semi-structured data.
    CSV = 100

    # Json-formatted semi-structured data.
    JSON = 101

    # Image data formats (such as jpg and png).
    IMAGE = 200

    # Audio data formats (such as mp3, and wav).
    AUDIO = 201

    # Video data formats (such as mp4 and mpg).
    VIDEO = 202

    # Textual data formats (such as txt and xml).
    TEXT = 203

    # TensorFlow record format.
    TFRECORD = 204

    # Data that doesn't match a specific format.
    OTHER = 1000

    # Data of an unknown format.
    UNKNOWN = 1001
  end

  # The specific compressed file format of the data.
  module CompressionFormat
    # CompressionFormat unspecified. Implies uncompressed data.
    COMPRESSION_FORMAT_UNSPECIFIED = 0

    # GZip compressed set of files.
    GZIP = 2

    # BZip2 compressed set of files.
    BZIP2 = 3
  end
end