Class: Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec

Inherits:

Object

Object
Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec

show all

Extended by:: Protobuf::MessageExts::ClassMethods

Includes:: Protobuf::MessageExts

Defined in:: proto_docs/google/cloud/aiplatform/v1/endpoint.rb

Overview

Configuration for Speculative Decoding.

Defined Under Namespace

Classes: DraftModelSpeculation, NgramSpeculation

Instance Attribute Summary collapse

#draft_model_speculation ⇒ ::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::DraftModelSpeculation
Draft model speculation.
#ngram_speculation ⇒ ::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::NgramSpeculation
N-Gram speculation.
#speculative_token_count ⇒ ::Integer
The number of speculative tokens to generate at each step.

Instance Attribute Details

#draft_model_speculation ⇒ `::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::DraftModelSpeculation`

Returns draft model speculation.

Note: The following fields are mutually exclusive: draft_model_speculation, ngram_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:

(::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::DraftModelSpeculation) —
draft model speculation.

Note: The following fields are mutually exclusive: draft_model_speculation, ngram_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 396

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#ngram_speculation ⇒ `::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::NgramSpeculation`

Returns N-Gram speculation.

Note: The following fields are mutually exclusive: ngram_speculation, draft_model_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:

(::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::NgramSpeculation) —
N-Gram speculation.

Note: The following fields are mutually exclusive: ngram_speculation, draft_model_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 396

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#speculative_token_count ⇒ `::Integer`

Returns The number of speculative tokens to generate at each step.

Returns:

(::Integer) —
The number of speculative tokens to generate at each step.

# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 396

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

Class: Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec

Overview

Defined Under Namespace

Instance Attribute Summary collapse

Instance Attribute Details

#draft_model_speculation ⇒ ::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::DraftModelSpeculation

#ngram_speculation ⇒ ::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::NgramSpeculation

#speculative_token_count ⇒ ::Integer

#draft_model_speculation ⇒ `::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::DraftModelSpeculation`

#ngram_speculation ⇒ `::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::NgramSpeculation`

#speculative_token_count ⇒ `::Integer`