Class: Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/aiplatform/v1/endpoint.rb

Overview

Configuration for Speculative Decoding.

Defined Under Namespace

Classes: DraftModelSpeculation, NgramSpeculation

Instance Attribute Summary collapse

Instance Attribute Details

#draft_model_speculation::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::DraftModelSpeculation

Returns draft model speculation.

Note: The following fields are mutually exclusive: draft_model_speculation, ngram_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:



396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 396

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#ngram_speculation::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::NgramSpeculation

Returns N-Gram speculation.

Note: The following fields are mutually exclusive: ngram_speculation, draft_model_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:



396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 396

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#speculative_token_count::Integer

Returns The number of speculative tokens to generate at each step.

Returns:

  • (::Integer)

    The number of speculative tokens to generate at each step.



396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 396

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end