Class: Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec

Inherits:
Object
  • Object
show all
Extended by:
Protobuf::MessageExts::ClassMethods
Includes:
Protobuf::MessageExts
Defined in:
proto_docs/google/cloud/aiplatform/v1/endpoint.rb

Overview

Configuration for Speculative Decoding.

Defined Under Namespace

Classes: DraftModelSpeculation, NgramSpeculation

Instance Attribute Summary collapse

Instance Attribute Details

#draft_model_speculation::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::DraftModelSpeculation

Returns draft model speculation.

Note: The following fields are mutually exclusive: draft_model_speculation, ngram_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:



427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 427

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#ngram_speculation::Google::Cloud::AIPlatform::V1::SpeculativeDecodingSpec::NgramSpeculation

Returns N-Gram speculation.

Note: The following fields are mutually exclusive: ngram_speculation, draft_model_speculation. If a field in that set is populated, all other fields in the set will automatically be cleared.

Returns:



427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 427

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end

#speculative_token_count::Integer

Returns The number of speculative tokens to generate at each step.

Returns:

  • (::Integer)

    The number of speculative tokens to generate at each step.



427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
# File 'proto_docs/google/cloud/aiplatform/v1/endpoint.rb', line 427

class SpeculativeDecodingSpec
  include ::Google::Protobuf::MessageExts
  extend ::Google::Protobuf::MessageExts::ClassMethods

  # Draft model speculation works by using the smaller model to generate
  # candidate tokens for speculative decoding.
  # @!attribute [rw] draft_model
  #   @return [::String]
  #     Required. The resource name of the draft model.
  class DraftModelSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end

  # N-Gram speculation works by trying to find matching tokens in the
  # previous prompt sequence and use those as speculation for generating
  # new tokens.
  # @!attribute [rw] ngram_size
  #   @return [::Integer]
  #     The number of last N input tokens used as ngram to search/match
  #     against the previous prompt sequence.
  #     This is equal to the N in N-Gram.
  #     The default value is 3 if not specified.
  class NgramSpeculation
    include ::Google::Protobuf::MessageExts
    extend ::Google::Protobuf::MessageExts::ClassMethods
  end
end