Class: BxBuilderChain::Utils::TokenLength::BaseValidator

Inherits:
Object
  • Object
show all
Defined in:
lib/bx_builder_chain/utils/token_length/base_validator.rb

Overview

Calculate the ‘max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length

Returns:

  • (Integer)

    Whether the text is valid or not

Raises:

  • (TokenLimitExceeded)

    If the text is too long

Direct Known Subclasses

OpenAiValidator

Class Method Summary collapse

Class Method Details

.token_length(text, model_name = "", options = {}) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
# File 'lib/bx_builder_chain/utils/token_length/base_validator.rb', line 31

def self.token_length(text, model_name = "", options = {})
  start_time = Time.new.to_f * 1000
  settings = BxBuilderChain::Utils::Tokenization::OpenAiEncodings.cl100k_base
  puts "loading 100k_base = #{(Time.new.to_f * 1000) - start_time}"
  encoder = BxBuilderChain::Utils::Tokenization::BytePairEncoding.new(
    pat_str: settings["pat_str"],
    mergeable_ranks: settings["mergeable_ranks"]
  )

  encoder.encode(text).count
end

.validate_max_tokens!(content, model_name, options = {}) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/bx_builder_chain/utils/token_length/base_validator.rb', line 15

def self.validate_max_tokens!(content, model_name, options = {})
  text_token_length = if content.is_a?(Array)
    content.sum { |item| token_length(item.to_json, model_name, options) }
  else
    token_length(content, model_name, options)
  end

  leftover_tokens = token_limit(model_name) - text_token_length

  if leftover_tokens < 0
    raise "This model's maximum context length is #{token_limit(model_name)} tokens, but the given text is #{text_token_length} tokens long."
  end

  leftover_tokens
end