Class: Roseflow::Tiktoken::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/roseflow/tiktoken/tokenizer.rb

Instance Method Summary collapse

Constructor Details

#initialize(model: nil) ⇒ Tokenizer

Returns a new instance of Tokenizer.



6
7
8
9
# File 'lib/roseflow/tiktoken/tokenizer.rb', line 6

def initialize(model: nil)
  @model = model
  @encoding = determine_encoding(model)
end

Instance Method Details

#count_tokens(messages) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/roseflow/tiktoken/tokenizer.rb', line 23

def count_tokens(messages)
  token_count = 0

  messages.each do |message|
    token_count += tokens_per_message_for_model(@model)

    message.each do |key, value|
      token_count += encode(value).count
      if key == "name"
        token_count += tokens_per_message_for_model(@model)
      end
    end
  end

  token_count += 3 # Every reply is primed with assistant
  return token_count
end

#decode(input) ⇒ Object



17
18
19
20
21
# File 'lib/roseflow/tiktoken/tokenizer.rb', line 17

def decode(input)
  @encoding.decode(input)
rescue
  raise ::Roseflow::Tiktoken::NoEncodingError, "No encoding found for model #{@model}"
end

#encode(input) ⇒ Object



11
12
13
14
15
# File 'lib/roseflow/tiktoken/tokenizer.rb', line 11

def encode(input)
  @encoding.encode(input)
rescue
  raise ::Roseflow::Tiktoken::NoEncodingError, "No encoding found for model #{@model}"
end