Class: AnthropicResponse

Inherits:
GlimResponse show all
Defined in:
lib/anthropic_response.rb

Overview

require ‘tiktoken_ruby’ # TODO only for token counting while anthropic doesn’t support it

Instance Attribute Summary collapse

Attributes inherited from GlimResponse

#cached_response, #params, #req

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from GlimResponse

#[], #_look_for_cached_response, #completion, #context, #err, #initialize, #log_completion, #log_raw_response, #log_request_hash, #log_summary_append, #raw_response, #response_available?, #save_raw_response_to_cache, #total_cost, #total_tokens, #wait_for_response

Constructor Details

This class inherits a constructor from GlimResponse

Instance Attribute Details

#completion_token_countObject (readonly)

Returns the value of attribute completion_token_count.



53
54
55
# File 'lib/anthropic_response.rb', line 53

def completion_token_count
  @completion_token_count
end

#prompt_token_countObject (readonly)

Returns the value of attribute prompt_token_count.



53
54
55
# File 'lib/anthropic_response.rb', line 53

def prompt_token_count
  @prompt_token_count
end

#total_token_countObject (readonly)

Returns the value of attribute total_token_count.



53
54
55
# File 'lib/anthropic_response.rb', line 53

def total_token_count
  @total_token_count
end

Class Method Details

._count_tokens(model, s) ⇒ Object



24
25
26
27
28
29
30
31
# File 'lib/anthropic_response.rb', line 24

def self._count_tokens(model, s)
  # TODO -- not yet support by ruby-antrhopic
  # client = Anthropic::Client.new
  # puts "***** #{client.count_tokens(req.prompt)}"
  
  enc = Tiktoken.encoding_for_model("gpt-3.5-turbo") # this is obviously wrong, should use anthropic
  return enc.encode(s).length
end

._llm_info(llm_name) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/anthropic_response.rb', line 7

def self._llm_info(llm_name)
  if llm_name.start_with?("claude-instant-1")
    prompt_price = 1.63
    completion_price = 5.51
    context_length = 100_000
  elsif llm_name.start_with?("claude-2")
    prompt_price =  11.02
    completion_price = 32.68
    context_length = 100_000
  else
    raise "Unknown model #{llm_name}"
  end
  cost_per_prompt_token = prompt_price / 1_000_000.0
  cost_per_completion_token = completion_price / 1_000_000.0
  return {cost_per_prompt_token:,cost_per_completion_token:,context_length:}
end

.api_limiterObject



49
50
51
# File 'lib/anthropic_response.rb', line 49

def self.api_limiter
  @_api_limiter ||= APILimiter.new(max_concurrent_requests: 1)
end

Instance Method Details

#async_send_request_to_apiObject



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/anthropic_response.rb', line 69

def async_send_request_to_api
  # anthropic API modifies the prompt
  # but no need to params = Marshal.load(Marshal.dump(req.request_hash))
  # since it's only the prompt, we can just do this:
  params = req.request_hash.dup
  params[:prompt] = req.request_hash[:prompt].dup

  raise "request_hash should not have messages for Anthropic" if params[:messages]
  @thread = Thread.new do
    AnthropicResponse.api_limiter.with_limit do
      client = Anthropic::Client.new
      # this is necessary because the Anthropic API modifies the prompt
      _raw_response = client.complete(parameters: params).with_indifferent_access
      if _raw_response[:error]
        if _raw_response[:error][:type] == "rate_limit_error"
          limit = AnthropicResponse.api_limiter.max_concurrent_requests
          raise RateLimitExceededError, "Rate limit (#{limit}) exceeded. Edit config or negotiate with Anthropic to avoid this."
        else
            "Anthropic API error: #{_raw_response[:error]}"
        end
      end
      _raw_response
    end
  end
end

#completion_tokensObject

Anthropic does not report token counts so we count ourselves NOTE: This means there may be some inaccuracies.



40
41
42
43
# File 'lib/anthropic_response.rb', line 40

def completion_tokens
  wait_for_response
  req.count_tokens(completion)
end

#process_response_from_apiObject



55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/anthropic_response.rb', line 55

def process_response_from_api
  @completion = raw_response[:completion] || err("Anthropic API error: No completion in #{raw_response}")
  # puts "JSON: #{JSON.pretty_generate(raw_response)}"
  usage = raw_response[:usage]
  if usage
    @total_token_count = usage[:total_tokens] || err("No total_tokens in #{usage}")
    @prompt_token_count = usage[:prompt_tokens] || err("No prompt_tokens in #{usage}")
    @completion_token_count = usage[:completion_tokens] || err("No completion_tokens in #{usage}")
  else
    #raise err("No usage in #{raw_response}")
  end

end

#prompt_tokensObject

Anthropic does not report token counts so we use the number from the request



34
35
36
# File 'lib/anthropic_response.rb', line 34

def prompt_tokens
  req.prompt_token_count
end

#responding_llm_nameObject



45
46
47
# File 'lib/anthropic_response.rb', line 45

def responding_llm_name
  "TODO FOR ANTHROPIC"
end