Class: ChatResponse

Inherits:

GlimResponse

Object
GlimResponse
ChatResponse

show all

Defined in:: lib/chat_response.rb

Instance Attribute Summary

Attributes inherited from GlimResponse

#cached_response, #params, #req

Class Method Summary collapse

Instance Method Summary collapse

#_function_call_arguments_from_message ⇒ Object
#_function_call_from_message ⇒ Object
#async_send_request_to_api ⇒ Object
#completion_tokens ⇒ Object
#create_request_for_chat(message_to_append: nil) ⇒ Object
#create_request_with_function_result ⇒ Object

returns a new GlimRequest that is preloaded with the data for sending the results of a function call back to the LLM API.
#extracted_data ⇒ Object

the extracted data generated by GPT.
#function_call_message? ⇒ Boolean
#function_name_from_message ⇒ Object
#log_function_call_message ⇒ Object
#log_json_error(s) ⇒ Object
#message ⇒ Object

the message generated by GPT.
#messages ⇒ Object

all messages: prior ones, the prompt, GPT’s response, and the function call, if it happened.
#messages_sent ⇒ Object
#placeholder_anyscale_api_call(params) ⇒ Object
#process_response_from_api ⇒ Object
#prompt_tokens ⇒ Object
#responding_llm_name ⇒ Object
#usage ⇒ Object

Methods inherited from GlimResponse

#[], #_look_for_cached_response, #completion, #context, #err, #initialize, #log_completion, #log_raw_response, #log_request_hash, #log_summary_append, #raw_response, #response_available?, #save_raw_response_to_cache, #total_cost, #total_tokens, #wait_for_response

Constructor Details

This class inherits a constructor from GlimResponse

Class Method Details

._count_tokens(llm_name, s) ⇒ `Object`

# File 'lib/chat_response.rb', line 52

def self._count_tokens(llm_name, s)
  enc = Tiktoken.encoding_for_model(llm_name)
  if !enc
    # putt :warning, "Tiktoken doesn't know model #{llm_name}"
    enc = Tiktoken.encoding_for_model("gpt-3.5-turbo")
  end
  return enc.encode(s).length
end

._llm_info(model) ⇒ `Object`

# File 'lib/chat_response.rb', line 19

def self._llm_info(model)
  if model.start_with?("gpt-3.5-turbo-16k")
    prompt_price = 3.0 
    completion_price = prompt_price / 3.0 * 4.0
    context_length = 16384
  elsif model.start_with?("gpt-3.5")
    prompt_price =  1.5
    completion_price = prompt_price / 3.0 * 4.0
    context_length = 4096
  elsif model.start_with?("gpt-4-32k")
    prompt_price = 60.0
    completion_price = 2 * prompt_price
    context_length = 32768
  elsif model.start_with?("gpt-4")
    prompt_price = 30.0
    completion_price = 2 * prompt_price
    context_length = 8192
  elsif model.start_with?("meta-llama")
    prompt_price = 1.0
    completion_price = prompt_price
    context_length = 4096
  elsif model.start_with?("codellama")
    prompt_price = 1.0
    completion_price = prompt_price
    context_length = 4096
  else
    raise "Unknown model #{model}"
  end
  cost_per_prompt_token = prompt_price / 1_000_000.0
  cost_per_completion_token = completion_price / 1_000_000.0
  return {cost_per_prompt_token:,cost_per_completion_token:,context_length:}
end

.api_limiter ⇒ `Object`



61
62
63

# File 'lib/chat_response.rb', line 61

def self.api_limiter
  @_api_limiter ||= APILimiter.new(max_concurrent_requests: 2)
end

Instance Method Details

#_function_call_arguments_from_message ⇒ `Object`

# File 'lib/chat_response.rb', line 147

def _function_call_arguments_from_message
  begin 
    s = _function_call_from_message[:arguments]
    JSON.parse(s).with_indifferent_access
  rescue   => e
    puts "JSON parse error:"
    puts s
    log_json_error(s)
    err(e)
  end
end

#_function_call_from_message ⇒ `Object`

# File 'lib/chat_response.rb', line 142

def _function_call_from_message
  wait_for_response
  @message[:function_call] || err("No function call!")
end

#async_send_request_to_api ⇒ `Object`

# File 'lib/chat_response.rb', line 89

def async_send_request_to_api
  h = req.request_hash
  log_request_hash
  
  raise "model not set; remember to set request.llm_name = (the model you want to use)" unless h[:model]

  @thread = Thread.new do
    ChatResponse.api_limiter.with_limit do
      if GlimRequest.openai_llms.include?(req.llm_name)
        client = OpenAI::Client.new
        _raw_response = client.chat(parameters: req.request_hash).with_indifferent_access
      else
        _raw_response = placeholder_anyscale_api_call(req.request_hash).with_indifferent_access
      end
      if _raw_response[:error]
        if _raw_response[:error][:type] == "rate_limit_error"
          limit = self.api_limiter.max_concurrent_requests
          raise RateLimitExceededError, "Rate limit (#{limit}) exceeded. Edit config or negotiate with Anthropic to avoid this."
        else
          puts "-------Error. Prompt:"
          puts req.prompt.class
          raise "OpenAI API error: #{_raw_response[:error]}"
        end
      end
      _raw_response
    end
  end # thread; its value is the raw_response
end

#completion_tokens ⇒ `Object`



255
256
257

# File 'lib/chat_response.rb', line 255

def completion_tokens
  usage[:completion_tokens]
end

#create_request_for_chat(message_to_append: nil) ⇒ `Object`

# File 'lib/chat_response.rb', line 197

def create_request_for_chat(message_to_append: nil)
  wait_for_response
  h = req.generic_params_hash.merge({
    llm_name: req.llm_name,
    context: req.context
  })
  new_request = GlimRequest.new(**h)
  new_request.set_message_history(messages.dup)

  if message_to_append
    messages.append(message_to_append)
  end

  new_request.request_hash[:messages] = messages
  new_request

end

#create_request_with_function_result ⇒ `Object`

returns a new GlimRequest that is preloaded with the data for sending the results of a function call back to the LLM API

# File 'lib/chat_response.rb', line 175

def create_request_with_function_result
  wait_for_response

  eval_functions_object = req.functions_object || err("No functions_object")
  raise "functions_object must be ai_callable, is #{eval_functions_object}" unless eval_functions_object.is_a?(AICallable)

  eval_function_name = _function_call_from_message[:name].to_sym
  raise "no_method_error #{eval_function_name}" unless eval_functions_object.respond_to?(eval_function_name)

  # TODO -- validate that the schema is right? 
  eval_function_arguments = _function_call_arguments_from_message

  putt :functions, "#{eval_functions_object}.#{eval_function_name}(#{eval_function_arguments})"
  eval_function_result = eval_functions_object._perform_ai_call(eval_function_name, eval_function_arguments)

  return create_request_for_chat(message_to_append: {
    role: "function",
    name: eval_function_name,
    content: eval_function_result.to_json
  })
end

#extracted_data ⇒ `Object`

the extracted data generated by GPT

# File 'lib/chat_response.rb', line 228

def extracted_data
  return @extracted_data if @extracted_data
  raise "no output schema specified, can't get extracted_data" unless req.output_schema
  wait_for_response
  args = _function_call_arguments_from_message

  JSON::Validator.validate!(req.output_schema, args)
  if req.expected_output_is_list? # TODO -- this feels a bit awkward
    @extracted_data = args[:list] || raise(err("Expected list"))
  else
    @extracted_data = args
  end
  @extracted_data
end

#function_call_message? ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/chat_response.rb', line 137

def function_call_message?
  wait_for_response
  @message[:function_call] != nil
end

#function_name_from_message ⇒ `Object`



159
160
161

# File 'lib/chat_response.rb', line 159

def function_name_from_message
  _function_call_from_message[:name]
end

#log_function_call_message ⇒ `Object`

# File 'lib/chat_response.rb', line 167

def log_function_call_message
  s = "LLM requested results of function call to #{req.functions_object}##{function_name_from_message}\n"
  s += JSON.pretty_generate(_function_call_arguments_from_message)
  req.save_log_file("function_call.txt", s)
end

#log_json_error(s) ⇒ `Object`



163
164
165

# File 'lib/chat_response.rb', line 163

def log_json_error(s)
  req.save_log_file("json_error.json", s)
end

#message ⇒ `Object`

the message generated by GPT

# File 'lib/chat_response.rb', line 216

def message
  wait_for_response
  @message 
end

#messages ⇒ `Object`

all messages: prior ones, the prompt, GPT’s response, and the function call, if it happened

# File 'lib/chat_response.rb', line 222

def messages
  wait_for_response
  @messages
end

#messages_sent ⇒ `Object`



243
244
245

# File 'lib/chat_response.rb', line 243

def messages_sent
  req.request_hash[:messages] 
end

#placeholder_anyscale_api_call(params) ⇒ `Object`

# File 'lib/chat_response.rb', line 71

def placeholder_anyscale_api_call(params)
  # for some reason the ruby gem for OpenAI won't work with anyscale
  # so we're doing it manually for now
  key = ENV.fetch('ANYSCALE_API_KEY')
  # for some reason, this doesn't work, just returns "details: Not found"
  # client = OpenAI::Client.new(uri_base: "https://api.endpoints.anyscale.com/v1", access_token: key)
  # @raw_response = deep_symbolize_keys(cached_response || client.chat(parameters: params))
  api_base = "https://api.endpoints.anyscale.com/v1"
  uri = URI("#{api_base}/chat/completions")
  r = Net::HTTP::Post.new(uri, 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{key}")
  r.body = params.to_json
  http = Net::HTTP.new(uri.host, uri.port)
  http.read_timeout = 480  # seconds
  http.use_ssl = true if uri.scheme == 'https'
  response = http.request(r)
  return JSON.parse(response.body).with_indifferent_access
end

#process_response_from_api ⇒ `Object`

# File 'lib/chat_response.rb', line 119

def process_response_from_api
  raise "no raw response!" unless @raw_response

  # TODO
  # request has message_history
  # response has messages_sent (still need reader)
  # and then what does the following look like? 

  @message = (raw_response.dig(:choices, 0, :message) || err("No message returned!"))
  @messages = messages_sent.dup + [@message]
  @completion = @message[:content] # turns out there is always a completion, even with function calls
  log_completion
  if function_call_message?
    log_function_call_message
  end

end

#prompt_tokens ⇒ `Object`



251
252
253

# File 'lib/chat_response.rb', line 251

def prompt_tokens
  usage[:prompt_tokens]
end

#responding_llm_name ⇒ `Object`



259
260
261

# File 'lib/chat_response.rb', line 259

def responding_llm_name
  raw_response[:model]
end

#usage ⇒ `Object`



247
248
249

# File 'lib/chat_response.rb', line 247

def usage
  raw_response[:usage]
end

Class: ChatResponse

Instance Attribute Summary

Attributes inherited from GlimResponse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from GlimResponse

Constructor Details

Class Method Details

._count_tokens(llm_name, s) ⇒ Object

._llm_info(model) ⇒ Object

.api_limiter ⇒ Object

Instance Method Details

#_function_call_arguments_from_message ⇒ Object

#_function_call_from_message ⇒ Object

#async_send_request_to_api ⇒ Object

#completion_tokens ⇒ Object

#create_request_for_chat(message_to_append: nil) ⇒ Object

#create_request_with_function_result ⇒ Object

#extracted_data ⇒ Object

#function_call_message? ⇒ Boolean

#function_name_from_message ⇒ Object

#log_function_call_message ⇒ Object

#log_json_error(s) ⇒ Object

#message ⇒ Object

#messages ⇒ Object

#messages_sent ⇒ Object

#placeholder_anyscale_api_call(params) ⇒ Object

#process_response_from_api ⇒ Object

#prompt_tokens ⇒ Object

#responding_llm_name ⇒ Object

#usage ⇒ Object