Class: ChatResponse
Instance Attribute Summary
Attributes inherited from GlimResponse
#cached_response, #params, #req
Class Method Summary
collapse
Instance Method Summary
collapse
#[], #_look_for_cached_response, #completion, #context, #err, #initialize, #log_completion, #log_raw_response, #log_request_hash, #log_summary_append, #raw_response, #response_available?, #save_raw_response_to_cache, #total_cost, #total_tokens, #wait_for_response
Constructor Details
This class inherits a constructor from GlimResponse
Class Method Details
._count_tokens(llm_name, s) ⇒ Object
52
53
54
55
56
57
58
59
|
# File 'lib/chat_response.rb', line 52
def self._count_tokens(llm_name, s)
enc = Tiktoken.encoding_for_model(llm_name)
if !enc
enc = Tiktoken.encoding_for_model("gpt-3.5-turbo")
end
return enc.encode(s).length
end
|
._llm_info(model) ⇒ Object
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
# File 'lib/chat_response.rb', line 19
def self._llm_info(model)
if model.start_with?("gpt-3.5-turbo-16k")
prompt_price = 3.0
completion_price = prompt_price / 3.0 * 4.0
context_length = 16384
elsif model.start_with?("gpt-3.5")
prompt_price = 1.5
completion_price = prompt_price / 3.0 * 4.0
context_length = 4096
elsif model.start_with?("gpt-4-32k")
prompt_price = 60.0
completion_price = 2 * prompt_price
context_length = 32768
elsif model.start_with?("gpt-4")
prompt_price = 30.0
completion_price = 2 * prompt_price
context_length = 8192
elsif model.start_with?("meta-llama")
prompt_price = 1.0
completion_price = prompt_price
context_length = 4096
elsif model.start_with?("codellama")
prompt_price = 1.0
completion_price = prompt_price
context_length = 4096
else
raise "Unknown model #{model}"
end
cost_per_prompt_token = prompt_price / 1_000_000.0
cost_per_completion_token = completion_price / 1_000_000.0
return {cost_per_prompt_token:,cost_per_completion_token:,context_length:}
end
|
.api_limiter ⇒ Object
61
62
63
|
# File 'lib/chat_response.rb', line 61
def self.api_limiter
@_api_limiter ||= APILimiter.new(max_concurrent_requests: 2)
end
|
Instance Method Details
#_function_call_arguments_from_message ⇒ Object
147
148
149
150
151
152
153
154
155
156
157
|
# File 'lib/chat_response.rb', line 147
def _function_call_arguments_from_message
begin
s = _function_call_from_message[:arguments]
JSON.parse(s).with_indifferent_access
rescue => e
puts "JSON parse error:"
puts s
log_json_error(s)
err(e)
end
end
|
#_function_call_from_message ⇒ Object
142
143
144
145
|
# File 'lib/chat_response.rb', line 142
def _function_call_from_message
wait_for_response
@message[:function_call] || err("No function call!")
end
|
#async_send_request_to_api ⇒ Object
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
# File 'lib/chat_response.rb', line 89
def async_send_request_to_api
h = req.request_hash
log_request_hash
raise "model not set; remember to set request.llm_name = (the model you want to use)" unless h[:model]
@thread = Thread.new do
ChatResponse.api_limiter.with_limit do
if GlimRequest.openai_llms.include?(req.llm_name)
client = OpenAI::Client.new
_raw_response = client.chat(parameters: req.request_hash).with_indifferent_access
else
_raw_response = placeholder_anyscale_api_call(req.request_hash).with_indifferent_access
end
if _raw_response[:error]
if _raw_response[:error][:type] == "rate_limit_error"
limit = self.api_limiter.max_concurrent_requests
raise RateLimitExceededError, "Rate limit (#{limit}) exceeded. Edit config or negotiate with Anthropic to avoid this."
else
puts "-------Error. Prompt:"
puts req.prompt.class
raise "OpenAI API error: #{_raw_response[:error]}"
end
end
_raw_response
end
end
end
|
#completion_tokens ⇒ Object
255
256
257
|
# File 'lib/chat_response.rb', line 255
def completion_tokens
usage[:completion_tokens]
end
|
#create_request_for_chat(message_to_append: nil) ⇒ Object
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
# File 'lib/chat_response.rb', line 197
def create_request_for_chat(message_to_append: nil)
wait_for_response
h = req.generic_params_hash.merge({
llm_name: req.llm_name,
context: req.context
})
new_request = GlimRequest.new(**h)
new_request.set_message_history(messages.dup)
if message_to_append
messages.append(message_to_append)
end
new_request.request_hash[:messages] = messages
new_request
end
|
#create_request_with_function_result ⇒ Object
returns a new GlimRequest that is preloaded with the data for sending the results of a function call back to the LLM API
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
# File 'lib/chat_response.rb', line 175
def create_request_with_function_result
wait_for_response
eval_functions_object = req.functions_object || err("No functions_object")
raise "functions_object must be ai_callable, is #{eval_functions_object}" unless eval_functions_object.is_a?(AICallable)
eval_function_name = _function_call_from_message[:name].to_sym
raise "no_method_error #{eval_function_name}" unless eval_functions_object.respond_to?(eval_function_name)
eval_function_arguments = _function_call_arguments_from_message
putt :functions, "#{eval_functions_object}.#{eval_function_name}(#{eval_function_arguments})"
eval_function_result = eval_functions_object._perform_ai_call(eval_function_name, eval_function_arguments)
return create_request_for_chat(message_to_append: {
role: "function",
name: eval_function_name,
content: eval_function_result.to_json
})
end
|
the extracted data generated by GPT
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
# File 'lib/chat_response.rb', line 228
def
return if
raise "no output schema specified, can't get extracted_data" unless req.output_schema
wait_for_response
args = _function_call_arguments_from_message
JSON::Validator.validate!(req.output_schema, args)
if req.expected_output_is_list?
= args[:list] || raise(err("Expected list"))
else
= args
end
end
|
#function_call_message? ⇒ Boolean
137
138
139
140
|
# File 'lib/chat_response.rb', line 137
def function_call_message?
wait_for_response
@message[:function_call] != nil
end
|
#function_name_from_message ⇒ Object
159
160
161
|
# File 'lib/chat_response.rb', line 159
def function_name_from_message
_function_call_from_message[:name]
end
|
#log_function_call_message ⇒ Object
167
168
169
170
171
|
# File 'lib/chat_response.rb', line 167
def log_function_call_message
s = "LLM requested results of function call to #{req.functions_object}##{function_name_from_message}\n"
s += JSON.pretty_generate(_function_call_arguments_from_message)
req.save_log_file("function_call.txt", s)
end
|
#log_json_error(s) ⇒ Object
163
164
165
|
# File 'lib/chat_response.rb', line 163
def log_json_error(s)
req.save_log_file("json_error.json", s)
end
|
#message ⇒ Object
the message generated by GPT
216
217
218
219
|
# File 'lib/chat_response.rb', line 216
def message
wait_for_response
@message
end
|
#messages ⇒ Object
all messages: prior ones, the prompt, GPT’s response, and the function call, if it happened
222
223
224
225
|
# File 'lib/chat_response.rb', line 222
def messages
wait_for_response
@messages
end
|
#messages_sent ⇒ Object
243
244
245
|
# File 'lib/chat_response.rb', line 243
def messages_sent
req.request_hash[:messages]
end
|
#placeholder_anyscale_api_call(params) ⇒ Object
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
# File 'lib/chat_response.rb', line 71
def placeholder_anyscale_api_call(params)
key = ENV.fetch('ANYSCALE_API_KEY')
api_base = "https://api.endpoints.anyscale.com/v1"
uri = URI("#{api_base}/chat/completions")
r = Net::HTTP::Post.new(uri, 'Content-Type' => 'application/json', 'Authorization' => "Bearer #{key}")
r.body = params.to_json
http = Net::HTTP.new(uri.host, uri.port)
http.read_timeout = 480
http.use_ssl = true if uri.scheme == 'https'
response = http.request(r)
return JSON.parse(response.body).with_indifferent_access
end
|
#process_response_from_api ⇒ Object
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
|
# File 'lib/chat_response.rb', line 119
def process_response_from_api
raise "no raw response!" unless @raw_response
@message = (raw_response.dig(:choices, 0, :message) || err("No message returned!"))
@messages = messages_sent.dup + [@message]
@completion = @message[:content]
log_completion
if function_call_message?
log_function_call_message
end
end
|
#prompt_tokens ⇒ Object
251
252
253
|
# File 'lib/chat_response.rb', line 251
def prompt_tokens
usage[:prompt_tokens]
end
|
#responding_llm_name ⇒ Object
259
260
261
|
# File 'lib/chat_response.rb', line 259
def responding_llm_name
raw_response[:model]
end
|
#usage ⇒ Object
247
248
249
|
# File 'lib/chat_response.rb', line 247
def usage
raw_response[:usage]
end
|