Class: LLMBench::Benchmark

Inherits:
Object
  • Object
show all
Defined in:
lib/llm_bench/benchmark.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(provider_name:, model_nickname:, config_manager:, print_result: false) ⇒ Benchmark

Returns a new instance of Benchmark.



13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/llm_bench/benchmark.rb', line 13

def initialize(provider_name:, model_nickname:, config_manager:, print_result: false)
  @provider_name = provider_name
  @model_nickname = model_nickname
  @print_result = print_result

  @config_manager = config_manager
  @config = config_manager.config

  @provider, @model = config_manager.validate_provider_and_model!(
    provider_name:,
    model_nickname:
  )
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def config
  @config
end

#end_timeObject (readonly)

Returns the value of attribute end_time.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def end_time
  @end_time
end

#modelObject (readonly)

Returns the value of attribute model.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def model
  @model
end

#model_nicknameObject (readonly)

Returns the value of attribute model_nickname.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def model_nickname
  @model_nickname
end

Returns the value of attribute print_result.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def print_result
  @print_result
end

#providerObject (readonly)

Returns the value of attribute provider.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def provider
  @provider
end

#provider_nameObject (readonly)

Returns the value of attribute provider_name.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def provider_name
  @provider_name
end

#start_timeObject (readonly)

Returns the value of attribute start_time.



11
12
13
# File 'lib/llm_bench/benchmark.rb', line 11

def start_time
  @start_time
end

Instance Method Details

#anthropic_format?Boolean

Returns:

  • (Boolean)


44
45
46
# File 'lib/llm_bench/benchmark.rb', line 44

def anthropic_format?
  model["api_format"] == "anthropic"
end

#api_endpointObject



48
49
50
# File 'lib/llm_bench/benchmark.rb', line 48

def api_endpoint
  anthropic_format? ? "#{provider["base_url"]}/v1/messages" : "#{provider["base_url"]}/chat/completions"
end

#build_request_bodyObject



63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/llm_bench/benchmark.rb', line 63

def build_request_body
  base_body = {
    model: model["id"],
    messages: [{ role: "user", content: config["prompt"] }]
  }

  if anthropic_format?
    base_body.merge(max_tokens: 1000)
  else
    base_body.merge(max_tokens: 1000, temperature: 0.7)
  end
end

#build_request_headersObject



52
53
54
55
56
57
58
59
60
61
# File 'lib/llm_bench/benchmark.rb', line 52

def build_request_headers
  headers = { "Content-Type" => "application/json" }
  if anthropic_format?
    headers["x-api-key"] = provider["api_key"]
    headers["anthropic-version"] = "2023-06-01"
  else
    headers["Authorization"] = "Bearer #{provider["api_key"]}"
  end
  headers
end

#calculate_and_display_metrics(response:) ⇒ Object



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/llm_bench/benchmark.rb', line 141

def calculate_and_display_metrics(response:)
  metrics = calculate_metrics(response:)

  puts "\n#{Colors.header("=== Results ===")}"
  puts Colors.metric("Duration: #{metrics[:duration].round(3)} seconds")

  if metrics[:input_tokens] && metrics[:output_tokens]
    puts Colors.metric("Input tokens: #{metrics[:input_tokens]}")
    puts Colors.metric("Output tokens: #{metrics[:output_tokens]}")
    puts Colors.success("Total tokens: #{metrics[:total_tokens]}")
    puts Colors.success("Tokens per second: #{metrics[:tokens_per_second].round(2)}")
  else
    puts Colors.warning("Token usage data not available in API response")
  end

  return unless print_result

  puts "\n#{Colors.header("=== Message Content ===")}"
  puts Colors.border(metrics[:message_content])
end

#calculate_metrics(response:) ⇒ Object



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/llm_bench/benchmark.rb', line 122

def calculate_metrics(response:)
  duration = end_time - start_time
  message_content = extract_response_content(response)
  input_tokens, output_tokens = extract_token_counts(response:)

  total_tokens = (input_tokens + output_tokens if input_tokens && output_tokens)

  tokens_per_second = (total_tokens / duration if total_tokens && duration.positive?)

  {
    duration:,
    input_tokens:,
    output_tokens:,
    total_tokens:,
    tokens_per_second:,
    message_content:
  }
end

#extract_anthropic_content(response:) ⇒ Object



162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/llm_bench/benchmark.rb', line 162

def extract_anthropic_content(response:)
  return "Error: #{response["msg"]}" if response.key?("code") && response.key?("msg") && response.key?("success")

  content_blocks = response["content"]

  if content_blocks.is_a?(Array) && !content_blocks.empty?
    text_block = content_blocks.find { |block| block.is_a?(Hash) && block["type"] == "text" }
    text_block ? text_block["text"] : nil
  elsif response.dig("content", 0, "text")
    response.dig("content", 0, "text")
  end
end

#extract_response_content(response) ⇒ Object



76
77
78
79
80
81
82
# File 'lib/llm_bench/benchmark.rb', line 76

def extract_response_content(response)
  if anthropic_format?
    extract_anthropic_content(response:)
  else
    response.dig("choices", 0, "message", "content") || ""
  end
end

#extract_token_counts(response:) ⇒ Object



84
85
86
87
88
89
90
91
92
93
# File 'lib/llm_bench/benchmark.rb', line 84

def extract_token_counts(response:)
  if anthropic_format?
    input_tokens = response.dig("usage", "input_tokens")
    output_tokens = response.dig("usage", "output_tokens")
  else
    input_tokens = response.dig("usage", "prompt_tokens")
    output_tokens = response.dig("usage", "completion_tokens")
  end
  [input_tokens, output_tokens]
end

#handle_api_error(response:) ⇒ Object



113
114
115
116
117
118
119
120
# File 'lib/llm_bench/benchmark.rb', line 113

def handle_api_error(response:)
  error_response = JSON.parse(response.body)
  error_msg = error_response["msg"] || error_response["message"] ||
              error_response.dig("error", "message") || response.message
  raise "API request failed: #{response.code} - #{error_msg}"
rescue JSON::ParserError
  raise "API request failed: #{response.code} #{response.message}"
end

#make_api_callObject



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/llm_bench/benchmark.rb', line 95

def make_api_call
  uri = URI.parse(api_endpoint)
  request = Net::HTTP::Post.new(uri)
  request["Content-Type"] = "application/json"

  build_request_headers.each { |key, value| request[key] = value }
  request.body = build_request_body.to_json

  http = Net::HTTP.new(uri.host, uri.port)
  http.use_ssl = uri.scheme == "https"

  response = http.request(request)

  handle_api_error(response:) unless response.is_a?(Net::HTTPSuccess)

  JSON.parse(response.body)
end

#run_benchmarkObject



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/llm_bench/benchmark.rb', line 27

def run_benchmark
  puts Colors.header("=== LLM Benchmark ===")
  puts Colors.info("Provider: #{provider_name}")
  puts Colors.info("Model: #{model_nickname} (#{model["id"]})")
  puts Colors.highlight("Starting benchmark...")

  @start_time = Time.now
  puts Colors.border("Start time: #{start_time.strftime("%Y-%m-%d %H:%M:%S.%3N")}")

  response = make_api_call

  @end_time = Time.now
  puts Colors.border("End time: #{end_time.strftime("%Y-%m-%d %H:%M:%S.%3N")}")

  calculate_and_display_metrics(response:)
end

#run_benchmark_for_resultsObject



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/llm_bench/benchmark.rb', line 175

def run_benchmark_for_results
  @start_time = Time.now
  response = make_api_call
  @end_time = Time.now

  metrics = calculate_metrics(response:)

  {
    provider: provider_name,
    model: model_nickname,
    total_tokens: metrics[:total_tokens] || 0,
    tokens_per_second: metrics[:tokens_per_second]&.round(2) || 0,
    duration: metrics[:duration].round(3),
    success: true,
    message_content: metrics[:message_content]
  }
rescue StandardError => e
  {
    provider: provider_name,
    model: model_nickname,
    total_tokens: 0,
    tokens_per_second: 0,
    duration: 0,
    success: false,
    error: e.message,
    message_content: ""
  }
end