Class: LangsmithrbRails::Evals::Checks::LlmGraded

Inherits:

Object

Object
LangsmithrbRails::Evals::Checks::LlmGraded

show all

Defined in:: lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb

Overview

LLM-based grading for evaluating responses

Class Method Summary collapse

.call_llm(prompt) ⇒ String

Call the LLM for grading.
.create_grading_prompt(input, answer, expected_answer) ⇒ String

Create a prompt for the LLM to grade the response.
.evaluate(input, response, expected) ⇒ Hash

Check if the response is correct using an LLM.
.extract_answer(response) ⇒ String

Extract the answer from the response.
.parse_llm_response(response) ⇒ Hash

Parse the LLM response.

Class Method Details

.call_llm(prompt) ⇒ `String`

Call the LLM for grading

# File 'lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb', line 82

def self.call_llm(prompt)
  # Check if OpenAI is configured
  if defined?(OpenAI) && ENV["OPENAI_API_KEY"].present?
    client = OpenAI::Client.new(access_token: ENV["OPENAI_API_KEY"])
    response = client.chat(
      parameters: {
        model: ENV.fetch("LANGSMITH_EVAL_MODEL", "gpt-3.5-turbo"),
        messages: [{ role: "user", content: prompt }],
        temperature: 0.0
      }
    )
    return response.dig("choices", 0, "message", "content")
  end
  
  # Check if Anthropic is configured
  if defined?(Anthropic) && ENV["ANTHROPIC_API_KEY"].present?
    client = Anthropic::Client.new(api_key: ENV["ANTHROPIC_API_KEY"])
    response = client.messages.create(
      model: ENV.fetch("LANGSMITH_EVAL_MODEL", "claude-2"),
      max_tokens: 1024,
      messages: [{ role: "user", content: prompt }]
    )
    return response.content.first.text
  end
  
  # Fall back to a simple evaluation
  "Score: 0.5\nReasoning: Unable to perform LLM-based evaluation. Please configure an LLM provider."
end

.create_grading_prompt(input, answer, expected_answer) ⇒ `String`

Create a prompt for the LLM to grade the response

# File 'lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb', line 57

def self.create_grading_prompt(input, answer, expected_answer)
  "    You are an expert evaluator. Your task is to grade the quality and correctness of a response.\n    \n    Question: \#{input[\"question\"]}\n    \n    Expected Answer: \#{expected_answer}\n    \n    Actual Response: \#{answer}\n    \n    Please evaluate the response based on:\n    1. Correctness: Is the information accurate?\n    2. Completeness: Does it fully address the question?\n    3. Clarity: Is it well-explained and easy to understand?\n    \n    Provide your evaluation in the following format:\n    \n    Score: [a number between 0.0 and 1.0]\n    Reasoning: [your detailed explanation]\n  PROMPT\nend\n"

.evaluate(input, response, expected) ⇒ `Hash`

Check if the response is correct using an LLM

# File 'lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb', line 13

def self.evaluate(input, response, expected)
  result = {
    score: 0.0,
    reasoning: "",
    passed: false
  }
  
  # Extract the answer from the response
  answer = extract_answer(response)
  expected_answer = extract_answer(expected)
  
  # Create the prompt for the LLM
  prompt = create_grading_prompt(input, answer, expected_answer)
  
  # Call the LLM for grading
  llm_response = call_llm(prompt)
  
  # Parse the LLM response
  parsed_result = parse_llm_response(llm_response)
  
  # Update the result with the parsed data
  result[:score] = parsed_result[:score]
  result[:reasoning] = parsed_result[:reasoning]
  result[:passed] = parsed_result[:score] >= 0.7
  
  result
end

.extract_answer(response) ⇒ `String`

Extract the answer from the response

# File 'lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb', line 44

def self.extract_answer(response)
  return response["answer"] if response["answer"]
  return response["text"] if response["text"]
  return response["content"] if response["content"]
  return response["output"] if response["output"]
  return response.to_s
end

.parse_llm_response(response) ⇒ `Hash`

Parse the LLM response

# File 'lib/generators/langsmithrb_rails/evals/templates/checks/llm_graded.rb', line 114

def self.parse_llm_response(response)
  result = {
    score: 0.5,
    reasoning: "Unable to parse LLM response"
  }
  
  # Extract score
  if response =~ /Score:\s*([\d\.]+)/i
    result[:score] = $1.to_f
    # Ensure score is between 0 and 1
    result[:score] = [0.0, [1.0, result[:score]].min].max
  end
  
  # Extract reasoning
  if response =~ /Reasoning:\s*(.+)/im
    result[:reasoning] = $1.strip
  end
  
  result
end

Class: LangsmithrbRails::Evals::Checks::LlmGraded

Overview

Class Method Summary collapse

Class Method Details

.call_llm(prompt) ⇒ String

.create_grading_prompt(input, answer, expected_answer) ⇒ String

.evaluate(input, response, expected) ⇒ Hash

.extract_answer(response) ⇒ String

.parse_llm_response(response) ⇒ Hash

.call_llm(prompt) ⇒ `String`

.create_grading_prompt(input, answer, expected_answer) ⇒ `String`

.evaluate(input, response, expected) ⇒ `Hash`

.extract_answer(response) ⇒ `String`

.parse_llm_response(response) ⇒ `Hash`