Class: LangsmithrbRails::Evaluation::Evaluator

Inherits:

Object

Object
LangsmithrbRails::Evaluation::Evaluator

show all

Defined in:: lib/langsmithrb_rails/evaluation/evaluator.rb

Overview

Base evaluator class

Direct Known Subclasses

LLMEvaluator, StringEvaluator

Instance Attribute Summary collapse

#client ⇒ Object readonly

Returns the value of attribute client.
#project_name ⇒ Object readonly

Returns the value of attribute project_name.
#tags ⇒ Object readonly

Returns the value of attribute tags.

Instance Method Summary collapse

#evaluate(prediction, reference = nil, input = nil) ⇒ Hash

Evaluate a prediction against a reference.
#evaluate_dataset(dataset_id, experiment_name, target_llm = nil) ⇒ Hash

Evaluate a dataset.
#evaluate_run(run_id, reference = nil) ⇒ Hash

Evaluate a run.
#evaluate_runs(run_ids, references = {}) ⇒ Hash<String, Hash>

Evaluate multiple runs.
#initialize(client: nil, project_name: nil, tags: []) ⇒ Evaluator constructor

Initialize a new evaluator.

Constructor Details

#initialize(client: nil, project_name: nil, tags: []) ⇒ `Evaluator`

Initialize a new evaluator

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 13

def initialize(client: nil, project_name: nil, tags: [])
  @client = client || LangsmithrbRails::Client.new
  @project_name = project_name
  @tags = tags
end

Instance Attribute Details

#client ⇒ `Object` (readonly)

Returns the value of attribute client.



7
8
9

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 7

def client
  @client
end

#project_name ⇒ `Object` (readonly)

Returns the value of attribute project_name.



7
8
9

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 7

def project_name
  @project_name
end

#tags ⇒ `Object` (readonly)

Returns the value of attribute tags.



7
8
9

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 7

def tags
  @tags
end

Instance Method Details

#evaluate(prediction, reference = nil, input = nil) ⇒ `Hash`

Evaluate a prediction against a reference

Raises:

(NotImplementedError)



24
25
26

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 24

def evaluate(prediction, reference = nil, input = nil)
  raise NotImplementedError, "Subclasses must implement evaluate method"
end

#evaluate_dataset(dataset_id, experiment_name, target_llm = nil) ⇒ `Hash`

Evaluate a dataset

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 77

def evaluate_dataset(dataset_id, experiment_name, target_llm = nil)
  # Get the dataset examples
  response = client.list_examples(dataset_id)
  
  unless response[:status] >= 200 && response[:status] < 300
    raise "Failed to get dataset examples: #{response[:error] || response[:body]}"
  end
  
  examples = response[:body]
  
  results = {
    experiment_name: experiment_name,
    dataset_id: dataset_id,
    evaluator_name: self.class.name,
    results: []
  }
  
  examples.each do |example|
    # If target LLM is provided, generate a prediction
    if target_llm
      prediction = generate_prediction(target_llm, example["inputs"])
    else
      # Otherwise use the example's outputs as the prediction
      prediction = example["outputs"]
    end
    
    # Evaluate
    result = evaluate(prediction, example["outputs"], example["inputs"])
    
    results[:results] << {
      example_id: example["id"],
      score: result[:score],
      metadata: result[:metadata]
    }
  end
  
  results
end

#evaluate_run(run_id, reference = nil) ⇒ `Hash`

Evaluate a run

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 32

def evaluate_run(run_id, reference = nil)
  # Get the run
  response = client.get_run(run_id)
  
  unless response[:status] >= 200 && response[:status] < 300
    raise "Failed to get run: #{response[:error] || response[:body]}"
  end
  
  run = response[:body]
  
  # Extract prediction from run outputs
  prediction = extract_prediction_from_run(run)
  
  # Extract input from run inputs
  input = run["inputs"]
  
  # Evaluate
  result = evaluate(prediction, reference, input)
  
  # Create feedback
  create_feedback(run_id, result)
  
  result
end

#evaluate_runs(run_ids, references = {}) ⇒ `Hash<String, Hash>`

Evaluate multiple runs

# File 'lib/langsmithrb_rails/evaluation/evaluator.rb', line 61

def evaluate_runs(run_ids, references = {})
  results = {}
  
  run_ids.each do |run_id|
    reference = references[run_id]
    results[run_id] = evaluate_run(run_id, reference)
  end
  
  results
end

Class: LangsmithrbRails::Evaluation::Evaluator

Overview

Direct Known Subclasses

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(client: nil, project_name: nil, tags: []) ⇒ Evaluator

Instance Attribute Details

#client ⇒ Object (readonly)

#project_name ⇒ Object (readonly)

#tags ⇒ Object (readonly)

Instance Method Details

#evaluate(prediction, reference = nil, input = nil) ⇒ Hash

#evaluate_dataset(dataset_id, experiment_name, target_llm = nil) ⇒ Hash

#evaluate_run(run_id, reference = nil) ⇒ Hash

#evaluate_runs(run_ids, references = {}) ⇒ Hash<String, Hash>