Module: LangsmithrbRails::Evaluation

Defined in:
lib/langsmithrb_rails/evaluation.rb,
lib/langsmithrb_rails/evaluation/evaluator.rb,
lib/langsmithrb_rails/evaluation/llm_evaluator.rb,
lib/langsmithrb_rails/evaluation/string_evaluator.rb

Overview

Evaluation framework for LangSmith

Defined Under Namespace

Classes: Evaluator, LLMEvaluator, StringEvaluator

Class Method Summary collapse

Class Method Details

.create(type, **options) ⇒ Evaluator

Create a new evaluator



14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/langsmithrb_rails/evaluation.rb', line 14

def self.create(type, **options)
  case type
  when :string
    StringEvaluator.new(**options)
  when :llm
    unless options[:llm]
      raise ArgumentError, "LLM evaluator requires an :llm option"
    end
    LLMEvaluator.new(**options)
  else
    raise ArgumentError, "Unknown evaluator type: #{type}"
  end
end

.evaluate_dataset(dataset_id, evaluators, experiment_name: nil, target_llm: nil) ⇒ Hash

Run a batch evaluation on a dataset



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/langsmithrb_rails/evaluation.rb', line 34

def self.evaluate_dataset(dataset_id, evaluators, experiment_name: nil, target_llm: nil)
  experiment_name ||= "Evaluation #{Time.now.utc.iso8601}"
  
  results = {
    experiment_name: experiment_name,
    dataset_id: dataset_id,
    evaluators: evaluators.map { |e| e.class.name },
    results: []
  }
  
  evaluators.each do |evaluator|
    evaluator_results = evaluator.evaluate_dataset(dataset_id, experiment_name, target_llm)
    results[:results] << evaluator_results
  end
  
  results
end

.evaluate_runs(run_ids, evaluators, references = {}) ⇒ Hash

Run a batch evaluation on runs



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/langsmithrb_rails/evaluation.rb', line 57

def self.evaluate_runs(run_ids, evaluators, references = {})
  results = {
    run_ids: run_ids,
    evaluators: evaluators.map { |e| e.class.name },
    results: {}
  }
  
  run_ids.each do |run_id|
    results[:results][run_id] = {}
    
    evaluators.each do |evaluator|
      reference = references[run_id]
      results[:results][run_id][evaluator.class.name] = evaluator.evaluate_run(run_id, reference)
    end
  end
  
  results
end