Class: Prescient::Provider::Ollama

Inherits:
Base
  • Object
show all
Includes:
HTTParty
Defined in:
lib/prescient/provider/ollama.rb

Overview

Since:

  • 1.0.0

Constant Summary collapse

EMBEDDING_DIMENSIONS =

nomic-embed-text dimensions

Since:

  • 1.0.0

768

Instance Attribute Summary

Attributes inherited from Base

#options

Instance Method Summary collapse

Methods inherited from Base

#apply_format_template, #available?, #build_format_data, #build_prompt, #calculate_field_match_score, #clean_text, #default_context_configs, #default_prompt_templates, #detect_context_type, #extract_configured_fields, #extract_embedding_text, #extract_text_values, #fallback_format_hash, #find_best_field_match, #format_context_item, #format_hash_item, #handle_errors, #match_context_by_fields, #normalize_embedding, #resolve_context_config

Constructor Details

#initialize(**options) ⇒ Ollama

Returns a new instance of Ollama.

Since:

  • 1.0.0



10
11
12
13
14
# File 'lib/prescient/provider/ollama.rb', line 10

def initialize(**options)
  super
  self.class.base_uri(@options[:url])
  self.class.default_timeout(@options[:timeout] || 60)
end

Instance Method Details

#available_modelsObject

Since:

  • 1.0.0



89
90
91
92
93
94
95
96
97
98
99
# File 'lib/prescient/provider/ollama.rb', line 89

def available_models
  return @_available_models if defined?(@_available_models)

  handle_errors do
    @_available_models = (fetch_and_parse('get', '/api/tags', root_key: 'models') || []).map { |model|
      { embedding:  model['name'] == @options[:embedding_model],
        chat:       model['name'] == @options[:chat_model],
        name: model['name'], size: model['size'], modified_at: model['modified_at'], digest: model['digest'] }
    }
  end
end

#fetch_and_parse(htt_verb, endpoint, **options) ⇒ Object (private)

Since:

  • 1.0.0



144
145
146
147
148
149
150
151
152
153
# File 'lib/prescient/provider/ollama.rb', line 144

def fetch_and_parse(htt_verb, endpoint, **options)
  options = options.dup
  root_key = options.delete(:root_key)

  response = self.class.send(htt_verb, endpoint, **options)
  validate_response!(response, "#{htt_verb.upcase} #{endpoint}")
  return unless root_key

  response.parsed_response[root_key]
end

#generate_embedding(text, **_options) ⇒ Object

Since:

  • 1.0.0



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/prescient/provider/ollama.rb', line 16

def generate_embedding(text, **_options)
  handle_errors do
    embedding = fetch_and_parse('post', '/api/embeddings',
                                root_key: 'embedding',
                                headers:  { 'Content-Type' => 'application/json' },
                                body:     {
                                  model:  @options[:embedding_model],
                                  prompt: clean_text(text),
                                }.to_json)

    raise Prescient::InvalidResponseError, 'No embedding returned' unless embedding

    normalize_embedding(embedding, EMBEDDING_DIMENSIONS)
  end
end

#generate_response(prompt, context_items = [], **options) ⇒ Object

Since:

  • 1.0.0



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/prescient/provider/ollama.rb', line 32

def generate_response(prompt, context_items = [], **options)
  handle_errors do
    request_options = prepare_generate_response(prompt, context_items, **options)

    # Make the request and store both text and full response
    response = self.class.post('/api/generate', **request_options)
    validate_response!(response, 'POST /api/generate')

    generated_text = response.parsed_response['response']
    raise Prescient::InvalidResponseError, 'No response generated' unless generated_text

    {
      response:        generated_text.strip,
      model:           @options[:chat_model],
      provider:        'ollama',
      processing_time: response.parsed_response['total_duration']&./(1_000_000_000.0),
      metadata:        {
        eval_count:        response.parsed_response['eval_count'],
        eval_duration:     response.parsed_response['eval_duration'],
        prompt_eval_count: response.parsed_response['prompt_eval_count'],
      },
    }
  end
end

#health_checkObject

Since:

  • 1.0.0



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/prescient/provider/ollama.rb', line 57

def health_check
  handle_errors do
    models = available_models
    embedding_available = models.any? { |m| m[:embedding] }
    chat_available = models.any? { |m| m[:chat] }

    {
      status:           'healthy',
      provider:         'ollama',
      url:              @options[:url],
      models_available: models.map { |m| m[:name] },
      embedding_model:  {
        name:      @options[:embedding_model],
        available: embedding_available,
      },
      chat_model:       {
        name:      @options[:chat_model],
        available: chat_available,
      },
      ready:            embedding_available && chat_available,
    }
  end
rescue Prescient::Error => e
  {
    status:   'unavailable',
    provider: 'ollama',
    error:    e.class.name,
    message:  e.message,
    url:      @options[:url],
  }
end

#prepare_generate_response(prompt, context_items = [], **options) ⇒ Object (private)

Since:

  • 1.0.0



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/prescient/provider/ollama.rb', line 128

def prepare_generate_response(prompt, context_items = [], **options)
  formatted_prompt = build_prompt(prompt, context_items)
  { root_key: 'response',
    headers:  { 'Content-Type' => 'application/json' },
    body:     {
      model:   @options[:chat_model],
      prompt:  formatted_prompt,
      stream:  false,
      options: {
        num_predict: options[:max_tokens] || 2000,
        temperature: options[:temperature] || 0.7,
        top_p:       options[:top_p] || 0.9,
      },
    }.to_json }
end

#pull_model(model_name) ⇒ Object

Since:

  • 1.0.0



101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/prescient/provider/ollama.rb', line 101

def pull_model(model_name)
  handle_errors do
    fetch_and_parse('post', '/api/pull',
                    headers: { 'Content-Type' => 'application/json' },
                    body:    { name: model_name }.to_json,
                    timeout: 300) # 5 minutes for model download
    {
      success: true,
      model:   model_name,
      message: "Model #{model_name} pulled successfully",
    }
  end
end

#validate_configuration!Object (protected)

Raises:

Since:

  • 1.0.0



117
118
119
120
121
122
123
124
# File 'lib/prescient/provider/ollama.rb', line 117

def validate_configuration!
  required_options = [:url, :embedding_model, :chat_model]
  missing_options = required_options.select { |opt| @options[opt].nil? }

  return unless missing_options.any?

  raise Prescient::Error, "Missing required options: #{missing_options.join(', ')}"
end

#validate_response!(response, operation) ⇒ Object (private)

Since:

  • 1.0.0



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/prescient/provider/ollama.rb', line 155

def validate_response!(response, operation)
  return if response.success?

  case response.code
  when 404
    raise Prescient::ModelNotAvailableError, "Model not available for #{operation}"
  when 429
    raise Prescient::RateLimitError, "Rate limit exceeded for #{operation}"
  when 401, 403
    raise Prescient::AuthenticationError, "Authentication failed for #{operation}"
  when 500..599
    raise Prescient::Error, "Ollama server error during #{operation}: #{response.body}"
  else
    raise Prescient::Error,
          "Ollama request failed for #{operation}: HTTP #{response.code} - #{response.message}"
  end
end