Class: Langchain::Vectorsearch::Elasticsearch

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/elasticsearch.rb

Constant Summary

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary collapse

Attributes inherited from Base

#client, #llm

Instance Method Summary collapse

Methods inherited from Base

#add_data, #destroy_default_schema, #generate_hyde_prompt, #generate_rag_prompt, #get_default_schema, logger_options, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, index_name:, llm:, api_key: nil, es_options: {}) ⇒ Elasticsearch

Returns a new instance of Elasticsearch.



34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 34

def initialize(url:, index_name:, llm:, api_key: nil, es_options: {})
  require "elasticsearch"

  @options = {
    url: url,
    request_timeout: 20,
    log: false
  }.merge(es_options)

  @es_client = ::Elasticsearch::Client.new(**options)
  @index_name = index_name

  super(llm: llm)
end

Instance Attribute Details

#es_clientObject

Wrapper around Elasticsearch vector search capabilities.

Setting up Elasticsearch:

  1. Get Elasticsearch up and running with Docker: www.elastic.co/guide/en/elasticsearch/reference/current/docker.html

  2. Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable

  3. Set the ELASTICSEARCH_URL environment variable

Gem requirements:

gem "elasticsearch", "~> 8.0.0"

Usage:

llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
es = Langchain::Vectorsearch::Elasticsearch.new(
  url: ENV["ELASTICSEARCH_URL"],
  index_name: "docs",
  llm: llm,
  es_options: {
    transport_options: {ssl: {verify: false}},
    ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
  }
)

es.create_default_schema
es.add_texts(texts: ["..."])
es.similarity_search(text: "...")


32
33
34
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 32

def es_client
  @es_client
end

#index_nameObject

Wrapper around Elasticsearch vector search capabilities.

Setting up Elasticsearch:

  1. Get Elasticsearch up and running with Docker: www.elastic.co/guide/en/elasticsearch/reference/current/docker.html

  2. Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable

  3. Set the ELASTICSEARCH_URL environment variable

Gem requirements:

gem "elasticsearch", "~> 8.0.0"

Usage:

llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
es = Langchain::Vectorsearch::Elasticsearch.new(
  url: ENV["ELASTICSEARCH_URL"],
  index_name: "docs",
  llm: llm,
  es_options: {
    transport_options: {ssl: {verify: false}},
    ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
  }
)

es.create_default_schema
es.add_texts(texts: ["..."])
es.similarity_search(text: "...")


32
33
34
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 32

def index_name
  @index_name
end

#optionsObject

Wrapper around Elasticsearch vector search capabilities.

Setting up Elasticsearch:

  1. Get Elasticsearch up and running with Docker: www.elastic.co/guide/en/elasticsearch/reference/current/docker.html

  2. Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable

  3. Set the ELASTICSEARCH_URL environment variable

Gem requirements:

gem "elasticsearch", "~> 8.0.0"

Usage:

llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
es = Langchain::Vectorsearch::Elasticsearch.new(
  url: ENV["ELASTICSEARCH_URL"],
  index_name: "docs",
  llm: llm,
  es_options: {
    transport_options: {ssl: {verify: false}},
    ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
  }
)

es.create_default_schema
es.add_texts(texts: ["..."])
es.similarity_search(text: "...")


32
33
34
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 32

def options
  @options
end

Instance Method Details

#add_texts(texts: []) ⇒ Elasticsearch::Response

Add a list of texts to the index

Parameters:

  • texts (Array<String>) (defaults to: [])

    The list of texts to add

Returns:

  • (Elasticsearch::Response)

    from the Elasticsearch server



52
53
54
55
56
57
58
59
60
61
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 52

def add_texts(texts: [])
  body = texts.map do |text|
    [
      {index: {_index: index_name}},
      {input: text, input_vector: llm.embed(text: text).embedding}
    ]
  end.flatten

  es_client.bulk(body: body)
end

#ask(question:, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • k (Integer) (defaults to: 4)

    The number of results to have in context

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (String)

    The answer to the question



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 146

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result[:input]
  end.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schemaElasticsearch::Response

Create the index with the default schema

Returns:

  • (Elasticsearch::Response)

    Index creation



91
92
93
94
95
96
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 91

def create_default_schema
  es_client.indices.create(
    index: index_name,
    body: default_schema
  )
end

#default_query(query_vector) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 127

def default_query(query_vector)
  {
    script_score: {
      query: {match_all: {}},
      script: {
        source: "cosineSimilarity(params.query_vector, 'input_vector') + 1.0",
        params: {
          query_vector: query_vector
        }
      }
    }
  }
end

#default_schemaObject



114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 114

def default_schema
  {
    mappings: {
      properties: {
        input: {
          type: "text"
        },
        input_vector: vector_settings
      }
    }
  }
end

#default_vector_settingsObject



106
107
108
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 106

def default_vector_settings
  {type: "dense_vector", dims: llm.default_dimensions}
end

#delete_default_schemaElasticsearch::Response

Deletes the default schema

Returns:

  • (Elasticsearch::Response)

    Index deletion



100
101
102
103
104
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 100

def delete_default_schema
  es_client.indices.delete(
    index: index_name
  )
end

#remove_texts(ids: []) ⇒ Elasticsearch::Response

Remove a list of texts from the index

Parameters:

  • ids (Array<Integer>) (defaults to: [])

    The list of ids to delete

Returns:

  • (Elasticsearch::Response)

    from the Elasticsearch server



81
82
83
84
85
86
87
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 81

def remove_texts(ids: [])
  body = ids.map do |id|
    {delete: {_index: index_name, _id: id}}
  end

  es_client.bulk(body: body)
end

#similarity_search(text: "", k: 10, query: {}) ⇒ Elasticsearch::Response

Search for similar texts

Parameters:

  • text (String) (defaults to: "")

    The text to search for

  • k (Integer) (defaults to: 10)

    The number of results to return

  • query (Hash) (defaults to: {})

    Elasticsearch query that needs to be used while searching (Optional)

Returns:

  • (Elasticsearch::Response)

    The response from the server



167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 167

def similarity_search(text: "", k: 10, query: {})
  if text.empty? && query.empty?
    raise "Either text or query should pass as an argument"
  end

  if query.empty?
    query_vector = llm.embed(text: text).embedding

    query = default_query(query_vector)
  end

  es_client.search(body: {query: query, size: k}).body
end

#similarity_search_by_vector(embedding: [], k: 10, query: {}) ⇒ Elasticsearch::Response

Search for similar texts by embedding

Parameters:

  • embedding (Array<Float>) (defaults to: [])

    The embedding to search for

  • k (Integer) (defaults to: 10)

    The number of results to return

  • query (Hash) (defaults to: {})

    Elasticsearch query that needs to be used while searching (Optional)

Returns:

  • (Elasticsearch::Response)

    The response from the server



186
187
188
189
190
191
192
193
194
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 186

def similarity_search_by_vector(embedding: [], k: 10, query: {})
  if embedding.empty? && query.empty?
    raise "Either embedding or query should pass as an argument"
  end

  query = default_query(embedding) if query.empty?

  es_client.search(body: {query: query, size: k}).body
end

#update_texts(texts: [], ids: []) ⇒ Elasticsearch::Response

Add a list of texts to the index

Parameters:

  • texts (Array<String>) (defaults to: [])

    The list of texts to update

  • texts (Array<Integer>) (defaults to: [])

    The list of texts to update

Returns:

  • (Elasticsearch::Response)

    from the Elasticsearch server



67
68
69
70
71
72
73
74
75
76
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 67

def update_texts(texts: [], ids: [])
  body = texts.map.with_index do |text, i|
    [
      {index: {_index: index_name, _id: ids[i]}},
      {input: text, input_vector: llm.embed(text: text).embedding}
    ]
  end.flatten

  es_client.bulk(body: body)
end

#vector_settingsObject



110
111
112
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 110

def vector_settings
  options[:vector_settings] || default_vector_settings
end