Class: Langchain::Vectorsearch::Weaviate

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/weaviate.rb

Constant Summary

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, logger_options, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, api_key:, index_name:, llm:) ⇒ Weaviate

Initialize the Weaviate adapter

Parameters:

  • url (String)

    The URL of the Weaviate instance

  • api_key (String)

    The API key to use

  • index_name (String)

    The capitalized name of the index to use

  • llm (Object)

    The LLM client to use



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/langchain/vectorsearch/weaviate.rb', line 20

def initialize(url:, api_key:, index_name:, llm:)
  depends_on "weaviate-ruby", req: "weaviate"

  @client = ::Weaviate::Client.new(
    url: url,
    api_key: api_key
  )

  # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
  # TODO: Capitalize index_name
  @index_name = index_name

  super(llm: llm)
end

Instance Method Details

#add_texts(texts:, ids: []) ⇒ Hash

Add a list of texts to the index

Parameters:

  • texts (Array<String>)

    The list of texts to add

Returns:

  • (Hash)

    The response from the server



38
39
40
41
42
# File 'lib/langchain/vectorsearch/weaviate.rb', line 38

def add_texts(texts:, ids: [])
  client.objects.batch_create(
    objects: weaviate_objects(texts, ids)
  )
end

#ask(question:, k: 4) {|String| ... } ⇒ Hash

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • k (Integer) (defaults to: 4)

    The number of results to have in context

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (Hash)

    The answer



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/langchain/vectorsearch/weaviate.rb', line 146

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.dig("content").to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schemaHash

Create default schema

Returns:

  • (Hash)

    The response from the server



92
93
94
95
96
97
98
99
100
101
102
# File 'lib/langchain/vectorsearch/weaviate.rb', line 92

def create_default_schema
  client.schema.create(
    class_name: index_name,
    vectorizer: "none",
    properties: [
      # __id to be used a pointer to the original document
      {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
      {dataType: ["text"], name: "content"}
    ]
  )
end

#destroy_default_schemaBoolean

Delete the index

Returns:

  • (Boolean)

    Whether the index was deleted



112
113
114
# File 'lib/langchain/vectorsearch/weaviate.rb', line 112

def destroy_default_schema
  client.schema.delete(class_name: index_name)
end

#get_default_schemaHash

Get default schema

Returns:

  • (Hash)

    The response from the server



106
107
108
# File 'lib/langchain/vectorsearch/weaviate.rb', line 106

def get_default_schema
  client.schema.get(class_name: index_name)
end

#remove_texts(ids:) ⇒ Hash

Deletes a list of texts in the index

Parameters:

  • ids (Array)

    The ids of texts to delete

Returns:

  • (Hash)

    The response from the server

Raises:

  • (ArgumentError)


77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/langchain/vectorsearch/weaviate.rb', line 77

def remove_texts(ids:)
  raise ArgumentError, "ids must be an array" unless ids.is_a?(Array)

  client.objects.batch_delete(
    class_name: index_name,
    where: {
      path: ["__id"],
      operator: "ContainsAny",
      valueTextArray: ids
    }
  )
end

#similarity_search(query:, k: 4) ⇒ Hash

Return documents similar to the query

Parameters:

  • query (String)

    The query to search for

  • k (Integer|String) (defaults to: 4)

    The number of results to return

Returns:

  • (Hash)

    The search results



120
121
122
123
124
# File 'lib/langchain/vectorsearch/weaviate.rb', line 120

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(embedding: embedding, k: k)
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ Hash

Return documents similar to the vector

Parameters:

  • embedding (Array<Float>)

    The vector to search for

  • k (Integer|String) (defaults to: 4)

    The number of results to return

Returns:

  • (Hash)

    The search results



130
131
132
133
134
135
136
137
138
139
# File 'lib/langchain/vectorsearch/weaviate.rb', line 130

def similarity_search_by_vector(embedding:, k: 4)
  near_vector = "{ vector: #{embedding} }"

  client.query.get(
    class_name: index_name,
    near_vector: near_vector,
    limit: k.to_s,
    fields: "__id content _additional { id }"
  )
end

#update_texts(texts:, ids:) ⇒ Hash

Update a list of texts in the index

Parameters:

  • texts (Array<String>)

    The list of texts to update

Returns:

  • (Hash)

    The response from the server



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/langchain/vectorsearch/weaviate.rb', line 47

def update_texts(texts:, ids:)
  uuids = []

  # Retrieve the UUIDs of the objects to update
  Array(texts).map.with_index do |text, i|
    record = client.query.get(
      class_name: index_name,
      fields: "_additional { id }",
      where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
    )
    uuids.push record[0].dig("_additional", "id")
  end

  # Update the objects
  texts.map.with_index do |text, i|
    client.objects.update(
      class_name: index_name,
      id: uuids[i],
      properties: {
        __id: ids[i].to_s,
        content: text
      },
      vector: llm.embed(text: text).embedding
    )
  end
end