Class: Rllama::Model

Inherits:

Object

Object
Rllama::Model

show all

Defined in:: lib/rllama/model.rb

Constant Summary collapse

DEFAULT_CONTEXT_LENGTH =

2**13

Instance Attribute Summary collapse

#pointer ⇒ Object readonly

Returns the value of attribute pointer.

Instance Method Summary collapse

Constructor Details

#initialize(path_or_name, dir: nil) ⇒ `Model`

Returns a new instance of Model.

Raises:

(Error)

# File 'lib/rllama/model.rb', line 9

def initialize(path_or_name, dir: nil)
  resolved_path = Loader.resolve(path_or_name, dir:)

  model_params = Cpp.llama_model_default_params

  @pointer = Cpp.llama_model_load_from_file(resolved_path, model_params)

  raise Error, "Unable to load model from #{resolved_path}" if @pointer.null?
end

Instance Attribute Details

#pointer ⇒ `Object` (readonly)

Returns the value of attribute pointer.



7
8
9

# File 'lib/rllama/model.rb', line 7

def pointer
  @pointer
end

Instance Method Details

#build_chat_template(messages) ⇒ `Object`

Raises:

(Error)

# File 'lib/rllama/model.rb', line 98

def build_chat_template(messages)
  raise Error, 'Model does not provide a chat template' if chat_template.nil? || chat_template.empty?

  count = messages.length
  struct_size = Cpp::LlamaChatMessage.size
  array_ptr = FFI::MemoryPointer.new(struct_size * count)

  messages.each_with_index do |m, i|
    struct_ptr = array_ptr + (i * struct_size)
    msg_struct = Cpp::LlamaChatMessage.new(struct_ptr)
    msg_struct[:role] = FFI::MemoryPointer.from_string(m[:role].to_s)
    msg_struct[:content] = FFI::MemoryPointer.from_string(m[:content].to_s)
  end

  needed = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, nil, 0)

  raise Error, 'Failed to apply chat template' if needed.negative?

  buf = FFI::MemoryPointer.new(:char, needed)
  written = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, buf, needed)

  raise Error, 'Failed to apply chat template' if written.negative?

  buf.read_string(written)
end

#chat_template ⇒ `Object`



19
20
21

# File 'lib/rllama/model.rb', line 19

def chat_template
  @chat_template ||= Cpp.llama_model_chat_template(@pointer, nil)
end

#close ⇒ `Object`



76
77
78

# File 'lib/rllama/model.rb', line 76

def close
  Cpp.llama_model_free(@pointer)
end

#embed(prompt, normalize: true, batch_size: 512, &block) ⇒ `Object`

# File 'lib/rllama/model.rb', line 49

def embed(prompt, normalize: true, batch_size: 512, &block)
  inputs = prompt.is_a?(Array) ? prompt : [prompt]

  tokenized_inputs = inputs.map { |text| tokenize(text, max_tokens: n_ctx_train) }
  max_token_length = tokenized_inputs.map(&:length).max || 0

  effective_batch_size = [batch_size, max_token_length].max
  effective_ctx = [n_ctx_train, max_token_length].min

  init_embedding_context(n_ctx: effective_ctx, n_batch: effective_batch_size) do |ctx|
    inputs = prompt.is_a?(Array) ? tokenized_inputs : tokenized_inputs[0]

    ctx.embed(inputs, normalize:, batch_size: effective_batch_size, &block)
  end
end

#generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05, seed: nil, system: nil, &block) ⇒ `Object` Also known as: message

# File 'lib/rllama/model.rb', line 39

def generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05,
             seed: nil, system: nil, &block)
  init_context(n_ctx: max_tokens) do |ctx|
    ctx.generate(prompt, max_tokens: ctx.n_ctx,
                         temperature:, top_k:, top_p:, seed:, system:, min_p:,
                 &block)
  end
end

#init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512) ⇒ `Object`

# File 'lib/rllama/model.rb', line 80

def init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512)
  context = Context.new(self, embeddings:, n_ctx:, n_batch:)

  if block_given?
    result = yield context

    context.close

    return result
  end

  context
end

#init_embedding_context(n_ctx: n_ctx_train, n_batch: 512) ⇒ `Object`



94
95
96

# File 'lib/rllama/model.rb', line 94

def init_embedding_context(n_ctx: n_ctx_train, n_batch: 512, &)
  init_context(embeddings: true, n_ctx:, n_batch:, &)
end

#n_ctx_train ⇒ `Object`



35
36
37

# File 'lib/rllama/model.rb', line 35

def n_ctx_train
  @n_ctx_train ||= Cpp.llama_model_n_ctx_train(@pointer)
end

#n_embd ⇒ `Object`



27
28
29

# File 'lib/rllama/model.rb', line 27

def n_embd
  @n_embd ||= Cpp.llama_model_n_embd(@pointer)
end

#n_seq_max ⇒ `Object`



31
32
33

# File 'lib/rllama/model.rb', line 31

def n_seq_max
  @n_seq_max ||= Cpp.llama_max_parallel_sequences
end

#tokenize(text, max_tokens: nil) ⇒ `Object`

Raises:

(Error)

# File 'lib/rllama/model.rb', line 65

def tokenize(text, max_tokens: nil)
  size = text.bytesize + 2

  tokens_ptr = FFI::MemoryPointer.new(:int32, size)
  count = Cpp.llama_tokenize(vocab, text, text.bytesize, tokens_ptr, size, true, false)

  raise Error, "Failed to tokenize text: '#{text}'" if count.negative?

  tokens_ptr.read_array_of_int32([count, max_tokens].compact.min)
end

#vocab ⇒ `Object`



23
24
25

# File 'lib/rllama/model.rb', line 23

def vocab
  @vocab ||= Cpp.llama_model_get_vocab(@pointer)
end

Class: Rllama::Model

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path_or_name, dir: nil) ⇒ Model

Instance Attribute Details

#pointer ⇒ Object (readonly)

Instance Method Details

#build_chat_template(messages) ⇒ Object

#chat_template ⇒ Object

#close ⇒ Object

#embed(prompt, normalize: true, batch_size: 512, &block) ⇒ Object

#generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05, seed: nil, system: nil, &block) ⇒ Object Also known as: message

#init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512) ⇒ Object

#init_embedding_context(n_ctx: n_ctx_train, n_batch: 512) ⇒ Object

#n_ctx_train ⇒ Object

#n_embd ⇒ Object

#n_seq_max ⇒ Object

#tokenize(text, max_tokens: nil) ⇒ Object

#vocab ⇒ Object

#initialize(path_or_name, dir: nil) ⇒ `Model`

#pointer ⇒ `Object` (readonly)

#build_chat_template(messages) ⇒ `Object`

#chat_template ⇒ `Object`

#close ⇒ `Object`

#embed(prompt, normalize: true, batch_size: 512, &block) ⇒ `Object`

#generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05, seed: nil, system: nil, &block) ⇒ `Object` Also known as: message

#init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512) ⇒ `Object`

#init_embedding_context(n_ctx: n_ctx_train, n_batch: 512) ⇒ `Object`

#n_ctx_train ⇒ `Object`

#n_embd ⇒ `Object`

#n_seq_max ⇒ `Object`

#tokenize(text, max_tokens: nil) ⇒ `Object`

#vocab ⇒ `Object`