Class: Rllama::Model

Inherits:
Object
  • Object
show all
Defined in:
lib/rllama/model.rb

Constant Summary collapse

DEFAULT_CONTEXT_LENGTH =
2**13

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(path_or_name, dir: nil) ⇒ Model

Returns a new instance of Model.

Raises:



9
10
11
12
13
14
15
16
17
# File 'lib/rllama/model.rb', line 9

def initialize(path_or_name, dir: nil)
  resolved_path = Loader.resolve(path_or_name, dir:)

  model_params = Cpp.llama_model_default_params

  @pointer = Cpp.llama_model_load_from_file(resolved_path, model_params)

  raise Error, "Unable to load model from #{resolved_path}" if @pointer.null?
end

Instance Attribute Details

#pointerObject (readonly)

Returns the value of attribute pointer.



7
8
9
# File 'lib/rllama/model.rb', line 7

def pointer
  @pointer
end

Instance Method Details

#build_chat_template(messages) ⇒ Object

Raises:



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/rllama/model.rb', line 98

def build_chat_template(messages)
  raise Error, 'Model does not provide a chat template' if chat_template.nil? || chat_template.empty?

  count = messages.length
  struct_size = Cpp::LlamaChatMessage.size
  array_ptr = FFI::MemoryPointer.new(struct_size * count)

  messages.each_with_index do |m, i|
    struct_ptr = array_ptr + (i * struct_size)
    msg_struct = Cpp::LlamaChatMessage.new(struct_ptr)
    msg_struct[:role] = FFI::MemoryPointer.from_string(m[:role].to_s)
    msg_struct[:content] = FFI::MemoryPointer.from_string(m[:content].to_s)
  end

  needed = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, nil, 0)

  raise Error, 'Failed to apply chat template' if needed.negative?

  buf = FFI::MemoryPointer.new(:char, needed)
  written = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, buf, needed)

  raise Error, 'Failed to apply chat template' if written.negative?

  buf.read_string(written)
end

#chat_templateObject



19
20
21
# File 'lib/rllama/model.rb', line 19

def chat_template
  @chat_template ||= Cpp.llama_model_chat_template(@pointer, nil)
end

#closeObject



76
77
78
# File 'lib/rllama/model.rb', line 76

def close
  Cpp.llama_model_free(@pointer)
end

#embed(prompt, normalize: true, batch_size: 512, &block) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/rllama/model.rb', line 49

def embed(prompt, normalize: true, batch_size: 512, &block)
  inputs = prompt.is_a?(Array) ? prompt : [prompt]

  tokenized_inputs = inputs.map { |text| tokenize(text, max_tokens: n_ctx_train) }
  max_token_length = tokenized_inputs.map(&:length).max || 0

  effective_batch_size = [batch_size, max_token_length].max
  effective_ctx = [n_ctx_train, max_token_length].min

  init_embedding_context(n_ctx: effective_ctx, n_batch: effective_batch_size) do |ctx|
    inputs = prompt.is_a?(Array) ? tokenized_inputs : tokenized_inputs[0]

    ctx.embed(inputs, normalize:, batch_size: effective_batch_size, &block)
  end
end

#generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05, seed: nil, system: nil, &block) ⇒ Object Also known as: message



39
40
41
42
43
44
45
46
# File 'lib/rllama/model.rb', line 39

def generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05,
             seed: nil, system: nil, &block)
  init_context(n_ctx: max_tokens) do |ctx|
    ctx.generate(prompt, max_tokens: ctx.n_ctx,
                         temperature:, top_k:, top_p:, seed:, system:, min_p:,
                 &block)
  end
end

#init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/rllama/model.rb', line 80

def init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512)
  context = Context.new(self, embeddings:, n_ctx:, n_batch:)

  if block_given?
    result = yield context

    context.close

    return result
  end

  context
end

#init_embedding_context(n_ctx: n_ctx_train, n_batch: 512) ⇒ Object



94
95
96
# File 'lib/rllama/model.rb', line 94

def init_embedding_context(n_ctx: n_ctx_train, n_batch: 512, &)
  init_context(embeddings: true, n_ctx:, n_batch:, &)
end

#n_ctx_trainObject



35
36
37
# File 'lib/rllama/model.rb', line 35

def n_ctx_train
  @n_ctx_train ||= Cpp.llama_model_n_ctx_train(@pointer)
end

#n_embdObject



27
28
29
# File 'lib/rllama/model.rb', line 27

def n_embd
  @n_embd ||= Cpp.llama_model_n_embd(@pointer)
end

#n_seq_maxObject



31
32
33
# File 'lib/rllama/model.rb', line 31

def n_seq_max
  @n_seq_max ||= Cpp.llama_max_parallel_sequences
end

#tokenize(text, max_tokens: nil) ⇒ Object

Raises:



65
66
67
68
69
70
71
72
73
74
# File 'lib/rllama/model.rb', line 65

def tokenize(text, max_tokens: nil)
  size = text.bytesize + 2

  tokens_ptr = FFI::MemoryPointer.new(:int32, size)
  count = Cpp.llama_tokenize(vocab, text, text.bytesize, tokens_ptr, size, true, false)

  raise Error, "Failed to tokenize text: '#{text}'" if count.negative?

  tokens_ptr.read_array_of_int32([count, max_tokens].compact.min)
end

#vocabObject



23
24
25
# File 'lib/rllama/model.rb', line 23

def vocab
  @vocab ||= Cpp.llama_model_get_vocab(@pointer)
end