Class: Rllama::Model
- Inherits:
-
Object
- Object
- Rllama::Model
- Defined in:
- lib/rllama/model.rb
Constant Summary collapse
- DEFAULT_CONTEXT_LENGTH =
2**13
Instance Attribute Summary collapse
-
#pointer ⇒ Object
readonly
Returns the value of attribute pointer.
Instance Method Summary collapse
- #build_chat_template(messages) ⇒ Object
- #chat_template ⇒ Object
- #close ⇒ Object
- #embed(prompt, normalize: true, batch_size: 512, &block) ⇒ Object
- #generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05, seed: nil, system: nil, &block) ⇒ Object (also: #message)
- #init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512) ⇒ Object
- #init_embedding_context(n_ctx: n_ctx_train, n_batch: 512) ⇒ Object
-
#initialize(path_or_name, dir: nil) ⇒ Model
constructor
A new instance of Model.
- #n_ctx_train ⇒ Object
- #n_embd ⇒ Object
- #n_seq_max ⇒ Object
- #tokenize(text, max_tokens: nil) ⇒ Object
- #vocab ⇒ Object
Constructor Details
#initialize(path_or_name, dir: nil) ⇒ Model
Returns a new instance of Model.
9 10 11 12 13 14 15 16 17 |
# File 'lib/rllama/model.rb', line 9 def initialize(path_or_name, dir: nil) resolved_path = Loader.resolve(path_or_name, dir:) model_params = Cpp.llama_model_default_params @pointer = Cpp.llama_model_load_from_file(resolved_path, model_params) raise Error, "Unable to load model from #{resolved_path}" if @pointer.null? end |
Instance Attribute Details
#pointer ⇒ Object (readonly)
Returns the value of attribute pointer.
7 8 9 |
# File 'lib/rllama/model.rb', line 7 def pointer @pointer end |
Instance Method Details
#build_chat_template(messages) ⇒ Object
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/rllama/model.rb', line 98 def build_chat_template() raise Error, 'Model does not provide a chat template' if chat_template.nil? || chat_template.empty? count = .length struct_size = Cpp::LlamaChatMessage.size array_ptr = FFI::MemoryPointer.new(struct_size * count) .each_with_index do |m, i| struct_ptr = array_ptr + (i * struct_size) msg_struct = Cpp::LlamaChatMessage.new(struct_ptr) msg_struct[:role] = FFI::MemoryPointer.from_string(m[:role].to_s) msg_struct[:content] = FFI::MemoryPointer.from_string(m[:content].to_s) end needed = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, nil, 0) raise Error, 'Failed to apply chat template' if needed.negative? buf = FFI::MemoryPointer.new(:char, needed) written = Cpp.llama_chat_apply_template(chat_template, array_ptr, count, true, buf, needed) raise Error, 'Failed to apply chat template' if written.negative? buf.read_string(written) end |
#chat_template ⇒ Object
19 20 21 |
# File 'lib/rllama/model.rb', line 19 def chat_template @chat_template ||= Cpp.llama_model_chat_template(@pointer, nil) end |
#close ⇒ Object
76 77 78 |
# File 'lib/rllama/model.rb', line 76 def close Cpp.llama_model_free(@pointer) end |
#embed(prompt, normalize: true, batch_size: 512, &block) ⇒ Object
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/rllama/model.rb', line 49 def (prompt, normalize: true, batch_size: 512, &block) inputs = prompt.is_a?(Array) ? prompt : [prompt] tokenized_inputs = inputs.map { |text| tokenize(text, max_tokens: n_ctx_train) } max_token_length = tokenized_inputs.map(&:length).max || 0 effective_batch_size = [batch_size, max_token_length].max effective_ctx = [n_ctx_train, max_token_length].min (n_ctx: effective_ctx, n_batch: effective_batch_size) do |ctx| inputs = prompt.is_a?(Array) ? tokenized_inputs : tokenized_inputs[0] ctx.(inputs, normalize:, batch_size: effective_batch_size, &block) end end |
#generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05, seed: nil, system: nil, &block) ⇒ Object Also known as: message
39 40 41 42 43 44 45 46 |
# File 'lib/rllama/model.rb', line 39 def generate(prompt, max_tokens: DEFAULT_CONTEXT_LENGTH, temperature: 0.8, top_k: 40, top_p: 0.95, min_p: 0.05, seed: nil, system: nil, &block) init_context(n_ctx: max_tokens) do |ctx| ctx.generate(prompt, max_tokens: ctx.n_ctx, temperature:, top_k:, top_p:, seed:, system:, min_p:, &block) end end |
#init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512) ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/rllama/model.rb', line 80 def init_context(embeddings: false, n_ctx: DEFAULT_CONTEXT_LENGTH, n_batch: 512) context = Context.new(self, embeddings:, n_ctx:, n_batch:) if block_given? result = yield context context.close return result end context end |
#init_embedding_context(n_ctx: n_ctx_train, n_batch: 512) ⇒ Object
94 95 96 |
# File 'lib/rllama/model.rb', line 94 def (n_ctx: n_ctx_train, n_batch: 512, &) init_context(embeddings: true, n_ctx:, n_batch:, &) end |
#n_ctx_train ⇒ Object
35 36 37 |
# File 'lib/rllama/model.rb', line 35 def n_ctx_train @n_ctx_train ||= Cpp.llama_model_n_ctx_train(@pointer) end |
#n_embd ⇒ Object
27 28 29 |
# File 'lib/rllama/model.rb', line 27 def n_embd @n_embd ||= Cpp.llama_model_n_embd(@pointer) end |
#n_seq_max ⇒ Object
31 32 33 |
# File 'lib/rllama/model.rb', line 31 def n_seq_max @n_seq_max ||= Cpp.llama_max_parallel_sequences end |
#tokenize(text, max_tokens: nil) ⇒ Object
65 66 67 68 69 70 71 72 73 74 |
# File 'lib/rllama/model.rb', line 65 def tokenize(text, max_tokens: nil) size = text.bytesize + 2 tokens_ptr = FFI::MemoryPointer.new(:int32, size) count = Cpp.llama_tokenize(vocab, text, text.bytesize, tokens_ptr, size, true, false) raise Error, "Failed to tokenize text: '#{text}'" if count.negative? tokens_ptr.read_array_of_int32([count, max_tokens].compact.min) end |
#vocab ⇒ Object
23 24 25 |
# File 'lib/rllama/model.rb', line 23 def vocab @vocab ||= Cpp.llama_model_get_vocab(@pointer) end |