Module: Riktoken
- Defined in:
- lib/riktoken.rb,
lib/riktoken/bpe.rb,
lib/riktoken/version.rb,
lib/riktoken/encoding.rb,
lib/riktoken/encodings.rb,
lib/riktoken/tiktoken_file.rb,
lib/riktoken/encodings/p50k_base.rb,
lib/riktoken/encodings/p50k_edit.rb,
lib/riktoken/encodings/r50k_base.rb,
lib/riktoken/encodings/o200k_base.rb,
lib/riktoken/encodings/cl100k_base.rb
Defined Under Namespace
Modules: Encodings
Classes: BPE, Encoding, TiktokenFile, UnknownEncodingError, UnknownModelError
Constant Summary
collapse
- VERSION =
"0.0.1"
Class Method Summary
collapse
Class Method Details
.default_tiktoken_base_dir ⇒ Object
151
152
153
|
# File 'lib/riktoken.rb', line 151
def default_tiktoken_base_dir
ENV[TIKTOKEN_BASE_DIR_ENV_KEY] || DEFAULT_TIKTOKEN_BASE_DIR
end
|
.encoding_for_model(model_name, tiktoken_base_dir: default_tiktoken_base_dir) ⇒ Object
98
99
100
101
102
103
|
# File 'lib/riktoken.rb', line 98
def encoding_for_model(model_name, tiktoken_base_dir: default_tiktoken_base_dir)
encoding_name = MODEL_TO_ENCODING[model_name]
raise UnknownModelError, "Unknown model: #{model_name}" unless encoding_name
get_encoding(encoding_name, tiktoken_base_dir:)
end
|
.encoding_from_file(path:, name:, pattern:, special_tokens: {}) ⇒ Object
124
125
126
127
128
129
130
131
132
133
134
|
# File 'lib/riktoken.rb', line 124
def encoding_from_file(path:, name:, pattern:, special_tokens: {})
parser = TiktokenFile.new
ranks = parser.load(path)
Encoding.new(
name:,
ranks:,
special_tokens:,
pattern:
)
end
|
.get_encoding(encoding_name, tiktoken_base_dir: default_tiktoken_base_dir) ⇒ Object
Get the encoding by name (like “cl100k_base”).
.list_encoding_names ⇒ Object
137
138
139
|
# File 'lib/riktoken.rb', line 137
def list_encoding_names
%w[cl100k_base p50k_base p50k_edit r50k_base o200k_base]
end
|
.list_model_names ⇒ Object
142
143
144
|
# File 'lib/riktoken.rb', line 142
def list_model_names
MODEL_TO_ENCODING.keys
end
|
.make_encoding(name:, ranks:, pattern:, special_tokens: {}) ⇒ Object
110
111
112
113
114
115
116
117
|
# File 'lib/riktoken.rb', line 110
def make_encoding(name:, ranks:, pattern:, special_tokens: {})
Encoding.new(
name:,
ranks:,
special_tokens:,
pattern:
)
end
|