Module: Riktoken::Encodings::R50kBase
- Includes:
- Riktoken::Encodings
- Defined in:
- lib/riktoken/encodings/r50k_base.rb
Class Method Summary collapse
Methods included from Riktoken::Encodings
Class Method Details
.load_encoding(tiktoken_base_dir:) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/riktoken/encodings/r50k_base.rb', line 15 def self.load_encoding(tiktoken_base_dir:) ranks = TiktokenFile.new.load(find_tiktoken_file(name: ENCODING_NAME, base_dir: tiktoken_base_dir)) special_tokens = { "<|endoftext|>" => 50256 } pattern = /'(?:[sdmt]|ll|ve|re)| ?\p{L}++| ?\p{N}++| ?[^\s\p{L}\p{N}]++|\s++$|\s+(?!\S)|\s/ Riktoken::Encoding.new( name: ENCODING_NAME, ranks: ranks, special_tokens: special_tokens, pattern: pattern ) end |