Class: Riktoken::TiktokenFile

Inherits:
Object
  • Object
show all
Defined in:
lib/riktoken/tiktoken_file.rb

Defined Under Namespace

Classes: ParseError

Instance Method Summary collapse

Instance Method Details

#load(path) ⇒ Object



37
38
39
40
# File 'lib/riktoken/tiktoken_file.rb', line 37

def load(path)
  content = File.read(path, encoding: "UTF-8")
  parse(content)
end

#parse(content) ⇒ Object

Parses a .tiktoken file content and returns a hash mapping base64-encoded tokens to their ranks.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/riktoken/tiktoken_file.rb', line 12

def parse(content)
  ranks = {}

  content.each_line do |line|
    line = line.strip

    next if line.empty? || line.start_with?("#")

    parts = line.split(/\s+/)
    if parts.length != 2
      raise ParseError, "Invalid line format: #{line}"
    end

    begin
      token = Base64.strict_decode64(parts[0])
      rank = Integer(parts[1])
      ranks[token] = rank
    rescue ArgumentError => e
      raise ParseError, "Failed to parse line: #{line} - #{e.message}"
    end
  end

  ranks
end