Class: Tokenizers::CharBPETokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/tokenizers/char_bpe_tokenizer.rb

Instance Method Summary collapse

Constructor Details

#initialize(vocab, merges) ⇒ CharBPETokenizer

Returns a new instance of CharBPETokenizer.



3
4
5
6
7
8
9
# File 'lib/tokenizers/char_bpe_tokenizer.rb', line 3

def initialize(vocab, merges)
  @tokenizer = Tokenizer.new(BPE.new(vocab, merges))
  @tokenizer.add_special_tokens(["<unk>"])
  @tokenizer.normalizer = BertNormalizer.new
  @tokenizer.pre_tokenizer = BertPreTokenizer.new
  @tokenizer.decoder = BPEDecoder.new
end

Instance Method Details

#decode(ids) ⇒ Object



15
16
17
# File 'lib/tokenizers/char_bpe_tokenizer.rb', line 15

def decode(ids)
  @tokenizer.decode(ids)
end

#encode(text) ⇒ Object



11
12
13
# File 'lib/tokenizers/char_bpe_tokenizer.rb', line 11

def encode(text)
  @tokenizer.encode(text)
end