Class: Tokenizers::Tokenizer

Inherits:
Object
  • Object
show all
Extended by:
FromPretrained
Defined in:
lib/tokenizers/tokenizer.rb

Constant Summary

Constants included from FromPretrained

FromPretrained::TOKENIZERS_VERSION

Instance Method Summary collapse

Methods included from FromPretrained

from_pretrained

Instance Method Details

#decode(ids, skip_special_tokens: true) ⇒ Object



21
22
23
# File 'lib/tokenizers/tokenizer.rb', line 21

def decode(ids, skip_special_tokens: true)
  _decode(ids, skip_special_tokens)
end

#decode_batch(sequences, skip_special_tokens: true) ⇒ Object



25
26
27
# File 'lib/tokenizers/tokenizer.rb', line 25

def decode_batch(sequences, skip_special_tokens: true)
  _decode_batch(sequences, skip_special_tokens)
end

#enable_padding(**options) ⇒ Object



29
30
31
# File 'lib/tokenizers/tokenizer.rb', line 29

def enable_padding(**options)
  _enable_padding(options)
end

#enable_truncation(max_length, **options) ⇒ Object



33
34
35
# File 'lib/tokenizers/tokenizer.rb', line 33

def enable_truncation(max_length, **options)
  _enable_truncation(max_length, options)
end

#encode(sequence, pair = nil, is_pretokenized: false, add_special_tokens: true) ⇒ Object



13
14
15
# File 'lib/tokenizers/tokenizer.rb', line 13

def encode(sequence, pair = nil, is_pretokenized: false, add_special_tokens: true)
  _encode(sequence, pair, is_pretokenized, add_special_tokens)
end

#encode_batch(input, is_pretokenized: false, add_special_tokens: true) ⇒ Object



17
18
19
# File 'lib/tokenizers/tokenizer.rb', line 17

def encode_batch(input, is_pretokenized: false, add_special_tokens: true)
  _encode_batch(input, is_pretokenized, add_special_tokens)
end

#save(path, pretty: false) ⇒ Object



9
10
11
# File 'lib/tokenizers/tokenizer.rb', line 9

def save(path, pretty: false)
  _save(path, pretty)
end

#to_s(pretty: false) ⇒ Object



5
6
7
# File 'lib/tokenizers/tokenizer.rb', line 5

def to_s(pretty: false)
  _to_s(pretty)
end

#vocab(with_added_tokens: true) ⇒ Object



37
38
39
# File 'lib/tokenizers/tokenizer.rb', line 37

def vocab(with_added_tokens: true)
  _vocab(with_added_tokens)
end

#vocab_size(with_added_tokens: true) ⇒ Object



41
42
43
# File 'lib/tokenizers/tokenizer.rb', line 41

def vocab_size(with_added_tokens: true)
  _vocab_size(with_added_tokens)
end