Class: Transformers::Bert::BertTokenizerFast

Inherits:
PreTrainedTokenizerFast show all
Defined in:
lib/transformers/models/bert/tokenization_bert_fast.rb

Constant Summary collapse

VOCAB_FILES_NAMES =
{vocab_file: "vocab.txt", tokenizer_file: "tokenizer.json"}

Constants included from SpecialTokensMixin

SpecialTokensMixin::SPECIAL_TOKENS_ATTRIBUTES

Instance Attribute Summary

Attributes inherited from PreTrainedTokenizerBase

#init_kwargs, #model_max_length

Instance Method Summary collapse

Methods inherited from PreTrainedTokenizerFast

#_convert_token_to_id_with_added_voc, #backend_tokenizer, #convert_ids_to_tokens, #convert_tokens_to_ids, #convert_tokens_to_string, #get_vocab, #is_fast, #vocab

Methods inherited from PreTrainedTokenizerBase

#_eventual_warn_about_too_long_sequence, _from_pretrained, #call, from_pretrained

Methods included from ClassAttribute

#class_attribute

Methods included from SpecialTokensMixin

#bos_token_id, #cls_token_id, #eos_token_id, #pad_token_id, #sep_token_id, #special_tokens_map, #unk_token_id

Constructor Details

#initialize(vocab_file: nil, tokenizer_file: nil, do_lower_case: true, unk_token: "[UNK]", sep_token: "[SEP]", pad_token: "[PAD]", cls_token: "[CLS]", mask_token: "[MASK]", tokenize_chinese_chars: true, strip_accents: nil, **kwargs) ⇒ BertTokenizerFast

Returns a new instance of BertTokenizerFast.



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/transformers/models/bert/tokenization_bert_fast.rb', line 23

def initialize(
  vocab_file: nil,
  tokenizer_file: nil,
  do_lower_case: true,
  unk_token: "[UNK]",
  sep_token: "[SEP]",
  pad_token: "[PAD]",
  cls_token: "[CLS]",
  mask_token: "[MASK]",
  tokenize_chinese_chars: true,
  strip_accents: nil,
  **kwargs
)
  super(
    vocab_file,
    tokenizer_file: tokenizer_file,
    do_lower_case: do_lower_case,
    unk_token: unk_token,
    sep_token: sep_token,
    pad_token: pad_token,
    cls_token: cls_token,
    mask_token: mask_token,
    tokenize_chinese_chars: tokenize_chinese_chars,
    strip_accents: strip_accents,
    **kwargs
  )
end