Method: Transformers::XlmRoberta::XLMRobertaTokenizerFast#initialize
- Defined in:
- lib/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.rb
#initialize(vocab_file: nil, tokenizer_file: nil, bos_token: "<s>", eos_token: "</s>", sep_token: "</s>", cls_token: "<s>", unk_token: "<unk>", pad_token: "<pad>", mask_token: "<mask>", **kwargs) ⇒ XLMRobertaTokenizerFast
self.slow_tokenizer_class = XLMRobertaTokenizer
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.rb', line 24 def initialize( vocab_file: nil, tokenizer_file: nil, bos_token: "<s>", eos_token: "</s>", sep_token: "</s>", cls_token: "<s>", unk_token: "<unk>", pad_token: "<pad>", mask_token: "<mask>", **kwargs ) # Mask token behave like a normal word, i.e. include the space before it mask_token = mask_token.is_a?(String) ? Tokenizers::AddedToken.new(mask_token, lstrip: true, rstrip: false) : mask_token super(vocab_file, tokenizer_file: tokenizer_file, bos_token: bos_token, eos_token: eos_token, sep_token: sep_token, cls_token: cls_token, unk_token: unk_token, pad_token: pad_token, mask_token: mask_token, **kwargs) @vocab_file = vocab_file end |