Class: Tokenizers::Trainers::WordPieceTrainer

Inherits:
Object
  • Object
show all
Defined in:
lib/tokenizers/trainers/word_piece_trainer.rb

Class Method Summary collapse

Class Method Details

.new(vocab_size: 30000, min_frequency: 0, show_progress: true, special_tokens: [], limit_alphabet: nil, initial_alphabet: [], continuing_subword_prefix: "##", end_of_word_suffix: nil) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/tokenizers/trainers/word_piece_trainer.rb', line 4

def self.new(
  vocab_size: 30000,
  min_frequency: 0,
  show_progress: true,
  special_tokens: [],
  limit_alphabet: nil,
  initial_alphabet: [],
  continuing_subword_prefix: "##",
  end_of_word_suffix: nil
)
  _new({
    vocab_size: vocab_size,
    min_frequency: min_frequency,
    show_progress: show_progress,
    special_tokens: special_tokens,
    limit_alphabet: limit_alphabet,
    initial_alphabet: initial_alphabet,
    continuing_subword_prefix: continuing_subword_prefix,
    end_of_word_suffix: end_of_word_suffix
  })
end