Class: Tokenizers::Trainers::UnigramTrainer

Inherits:
Object
  • Object
show all
Defined in:
lib/tokenizers/trainers/unigram_trainer.rb

Class Method Summary collapse

Class Method Details

.new(vocab_size: 8000, show_progress: true, special_tokens: [], initial_alphabet: [], shrinking_factor: 0.75, unk_token: nil, max_piece_length: 16, n_sub_iterations: 2) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/tokenizers/trainers/unigram_trainer.rb', line 4

def self.new(vocab_size: 8000,
             show_progress: true,
             special_tokens: [],
             initial_alphabet: [],
             shrinking_factor: 0.75,
             unk_token: nil,
             max_piece_length: 16,
             n_sub_iterations: 2)

  _new({
    vocab_size: vocab_size,
    show_progress: show_progress,
    special_tokens: special_tokens,
    initial_alphabet: initial_alphabet,
    shrinking_factor: shrinking_factor,
    unk_token: unk_token,
    max_piece_length: max_piece_length,
    n_sub_iterations: n_sub_iterations
  })
end