Module: ModelTokenizer::Base

Defined in:
lib/model_tokenizer/base.rb

Defined Under Namespace

Modules: InstanceMethods

Constant Summary collapse

CHARSET =
%w{
  a b c d e f g h i j k   m n o p q r s t u v w x y z
  A B C D E F G H   J K L M N   P   R S T     W X Y Z
  2 3 4 5 6 7 8 9
  - _
}
@@model_tokenizer_token_length =

Default length is 14 characters. Provides 56!/(14!*(56-14)!) = 5,804,731,963,800 unique tokens.

14

Instance Method Summary collapse

Instance Method Details

#has_token(*attributes) ⇒ Object



18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/model_tokenizer/base.rb', line 18

def has_token(*attributes)
  options = {
    :length => @@model_tokenizer_token_length
  }.merge!(attributes.last.is_a?(Hash) ? attributes.pop : {})

  if(!options[:length].is_a?(Integer) || options[:length] < 8)
    options[:length] = @@model_tokenizer_token_length
  end

  @@model_tokenizer_token_length = options[:length]

  include InstanceMethods
end

#model_tokenizer_token_lengthObject



14
15
16
# File 'lib/model_tokenizer/base.rb', line 14

def model_tokenizer_token_length
  @@model_tokenizer_token_length
end