Method: TorchText::Data::Utils#tokenizer

Defined in:
lib/torchtext/data/utils.rb

#tokenizer(tokenizer, language: "en") ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
# File 'lib/torchtext/data/utils.rb', line 4

def tokenizer(tokenizer, language: "en")
  return method(:split_tokenizer) if tokenizer.nil?

  if tokenizer == "basic_english"
    if language != "en"
      raise ArgumentError, "Basic normalization is only available for English(en)"
    end
    return method(:basic_english_normalize)
  end

  raise "Not implemented yet"
end