Module: PragmaticTokenizer::Languages

Defined in:
lib/pragmatic_tokenizer/languages.rb,
lib/pragmatic_tokenizer/languages/czech.rb,
lib/pragmatic_tokenizer/languages/dutch.rb,
lib/pragmatic_tokenizer/languages/greek.rb,
lib/pragmatic_tokenizer/languages/arabic.rb,
lib/pragmatic_tokenizer/languages/common.rb,
lib/pragmatic_tokenizer/languages/danish.rb,
lib/pragmatic_tokenizer/languages/french.rb,
lib/pragmatic_tokenizer/languages/polish.rb,
lib/pragmatic_tokenizer/languages/slovak.rb,
lib/pragmatic_tokenizer/languages/catalan.rb,
lib/pragmatic_tokenizer/languages/deutsch.rb,
lib/pragmatic_tokenizer/languages/english.rb,
lib/pragmatic_tokenizer/languages/finnish.rb,
lib/pragmatic_tokenizer/languages/italian.rb,
lib/pragmatic_tokenizer/languages/latvian.rb,
lib/pragmatic_tokenizer/languages/persian.rb,
lib/pragmatic_tokenizer/languages/russian.rb,
lib/pragmatic_tokenizer/languages/spanish.rb,
lib/pragmatic_tokenizer/languages/swedish.rb,
lib/pragmatic_tokenizer/languages/turkish.rb,
lib/pragmatic_tokenizer/languages/romanian.rb,
lib/pragmatic_tokenizer/languages/bulgarian.rb,
lib/pragmatic_tokenizer/languages/norwegian.rb,
lib/pragmatic_tokenizer/languages/indonesian.rb,
lib/pragmatic_tokenizer/languages/portuguese.rb

Defined Under Namespace

Modules: Arabic, Bulgarian, Catalan, Common, Czech, Danish, Deutsch, Dutch, English, Finnish, French, Greek, Indonesian, Italian, Latvian, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swedish, Turkish

Constant Summary collapse

LANGUAGE_CODES =
{
    en: English,
    ar: Arabic,
    bg: Bulgarian,
    ca: Catalan,
    cs: Czech,
    da: Danish,
    de: Deutsch,
    el: Greek,
    es: Spanish,
    fa: Persian,
    fi: Finnish,
    fr: French,
    id: Indonesian,
    it: Italian,
    lv: Latvian,
    nl: Dutch,
    nn: Norwegian,
    nb: Norwegian,
    no: Norwegian,
    pl: Polish,
    pt: Portuguese,
    ro: Romanian,
    ru: Russian,
    sk: Slovak,
    sv: Swedish,
    tr: Turkish
}.freeze

Class Method Summary collapse

Class Method Details

.get_language_by_code(code) ⇒ Object



59
60
61
62
# File 'lib/pragmatic_tokenizer/languages.rb', line 59

def self.get_language_by_code(code)
  code = code ? code.to_sym : :en
  LANGUAGE_CODES[code] || Common
end