Module: PragmaticTokenizer::Languages::Common

Included in:
Arabic, Bulgarian, Catalan, Czech, Danish, Deutsch, Dutch, English, Finnish, French, Greek, Indonesian, Italian, Latvian, Norwegian, Persian, Polish, Portuguese, Romanian, Russian, Slovak, Spanish, Swedish, Turkish
Defined in:
lib/pragmatic_tokenizer/languages/common.rb

Defined Under Namespace

Classes: SingleQuotes

Constant Summary collapse

PUNCTUATION_MAP =
{ "" => "", "" => "", "." => "", "" => "", "!" => "", "?" => "", "" => "", "" => "", "¡" => "", "¿" => "", "" => "", "" => "", "[" => "", "]" => "", "\"" => "", "#" => "", "$" => "", "%" => "", "&" => "", "(" => "", ")" => "", "*" => "", "+" => "", "," => "", ":" => "", ";" => "", "<" => "", "=" => "", ">" => "", "@" => "", "^" => "", "_" => "", "`" => "", "'" => "", "{" => "", "|" => "", "}" => "", "~" => "", "-" => "", "«" => "", "»" => "", "" => "", "" => "" }.freeze
ABBREVIATIONS =
Set.new([]).freeze
STOP_WORDS =
Set.new([]).freeze
CONTRACTIONS =
{}.freeze