Class: PragmaticTokenizer::Languages::Common::SingleQuotes

Inherits:
Object
  • Object
show all
Defined in:
lib/pragmatic_tokenizer/languages/common.rb

Constant Summary collapse

ALNUM_QUOTE =
/(\w|\D)'(?!')(?=\W|$)/
QUOTE_WORD =
/(\W|^)'(?=\w)/
QUOTE_NOT_TWAS1 =
/(\W|^)'(?!twas)/i
QUOTE_NOT_TWAS2 =
/(\W|^)‘(?!twas)/i

Instance Method Summary collapse

Instance Method Details

#handle_single_quotes(text) ⇒ Object

This ‘special treatment’ is actually relevant for many other tests. Alter core regular expressions!



17
18
19
20
21
22
23
24
25
# File 'lib/pragmatic_tokenizer/languages/common.rb', line 17

def handle_single_quotes(text)
  # special treatment for "'twas"
  text.gsub!(QUOTE_NOT_TWAS1, '\1 ' << PUNCTUATION_MAP["'".freeze] << ' ')
  text.gsub!(QUOTE_NOT_TWAS2, '\1 ' << PUNCTUATION_MAP["".freeze] << ' ')

  text.gsub!(QUOTE_WORD,      ' '   << PUNCTUATION_MAP["'".freeze])
  text.gsub!(ALNUM_QUOTE,     '\1 ' << PUNCTUATION_MAP["'".freeze] << ' ')
  text
end