Class: PragmaticTokenizer::Languages::French::SingleQuotes

Inherits:
Object
  • Object
show all
Defined in:
lib/pragmatic_tokenizer/languages/french.rb

Constant Summary collapse

REGEXP_UNKNOWN1 =
/(\w|\D)'(?!')(?=\W|$)/o
REGEXP_UNKNOWN2 =
/(\W|^)'(?=.*\w)/o

Instance Method Summary collapse

Instance Method Details

#handle_single_quotes(text) ⇒ Object



14
15
16
17
18
19
20
21
# File 'lib/pragmatic_tokenizer/languages/french.rb', line 14

def handle_single_quotes(text)
  replacement = PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP["'".freeze]
  text.gsub!(REGEXP_UNKNOWN1, "\\1 #{replacement} ")
  text.gsub!(REGEXP_UNKNOWN2, ' ' << replacement)
  text.gsub!(/l\'/, '\1 l☮ \2')
  text.gsub!(/L\'/, '\1 L☮ \2')
  text
end