Class: PragmaticTokenizer::Languages::English::SingleQuotes

Inherits:
Object
  • Object
show all
Defined in:
lib/pragmatic_tokenizer/languages/english.rb

Instance Method Summary collapse

Instance Method Details

#handle_single_quotes(text) ⇒ Object



99
100
101
102
103
104
105
106
# File 'lib/pragmatic_tokenizer/languages/english.rb', line 99

def handle_single_quotes(text)
  # Convert left quotes to special character except for 'Twas or 'twas
  text.gsub!(/(\W|^)'(?=.*\w)(?!twas)(?!Twas)/o) { Regexp.last_match(1) ? Regexp.last_match(1) + ' ' + PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP["'"] + ' ' : ' ' + PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP["'"] + ' ' } || text
  text.gsub!(/(\W|^)‘(?=.*\w)(?!twas)(?!Twas)/o) { Regexp.last_match(1) ? Regexp.last_match(1) + ' ' + PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP[""] + ' ' : ' ' + PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP[""] + ' ' } || text
  text.gsub!(/(\W|^)'(?=.*\w)/o, ' ' + PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP["'"]) || text
  # Separate right single quotes
  text.gsub!(/(\w|\D)'(?!')(?=\W|$)/o) { Regexp.last_match(1) + ' ' + PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP["'"] + ' ' } || text
end