Class: PragmaticTokenizer::Languages::Common::SingleQuotes

Inherits:
Object
  • Object
show all
Defined in:
lib/pragmatic_tokenizer/languages/common.rb

Constant Summary collapse

REGEXP_LEFT_QUOTES1 =
/(\W|^)'(?=.*\w)(?!twas)(?!Twas)/o
REGEXP_LEFT_QUOTES2 =
/(\W|^)‘(?=.*\w)(?!twas)(?!Twas)/o
REGEXP_LEFT_QUOTES3 =
/(\W|^)'(?=.*\w)/o
REGEXP_RIGHT_SIDE_QUOTES =
/(\w|\D)'(?!')(?=\W|$)/o

Instance Method Summary collapse

Instance Method Details

#handle_single_quotes(text) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/pragmatic_tokenizer/languages/common.rb', line 24

def handle_single_quotes(text)
  # Convert left quotes to special character except for 'Twas or 'twas
  replacement = PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP["'".freeze]
  text.gsub!(REGEXP_LEFT_QUOTES1, "\\1 #{replacement} ")
  text.gsub!(REGEXP_LEFT_QUOTES3, ' ' << replacement)
  text.gsub!(REGEXP_RIGHT_SIDE_QUOTES, "\\1 #{replacement} ")

  replacement = PragmaticTokenizer::Languages::Common::PUNCTUATION_MAP["".freeze]
  text.gsub!(REGEXP_LEFT_QUOTES2, "\\1 #{replacement} ")

  text
end