Module: PragmaticTokenizer::PreProcessor

Defined in:
lib/pragmatic_tokenizer/pre_processor.rb

Instance Method Summary collapse

Instance Method Details

#pre_process(language: Languages::Common) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
# File 'lib/pragmatic_tokenizer/pre_processor.rb', line 4

def pre_process(language: Languages::Common)
  remove_non_breaking_space!
  shift_various_characters!
  replace_colon_in_url!
  shift_remaining_colons!
  shift_hashtag!
  convert_double_quotes!
  convert_single_quotes!(language)
  convert_acute_accent_s!
  shift_hyphens!
  squeeze(' '.freeze)
end