Module: PragmaticTokenizer::PreProcessor

Defined in:
lib/pragmatic_tokenizer/pre_processor.rb

Instance Method Summary collapse

Instance Method Details

#pre_process(language: Languages::Common) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/pragmatic_tokenizer/pre_processor.rb', line 4

def pre_process(language: Languages::Common)
  remove_non_breaking_space!
  shift_comma!
  shift_multiple_dash!
  shift_inverted_question_mark!
  shift_inverted_exclamation!
  shift_exclamation!
  shift_ellipse_three_dots!
  shift_ellipse_two_dots!
  shift_horizontal_ellipsis!
  shift_no_space_mention!
  shift_not_equals!
  shift_special_quotes!
  shift_colon!
  shift_bracket!
  shift_semicolon!
  shift_percent!
  shift_caret!
  shift_hashtag!
  shift_ampersand!
  shift_vertical_bar!
  convert_dbl_quotes!
  convert_sgl_quotes!(language)
  convert_apostrophe_s!
  shift_beginning_hyphen!
  shift_ending_hyphen!
  squeeze(' '.freeze)
end