5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
# File 'lib/rb_lib_text.rb', line 5
def self.or_pattern
patterns = {
html_chars: '&\w+;',
numbers_commas: '[\-\$]?\d{1,3}(?:,\d{3})+',
times: '\d?\d:\d{2}',
money: '-?\$?\d+[.]\d+%?',
acronyms: '(?:\w{1}\.{1})+',
possessive_mentions: '@\w+',
possessive_hashtags: '#\w+',
tags_contractions: '[\w]+[\'‘’][\w]+',
emails: '[\w\.\d]+@[\w\.\d]+\.[\w]+',
urls: 'https?://[-_/~%\w\d\.]*[_/~\w\d]',
sideways_text_emoji: '>?[:;=8][\'\-D\)\(3DdPpOo\*\/]+',
ellipses: '\.{3}',
en_em_dash: '-{2,3}',
slashes: '[\w]+(?:[/\-][\w]+)+',
punct: '[\"“”‘’\'\\.\\?!…,:;»«\(\)]',
tags_mentions: '[\w#@\d%$\u00B0]+',
hearts: '<+\/?3',
emoji_block0: '[\U00002600-\U000027BF]',
emoji_block1: '[\U0001f300-\U0001f64F]',
emoji_block2: '[\U0001f680-\U0001f6FF]',
other_punct: '[\u2014\u2013]',
all_other: '[^\s]',
}
return Regexp.union(patterns.values.map{|value| Regexp.new(value)})
end
|