Module: Piola::SpecialChars

Defined in:
lib/piola/special_chars.rb

Constant Summary collapse

ACCENTS =
[193, 201, 205, 209, 211, 218, 220, 225, 233, 237, 241, 243, 250, 252, 246, 214]

Instance Method Summary collapse

Instance Method Details

#clean_chars(options = {}) ⇒ Object

Removes weird chars from a string



36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/piola/special_chars.rb', line 36

def clean_chars(options = {})
  str = self
  str = str.gsub(',', ' ') unless options[:keep_commas]
  str = str.gsub('.', ' ') unless options[:keep_periods]
  str = str.gsub(/ +/, ' ')
  str = str.strip

  str = str.split('').map do |char|
    char if char.normal_char?
  end.compact.join

  str = str.gsub(/ +/, ' ').strip
  str
end

#clean_text(remove_parens = true) ⇒ Object

Clean text



93
94
95
96
97
98
99
100
# File 'lib/piola/special_chars.rb', line 93

def clean_text(remove_parens = true)
  str = self
  str = str.html_decode
  str = str.remove_all_parenthesis if remove_parens
  str = str.gsub(/\n|\t/, ' ').gsub(/ +/, ' ')
  str = str.strip
  str
end

#clean_urlObject

Get rid of all weird stuff for urls



103
104
105
106
107
108
# File 'lib/piola/special_chars.rb', line 103

def clean_url
  str = self
  str = str.remove_enters
  str = str.remove_tabs
  str
end

#downcase_special_charsObject

Converts special chars to downcase



59
60
61
62
63
64
65
66
67
68
69
# File 'lib/piola/special_chars.rb', line 59

def downcase_special_chars
  str = self
  str = str.gsub("Á", "á")
  str = str.gsub("É", "é")
  str = str.gsub("Í", "í")
  str = str.gsub("Ó", "ó")
  str = str.gsub("Ú", "ú")
  str = str.gsub("Ñ", "ñ")
  str = str.gsub("Ü", "ü")
  str
end

#normal_char?Boolean

Returns:

  • (Boolean)


51
52
53
54
55
56
# File 'lib/piola/special_chars.rb', line 51

def normal_char?
  ord = self.ord
  return true if ord >= 32 && ord <= 126
  return true if ACCENTS.include? ord
  false
end

#only_lettersObject

Remove all characters that are not pure letters



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/piola/special_chars.rb', line 20

def only_letters
  str = self
  str = str.gsub(',', ' ')
  str = str.gsub('.', ' ')
  str = str.gsub(/ +/, ' ')
  str = str.strip

  str = str.split('').map do |char|
    char if char.spanish_char?
  end.compact.join

  str = str.gsub(/ +/, ' ').strip
  str
end

#remove_special_charsObject

Remove spanish special chars



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/piola/special_chars.rb', line 72

def remove_special_chars
  str = self
  str = str.gsub("Á", "A")
  str = str.gsub("É", "E")
  str = str.gsub("Í", "I")
  str = str.gsub("Ó", "O")
  str = str.gsub("Ú", "U")
  str = str.gsub("Ñ", "N")
  str = str.gsub("Ü", "U")

  str = str.gsub("á", "a")
  str = str.gsub("é", "e")
  str = str.gsub("í", "i")
  str = str.gsub("ó", "o")
  str = str.gsub("ú", "u")
  str = str.gsub("ñ", "n")
  str = str.gsub("ü", "u")
  str
end

#remove_tabsObject

Remove tabs



111
112
113
114
115
116
117
# File 'lib/piola/special_chars.rb', line 111

def remove_tabs
  str = self
  str = str.gsub(/\t/, "")
  str = str.gsub(/ +/, ' ')
  str = str.strip
  str
end

#spanish_char?Boolean

Determines if a char is a spanish letter

Returns:

  • (Boolean)


10
11
12
13
14
15
16
17
# File 'lib/piola/special_chars.rb', line 10

def spanish_char?
  ord = self.ord
  return true if ord == 32
  return true if ord >= 65 && ord <= 90
  return true if ord >= 97 && ord <= 122
  return true if ACCENTS.include? ord
  false
end