Module: Sanitizer

Defined in:
lib/sanitizer/version.rb,
lib/sanitizer/sanitizer.rb

Constant Summary collapse

VERSION =
"0.1.7"
@@htmle =

HTMLEntris

HTMLEntities.new

Class Method Summary collapse

Class Method Details

.clean_spaces(text) ⇒ Object



15
16
17
18
19
# File 'lib/sanitizer/sanitizer.rb', line 15

def clean_spaces(text)
  output = text.dup
  output.gsub!(/\s+/, " ")
  output
end

.entities_to_chars(text) ⇒ Object

Alguns feeds retornam tags “escapadas” dentro do conteúdo (ex: <br/>) Este método deve ser utilizado após o stripping e sanitização, para não deixar que essas tags sejam exibidas como conteúdo



74
75
76
77
78
79
# File 'lib/sanitizer/sanitizer.rb', line 74

def entities_to_chars(text)
  output = text.dup
  output.gsub!(/\&lt;/uim, "<")
  output.gsub!(/\&gt;/uim, ">")
  output
end

.html_decode(text) ⇒ Object

Convert invalid chars to HTML Entries



67
68
69
70
# File 'lib/sanitizer/sanitizer.rb', line 67

def html_decode(text)
  text = text.to_s  
  @@htmle.decode(text, :named)
end

.html_encode(text) ⇒ Object

Convert invalid chars to HTML Entries



61
62
63
64
# File 'lib/sanitizer/sanitizer.rb', line 61

def html_encode(text)
  text = text.to_s  
  @@htmle.encode(text, :named)
end

.sanitize(text) ⇒ Object



8
9
10
11
12
13
# File 'lib/sanitizer/sanitizer.rb', line 8

def sanitize(text)
  text = strip_tags(text)
  text = clean_spaces(text)
  text = html_encode(text)
  text
end

.strip_comments(text) ⇒ Object



21
22
23
24
25
26
# File 'lib/sanitizer/sanitizer.rb', line 21

def strip_comments(text)
  output = text.dup
  output.gsub!(/(\<\!\-\-\b*[^\-\-\>]*.*?\-\-\>)/ui, "")
  output.gsub!(/(\&lt;\s?\!--.*\s?--\&gt;)/uim, "")
  output
end

.strip_disallowed_tags(text) ⇒ Object

Remove all <script> and <style> tags



29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/sanitizer/sanitizer.rb', line 29

def strip_disallowed_tags(text)
  output = text
  output.gsub!(/(<script\s*.*>.*<\/script>)/uim, "")
  output.gsub!(/(<script\s*.*\/?>)/uim, "")
  output.gsub!(/(<link\s*.*\/?>)/uim, "")
  output.gsub!(/(<style\s*.*>.*<\/style>)/uim, "")

  # Stripping html entities too
  output.gsub!(/(\&lt;script\s*.*\&gt;.*\&lt;\/script\&gt;)/uim, "")
  output.gsub!(/(\&lt;script\s*.*\/?\&gt;)/uim, "")
  output.gsub!(/(\&lt;link\s*.*\/?\&gt;)/uim, "")
  output.gsub!(/(\&lt;style\s*.*\&gt;.*\&lt;\/style\&gt;)/uim, "")
  output
end

.strip_tags(text, *tags) ⇒ Object

Remove all tags from from text



45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/sanitizer/sanitizer.rb', line 45

def strip_tags(text, *tags)
  output = text.dup
  if tags.empty? # clear all tags by default
    output.gsub!(/<\/?[^>]*>/uim, "")
    output.gsub!(/\&lt;\/?[^\&gt;]*\&gt;/uim, "")
  else # clean only selected tags 
    strip = tags.map do |tag|  
      %Q{(#{tag})}
    end.join('|')
    output.gsub!(/<\/?(#{strip})[^>]*>/uim, "")
    output.gsub!(/\&lt;\/?(#{strip})[^\&gt;]*\&gt;/uim, "")
  end
  output
end