Module: Softcover::Sanitizer

Extended by:
Sanitizer
Included in:
Sanitizer
Defined in:
lib/softcover/sanitizer.rb

Instance Method Summary collapse

Instance Method Details

#clean(html) ⇒ Object

Sanitization suitable for displaying untrusted generated html, while retaining useful tags and attributes.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/softcover/sanitizer.rb', line 10

def clean(html)
  return unless html

  # Make a whitelist of acceptable elements and attributes.
  sanitize_options = {
    elements: %w{div span p a ul ol li h1 h2 h3 h4
                 pre em sup table tbody thead tr td img code strong
                 blockquote small},
    remove_contents: %w{script},
    attributes: {
      'div' => %w{id class data-tralics-id data-number data-chapter},
      'a'    => %w{id class href target rel},
      'span' => %w{id class style},
      'ol'   => %w{id class},
      'ul'   => %w{id class},
      'li'   => %w{id class},
      'sup'  => %w{id class},
      'h1'   => %w{id class},
      'h2'   => %w{id class},
      'h3'   => %w{id class},
      'h4'   => %w{id class},
      'img'  => %w{id class src alt},
      'em'   => %w{id class},
      'code' => %w{id class},
      'blockquote' => %w{id class},
      'strong' => %w{id class},
      'table'   => %w{id class},
      'tbody'   => %w{id class},
      'tr'   => %w{id class},
      'td'   => %w{id class colspan}
    },
    css: {
      properties: %w{color height width}
    },
    protocols: {
      'a'   => {'href' => [:relative, 'http', 'https', 'mailto']},
      'img' => {'src'  => [:relative, 'http', 'https']}
    },
    output: :xhtml
  }

  Sanitize.clean(html.force_encoding("UTF-8"), sanitize_options)
end