Module: Softcover::Sanitizer

Extended by:
Sanitizer
Included in:
Sanitizer
Defined in:
lib/softcover/sanitizer.rb

Instance Method Summary collapse

Instance Method Details

#clean(html) ⇒ Object

Sanitization suitable for displaying untrusted generated html, while retaining useful tags and attributes.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/softcover/sanitizer.rb', line 10

def clean(html)
  return unless html

  sanitize_options = {
    elements: %w{div span p a ul ol li h1 h2 h3 h4
      pre em sup table tbody thead tr td img code strong blockquote},
    remove_contents: %w{script},
    attributes: {
      'div' => %w{id class data-tralics-id data-number data-chapter},
      'a'    => %w{id class href target},
      'span' => %w{id class style},
      'ol'   => %w{id class},
      'ul'   => %w{id class},
      'li'   => %w{id class},
      'sup'  => %w{id class},
      'h1'   => %w{id class},
      'h2'   => %w{id class},
      'h3'   => %w{id class},
      'h4'   => %w{id class},
      'img'  => %w{id class src alt},
      'em'   => %w{id class},
      'code' => %w{id class},
      'blockquote' => %w{id class},
      'strong' => %w{id class},
      'table'   => %w{id class},
      'tbody'   => %w{id class},
      'tr'   => %w{id class},
      'td'   => %w{id class colspan}
    },
    css: {
      properties: %w{color height width}
    },
    protocols: {
      'a'   => {'href' => [:relative, 'http', 'https', 'mailto']},
      'img' => {'src'  => [:relative, 'http', 'https']}
    },
    output: :xhtml
  }

  Sanitize.clean(html.force_encoding("UTF-8"), sanitize_options)
end