Module: Selma::Sanitizer::Config

Defined in:
lib/selma/sanitizer/config.rb,
lib/selma/sanitizer/config/basic.rb,
lib/selma/sanitizer/config/default.rb,
lib/selma/sanitizer/config/relaxed.rb,
lib/selma/sanitizer/config/restricted.rb

Constant Summary collapse

BASIC =
freeze_config(
  elements: [
    "a",
    "abbr",
    "blockquote",
    "b",
    "br",
    "cite",
    "code",
    "dd",
    "dfn",
    "dl",
    "dt",
    "em",
    "i",
    "kbd",
    "li",
    "mark",
    "ol",
    "p",
    "pre",
    "q",
    "s",
    "samp",
    "small",
    "strike",
    "strong",
    "sub",
    "sup",
    "time",
    "u",
    "ul",
    "var",
  ],

  attributes: {
    "a" => ["href"],
    "abbr" => ["title"],
    "blockquote" => ["cite"],
    "dfn" => ["title"],
    "q" => ["cite"],
    "time" => ["datetime", "pubdate"],
  },

  protocols: {
    "a" => { "href" => ["ftp", "http", "https", "mailto", :relative] },
    "blockquote" => { "cite" => ["http", "https", :relative] },
    "q" => { "cite" => ["http", "https", :relative] },
  },
)
VALID_PROTOCOLS =

although there are many more protocol types, eg., ftp, xmpp, etc., these are the only ones that are allowed by default

["http", "https", "mailto", :relative]
DEFAULT =
freeze_config(
  # Whether or not to allow HTML comments. Allowing comments is strongly
  # discouraged, since IE allows script execution within conditional
  # comments.
  allow_comments: false,

  # Whether or not to allow well-formed HTML doctype declarations such as
  # "<!DOCTYPE html>" when sanitizing a document.
  allow_doctype: false,

  # HTML attributes to allow in specific elements. By default, no attributes
  # are allowed. Use the symbol :data to indicate that arbitrary HTML5
  # data-* attributes should be allowed.
  attributes: {},

  # HTML elements to allow. By default, no elements are allowed (which means
  # that all HTML will be stripped).
  elements: [],

  # URL handling protocols to allow in specific attributes. By default, no
  # protocols are allowed. Use :relative in place of a protocol if you want
  # to allow relative URLs sans protocol. Set to `:all` to allow any protocol.
  protocols: {},

  # An Array of element names whose contents will be removed. The contents
  # of all other filtered elements will be left behind.
  remove_contents: [
    "iframe",
    "math",
    "noembed",
    "noframes",
    "noscript",
    "plaintext",
    "script",
    "style",
    "svg",
    "xmp",
  ],

  # Elements which, when removed, should have their contents surrounded by
  # whitespace.
  whitespace_elements: [
    "address",
    "article",
    "aside",
    "blockquote",
    "br",
    "dd",
    "div",
    "dl",
    "dt",
    "footer",
    "h1",
    "h2",
    "h3",
    "h4",
    "h5",
    "h6",
    "header",
    "hgroup",
    "hr",
    "li",
    "nav",
    "ol",
    "p",
    "pre",
    "section",
    "ul",
  ],
)
RELAXED =
freeze_config(
  elements: BASIC[:elements] + [
    "address",
    "article",
    "aside",
    "bdi",
    "bdo",
    "body",
    "caption",
    "col",
    "colgroup",
    "data",
    "del",
    "details",
    "div",
    "figcaption",
    "figure",
    "footer",
    "h1",
    "h2",
    "h3",
    "h4",
    "h5",
    "h6",
    "head",
    "header",
    "hgroup",
    "hr",
    "html",
    "img",
    "ins",
    "main",
    "nav",
    "rp",
    "rt",
    "ruby",
    "section",
    "span",
    "style",
    "summary",
    "sup",
    "table",
    "tbody",
    "td",
    "tfoot",
    "th",
    "thead",
    "title",
    "tr",
    "wbr",
  ],

  allow_doctype: true,

  attributes: merge(
    BASIC[:attributes],
    :all => ["class", "dir", "hidden", "id", "lang", "style", "tabindex", "title", "translate"],
    "a" => ["href", "hreflang", "name", "rel"],
    "col" => ["span", "width"],
    "colgroup" => ["span", "width"],
    "data" => ["value"],
    "del" => ["cite", "datetime"],
    "img" => ["align", "alt", "border", "height", "src", "srcset", "width"],
    "ins" => ["cite", "datetime"],
    "li" => ["value"],
    "ol" => ["reversed", "start", "type"],
    "style" => ["media", "scoped", "type"],
    "table" => [
      "align",
      "bgcolor",
      "border",
      "cellpadding",
      "cellspacing",
      "frame",
      "rules",
      "sortable",
      "summary",
      "width",
    ],
    "td" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "valign", "width"],
    "th" => ["abbr", "align", "axis", "colspan", "headers", "rowspan", "scope", "sorted", "valign", "width"],
    "ul" => ["type"],
  ),

  protocols: merge(
    BASIC[:protocols],
    "del" => { "cite" => ["http", "https", :relative] },
    "img" => { "src"  => ["http", "https", :relative] },
    "ins" => { "cite" => ["http", "https", :relative] },
  ),
)
RESTRICTED =
freeze_config(
  elements: ["b", "em", "i", "strong", "u"],

  whitespace_elements: DEFAULT[:whitespace_elements],
)

Class Method Summary collapse

Class Method Details

.can_dupe?(value) ⇒ Boolean

Returns ‘true` if `dup` may be safely called on value, `false` otherwise.

Returns:

  • (Boolean)


56
57
58
# File 'lib/selma/sanitizer/config.rb', line 56

def can_dupe?(value)
  !(value == true || value == false || value.nil? || value.is_a?(Method) || value.is_a?(Numeric) || value.is_a?(Symbol))
end

.freeze_config(config) ⇒ Object

Deeply freezes and returns the given configuration Hash.



10
11
12
13
14
15
16
17
18
19
# File 'lib/selma/sanitizer/config.rb', line 10

def freeze_config(config)
  case config
  when Hash
    config.each_value { |c| freeze_config(c) }
  when Array, Set
    config.each { |c| freeze_config(c) }
  end

  config.freeze
end

.merge(config, other_config = {}) ⇒ Object

Returns a new Hash containing the result of deeply merging other_config into config. Does not modify config or other_config.

This is the safest way to use a built-in config as the basis for your own custom config.

Raises:

  • (ArgumentError)


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/selma/sanitizer/config.rb', line 26

def merge(config, other_config = {})
  raise ArgumentError, "config must be a Hash" unless config.is_a?(Hash)
  raise ArgumentError, "other_config must be a Hash" unless other_config.is_a?(Hash)

  merged = {}
  keys   = Set.new(config.keys + other_config.keys).to_a

  keys.each do |key|
    oldval = config[key]

    if other_config.key?(key)
      newval = other_config[key]

      merged[key] = if oldval.is_a?(Hash) && newval.is_a?(Hash)
        oldval.empty? ? newval.dup : merge(oldval, newval)
      elsif newval.is_a?(Array) && key != :transformers
        Set.new(newval).to_a
      else
        can_dupe?(newval) ? newval.dup : newval
      end
    else
      merged[key] = can_dupe?(oldval) ? oldval.dup : oldval
    end
  end

  merged
end