Module: Card::Content::Clean

Included in:
Card::Content
Defined in:
lib/card/content/clean.rb

Overview

tools for cleaning content, especially for restricing unwanted HTML

Constant Summary collapse

ALLOWED_TAGS =
allowed_tags.freeze
ATTR_VALUE_RE =
[/(?<=^')[^']+(?=')/, /(?<=^")[^"]+(?=")/, /\S+/].freeze

Instance Method Summary collapse

Instance Method Details

#clean!(string, tags = ALLOWED_TAGS) ⇒ Object

Method that cleans the String of HTML tags and attributes outside of the allowed list.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/card/content/clean.rb', line 35

def clean! string, tags=ALLOWED_TAGS
  string.gsub(%r{<(/*)(\w+)([^>]*)>}) do
    raw = $LAST_MATCH_INFO
    tag = raw[2].downcase
    if (attrs = tags[tag])
      html_attribs =
        attrs.each_with_object([tag]) do |attr, pcs|
          q, rest_value = process_attribute attr, raw[3]
          pcs << "#{attr}=#{q}#{rest_value}#{q}" unless rest_value.blank?
        end * " "
      "<#{raw[1]}#{html_attribs}>"
    else
      " "
    end
  end.gsub(/<\!--.*?-->/, "")
end

#clean_with_space_last!(string, tags = ALLOWED_TAGS) ⇒ Object



53
54
55
56
# File 'lib/card/content/clean.rb', line 53

def clean_with_space_last! string, tags=ALLOWED_TAGS
  cwo = clean_without_space_last!(string, tags)
  cwo.gsub(/(?:^|\b) ((?:&nbsp;)+)/, '\1 ')
end

#process_attribute(attrib, all_attributes) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
# File 'lib/card/content/clean.rb', line 60

def process_attribute attrib, all_attributes
  return ['"', nil] unless all_attributes =~ /\b#{attrib}\s*=\s*(?=(.))/i
  q = '"'
  rest_value = $'
  if (idx = %w(' ").index Regexp.last_match(1))
    q = Regexp.last_match(1)
  end
  reg_exp = ATTR_VALUE_RE[idx || 2]
  rest_value = process_attribute_match rest_value, reg_exp, attrib
  [q, rest_value]
end

#process_attribute_match(rest_value, reg_exp, attrib) ⇒ Object

NOTE allows classes beginning with "w-" (deprecated)



73
74
75
76
77
78
79
80
81
# File 'lib/card/content/clean.rb', line 73

def process_attribute_match rest_value, reg_exp, attrib
  return rest_value unless (match = rest_value.match reg_exp)
  rest_value = match[0]
  if attrib == "class"
    rest_value.split(/\s+/).select { |s| s =~ /^w-/i }.join(" ")
  else
    rest_value
  end
end