Class: Spider::WhiteListSanitizer

Inherits:
Sanitizer show all
Defined in:
lib/spiderfw/utils/sanitizer.rb

Instance Method Summary collapse

Methods inherited from Sanitizer

#sanitize, #sanitizeable?

Instance Method Details

#sanitize_css(style) ⇒ Object

Sanitizes a block of css code. Used by #sanitize when it comes across a style attribute



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/spiderfw/utils/sanitizer.rb', line 108

def sanitize_css(style)
        # disallow urls
        style = style.to_s.gsub(/url\s*\(\s*[^\s)]+?\s*\)\s*/, ' ')

        # gauntlet
        if style !~ /^([:,;#%.\sa-zA-Z0-9!]|\w-\w|\'[\s\w]+\'|\"[\s\w]+\"|\([\d,\s]+\))*$/ ||
            style !~ /^(\s*[-\w]+\s*:\s*[^:;]*(;|$)\s*)*$/
            return ''
        end

        clean = []
        style.scan(/([-\w]+)\s*:\s*([^:;]*)/) do |prop,val|
            if allowed_css_properties.include?(prop.downcase)
                clean <<  prop + ': ' + val + ';'
            elsif shorthand_css_properties.include?(prop.split('-')[0].downcase) 
                unless val.split().any? do |keyword|
                    !allowed_css_keywords.include?(keyword) && 
                    keyword !~ /^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$/
                end
                clean << prop + ': ' + val + ';'
            end
        end
    end
    clean.join(' ')
end