Class: Spider::FullSanitizer

Inherits:
Sanitizer show all
Defined in:
lib/spiderfw/utils/sanitizer.rb

Instance Method Summary collapse

Methods inherited from Sanitizer

#sanitizeable?

Instance Method Details

#process_node(node, result, options) ⇒ Object



42
43
44
# File 'lib/spiderfw/utils/sanitizer.rb', line 42

def process_node(node, result, options)
    result << node.to_s if node.class == HTML::Text
end

#sanitize(text, options = {}) ⇒ Object



33
34
35
36
37
38
39
40
# File 'lib/spiderfw/utils/sanitizer.rb', line 33

def sanitize(text, options = {})
    result = super
    # strip any comments, and if they have a newline at the end (ie. line with
    # only a comment) strip that too
    result.gsub!(/<!--(.*?)-->[\n]?/m, "") if result
    # Recurse - handle all dirty nested tags
    result == text ? result : sanitize(result, options)
end