Class: FeedParserUtilities::SanitizerDoc

Inherits:
Hpricot::Doc
  • Object
show all
Defined in:
lib/rfeedparser/scrub.rb

Instance Method Summary collapse

Instance Method Details

#scrubObject



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/rfeedparser/scrub.rb', line 160

def scrub
  others = children.map do |e|
    if e.elem?
      if Acceptable_Elements.include?e.name
        e.strip_attributes
        e.inner_html = SanitizerDoc.new(e.children).scrub
        result = e
      else
        result = e
        
        if Unacceptable_Elements_With_End_Tag.include?e.name
          result = nil
        end
        
        if result 
          result = SanitizerDoc.new(result.children).scrub   # The important part
        end            
      end
      
    elsif e.doctype?
      result = nil

    elsif e.text?
      ets = e.to_html
      ets.gsub!(/'/, "'") 
      ets.gsub!(/"/, '"')
      ets.gsub!(/\r/,'')
      result = ets
    end
    result
  end
  
  unless $compatible # FIXME nonworking
    # yes, that '/' should be there. It's a search method. See the Hpricot docs.
    (self/tag).strip_style(@config[:allow_css_properties], @config[:allow_css_keywords])
  end
  return others.compact.join
end