Module: NewsFetcher::Scrubber
- Defined in:
- lib/newsfetcher/scrubber.rb
Constant Summary collapse
Loofah::Scrubber.new do |node| if node.text == 'Help keep Vox free for all' n = node while (n = n.previous) if n.name == 'hr' n.remove break end end while node.next node.next.remove end node.remove Loofah::Scrubber::STOP end end
- RemoveExtras =
Loofah::Scrubber.new do |node| if node.name == 'div' && node['class'] == 'feedflare' node.remove elsif node.name == 'img' && node['height'] == '1' && node['width'] == '1' node.remove elsif node.name == 'form' node.replace(node.children) end end
- RemoveStyling =
Loofah::Scrubber.new do |node| if %w{font big small}.include?(node.name) node.replace(node.children) else node.remove_attribute('style') if node['style'] node.remove_attribute('class') if node['class'] node.remove_attribute('id') if node['id'] end end
- ReplaceBlockquote =
Loofah::Scrubber.new do |node| if node.name == 'blockquote' node.name = 'div' node['class'] = 'blockquote' end end
Class Method Summary collapse
Class Method Details
.scrub_html(html) ⇒ Object
5 6 7 8 9 10 11 12 13 |
# File 'lib/newsfetcher/scrubber.rb', line 5 def self.scrub_html(html) Loofah.fragment(html). scrub!(:prune). scrub!(RemoveExtras). scrub!(). scrub!(RemoveStyling). scrub!(ReplaceBlockquote). to_html end |
.text_to_html(text) ⇒ Object
15 16 17 18 19 20 21 22 |
# File 'lib/newsfetcher/scrubber.rb', line 15 def self.text_to_html(text) Simple::Builder.build_html do |html| text.split("\n").each_with_index do |line, i| html.br unless i == 0 html.text(line) end end.to_html end |