Class: Webtractor::Filters::RemoveSmallest

Inherits:
Object
  • Object
show all
Defined in:
lib/webtractor/filters/remove_smallest.rb

Instance Method Summary collapse

Instance Method Details

#explore(path, node) ⇒ Object



8
9
10
11
12
13
14
15
16
17
# File 'lib/webtractor/filters/remove_smallest.rb', line 8

def explore path, node
  path += "/#{node.name}"
  words = (node.text || '').split

  node.children.each do |child|
    explore(path, child)
  end

  node.remove if words.count < node.children.count
end

#process(page) ⇒ Object



3
4
5
6
# File 'lib/webtractor/filters/remove_smallest.rb', line 3

def process page
  explore(page.name, page.at('body'))
  page
end