Class: Webtractor::Filters::RemoveMenus

Inherits:
Object
  • Object
show all
Defined in:
lib/webtractor/filters/remove_menus.rb

Instance Method Summary collapse

Instance Method Details

#explore(path, node) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/webtractor/filters/remove_menus.rb', line 19

def explore path, node

  path += "/#{node.name}"

  node.children.each do |child|
    explore(path, child)
  end

  return if node.name == 'p'

  links_count = node.xpath('./a').size
  if links_count > 0 && links_count.to_f/node.children.count > 0.3
    node.remove
  end
end

#process(page) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/webtractor/filters/remove_menus.rb', line 3

def process page
  page.css('nav').remove
  page.css('.pane').remove
  page.css('.carousel').remove

  page.css('ul').each do |ul|
    li_count = ul.css('li').count
    a_count = ul.xpath('./a[@href]').count
    ul.remove if a_count >= li_count.to_f/2
  end

  explore(page.name, page.at('body'))

  page
end