Class: Webtractor::Filters::RemoveMenus

Inherits:
Object
  • Object
show all
Defined in:
lib/webtractor/filters/remove_menus.rb

Instance Method Summary collapse

Instance Method Details

#explore(path, node) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/webtractor/filters/remove_menus.rb', line 23

def explore path, node
  path += "/#{node.name}"

  node.children.each do |child|
    explore(path, child)
  end

  return if node.name == 'p'

  links_count = node.xpath('./a').size

  if links_count > 0 && links_count.to_f/node.children.count >= 0.3
    node.remove
  end
end

#process(page) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/webtractor/filters/remove_menus.rb', line 3

def process page
  page.css('nav').remove
  page.css('#pane').remove
  page.css('#carousel').remove
  page.css('#sidebar').remove
  page.css('.pane').remove
  page.css('.carousel').remove
  page.css('.sidebar').remove

  page.css('ul').each do |ul|
    li_count = ul.css('li').count
    a_count = ul.xpath('./a[@href]').count
    ul.remove if a_count >= li_count.to_f/2
  end

  explore(page.name, page.at('body'))

  page
end