Class: Webtractor::Filters::BiggestBlock

Inherits:
Object
  • Object
show all
Defined in:
lib/webtractor/filters/biggest_block.rb

Instance Method Summary collapse

Constructor Details

#initialize(threshold = 50.0) ⇒ BiggestBlock

Returns a new instance of BiggestBlock.



3
4
5
# File 'lib/webtractor/filters/biggest_block.rb', line 3

def initialize threshold=50.0
  @threshold = threshold
end

Instance Method Details

#explore(path, node) ⇒ Object



26
27
28
29
30
31
32
33
34
35
# File 'lib/webtractor/filters/biggest_block.rb', line 26

def explore path, node
  path += "/#{node.name}"
  size = node.text ? node.text.size : 0

  @nodes[size] = node

  node.children.each do |child|
    explore(path, child)
  end
end

#process(page) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/webtractor/filters/biggest_block.rb', line 7

def process page
  @nodes = {}
  explore(page.name, page.at('body'))
  @nodes = Hash[@nodes.sort.reverse]

  max = @nodes.keys[0]
  last_percents = 100.0
  last_node = @nodes.values[0]

  @nodes.each do |size, node|
    percents = size.to_f/max*100
    diff = last_percents - percents
    return last_node if diff > @threshold
    last_percents = percents
    last_node = node
  end
  page
end