Class: Boilerpipe::Filters::ListAtEndFilter

Inherits:
Object
  • Object
show all
Defined in:
lib/boilerpipe/filters/list_at_end_filter.rb

Constant Summary collapse

MAX =
99999999

Class Method Summary collapse

Class Method Details

.process(doc) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/boilerpipe/filters/list_at_end_filter.rb', line 8

def self.process(doc)
  tag_level = MAX

  doc.text_blocks.each do |tb|
    if tb.is_content? && tb.has_label?(:VERY_LIKELY_CONTENT)
      tag_level = tb.tag_level
    elsif tb.tag_level > tag_level && tb.has_label?(:MIGHT_BE_CONTENT) && tb.has_label?(:LI) && tb.link_density == 0
      tb.content = true
    else
      tag_level = MAX
    end
  end

  doc
end