Class: Boilerpipe::Filters::BoilerplateBlockFilter

Inherits:
Object
  • Object
show all
Defined in:
lib/boilerpipe/filters/boilerplate_block_filter.rb

Constant Summary collapse

INSTANCE_KEEP_TITLE =
BoilerplateBlockFilter.new(:TITLE)

Instance Method Summary collapse

Constructor Details

#initialize(label) ⇒ BoilerplateBlockFilter

Returns a new instance of BoilerplateBlockFilter.



5
6
7
# File 'lib/boilerpipe/filters/boilerplate_block_filter.rb', line 5

def initialize(label)
  @label_to_keep = label
end

Instance Method Details

#process(doc) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/boilerpipe/filters/boilerplate_block_filter.rb', line 10

def process(doc)
  combined = doc.text_blocks.delete_if do |tb|
    if tb.is_not_content? &&
       (@label_to_keep.nil? || !tb.has_label?(:TITLE))
      true
    else
      false
    end
  end
  doc.replace_text_blocks!(combined)
  doc
end