Class: Boilerpipe::Filters::SimpleBlockFusionProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/boilerpipe/filters/simple_block_fusion_processor.rb

Class Method Summary collapse

Class Method Details

.process(doc) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/boilerpipe/filters/simple_block_fusion_processor.rb', line 5

def self.process(doc)
  tbs = doc.text_blocks
  return doc if tbs.size < 2

  blocks_to_remove = []
  tb1 = tbs.first
  tbs.drop(1).each do |tb|
    if tb1.text_density == tb.text_density
      tb1.merge_next(tb)
      blocks_to_remove << tb
    else
      tb1 = tb
    end
  end

  doc.replace_text_blocks!(tbs - blocks_to_remove)
  doc
end