Class: Boilerpipe::Filters::DensityRulesClassifier

Inherits:
Object
  • Object
show all
Defined in:
lib/boilerpipe/filters/density_rules_classifier.rb

Class Method Summary collapse

Class Method Details

.classify(prev, current, nxt) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/boilerpipe/filters/density_rules_classifier.rb', line 22

def self.classify(prev, current, nxt)
  return false if current.link_density > 0.333333

  if prev.link_density <= 0.555556
    if current.text_density <= 9
      return true if nxt.text_density > 10

      return prev.text_density <= 4 ? false : true
    else
      return nxt.text_density == 0 ? false : true
    end
  else
    return false if nxt.text_density <= 11

    true
  end
end

.process(doc) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/boilerpipe/filters/density_rules_classifier.rb', line 8

def self.process(doc)
  # return doc if doc.text_blocks.size < 2

  empty = Boilerpipe::Document::TextBlock.empty_start
  text_blocks = [empty] + doc.text_blocks + [empty]

  text_blocks.each_cons(3) do |slice|
    prev, current, nxt = *slice
    current.content = classify(prev, current, nxt)
  end

  doc
end