Class: Boilerpipe::Extractors::LargestContentExtractor

Inherits:
Object
  • Object
show all
Defined in:
lib/boilerpipe/extractors/largest_content_extractor.rb

Class Method Summary collapse

Class Method Details

.process(doc) ⇒ Object



9
10
11
12
13
14
15
16
# File 'lib/boilerpipe/extractors/largest_content_extractor.rb', line 9

def self.process(doc)
  filters = ::Boilerpipe::Filters
  filters::NumWordsRulesClassifier.process doc
  filters::BlockProximityFusion::MAX_DISTANCE_1.process doc
  filters::KeepLargestBlockFilter::INSTANCE.process doc

  doc
end

.text(contents) ⇒ Object



3
4
5
6
7
# File 'lib/boilerpipe/extractors/largest_content_extractor.rb', line 3

def self.text(contents)
  doc = ::Boilerpipe::SAX::BoilerpipeHTMLParser.parse(contents)
  ::Boilerpipe::Extractors::LargestContentExtractor.process doc
  doc.content
end