Class: Boilerpipe::Filters::TerminatingBlocksFinder
- Inherits:
-
Object
- Object
- Boilerpipe::Filters::TerminatingBlocksFinder
- Defined in:
- lib/boilerpipe/filters/terminating_blocks_finder.rb
Class Method Summary collapse
Class Method Details
.finds_match?(text) ⇒ Boolean
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/boilerpipe/filters/terminating_blocks_finder.rb', line 21 def self.finds_match?(text) text.start_with?('comments') || text =~ /^\d+ (comments|users responded in)/ || # starts with number text.start_with?('© reuters') || text.start_with?('please rate this') || text.start_with?('post a comment') || text.include?('what you think...') || text.include?('add your comment') || text.include?('add comment') || # TODO add this and test # text.include?('leave a reply') || # text.include?('leave a comment') || # text.include?('show comments') || # text.include?('Share this:') || text.include?('reader views') || text.include?('have your say') || text.include?('reader comments') || text.include?('rätta artikeln') || text == 'thanks for your comments - this feedback is now closed' end |
.process(doc) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/boilerpipe/filters/terminating_blocks_finder.rb', line 7 def self.process(doc) doc.text_blocks.each do |tb| next unless tb.num_words < 15 if tb.text.length >= 8 && finds_match?(tb.text.downcase) tb.labels << :INDICATES_END_OF_TEXT elsif tb.link_density == 1.0 && tb.text == 'comment' tb.labels << :INDICATES_END_OF_TEXT end end doc end |