Module: ClassifierReborn::Summarizer
Instance Method Summary collapse
- #paragraph_summary(str, count = 1, separator = " [...] ") ⇒ Object
- #perform_lsi(chunks, count, separator) ⇒ Object
- #split_paragraphs(str) ⇒ Object
- #split_sentences(str) ⇒ Object
- #summary(str, count = 10, separator = " [...] ") ⇒ Object
Instance Method Details
#paragraph_summary(str, count = 1, separator = " [...] ") ⇒ Object
13 14 15 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 13 def paragraph_summary( str, count=1, separator=" [...] " ) perform_lsi split_paragraphs(str), count, separator end |
#perform_lsi(chunks, count, separator) ⇒ Object
25 26 27 28 29 30 31 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 25 def perform_lsi(chunks, count, separator) lsi = ClassifierReborn::LSI.new :auto_rebuild => false chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 } lsi.build_index summaries = lsi.highest_relative_content count return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator) end |
#split_paragraphs(str) ⇒ Object
21 22 23 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 21 def split_paragraphs(str) str.split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive end |
#split_sentences(str) ⇒ Object
17 18 19 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 17 def split_sentences(str) str.split(/(\.|\!|\?)/) # TODO: make this less primitive end |
#summary(str, count = 10, separator = " [...] ") ⇒ Object
9 10 11 |
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 9 def summary( str, count=10, separator=" [...] " ) perform_lsi split_sentences(str), count, separator end |