Module: ClassifierReborn::Summarizer

Extended by:
Summarizer
Included in:
Summarizer
Defined in:
lib/classifier-reborn/lsi/summarizer.rb

Instance Method Summary collapse

Instance Method Details

#paragraph_summary(str, count = 1, separator = " [...] ") ⇒ Object



13
14
15
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 13

def paragraph_summary( str, count=1, separator=" [...] " )
  perform_lsi split_paragraphs(str), count, separator
end

#perform_lsi(chunks, count, separator) ⇒ Object



25
26
27
28
29
30
31
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 25

def perform_lsi(chunks, count, separator)
  lsi = ClassifierReborn::LSI.new :auto_rebuild => false
  chunks.each { |chunk| lsi << chunk unless chunk.strip.empty? || chunk.strip.split.size == 1 }
  lsi.build_index
  summaries = lsi.highest_relative_content count
  return summaries.reject { |chunk| !summaries.include? chunk }.map { |x| x.strip }.join(separator)
end

#split_paragraphs(str) ⇒ Object



21
22
23
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 21

def split_paragraphs(str)
  str.split(/(\n\n|\r\r|\r\n\r\n)/) # TODO: make this less primitive
end

#split_sentences(str) ⇒ Object



17
18
19
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 17

def split_sentences(str)
  str.split(/(\.|\!|\?)/) # TODO: make this less primitive
end

#summary(str, count = 10, separator = " [...] ") ⇒ Object



9
10
11
# File 'lib/classifier-reborn/lsi/summarizer.rb', line 9

def summary( str, count=10, separator=" [...] " )
  perform_lsi split_sentences(str), count, separator
end