Module: Shiner

Defined in:
lib/shiner.rb,
lib/shiner/version.rb

Constant Summary collapse

VERSION =
"0.0.3"

Class Method Summary collapse

Class Method Details

.classifierObject



40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/shiner.rb', line 40

def self.classifier
  @classifier ||= (
      b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
      File.read(File.dirname(__FILE__) + '/../data/interesting.txt').split("\n").each { |i|
        b.train_interesting i
      }
      File.read(File.dirname(__FILE__) + '/../data/uninteresting.txt').split("\n").each { |i|
        b.train_uninteresting i
      }
      b
    )
end

.shine(string, options = {:max_length => 188}) ⇒ Object



11
12
13
14
# File 'lib/shiner.rb', line 11

def self.shine(string, options={:max_length => 188})
  best = string_to_best_sentences(string, options)
  best[:sentences].collect{|sentence| sentence[:sentence]}.join(' ')
end

.string_to_best_sentences(string, options = {}) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/shiner.rb', line 16

def self.string_to_best_sentences(string, options={})
  sentences = string_to_scored_sentences(string)
  options[:max_sentences]||=sentences.size
  batches = []
  sentences.each_index{|index|
    batch={:sentences => sentences[index, options[:max_sentences]]}
    batch[:sentences].pop while options[:max_length] && batch[:sentences].collect{|sentence| sentence[:sentence]}.join(' ').length > options[:max_length]
    batch[:score] = batch[:sentences].collect{|sentence| sentence[:score]}.sum.to_f / batch[:sentences].size
    batches << batch
  }
  best = batches.sort_by{|batch| batch[:score]}.last
end

.string_to_scored_sentences(string) ⇒ Object



29
30
31
32
33
34
35
36
37
38
# File 'lib/shiner.rb', line 29

def self.string_to_scored_sentences(string)
  sentences=[]
  string_to_sentences(string).each{ |sentence|
    classifications = classifier.classifications(sentence)
    sentences << {:sentence => sentence, :classifications => classifications, 
      :score => 1 - classifications['Interesting'] / classifications['Uninteresting'] }
  }
  #sentences = sentences.sort_by{|sentence| sentence[:score] }
  sentences
end

.string_to_sentences(string) ⇒ Object



53
54
55
56
# File 'lib/shiner.rb', line 53

def self.string_to_sentences(string)
  @tactful_tokenizer ||= TactfulTokenizer::Model.new
  @tactful_tokenizer.tokenize_text(string)
end