Class: Tefil::EachSentence

Inherits:
TextFilterBase show all
Defined in:
lib/tefil/eachsentence.rb

Constant Summary collapse

END_CHAR =
%w(. ? . 。)
NOT_END_WORDS =
["Fig.", "FIG."]

Instance Method Summary collapse

Methods inherited from TextFilterBase

#filter

Constructor Details

#initialize(options = {}) ⇒ EachSentence

Returns a new instance of EachSentence.



7
8
9
10
11
# File 'lib/tefil/eachsentence.rb', line 7

def initialize(options = {})
  options[:smart_filename] = true
  @minimum = options[:minimum]
  super(options)
end

Instance Method Details

#process_stream(in_io, out_io) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/tefil/eachsentence.rb', line 13

def process_stream(in_io, out_io)
  results = []
  #words = []
  in_io.read.strip.split("\n").each do |line|
    new_line = ''
    #line.gsub!("\n", ' ')
    line.chars.each do |char|
      new_line += char
      new_line += "\n" if (END_CHAR.include?(char))
    end
    NOT_END_WORDS.each do |word|
      new_line.gsub!(/#{word}\n/, word)
    end
    new_line.gsub!(/\n  */, "\n")
    new_line.strip!
    new_line.gsub!(/  */, " ")
    results << new_line
  end
  out_io.puts results.join("\n")
end