Class: Tefil::EachSentence
- Inherits:
-
TextFilterBase
- Object
- TextFilterBase
- Tefil::EachSentence
- Defined in:
- lib/tefil/eachsentence.rb
Constant Summary collapse
- END_CHAR =
%w(. ? . 。)- NOT_END_WORDS =
["Fig.", "FIG."]
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ EachSentence
constructor
A new instance of EachSentence.
- #process_stream(in_io, out_io) ⇒ Object
Methods inherited from TextFilterBase
Constructor Details
#initialize(options = {}) ⇒ EachSentence
Returns a new instance of EachSentence.
7 8 9 10 11 |
# File 'lib/tefil/eachsentence.rb', line 7 def initialize( = {}) [:smart_filename] = true @minimum = [:minimum] super() end |
Instance Method Details
#process_stream(in_io, out_io) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/tefil/eachsentence.rb', line 13 def process_stream(in_io, out_io) results = [] #words = [] in_io.read.strip.split("\n").each do |line| new_line = '' #line.gsub!("\n", ' ') line.chars.each do |char| new_line += char new_line += "\n" if (END_CHAR.include?(char)) end NOT_END_WORDS.each do |word| new_line.gsub!(/#{word}\n/, word) end new_line.gsub!(/\n */, "\n") new_line.strip! new_line.gsub!(/ */, " ") results << new_line end out_io.puts results.join("\n") end |