29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
# File 'lib/rbbt/nlp/open_nlp/sentence_splitter.rb', line 29
def self.sentence_splitter(text)
return [] if text.nil? or text.empty?
text = Misc.to_utf8(text)
last = 0
begin
sentence_split_detector = self.sentence_split_detector
sentences = nil
TmpFile.with_file do |tmpfile|
start_time = Time.now
begin
pid = Process.fork do
sent = sentence_split_detector.sentDetect(text)
Open.write(tmpfile, sent * "#OpenNLP:SENTENCE#")
end
while not Process.waitpid(pid)
if Time.now - start_time > MAX
Process.kill(9, pid)
raise "Taking to long (> #{MAX} seconds)"
end
sleep 0.1
end
begin
Process.waitpid(pid)
end
rescue Errno::ECHILD
end
sentences = Open.read(tmpfile).split("#OpenNLP:SENTENCE#")
end
sentences.collect{|sentence|
sentence = Misc.to_utf8(sentence)
start = text.index(sentence, last)
Segment.setup sentence, start
last = start + sentence.length - 1
sentence
}
rescue Exception
raise $!
raise "Sentence splitter raised exception: #{$!.message}"
end
end
|