39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
|
# File 'lib/rbbt/ner/g_norm_plus.rb', line 39
def self.process(texts)
TmpFile.with_file do |tmpdir|
Open.mkdir tmpdir
Misc.in_dir tmpdir do
Open.ln_s Rbbt.software.opt.GNormPlus.Dictionary.find, '.'
Open.ln_s Rbbt.software.opt.GNormPlus["BioC.dtd"].find, '.'
Open.ln_s Rbbt.software.opt.GNormPlus["Ab3P"].find, '.'
Open.ln_s Rbbt.software.opt.GNormPlus["CRF"].find, '.'
Open.mkdir 'input'
Open.mkdir 'output'
Open.mkdir 'tmp'
texts.each do |name,text|
Open.write("input/#{name}.txt") do |f|
f.puts "#{name}|a|" << text
f.puts
end
end
Open.write('config', CONFIG)
CMD.cmd_log("java -Xmx20G -Xms20G -jar '#{Rbbt.software.opt.GNormPlus.find}/GNormPlus.jar' 'input' 'output' 'config'")
if texts.respond_to? :key_field
key_field = texts.key_field
else
key_field = "ID"
end
tsv = TSV.setup({}, :key_field => key_field, :fields => ["Entities"], :type => :flat)
Dir.glob("output/*.txt").each do |file|
name = File.basename(file).sub(".txt",'')
entities = Open.read(file).split("\n")[1..-1].collect{|l| l.gsub(':', '.').split("\t")[1..-1] * ":"}
tsv[name] = entities
end
tsv
end
end
end
|