Class: FreelingClient::Analyzer
- Inherits:
-
Base
- Object
- Base
- FreelingClient::Analyzer
show all
- Defined in:
- lib/freeling_client/analyzer.rb
Instance Attribute Summary
Attributes inherited from Base
#config, #ident, #port, #server
Instance Method Summary
collapse
Constructor Details
#initialize(opt = {}) ⇒ Analyzer
Returns a new instance of Analyzer.
13
14
15
16
|
# File 'lib/freeling_client/analyzer.rb', line 13
def initialize(opt={})
@config = opt.fetch(:config, 'config/freeling/analyzer.cfg')
@timeout = opt.fetch(:timeout, 60) end
|
Instance Method Details
#build_regexp(ne_text) ⇒ Object
109
110
111
112
113
114
115
116
117
118
119
|
# File 'lib/freeling_client/analyzer.rb', line 109
def build_regexp(ne_text)
begin
if ne_text =~ /\_/
/#{ne_text.split('_').join('\W+')}/i
else
/#{ne_text}/i
end
rescue RegexpError => e
/./
end
end
|
#call(cmd, text) ⇒ Object
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# File 'lib/freeling_client/analyzer.rb', line 71
def call(cmd, text)
valide_command!(cmd)
output = []
file = Tempfile.new('foo', encoding: 'utf-8')
begin
file.write(text)
file.close
stdin, stdout, stderr = Open3.popen3(command(cmd, file.path))
Timeout::timeout(@timeout) {
until (line = stdout.gets).nil?
output << line.chomp
end
message = stderr.readlines
unless message.empty?
raise ExtractionError, message.join("\n")
end
}
rescue Timeout::Error
raise ExtractionError, "Timeout"
ensure
file.close
file.unlink
end
output
end
|
#command(cmd, file_path) ⇒ Object
121
122
123
|
# File 'lib/freeling_client/analyzer.rb', line 121
def command(cmd, file_path)
self.send("command_#{cmd}", file_path)
end
|
#command_morfo(file_path) ⇒ Object
125
126
127
|
# File 'lib/freeling_client/analyzer.rb', line 125
def command_morfo(file_path)
"#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf morfo < #{file_path}"
end
|
#command_tagged(file_path) ⇒ Object
129
130
131
|
# File 'lib/freeling_client/analyzer.rb', line 129
def command_tagged(file_path)
"#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged < #{file_path}"
end
|
#command_tagged_nec(file_path) ⇒ Object
133
134
135
|
# File 'lib/freeling_client/analyzer.rb', line 133
def command_tagged_nec(file_path)
"#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged --nec --noflush < #{file_path}"
end
|
#command_tagged_sense(file_path) ⇒ Object
137
138
139
|
# File 'lib/freeling_client/analyzer.rb', line 137
def command_tagged_sense(file_path)
"#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf sense --sense all < #{file_path}"
end
|
#freeling_bin ⇒ Object
145
146
147
|
# File 'lib/freeling_client/analyzer.rb', line 145
def freeling_bin
"/usr/local/bin/analyzer"
end
|
#freeling_share ⇒ Object
141
142
143
|
# File 'lib/freeling_client/analyzer.rb', line 141
def freeling_share
"FREELINGSHARE=/usr/local/share/freeling/"
end
|
#parse_token_line(str) ⇒ Object
99
100
101
102
103
104
105
106
107
|
# File 'lib/freeling_client/analyzer.rb', line 99
def parse_token_line(str)
form, lemma, tag, prob = str.split(' ')[0..3]
FreelingClient::Token.new({
:form => form,
:lemma => lemma,
:tag => tag,
:prob => prob.nil? ? nil : prob.to_f,
}.reject { |k, v| v.nil? })
end
|
#ptokens(cmd, text) ⇒ Object
Generate ptokens for a given text ptokens: Tokens with position
Example:
>> analyzer = FreelingClient::Analyzer.new
>> analyzer.ptoken(:morfo, "Este texto está en español.")
Arguments:
cmd: (Symbol)
text: (String)
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
# File 'lib/freeling_client/analyzer.rb', line 50
def ptokens(cmd, text)
Enumerator.new do |yielder|
pos = 0
tokens(cmd, text).each do |token|
ne_text = token['form'].dup
ne_regexp = build_regexp(ne_text)
token_pos = text.index(ne_regexp, pos)
if token_pos && token_pos < (pos + 5)
token.pos = token_pos
yielder << token
pos = token_pos + ne_text.length
else
pos = pos + ne_text.length
end
end
end
end
|
#tokens(cmd, text) ⇒ Object
Generate tokens for a given text
Example:
>> analyzer = FreelingClient::Analyzer.new
>> analyzer.token(:morfo, "Este texto está en español.")
Arguments:
cmd: (Symbol)
text: (String)
29
30
31
32
33
34
35
36
|
# File 'lib/freeling_client/analyzer.rb', line 29
def tokens(cmd, text)
valide_command!(cmd)
Enumerator.new do |yielder|
call(cmd, text).each do |freeling_line|
yielder << parse_token_line(freeling_line) unless freeling_line.empty?
end
end
end
|
#valide_command!(cmd) ⇒ Object
149
150
151
152
153
|
# File 'lib/freeling_client/analyzer.rb', line 149
def valide_command!(cmd)
unless [:morfo, :tagged, :tagged_nec, :tagged_sense].include?(cmd)
raise CommandError, "#{cmd} does not exist"
end
end
|