Class: FreelingClient::Analyzer

Inherits:
Base
  • Object
show all
Defined in:
lib/freeling_client/analyzer.rb

Instance Attribute Summary

Attributes inherited from Base

#config, #ident, #port, #server

Instance Method Summary collapse

Constructor Details

#initialize(opt = {}) ⇒ Analyzer

Returns a new instance of Analyzer.



13
14
15
16
# File 'lib/freeling_client/analyzer.rb', line 13

def initialize(opt={})
  @config = opt.fetch(:config, 'config/freeling/analyzer.cfg')
  @timeout = opt.fetch(:timeout, 60) # Three hours
end

Instance Method Details

#build_regexp(ne_text) ⇒ Object



109
110
111
112
113
114
115
116
117
118
119
# File 'lib/freeling_client/analyzer.rb', line 109

def build_regexp(ne_text)
  begin
    if ne_text =~ /\_/
       /#{ne_text.split('_').join('\W+')}/i
    else
      /#{ne_text}/i
    end
  rescue RegexpError => e
    /./
  end
end

#call(cmd, text) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/freeling_client/analyzer.rb', line 71

def call(cmd, text)
  valide_command!(cmd)

  output = []
  file = Tempfile.new('foo', encoding: 'utf-8')
  begin
    file.write(text)
    file.close
    stdin, stdout, stderr = Open3.popen3(command(cmd, file.path))
    Timeout::timeout(@timeout) {
      until (line = stdout.gets).nil?
        output << line.chomp
      end

      message = stderr.readlines
      unless message.empty?
        raise ExtractionError, message.join("\n")
      end
    }
  rescue Timeout::Error
    raise ExtractionError, "Timeout"
  ensure
    file.close
    file.unlink
  end
  output
end

#command(cmd, file_path) ⇒ Object



121
122
123
# File 'lib/freeling_client/analyzer.rb', line 121

def command(cmd, file_path)
  self.send("command_#{cmd}", file_path)
end

#command_morfo(file_path) ⇒ Object



125
126
127
# File 'lib/freeling_client/analyzer.rb', line 125

def command_morfo(file_path)
  "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf morfo < #{file_path}"
end

#command_tagged(file_path) ⇒ Object



129
130
131
# File 'lib/freeling_client/analyzer.rb', line 129

def command_tagged(file_path)
  "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged < #{file_path}"
end

#command_tagged_nec(file_path) ⇒ Object



133
134
135
# File 'lib/freeling_client/analyzer.rb', line 133

def command_tagged_nec(file_path)
  "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf tagged --nec --noflush < #{file_path}"
end

#command_tagged_sense(file_path) ⇒ Object



137
138
139
# File 'lib/freeling_client/analyzer.rb', line 137

def command_tagged_sense(file_path)
  "#{freeling_share} #{freeling_bin} -f #{config} --inpf plain --outf sense --sense all < #{file_path}"
end

#freeling_binObject



145
146
147
# File 'lib/freeling_client/analyzer.rb', line 145

def freeling_bin
  "/usr/local/bin/analyzer"
end

#freeling_shareObject



141
142
143
# File 'lib/freeling_client/analyzer.rb', line 141

def freeling_share
  "FREELINGSHARE=/usr/local/share/freeling/"
end

#parse_token_line(str) ⇒ Object



99
100
101
102
103
104
105
106
107
# File 'lib/freeling_client/analyzer.rb', line 99

def parse_token_line(str)
  form, lemma, tag, prob = str.split(' ')[0..3]
  FreelingClient::Token.new({
    :form => form,
    :lemma => lemma,
    :tag => tag,
    :prob => prob.nil? ? nil : prob.to_f,
  }.reject { |k, v| v.nil? })
end

#ptokens(cmd, text) ⇒ Object

Generate ptokens for a given text ptokens: Tokens with position

Example:

>> analyzer = FreelingClient::Analyzer.new
>> analyzer.ptoken(:morfo, "Este texto está en español.")

Arguments:

cmd: (Symbol)
text: (String)


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/freeling_client/analyzer.rb', line 50

def ptokens(cmd, text)
  Enumerator.new do |yielder|
    pos = 0
    tokens(cmd, text).each do |token|
      ne_text = token['form'].dup

      ne_regexp = build_regexp(ne_text)
      token_pos = text.index(ne_regexp, pos)

      if token_pos && token_pos < (pos + 5)
        token.pos = token_pos
        yielder << token

        pos = token_pos + ne_text.length
      else
        pos = pos + ne_text.length
      end
    end
  end
end

#tokens(cmd, text) ⇒ Object

Generate tokens for a given text

Example:

>> analyzer = FreelingClient::Analyzer.new
>> analyzer.token(:morfo, "Este texto está en español.")

Arguments:

cmd: (Symbol)
text: (String)


29
30
31
32
33
34
35
36
# File 'lib/freeling_client/analyzer.rb', line 29

def tokens(cmd, text)
  valide_command!(cmd)
  Enumerator.new do |yielder|
    call(cmd, text).each do |freeling_line|
      yielder << parse_token_line(freeling_line) unless freeling_line.empty?
    end
  end
end

#valide_command!(cmd) ⇒ Object



149
150
151
152
153
# File 'lib/freeling_client/analyzer.rb', line 149

def valide_command!(cmd)
  unless [:morfo, :tagged, :tagged_nec, :tagged_sense].include?(cmd)
    raise CommandError, "#{cmd} does not exist"
  end
end