Class: Opener::Scorer::OutputProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/scorer/output_processor.rb

Overview

Class that given a raw xml input, it will calculate the overall sentiment score and the scores per topic, given that it is a valid KAF document.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ OutputProcessor

Returns a new instance of OutputProcessor.

Parameters:

  • options (Hash) (defaults to: {})

Options Hash (options):

  • :request_id (Symbol)


18
19
20
# File 'lib/opener/scorer/output_processor.rb', line 18

def initialize(options = {})
  @request_id = options[:request_id] || SecureRandom.hex
end

Instance Attribute Details

#request_idString (readonly)

Returns:

  • (String)


10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/opener/scorer/output_processor.rb', line 10

class OutputProcessor
  attr_reader :request_id

  ##
  # @param [Hash] options
  #
  # @option options [Symbol] :request_id
  #
  def initialize(options = {})
    @request_id = options[:request_id] || SecureRandom.hex
  end

  ##
  # Runs the processor and returns the results as a String.
  #
  # @param [String] input
  # @return [String]
  #
  def run(input)
    output      = Output.new
    output.uuid = request_id
    output.text = JSON.dump(process(input))

    output.save!

    return output.text
  end

  ##
  # Process the document and return the scores for the available topics.
  #
  # @return [Hash]
  #
  def process(input)
    document = Nokogiri::XML(input)
    scores   = {}

    lemmas_hash     = build_lemmas_hash(document)
    polarities_hash = build_polarities_hash(document)
    overall_score   = get_overall_score(document)

    if overall_score
      scores[:overall] = overall_score
    end

    lemmas_hash.keys.each do |topic|
      score = get_topic_score(topic, lemmas_hash, polarities_hash)

      if score
        scores[topic] = score
      end
    end

    return scores
  end

  protected

  ##
  # @param [Nokogiri::XML::Document] document
  # @return [Hash]
  #
  def build_lemmas_hash(document)
    lemmas_hash  = Hash.new { |hash, key| hash[key] = [] }

    document.css('features properties property').each do |property|
      lemma = property.attr('lemma').to_sym

      property.css('references target').each do |target|
        lemma_id = target.attr('id')

        lemmas_hash[lemma] << lemma_id
      end
    end

    return lemmas_hash
  end

  # @param [Nokogiri::XML::Document] document
  # @return [Hash]
  #
  def build_polarities_hash(document)
    polarities_hash = {}
    opinions        = document.at('opinions')

    return polarities_hash unless opinions

    opinions.css('opinion').each do |opinion|
      polarity  = opinion.at('opinion_expression').attr('polarity').to_sym
      strength = opinion.at('opinion_expression').attr('strength').to_i.abs
      op_target = opinion.at('opinion_target')
      op_expr   = opinion.at('opinion_expression')

      if op_target
        op_target.css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= {}
          polarities_hash[target.attr('id')][polarity] = strength
        end
      end

      if op_expr
        op_expr.css('span target').each do |expression|
          polarities_hash[expression.attr('id')] ||= {}
          polarities_hash[expression.attr('id')][polarity] = strength
        end
      end
    end

    return polarities_hash
  end

  ##
  # Get the score for all lemmas that have a polarity.
  #
  # @param [Nokogiri::XML::Docuemnt] document
  # @return [Float]
  #
  def get_overall_score(document)
    polarities = {}
    polarities[:positive] = []
    polarities[:negative] = []
    opinions   = document.at('opinions')

    return 0.0 unless opinions

    opinions.css('opinion').each do |opinion|
      sentiment = opinion.at('opinion_expression').attr('polarity').to_sym
      polarities[sentiment] << opinion.at('opinion_expression').attr('strength').to_i.abs
    end

    positive = polarities[:positive].inject(0, :+)
    negative = polarities[:negative].inject(0, :+)

    return if (positive + negative) == 0

    return ((positive - negative).to_f) / (positive + negative)
  end

  ##
  # Given a topic, return the sentiment score of the lemmas of this topic.
  #
  # @param [String] topic
  # @param [Hash] lemmas_hash
  # @param [Hash] polarities_hash
  # @return [Float]
  #
  def get_topic_score(topic, lemmas_hash, polarities_hash)
    if lemmas_hash[topic].empty?
      return 0.0
    else
      return calculate_score(lemmas_hash[topic], polarities_hash)
    end
  end

  ##
  # Given an array of lemma ids, calculate the sentiment score.
  #
  # @param [Array] lemma_ids
  # @param [Hash] polarities_hash
  # @return [Float]
  #
  def calculate_score(lemma_ids, polarities_hash)
    positive_polarities = []
    negative_polarities = []

    lemma_ids.each do |id|
      positive_polarities << polarities_hash[id].fetch(:positive, 0)
      negative_polarities << polarities_hash[id].fetch(:negative, 0)
    end

    positive = positive_polarities.compact.inject(0, :+)
    negative = negative_polarities.compact.inject(0, :+)

    return if (positive + negative) == 0

    score = ((positive - negative).to_f) / (positive + negative)

    return score
  end

end

Instance Method Details

#process(input) ⇒ Hash

Process the document and return the scores for the available topics.

Returns:

  • (Hash)


43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/opener/scorer/output_processor.rb', line 43

def process(input)
  document = Nokogiri::XML(input)
  scores   = {}

  lemmas_hash     = build_lemmas_hash(document)
  polarities_hash = build_polarities_hash(document)
  overall_score   = get_overall_score(document)

  if overall_score
    scores[:overall] = overall_score
  end

  lemmas_hash.keys.each do |topic|
    score = get_topic_score(topic, lemmas_hash, polarities_hash)

    if score
      scores[topic] = score
    end
  end

  return scores
end

#run(input) ⇒ String

Runs the processor and returns the results as a String.

Parameters:

  • input (String)

Returns:

  • (String)


28
29
30
31
32
33
34
35
36
# File 'lib/opener/scorer/output_processor.rb', line 28

def run(input)
  output      = Output.new
  output.uuid = request_id
  output.text = JSON.dump(process(input))

  output.save!

  return output.text
end