Class: Opener::Scorer::OutputProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/scorer/output_processor.rb

Overview

Class that given a raw xml input, it will calculate the overall sentiment score and the scores per topic, given that it is a valid KAF document.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input) ⇒ OutputProcessor

Returns a new instance of OutputProcessor.

Parameters:

  • input (String)


27
28
29
30
31
32
# File 'lib/opener/scorer/output_processor.rb', line 27

def initialize(input)
  @input = Nokogiri::XML::Document.parse(input)
  @lemmas_array = []
  @lemmas_hash = {}
  @polarities_hash = {}
end

Instance Attribute Details

#inputString

Returns:

  • (String)


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/opener/scorer/output_processor.rb', line 21

class OutputProcessor
  attr_accessor :input, :lemmas_array, :lemmas_hash, :polarities_hash

  ##
  # @param [String] input
  #
  def initialize(input)
    @input = Nokogiri::XML::Document.parse(input)
    @lemmas_array = []
    @lemmas_hash = {}
    @polarities_hash = {}
  end

  ##
  # Process the document and return the scores for the available topics.
  #
  # @return [Hash]
  #
  def process
    scores = {}

    build_lemmas_hash
    build_polarities_hash

    if overall_score = get_overall_score
      scores[:overall] = overall_score
    end

    lemmas_hash.keys.each do |topic|
      score = get_topic_score(topic)
      if score
        scores[topic] = score
      end
    end

    return scores
  end

  protected

  ##
  # Create a hash with all lemma ids per property and also an array with
  # all lemma ids.
  #
  def build_lemmas_hash
    input.css('features properties property').each do |property|
      lemma = property.attr('lemma').to_sym
      lemmas_hash[lemma] ||= []
      property.css('references target').each do |target|
        lemma_id = target.attr('id')
        lemmas_array << lemma_id
        lemmas_hash[lemma] << lemma_id
      end
    end
  end

  ##
  # Create a hash with all lemma ids that have a polarity.
  #
  def build_polarities_hash
    input.at('opinions').css('opinion').each do |opinion|
      polarity = opinion.at('opinion_expression').attr('polarity').to_sym
      if opinion.at('opinion_target')
        opinion.at('opinion_target').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
      if targets = opinion.at('opinion_expression')
        opinion.at('opinion_expression').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
    end
  end

  ##
  # Get the score for all lemmas that have a polarity.
  #
  # @return [Float]
  #
  def get_overall_score
    polarities = []
    input.at('opinions').css('opinion').each do |opinion|
      polarities << opinion.at('opinion_expression').attr('polarity').to_sym
    end
    
    positive = polarities.count(:positive)
    negative = polarities.count(:negative)
    
    return if (positive + negative) == 0
    
    score = ((positive - negative).to_f) / (positive + negative)
    
    return score     
  end

  ##
  # Given a topic, return the sentiment score of the lemmas of this topic.
  #
  # @return [Float] || [NilClass]
  #
  def get_topic_score(topic)
    return calculate_score(lemmas_hash[topic]) if lemmas_hash[topic]
  end

  ##
  # Given an array of lemma ids, calculate the sentiment score.
  #
  # @return [Float]
  #
  def calculate_score(lemma_ids)
    polarities = []

    lemma_ids.each do |id|
      polarities << polarities_hash[id]
    end     
    
    positive = polarities.flatten.count(:positive)
    negative = polarities.flatten.count(:negative)
    
    return if (positive + negative) == 0

    score = ((positive - negative).to_f) / (positive + negative)

    return score
  end

end

#lemmas_arrayArray

Returns:

  • (Array)


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/opener/scorer/output_processor.rb', line 21

class OutputProcessor
  attr_accessor :input, :lemmas_array, :lemmas_hash, :polarities_hash

  ##
  # @param [String] input
  #
  def initialize(input)
    @input = Nokogiri::XML::Document.parse(input)
    @lemmas_array = []
    @lemmas_hash = {}
    @polarities_hash = {}
  end

  ##
  # Process the document and return the scores for the available topics.
  #
  # @return [Hash]
  #
  def process
    scores = {}

    build_lemmas_hash
    build_polarities_hash

    if overall_score = get_overall_score
      scores[:overall] = overall_score
    end

    lemmas_hash.keys.each do |topic|
      score = get_topic_score(topic)
      if score
        scores[topic] = score
      end
    end

    return scores
  end

  protected

  ##
  # Create a hash with all lemma ids per property and also an array with
  # all lemma ids.
  #
  def build_lemmas_hash
    input.css('features properties property').each do |property|
      lemma = property.attr('lemma').to_sym
      lemmas_hash[lemma] ||= []
      property.css('references target').each do |target|
        lemma_id = target.attr('id')
        lemmas_array << lemma_id
        lemmas_hash[lemma] << lemma_id
      end
    end
  end

  ##
  # Create a hash with all lemma ids that have a polarity.
  #
  def build_polarities_hash
    input.at('opinions').css('opinion').each do |opinion|
      polarity = opinion.at('opinion_expression').attr('polarity').to_sym
      if opinion.at('opinion_target')
        opinion.at('opinion_target').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
      if targets = opinion.at('opinion_expression')
        opinion.at('opinion_expression').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
    end
  end

  ##
  # Get the score for all lemmas that have a polarity.
  #
  # @return [Float]
  #
  def get_overall_score
    polarities = []
    input.at('opinions').css('opinion').each do |opinion|
      polarities << opinion.at('opinion_expression').attr('polarity').to_sym
    end
    
    positive = polarities.count(:positive)
    negative = polarities.count(:negative)
    
    return if (positive + negative) == 0
    
    score = ((positive - negative).to_f) / (positive + negative)
    
    return score     
  end

  ##
  # Given a topic, return the sentiment score of the lemmas of this topic.
  #
  # @return [Float] || [NilClass]
  #
  def get_topic_score(topic)
    return calculate_score(lemmas_hash[topic]) if lemmas_hash[topic]
  end

  ##
  # Given an array of lemma ids, calculate the sentiment score.
  #
  # @return [Float]
  #
  def calculate_score(lemma_ids)
    polarities = []

    lemma_ids.each do |id|
      polarities << polarities_hash[id]
    end     
    
    positive = polarities.flatten.count(:positive)
    negative = polarities.flatten.count(:negative)
    
    return if (positive + negative) == 0

    score = ((positive - negative).to_f) / (positive + negative)

    return score
  end

end

#lemmas_hashHash

Returns:

  • (Hash)


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/opener/scorer/output_processor.rb', line 21

class OutputProcessor
  attr_accessor :input, :lemmas_array, :lemmas_hash, :polarities_hash

  ##
  # @param [String] input
  #
  def initialize(input)
    @input = Nokogiri::XML::Document.parse(input)
    @lemmas_array = []
    @lemmas_hash = {}
    @polarities_hash = {}
  end

  ##
  # Process the document and return the scores for the available topics.
  #
  # @return [Hash]
  #
  def process
    scores = {}

    build_lemmas_hash
    build_polarities_hash

    if overall_score = get_overall_score
      scores[:overall] = overall_score
    end

    lemmas_hash.keys.each do |topic|
      score = get_topic_score(topic)
      if score
        scores[topic] = score
      end
    end

    return scores
  end

  protected

  ##
  # Create a hash with all lemma ids per property and also an array with
  # all lemma ids.
  #
  def build_lemmas_hash
    input.css('features properties property').each do |property|
      lemma = property.attr('lemma').to_sym
      lemmas_hash[lemma] ||= []
      property.css('references target').each do |target|
        lemma_id = target.attr('id')
        lemmas_array << lemma_id
        lemmas_hash[lemma] << lemma_id
      end
    end
  end

  ##
  # Create a hash with all lemma ids that have a polarity.
  #
  def build_polarities_hash
    input.at('opinions').css('opinion').each do |opinion|
      polarity = opinion.at('opinion_expression').attr('polarity').to_sym
      if opinion.at('opinion_target')
        opinion.at('opinion_target').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
      if targets = opinion.at('opinion_expression')
        opinion.at('opinion_expression').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
    end
  end

  ##
  # Get the score for all lemmas that have a polarity.
  #
  # @return [Float]
  #
  def get_overall_score
    polarities = []
    input.at('opinions').css('opinion').each do |opinion|
      polarities << opinion.at('opinion_expression').attr('polarity').to_sym
    end
    
    positive = polarities.count(:positive)
    negative = polarities.count(:negative)
    
    return if (positive + negative) == 0
    
    score = ((positive - negative).to_f) / (positive + negative)
    
    return score     
  end

  ##
  # Given a topic, return the sentiment score of the lemmas of this topic.
  #
  # @return [Float] || [NilClass]
  #
  def get_topic_score(topic)
    return calculate_score(lemmas_hash[topic]) if lemmas_hash[topic]
  end

  ##
  # Given an array of lemma ids, calculate the sentiment score.
  #
  # @return [Float]
  #
  def calculate_score(lemma_ids)
    polarities = []

    lemma_ids.each do |id|
      polarities << polarities_hash[id]
    end     
    
    positive = polarities.flatten.count(:positive)
    negative = polarities.flatten.count(:negative)
    
    return if (positive + negative) == 0

    score = ((positive - negative).to_f) / (positive + negative)

    return score
  end

end

#polarities_hashHash

Returns:

  • (Hash)


21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/opener/scorer/output_processor.rb', line 21

class OutputProcessor
  attr_accessor :input, :lemmas_array, :lemmas_hash, :polarities_hash

  ##
  # @param [String] input
  #
  def initialize(input)
    @input = Nokogiri::XML::Document.parse(input)
    @lemmas_array = []
    @lemmas_hash = {}
    @polarities_hash = {}
  end

  ##
  # Process the document and return the scores for the available topics.
  #
  # @return [Hash]
  #
  def process
    scores = {}

    build_lemmas_hash
    build_polarities_hash

    if overall_score = get_overall_score
      scores[:overall] = overall_score
    end

    lemmas_hash.keys.each do |topic|
      score = get_topic_score(topic)
      if score
        scores[topic] = score
      end
    end

    return scores
  end

  protected

  ##
  # Create a hash with all lemma ids per property and also an array with
  # all lemma ids.
  #
  def build_lemmas_hash
    input.css('features properties property').each do |property|
      lemma = property.attr('lemma').to_sym
      lemmas_hash[lemma] ||= []
      property.css('references target').each do |target|
        lemma_id = target.attr('id')
        lemmas_array << lemma_id
        lemmas_hash[lemma] << lemma_id
      end
    end
  end

  ##
  # Create a hash with all lemma ids that have a polarity.
  #
  def build_polarities_hash
    input.at('opinions').css('opinion').each do |opinion|
      polarity = opinion.at('opinion_expression').attr('polarity').to_sym
      if opinion.at('opinion_target')
        opinion.at('opinion_target').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
      if targets = opinion.at('opinion_expression')
        opinion.at('opinion_expression').css('span target').each do |target|
          polarities_hash[target.attr('id')] ||= []
          polarities_hash[target.attr('id')] << polarity
        end
      end
    end
  end

  ##
  # Get the score for all lemmas that have a polarity.
  #
  # @return [Float]
  #
  def get_overall_score
    polarities = []
    input.at('opinions').css('opinion').each do |opinion|
      polarities << opinion.at('opinion_expression').attr('polarity').to_sym
    end
    
    positive = polarities.count(:positive)
    negative = polarities.count(:negative)
    
    return if (positive + negative) == 0
    
    score = ((positive - negative).to_f) / (positive + negative)
    
    return score     
  end

  ##
  # Given a topic, return the sentiment score of the lemmas of this topic.
  #
  # @return [Float] || [NilClass]
  #
  def get_topic_score(topic)
    return calculate_score(lemmas_hash[topic]) if lemmas_hash[topic]
  end

  ##
  # Given an array of lemma ids, calculate the sentiment score.
  #
  # @return [Float]
  #
  def calculate_score(lemma_ids)
    polarities = []

    lemma_ids.each do |id|
      polarities << polarities_hash[id]
    end     
    
    positive = polarities.flatten.count(:positive)
    negative = polarities.flatten.count(:negative)
    
    return if (positive + negative) == 0

    score = ((positive - negative).to_f) / (positive + negative)

    return score
  end

end

Instance Method Details

#processHash

Process the document and return the scores for the available topics.

Returns:

  • (Hash)


39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/opener/scorer/output_processor.rb', line 39

def process
  scores = {}

  build_lemmas_hash
  build_polarities_hash

  if overall_score = get_overall_score
    scores[:overall] = overall_score
  end

  lemmas_hash.keys.each do |topic|
    score = get_topic_score(topic)
    if score
      scores[topic] = score
    end
  end

  return scores
end