Class: SentenceState

Inherits:
Object
  • Object
show all
Defined in:
lib/automated_metareview/sentence_state.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#broken_sentencesObject

Returns the value of attribute broken_sentences.



5
6
7
# File 'lib/automated_metareview/sentence_state.rb', line 5

def broken_sentences
  @broken_sentences
end

Instance Method Details

#break_at_coordinating_conjunctions(str_with_pos_tags) ⇒ Object

——————————————#——————————————



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/automated_metareview/sentence_state.rb', line 25

def break_at_coordinating_conjunctions(str_with_pos_tags)
  st = str_with_pos_tags.split(" ")
  count = st.length
  counter = 0

  @broken_sentences = Array.new
  #if the sentence contains a co-ordinating conjunction
  if(str_with_pos_tags.include?("CC"))
    counter = 0
    temp = ""
    for i in (0..count-1)
      ps = st[i]
      if(!ps.nil? and ps.include?("CC"))
        @broken_sentences[counter] = temp #for "run/NN on/IN..."
        counter+=1
        temp = ps[0..ps.index("/")]
        #the CC or IN goes as part of the following sentence
      elsif (!ps.nil? and !ps.include?("CC"))
        temp = temp +" "+ ps[0..ps.index("/")]
      end
    end
    if(!temp.empty?) #setting the last sentence segment
      @broken_sentences[counter] = temp
      counter+=1
    end
  else
    @broken_sentences[counter] = str_with_pos_tags
    counter+=1
  end
  return counter
end

#identify_sentence_state(str_with_pos_tags) ⇒ Object



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/automated_metareview/sentence_state.rb', line 6

def identify_sentence_state(str_with_pos_tags)
  # puts("**** Inside identify_sentence_state #{str_with_pos_tags}")
  #break the sentence at the co-ordinating conjunction
  num_conjunctions = break_at_coordinating_conjunctions(str_with_pos_tags)
  
  states_array = Array.new
  if(@broken_sentences == nil)
    states_array[0] = sentence_state(str_with_pos_tags)
  #identifying states for each of the sentence segments
  else
    for i in (0..num_conjunctions)
      if(!@broken_sentences[i].nil?)
        states_array[i] = sentence_state(@broken_sentences[i])
      end
    end
  end
  return states_array
end

#is_negative_descriptor(word) ⇒ Object

Checking if the token is a negative token



241
242
243
244
245
246
247
248
249
250
# File 'lib/automated_metareview/sentence_state.rb', line 241

def is_negative_descriptor(word)
  not_negated = POSITIVE
  for i in (0..NEGATIVE_DESCRIPTORS.length - 1)
    if(word.casecmp(NEGATIVE_DESCRIPTORS[i]) == 0)
      not_negated =  NEGATED #indicates negation found
      break
    end  
  end
  return not_negated
end

#is_negative_phrase(phrase) ⇒ Object

Checking if the phrase is negative



255
256
257
258
259
260
261
262
263
264
# File 'lib/automated_metareview/sentence_state.rb', line 255

def is_negative_phrase(phrase)
  not_negated = POSITIVE
  for i in (0..NEGATIVE_PHRASES.length - 1)
    if(phrase.casecmp(NEGATIVE_PHRASES[i]) == 0)
      not_negated =  NEGATED #indicates negation found
      break
    end
  end
  return not_negated
end

#is_negative_word(word) ⇒ Object

Checking if the token is a negative token



228
229
230
231
232
233
234
235
236
237
# File 'lib/automated_metareview/sentence_state.rb', line 228

def is_negative_word(word)
  not_negated = POSITIVE
  for i in (0..NEGATED_WORDS.length - 1)
    if(word.casecmp(NEGATED_WORDS[i]) == 0)
      not_negated =  NEGATED #indicates negation found
      break
    end
  end
  return not_negated
end

#is_suggestive(word) ⇒ Object

——————————————#——————————————

Checking if the token is a suggestive token



268
269
270
271
272
273
274
275
276
277
278
# File 'lib/automated_metareview/sentence_state.rb', line 268

def is_suggestive(word)
  not_suggestive = POSITIVE
  #puts "inside is_suggestive for token:: #{word}"
  for i in (0..SUGGESTIVE_WORDS.length - 1)
    if(word.casecmp(SUGGESTIVE_WORDS[i]) == 0)
      not_suggestive =  SUGGESTIVE #indicates negation found
      break
    end
  end
  return not_suggestive
end

#is_suggestive_phrase(phrase) ⇒ Object

Checking if the PHRASE is suggestive



282
283
284
285
286
287
288
289
290
291
# File 'lib/automated_metareview/sentence_state.rb', line 282

def is_suggestive_phrase(phrase)
  not_suggestive = POSITIVE
  for i in (0..SUGGESTIVE_PHRASES.length - 1)
    if(phrase.casecmp(SUGGESTIVE_PHRASES[i]) == 0)
      not_suggestive =  SUGGESTIVE #indicates negation found
      break
    end
  end
  return not_suggestive
end

#sentence_state(str_with_pos_tags) ⇒ Object

Checking if the token is a negative token



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/automated_metareview/sentence_state.rb', line 59

def sentence_state(str_with_pos_tags)
  state = POSITIVE
  #checking single tokens for negated words
  st = str_with_pos_tags.split(" ")
  count = st.length
  tokens = Array.new
  tagged_tokens = Array.new
  i = 0
  interim_noun_verb  = false #0 indicates no interim nouns or verbs
      
  #fetching all the tokens
  for k in (0..st.length-1)
    ps = st[k]
    #setting the tagged string
    tagged_tokens[i] = ps
    if(ps.include?("/"))
      ps = ps[0..ps.index("/")-1] 
    end
    #removing punctuations 
    if(ps.include?("."))
      tokens[i] = ps[0..ps.index(".")-1]
    elsif(ps.include?(","))
      tokens[i] = ps.gsub(",", "")
    elsif(ps.include?("!"))
      tokens[i] = ps.gsub("!", "")
    elsif(ps.include?(";"))
      tokens[i] = ps.gsub(";", "")
    else
      tokens[i] = ps
      i+=1
    end     
  end#end of the for loop
  
  #iterating through the tokens to determine state
  prev_negative_word =""
  for j  in (0..i-1)
    #checking type of the word
    #checking for negated words
    if(is_negative_word(tokens[j]) == NEGATED)  
      returned_type = NEGATIVE_WORD
    #checking for a negative descriptor (indirect indicators of negation)
    elsif(is_negative_descriptor(tokens[j]) == NEGATED)
      returned_type = NEGATIVE_DESCRIPTOR
    #2-gram phrases of negative phrases
    elsif(j+1 < count && !tokens[j].nil? && !tokens[j+1].nil? && 
      is_negative_phrase(tokens[j]+" "+tokens[j+1]) == NEGATED)
      returned_type = NEGATIVE_PHRASE
      j = j+1      
    #if suggestion word is found
    elsif(is_suggestive(tokens[j]) == SUGGESTIVE)
      returned_type = SUGGESTIVE
    #2-gram phrases suggestion phrases
    elsif(j+1 < count && !tokens[j].nil? && !tokens[j+1].nil? &&
       is_suggestive_phrase(tokens[j]+" "+tokens[j+1]) == SUGGESTIVE)
      returned_type = SUGGESTIVE
      j = j+1
    #else set to positive
    else
      returned_type = POSITIVE
    end
    
    #----------------------------------------------------------------------
    #comparing 'returnedType' with the existing STATE of the sentence clause
    #after returnedType is identified, check its state and compare it to the existing state
    #if present state is negative and an interim non-negative or non-suggestive word was found, set the flag to true
    if((state == NEGATIVE_WORD or state == NEGATIVE_DESCRIPTOR or state == NEGATIVE_PHRASE) and returned_type == POSITIVE)
      if(interim_noun_verb == false and (tagged_tokens[j].include?("NN") or tagged_tokens[j].include?("PR") or tagged_tokens[j].include?("VB") or tagged_tokens[j].include?("MD")))
        interim_noun_verb = true
      end
    end 
    
    if(state == POSITIVE and returned_type != POSITIVE)
      state = returned_type
    #when state is a negative word
    elsif(state == NEGATIVE_WORD) #previous state
      if(returned_type == NEGATIVE_WORD)
        #these words embellish the negation, so only if the previous word was not one of them you make it positive
        if(prev_negative_word.casecmp("NO") != 0 and prev_negative_word.casecmp("NEVER") != 0 and prev_negative_word.casecmp("NONE") != 0)
          state = POSITIVE #e.g: "not had no work..", "doesn't have no work..", "its not that it doesn't bother me..."
        else
          state = NEGATIVE_WORD #e.g: "no it doesn't help", "no there is no use for ..."
        end  
        interim_noun_verb = false #resetting         
      elsif(returned_type == NEGATIVE_DESCRIPTOR or returned_type == NEGATIVE_PHRASE)
        state = POSITIVE #e.g.: "not bad", "not taken from", "I don't want nothing", "no code duplication"// ["It couldn't be more confusing.."- anomaly we dont handle this for now!]
        interim_noun_verb = false #resetting
      elsif(returned_type == SUGGESTIVE)
        #e.g. " it is not too useful as people could...", what about this one?
        if(interim_noun_verb == true) #there are some words in between
          state = NEGATIVE_WORD
        else
          state = SUGGESTIVE #e.g.:"I do not(-) suggest(S) ..."
        end
        interim_noun_verb = false #resetting
      end
    #when state is a negative descriptor
    elsif(state == NEGATIVE_DESCRIPTOR)
      if(returned_type == NEGATIVE_WORD)
        if(interim_noun_verb == true)#there are some words in between
          state = NEGATIVE_WORD #e.g: "hard(-) to understand none(-) of the comments"
        else
          state = POSITIVE #e.g."He hardly not...."
        end
        interim_noun_verb = false #resetting
      elsif(returned_type == NEGATIVE_DESCRIPTOR)
        if(interim_noun_verb == true)#there are some words in between
          state = NEGATIVE_DESCRIPTOR #e.g:"there is barely any code duplication"
        else 
          state = POSITIVE #e.g."It is hardly confusing..", but what about "it is a little confusing.."
        end
        interim_noun_verb = false #resetting
      elsif(returned_type == NEGATIVE_PHRASE)
        if(interim_noun_verb == true)#there are some words in between
          state = NEGATIVE_PHRASE #e.g:"there is barely any code duplication"
        else 
          state = POSITIVE #e.g.:"it is hard and appears to be taken from"
        end
        interim_noun_verb = false #resetting
      elsif(returned_type == SUGGESTIVE)
        state = SUGGESTIVE #e.g.:"I hardly(-) suggested(S) ..."
        interim_noun_verb = false #resetting
      end
    #when state is a negative phrase
    elsif(state == NEGATIVE_PHRASE)
      if(returned_type == NEGATIVE_WORD)
        if(interim_noun_verb == true)#there are some words in between
          state = NEGATIVE_WORD #e.g."It is too short the text and doesn't"
        else
          state = POSITIVE #e.g."It is too short not to contain.."
        end
        interim_noun_verb = false #resetting
      elsif(returned_type == NEGATIVE_DESCRIPTOR)
        state = NEGATIVE_DESCRIPTOR #e.g."It is too short barely covering..."
        interim_noun_verb = false #resetting
      elsif(returned_type == NEGATIVE_PHRASE)
        state = NEGATIVE_PHRASE #e.g.:"it is too short, taken from ..."
        interim_noun_verb = false #resetting
      elsif(returned_type == SUGGESTIVE)
        state = SUGGESTIVE #e.g.:"I too short and I suggest ..."
        interim_noun_verb = false #resetting
      end
    #when state is suggestive
    elsif(state == SUGGESTIVE) #e.g.:"I might(S) not(-) suggest(S) ..."
      if(returned_type == NEGATIVE_DESCRIPTOR)
        state = NEGATIVE_DESCRIPTOR
      elsif(returned_type == NEGATIVE_PHRASE)
        state = NEGATIVE_PHRASE
      end
      #e.g.:"I suggest you don't.." -> suggestive
      interim_noun_verb = false #resetting
    end
    
    #setting the prevNegativeWord
    if(tokens[j].casecmp("NO") == 0 or tokens[j].casecmp("NEVER") == 0 or tokens[j].casecmp("NONE") == 0)
      prev_negative_word = tokens[j]
    end  
        
  end #end of for loop
  
  if(state == NEGATIVE_DESCRIPTOR or state == NEGATIVE_WORD or state == NEGATIVE_PHRASE)
    state = NEGATED
  end
  
  return state
end