Class: Tone
- Inherits:
-
Object
- Object
- Tone
- Defined in:
- lib/automated_metareview/tone.rb
Instance Method Summary collapse
-
#get_feature_vector(vertex, positive, negative, speller) ⇒ Object
——–.
-
#get_synonyms(vertex, threshold, speller) ⇒ Object
getSynonyms - gets synonyms for vertex - upto ‘threshold’ levels of synonyms level 1 = token level 2 = token’s synonyms …
- #identify_tone(pos_tagger, core_NLP_tagger, review_text, review_graph) ⇒ Object
Instance Method Details
#get_feature_vector(vertex, positive, negative, speller) ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/automated_metareview/tone.rb', line 91 def get_feature_vector(vertex, positive, negative, speller) threshold = THRESHOLD #max distance at which synonyms can be searched feature_vector = Array.new #size of the array depends on th number of tone dimensions e.g.[positive, negative, netural] feature_vector = [0, 0] #initializing #look for the presence of token in positive set if(positive.include?(vertex.name.downcase)) feature_vector[0] = 1 # else #recursively check for synonyms of token in the positive set distance = 1 flag = 0 synonym_sets = get_synonyms(vertex, threshold, speller) #gets upto 'threshold' levels of synonms in a double dimensional array synonym_sets.each{ |set| if(positive.length - (positive - set).length > 0) feature_vector[0] = 1/distance flag = 1 end if(flag == 1) break #break out of the loop end distance+=1 #incrementing to check synonyms in the next level } end # repeat above with negative set if(negative.include?(vertex.name.downcase)) feature_vector[1] = 1 # else #recursively check for synonyms of token in the positive set distance = 1 flag = 0 synonym_sets = get_synonyms(vertex, threshold, speller) #gets upto 'threshold' levels of synonms in a double dimensional array if(!synonym_sets[1].empty?)#i.e. if there were no synonyms identified for the token avoid rechecking for [0] - since that contains the original token synonym_sets.each{ |set| if(negative.length - (negative - set).length > 0) feature_vector[1] = 1/distance flag = 1 end if(flag == 1) break #break out of the loop end distance+=1 #incrementing to check synonyms in the next level } #end of loop for synonym sets end end #end of if condition return feature_vector end |
#get_synonyms(vertex, threshold, speller) ⇒ Object
getSynonyms - gets synonyms for vertex - upto ‘threshold’ levels of synonyms
level 1 = token
level 2 = token's synonyms
...
level 'threshold' = synonyms of tokens in threshold - 1 level
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
# File 'lib/automated_metareview/tone.rb', line 152 def get_synonyms(vertex, threshold, speller) wbsim = WordnetBasedSimilarity.new if(vertex.pos_tag.nil?) pos = wbsim.determine_POS(vertex) else pos = vertex.pos_tag end revSyn = Array.new(threshold+1){Array.new} #contains synonyms for the different levels revSyn[0] << vertex.name.downcase.split(" ")[0] #holds the array of tokens whose synonyms are to be identified, # and what if the vertex had a long phrase #at first level '0' is the token itself i = 0 while i < threshold do list_new = Array.new revSyn[i].each{ |token| lemmas = WordNet::WordNetDB.find(token) #reviewLemma = revIndex.find(revToken) # if(lemmas.nil?) lemmas = WordNet::WordNetDB.find(wbsim.findStemWord(token, speller)) #revIndex.find(revStem[0]) end #select the lemma corresponding to the token's POS lemma = lemmas[0] #set the first one as the default lemma, later if one with exact POS is found, set that as the lemma lemmas.each do |l| #puts "lemma's POS :: #{l.pos} and reviewPOS :: #{pos}" if(l.pos.casecmp(pos) == 0) lemma = l end end #error handling for lemmas's without synsets that throw errors! (likely due to the dictionary file we are using) #if selected reviewLemma is not nil or empty if(!lemma.nil? and lemma != "" and !lemma.synsets.nil?) #creating arrays of all the values for synonyms, hyponyms etc. for the review token for g in 0..lemma.synsets.length - 1 #fetching the first review synset review_lemma_synset = lemma.synsets[g] #synonyms begin #error handling rev_lemma_syns = review_lemma_synset.get_relation("&") #for each synset get the values and add them to the array for h in 0..rev_lemma_syns.length - 1 #incrementing the array with new synonym words list_new = list_new + rev_lemma_syns[h].words end rescue list_new = nil end end end #end of checking if the lemma is nil or empty } #end of iterating through revSyn[level]'s tokens if(list_new.nil? or list_new.empty?) break end i+=1 #level is incremented revSyn[i] = list_new #setting synonyms end return revSyn end |
#identify_tone(pos_tagger, core_NLP_tagger, review_text, review_graph) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/automated_metareview/tone.rb', line 6 def identify_tone(pos_tagger, core_NLP_tagger, review_text, review_graph) speller = Aspell.new("en_US") speller.suggestion_mode = Aspell::NORMAL cumulative_edge_feature = Array.new cumulative_review_tone = Array.new cumulative_review_tone = [-1, -1, -1] #sum of all edge tones #extracting positive and negative words from files into arrays positive_file = "app/models/automated_metareview/positive-words.csv" negative_file = "app/models/automated_metareview/negative-words.csv" positive = Array.new negative = Array.new FasterCSV.foreach(positive_file) do |text| positive << text[0] end FasterCSV.foreach(negative_file) do |text| negative << text[0] end negative = negative + NEGATIVE_DESCRIPTORS review_edges = review_graph.edges #if the edges are nil if(review_edges.nil?) return cumulative_review_tone end wbsim = WordnetBasedSimilarity.new in_feature = Array.new out_feature = Array.new review_edges.each{ |edge| if(!edge.nil?) if(!edge.in_vertex.nil?) # puts "#### Checking for edge #{edge.in_vertex.name}" in_feature = get_feature_vector(edge.in_vertex, positive, negative, speller) end if(!edge.out_vertex.nil?) # puts "#### with outvertex #{edge.out_vertex.name}" out_feature = get_feature_vector(edge.out_vertex, positive, negative, speller) end # puts "in_feature :: [#{in_feature[0]}, #{in_feature[1]}]" # puts "out_feature :: [#{out_feature[0]}, #{out_feature[1]}]" #making sure that we don't include frequent tokens' tones while calculating cumulative edge tone (both + and -) if(!wbsim.is_frequent_word(edge.in_vertex.name) and !wbsim.is_frequent_word(edge.out_vertex.name)) cumulative_edge_feature[0] = (in_feature[0].to_f + out_feature[0].to_f)/2.to_f cumulative_edge_feature[1] = (in_feature[1].to_f + out_feature[1].to_f)/2.to_f elsif(wbsim.is_frequent_word(edge.in_vertex.name) and !wbsim.is_frequent_word(edge.out_vertex.name)) cumulative_edge_feature[0] = out_feature[0].to_f cumulative_edge_feature[1] = out_feature[1].to_f elsif(!wbsim.is_frequent_word(edge.in_vertex.name) and wbsim.is_frequent_word(edge.out_vertex.name)) cumulative_edge_feature[0] = in_feature[0].to_f cumulative_edge_feature[1] = in_feature[1].to_f else cumulative_edge_feature[0] = 0 cumulative_edge_feature[1] = 0 end # puts "cumulative_edge_feature :: [#{cumulative_edge_feature[0]}, #{cumulative_edge_feature[1]}]" if((cumulative_review_tone[0] == -1 and cumulative_review_tone[1] == -1) or (cumulative_review_tone[0] == 0 and cumulative_review_tone[1] == 0)) #has not been initialized as yet cumulative_review_tone[0] = cumulative_edge_feature[0].to_f cumulative_review_tone[1] = cumulative_edge_feature[1].to_f elsif(cumulative_edge_feature[0] > 0 or cumulative_edge_feature[1] > 0) #only edges with some tone (either vertices) are taken into consideration during cumulative edge calculation #else all edges will be considered, which may adversely affect the net tone of the review text cumulative_review_tone[0] = (cumulative_review_tone[0].to_f + cumulative_edge_feature[0].to_f)/2.to_f cumulative_review_tone[1] = (cumulative_review_tone[1].to_f + cumulative_edge_feature[1].to_f)/2.to_f end # puts "cumulative_review_tone :: [#{cumulative_review_tone[0]}, #{cumulative_review_tone[1]}]" end } # puts "cumulative tone :: positive - #{cumulative_review_tone[0]}, negative - #{cumulative_review_tone[1]}" if(cumulative_review_tone[0] == 0 and cumulative_review_tone[1] == 0) cumulative_review_tone[2] = 1 #setting neutrality value else cumulative_review_tone[2] = 0 end return cumulative_review_tone end |