Class: GraphGenerator
- Inherits:
-
Object
- Object
- GraphGenerator
- Defined in:
- lib/automated_metareview/graph_generator.rb
Instance Attribute Summary collapse
-
#edges ⇒ Object
creating accessors for the instance variables.
-
#num_edges ⇒ Object
creating accessors for the instance variables.
-
#num_vertices ⇒ Object
creating accessors for the instance variables.
-
#pipeline ⇒ Object
creating accessors for the instance variables.
-
#pos_tagger ⇒ Object
creating accessors for the instance variables.
-
#vertices ⇒ Object
creating accessors for the instance variables.
Instance Method Summary collapse
-
#find_labels(t) ⇒ Object
——————————————#——————————————#—————————————— Identifying parents and labels for the vertices.
-
#find_parents(t) ⇒ Object
——————————————#——————————————#—————————————— Identifying parents and labels for the vertices.
-
#generate_graph(text, pos_tagger, coreNLPTagger, forRelevance, forPatternIdentify) ⇒ Object
-
generates the graph for the given review text and * INPUT: an array of sentences for a review or a submission.
-
-
#print_graph(edges, vertices) ⇒ Object
——————————————#——————————————#——————————————.
-
#remove_redundant_edges(in_vertex, out, index) ⇒ Object
NULLIFY ALL EDGES CONTAINING “ONLY SUBSTRINGS” (and not exact matches) OF EITHER IN/OUT VERTICES IN THE SAME SENTENCE (verts.index == index) And reset the @edges array with non-null elements.
-
#remove_redundant_vertices(s, index) ⇒ Object
NULLIFY ALL VERTICES CONTAINING “ONLY SUBSTRINGS” (and not exact matches) OF THIS VERTEX IN THE SAME SENTENCE (verts.index == index) And reset the @vertices array with non-null elements.
-
#search_edges(list, in_vertex, out, index) ⇒ Object
Checks to see if an edge between vertices “in” and “out” exists.
-
#search_edges_to_set_null(list, in_vertex, out, index) ⇒ Object
——————————————#——————————————#——————————————.
-
#search_vertices(list, s, index) ⇒ Object
——————————————#——————————————#——————————————.
-
#set_semantic_labels_for_edges ⇒ Object
-
Setting semantic labels for edges based on the labels vertices have with their parents.
-
Instance Attribute Details
#edges ⇒ Object
creating accessors for the instance variables
8 9 10 |
# File 'lib/automated_metareview/graph_generator.rb', line 8 def edges @edges end |
#num_edges ⇒ Object
creating accessors for the instance variables
8 9 10 |
# File 'lib/automated_metareview/graph_generator.rb', line 8 def num_edges @num_edges end |
#num_vertices ⇒ Object
creating accessors for the instance variables
8 9 10 |
# File 'lib/automated_metareview/graph_generator.rb', line 8 def num_vertices @num_vertices end |
#pipeline ⇒ Object
creating accessors for the instance variables
8 9 10 |
# File 'lib/automated_metareview/graph_generator.rb', line 8 def pipeline @pipeline end |
#pos_tagger ⇒ Object
creating accessors for the instance variables
8 9 10 |
# File 'lib/automated_metareview/graph_generator.rb', line 8 def pos_tagger @pos_tagger end |
#vertices ⇒ Object
creating accessors for the instance variables
8 9 10 |
# File 'lib/automated_metareview/graph_generator.rb', line 8 def vertices @vertices end |
Instance Method Details
#find_labels(t) ⇒ Object
——————————————#——————————————#—————————————— Identifying parents and labels for the vertices
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 |
# File 'lib/automated_metareview/graph_generator.rb', line 588 def find_labels(t) # puts "Inside find_labels" unTaggedString = t.split(" ") t = StanfordCoreNLP::Text.new(t) @pipeline.annotate(t) #for each sentence identify theparsed form of the sentence sentence = t.get(:sentences).toArray parsed_sentence = sentence[0].get(:collapsed_c_c_processed_dependencies) labels = Array.new labelCounter = 0 govDep = parsed_sentence.typedDependencies.toArray #for each untagged token for j in (0..unTaggedString.length - 1) unTaggedString[j].gsub!(".", "") unTaggedString[j].gsub!(",", "") #puts "Label for #{unTaggedString[j]}" #identify its corresponding position in govDep and fetch its label for k in (0..govDep.length - 1) #puts "Comparing with #{govDep[k].dep.value()}" if(govDep[k].dep.value() == unTaggedString[j]) labels[j] = govDep[k].reln.getShortName() #puts labels[j] labelCounter+=1 break end end end return labels end |
#find_parents(t) ⇒ Object
——————————————#——————————————#—————————————— Identifying parents and labels for the vertices
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 |
# File 'lib/automated_metareview/graph_generator.rb', line 548 def find_parents(t) # puts "Inside find_parents.. text #{t}" tp = TextPreprocessing.new unTaggedString = t.split(" ") parents = Array.new # t = text[i] t = StanfordCoreNLP::Text.new(t) #the same variable has to be passed into the Textx.new method @pipeline.annotate(t) #for each sentence identify theparsed form of the sentence sentence = t.get(:sentences).toArray parsed_sentence = sentence[0].get(:collapsed_c_c_processed_dependencies) #puts "parsed sentence #{parsed_sentence}" #iterating through the set of tokens and identifying each token's parent #puts "unTaggedString.length #{unTaggedString.length}" for j in (0..unTaggedString.length - 1) #puts "unTaggedString[#{j}] #{unTaggedString[j]}" if(tp.is_punct(unTaggedString[j])) next end if(tp.contains_punct(unTaggedString[j])) unTaggedString[j] = tp.contains_punct(unTaggedString[j]) # puts "unTaggedString #{unTaggedString[j]} and #{tp.contains_punct_bool(unTaggedString[j])}" end if(!unTaggedString[j].nil? and !tp.contains_punct_bool(unTaggedString[j])) pat = parsed_sentence.getAllNodesByWordPattern(unTaggedString[j]) pat = pat.toArray parent = parsed_sentence.getParents(pat[0]).toArray end #puts "parent of #{unTaggedString[j]} is #{parent[0]}" if(!parent.nil? and !parent[0].nil?) parents[j] = (parent[0].to_s)[0..(parent[0].to_s).index("-")-1]#extracting the name of the parent (since it is in the foramt-> "name-POS") #puts "parents[#{j}] = #{parents[j]}" else parents[j] = nil end end return parents end |
#generate_graph(text, pos_tagger, coreNLPTagger, forRelevance, forPatternIdentify) ⇒ Object
-
generates the graph for the given review text and
* INPUT: an array of sentences for a review or a submission. Every row in 'text' contains one sentence. * type - tells you if it was a review or s submission * type = 1 - submission/past review * type = 2 - new review
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 |
# File 'lib/automated_metareview/graph_generator.rb', line 21 def generate_graph(text, pos_tagger, coreNLPTagger, forRelevance, forPatternIdentify) #initializing common arrays @vertices = Array.new @num_vertices = 0 @edges = Array.new @num_edges = 0 @pos_tagger = pos_tagger #part of speech tagger @pipeline = coreNLPTagger #dependency parsing #iterate through the sentences in the text for i in (0..text.length-1) if(text[i].empty? or text[i] == "" or text[i].split(" ").empty?) next end unTaggedString = text[i].split(" ") # puts "UnTagged String:: #{unTaggedString}" taggedString = @pos_tagger.get_readable(text[i]) # puts "taggedString:: #{taggedString}" #Initializing some arrays nouns = Array.new nCount = 0 verbs = Array.new vCount = 0 adjectives = Array.new adjCount = 0 adverbs = Array.new advCount = 0 parents = Array.new labels = Array.new #------------------------------------------#------------------------------------------ #finding parents parents = find_parents(text[i]) parentCounter = 0 #------------------------------------------#------------------------------------------ #finding parents labels = find_labels(text[i]) labelCounter = 0 #------------------------------------------#------------------------------------------ #find state sstate = SentenceState.new states_array = sstate.identify_sentence_state(taggedString) states_counter = 0 state = states_array[states_counter] states_counter += 1 #------------------------------------------#------------------------------------------ taggedString = taggedString.split(" ") prevType = nil #initlializing the prevyp #iterate through the tokens for j in (0..taggedString.length-1) taggedToken = taggedString[j] plainToken = taggedToken[0...taggedToken.index("/")].to_s posTag = taggedToken[taggedToken.index("/")+1..taggedToken.length].to_s #ignore periods if(plainToken == "." or taggedToken.include?("/POS") or (taggedToken.index("/") == taggedToken.length()-1) or (taggedToken.index("/") == taggedToken.length()-2))#this is for strings containinig "'s" or without POS next end #SETTING STATE #since the CC or IN are part of the following sentence segment, we set the STATE for that segment when we see a CC or IN if(taggedToken.include?("/CC"))#{//|| ps.contains("/IN") state = states_array[states_counter] states_counter+=1 end # puts("**Value:: #{plainToken} LabelCounter:: #{labelCounter} ParentCounter:: #{parentCounter} POStag:: #{posTag} .. state = #{state}") #------------------------------------------ #if the token is a noun if(taggedToken.include?("NN") or taggedToken.include?("PRP") or taggedToken.include?("IN") or taggedToken.include?("/EX") or taggedToken.include?("WP")) #either add on to a previous vertex or create a brand new noun vertex if(prevType == NOUN) #adding to a previous noun vertex nCount -= 1 #decrement, since we are accessing a previous noun vertex prevVertex = search_vertices(@vertices, nouns[nCount], i) #fetching the previous vertex nouns[nCount] = nouns[nCount].to_s + " " + plainToken #concatenating with contents of the previous noun vertex #checking if the previous noun concatenated with "s" already exists among the vertices if((nounVertex = search_vertices(@vertices, nouns[nCount], i)) == nil) prevVertex.name = prevVertex.name.to_s + " " + plainToken #concatenating the nouns nounVertex = prevVertex #the current concatenated vertex will be considered if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD")#resetting labels for the concatenated vertex nounVertex.label = labels[labelCounter] end #fAppendedVertex = 1 end#if the vertex already exists, just use nounVertex - the returned vertex for ops. else #if the previous token is not a noun, create a brand new vertex nouns[nCount] = plainToken #this is checked for later on nounVertex = search_vertices(@vertices, plainToken, i) if(nounVertex == nil) #the string doesn't already exist @vertices[@num_vertices] = Vertex.new(nouns[nCount], NOUN, i, state, labels[labelCounter], parents[parentCounter], posTag) nounVertex = @vertices[@num_vertices] #the newly formed vertex will be considered @num_vertices+=1 end end #end of if prevType was noun remove_redundant_vertices(nouns[nCount], i) nCount+=1 #increment nCount for a new noun vertex just created (or existing previous vertex appended with new text) #checking if a noun existed before this one and if the adjective was attached to that noun. #if an adjective was found earlier, we add a new edge if(prevType == ADJ) #set previous noun's property to null, if it was set, if there is a noun before the adjective if(nCount > 1) v1 = search_vertices(@vertices, nouns[nCount-2], i) #fetching the previous noun, the one before the current noun (therefore -2) v2 = search_vertices(@vertices, adjectives[adjCount-1], i) #fetching the previous adjective #if such an edge exists - DELETE IT - search_edges_to_set_null() returns the position in the array at which such an edge exists if(!v1.nil? and !v2.nil? and (e = search_edges_to_set_null(@edges, v1, v2, i)) != -1) #-1 is when no such edge exists @edges[e] = nil #setting the edge to null #if @num_edges had been previously incremented, decrement it if(@num_edges > 0) @num_edges-=1 #deducting an edge count end end end #if this noun vertex was encountered for the first time, nCount < 1, #so do adding of edge outside the if condition #add a new edge with v1 as the adjective and v2 as the new noun v1 = search_vertices(@vertices, adjectives[adjCount-1], i) v2 = nounVertex #the noun vertex that was just created #if such an edge did not already exist if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1) @edges[@num_edges] = Edge.new("noun-property",VERB) @edges[@num_edges].in_vertex = v1 @edges[@num_edges].out_vertex = v2 @edges[@num_edges].index = i @num_edges+=1 #since an edge was just added we try to check if there exist any redundant edges that can be removed remove_redundant_edges(v1, v2, i) end end #a noun has been found and has established a verb as an in_vertex and such an edge doesnt already previously exist if(vCount > 0) #and fAppendedVertex == 0 #add edge only when a fresh vertex is created not when existing vertex is appended to v1 = search_vertices(@vertices, verbs[vCount-1], i) v2 = nounVertex #if such an edge does not already exist add it if(!v1.nil? and !v2.nil? and (e = search_edges(@edges,v1, v2, i)) == -1) @edges[@num_edges] = Edge.new("verb", VERB) @edges[@num_edges].in_vertex = v1 #for vCount = 0 @edges[@num_edges].out_vertex = v2 @edges[@num_edges].index = i @num_edges+=1 #since an edge was just added we try to check if there exist any redundant edges that can be removed remove_redundant_edges(v1, v2, i) end end prevType = NOUN #------------------------------------------ #if the string is an adjective #adjectives are vertices but they are not connected by an edge to the nouns, instead they are the noun's properties elsif(taggedToken.include?("/JJ")) adjective = nil if(prevType == ADJ) #combine the adjectives # puts("PREV ADJ here:: #{plainToken}") if(adjCount >= 1) adjCount = adjCount - 1 prevVertex = search_vertices(@vertices, adjectives[adjCount], i) #fetching the previous vertex adjectives[adjCount] = adjectives[adjCount] + " " + plainToken #if the concatenated vertex didn't already exist if((adjective = search_vertices(@vertices, adjectives[adjCount], i)).nil?) prevVertex.name = prevVertex.name+" "+plainToken adjective = prevVertex #set it as "adjective" for further execution if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD") #resetting labels for the concatenated vertex adjective.label = labels[labelCounter] end end end else #new adjective vertex adjectives[adjCount] = plainToken if((adjective = search_vertices(@vertices, plainToken, i)).nil?) #the string doesn't already exist @vertices[@num_vertices] = Vertex.new(adjectives[adjCount], ADJ, i, state, labels[labelCounter], parents[parentCounter], posTag) adjective = @vertices[@num_vertices] @num_vertices+=1 end end remove_redundant_vertices(adjectives[adjCount], i) adjCount+=1 #incrementing, since a new adjective was created or an existing one updated. #by default associate the adjective with the previous/latest noun and if there is a noun following it immediately, then remove the property from the older noun (done under noun condition) if(nCount > 0) #gets the previous noun to form the edge v1 = search_vertices(@vertices, nouns[nCount-1], i) v2 = adjective #the current adjective vertex #if such an edge does not already exist add it if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1) # puts "** Adding noun-adj edge .. #{v1.name} - #{v2.name}" @edges[@num_edges] = Edge.new("noun-property",VERB) @edges[@num_edges].in_vertex = v1 @edges[@num_edges].out_vertex = v2 @edges[@num_edges].index = i @num_edges+=1 #since an edge was just added we try to check if there exist any redundant edges that can be removed remove_redundant_edges(v1, v2, i) end end prevType = ADJ #end of if condition for adjective #------------------------------------------ #if the string is a verb or a modal//length condition for verbs is, be, are... elsif(taggedToken.include?("/VB") or taggedToken.include?("MD")) verbVertex = nil if(prevType == VERB) #combine the verbs vCount = vCount - 1 prevVertex = search_vertices(@vertices, verbs[vCount], i) #fetching the previous vertex verbs[vCount] = verbs[vCount] + " " + plainToken #if the concatenated vertex didn't already exist if((verbVertex = search_vertices(@vertices, verbs[vCount], i)) == nil) prevVertex.name = prevVertex.name + " " + plainToken verbVertex = prevVertex #concatenated vertex becomes the new verb vertex if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD")#resetting labels for the concatenated vertex verbVertex.label = labels[labelCounter] end end else verbs[vCount] = plainToken if((verbVertex = search_vertices(@vertices, plainToken, i)) == nil) @vertices[@num_vertices] = Vertex.new(plainToken, VERB, i, state, labels[labelCounter], parents[parentCounter], posTag) verbVertex = @vertices[@num_vertices] #newly created verb vertex will be considered in the future @num_vertices+=1 end end remove_redundant_vertices(verbs[vCount], i) vCount+=1 #if an adverb was found earlier, we set that as the verb's property if(prevType == ADV) #set previous verb's property to null, if it was set, if there is a verb following the adverb if(vCount > 1) v1 = search_vertices(@vertices, verbs[vCount-2], i) #fetching the previous verb, the one before the current one (hence -2) v2 = search_vertices(@vertices, adverbs[advCount-1], i) #fetching the previous adverb #if such an edge exists - DELETE IT if(!v1.nil? and !v2.nil? and (e = search_edges_to_set_null(@edges, v1, v2, i)) != -1) @edges[e] = nil #setting the edge to null if(@num_edges > 0) @num_edges-=1 #deducting an edge count end end end #if this verb vertex was encountered for the first time, vCount < 1, #so do adding of edge outside the if condition #add a new edge with v1 as the adverb and v2 as the new verb v1 = search_vertices(@vertices, adverbs[advCount-1], i) v2 = verbVertex #if such an edge did not already exist if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1) @edges[@num_edges] = Edge.new("verb-property",VERB) @edges[@num_edges].in_vertex = v1 @edges[@num_edges].out_vertex = v2 @edges[@num_edges].index = i @num_edges+=1 #since an edge was just added we try to check if there exist any redundant edges that can be removed remove_redundant_edges(v1, v2, i) end end #making the previous noun, one of the vertices of the verb edge if(nCount > 0) #and fAppendedVertex == 0 #gets the previous noun to form the edge v1 = search_vertices(@vertices, nouns[nCount-1], i) v2 = verbVertex #if such an edge does not already exist add it if(!v1.nil? and !v2.nil? and (e = search_edges(@edges, v1, v2, i)) == -1) @edges[@num_edges] = Edge.new("verb",VERB) @edges[@num_edges].in_vertex = v1 #for nCount = 0; @edges[@num_edges].out_vertex = v2 #the verb @edges[@num_edges].index = i @num_edges+=1 #since an edge was just added we try to check if there exist any redundant edges that can be removed remove_redundant_edges(v1, v2, i) end end prevType = VERB #------------------------------------------ #if the string is an adverb elsif(taggedToken.include?("RB")) adverb = nil if(prevType == ADV) #appending to existing adverb if(advCount >= 1) advCount = advCount - 1 end prevVertex = search_vertices(@vertices, adverbs[advCount], i) #fetching the previous vertex adverbs[advCount] = adverbs[advCount] + " " + plainToken #if the concatenated vertex didn't already exist if((adverb = search_vertices(@vertices, adverbs[advCount], i)) == nil) prevVertex.name = prevVertex.name + " " + plainToken adverb = prevVertex #setting it as "adverb" for further computation if(labels[labelCounter] != "NMOD" or labels[labelCounter] != "PMOD") #resetting labels for the concatenated vertex adverb.label = labels[labelCounter] end end else #else creating a new vertex adverbs[advCount] = plainToken if((adverb = search_vertices(@vertices, plainToken, i)) == nil) @vertices[@num_vertices] = Vertex.new(adverbs[advCount], ADV, i, state, labels[labelCounter], parents[parentCounter], posTag); adverb = @vertices[@num_vertices] @num_vertices+=1 end end remove_redundant_vertices(adverbs[advCount], i) advCount+=1 #by default associate it with the previous/latest verb and if there is a verb following it immediately, then remove the property from the verb if(vCount > 0) #gets the previous verb to form a verb-adverb edge v1 = search_vertices(@vertices, verbs[vCount-1], i) v2 = adverb #if such an edge does not already exist add it if(!v1.nil? and !v2.nil? && (e = search_edges(@edges, v1, v2, i)) == -1) @edges[@num_edges] = Edge.new("verb-property",VERB) @edges[@num_edges].in_vertex = v1 #for nCount = 0; @edges[@num_edges].out_vertex = v2 #the verb @edges[@num_edges].index = i @num_edges+=1 #since an edge was just added we try to check if there exist any redundant edges that can be removed remove_redundant_edges(v1, v2, i) end end prevType = ADV #end of if condition for adverb end #end of if condition #------------------------------------------ #incrementing counters for labels and parents labelCounter+=1 parentCounter+=1 end #end of the for loop for the tokens #puts "here outside the for loop for tokens" nouns = nil verbs = nil adjectives = nil adverbs = nil end #end of number of sentences in the text @num_vertices = @num_vertices - 1 #since as a counter it was 1 ahead of the array's contents @num_edges = @num_edges - 1 #same reason as for num_vertices set_semantic_labels_for_edges #print_graph(@edges, @vertices) # puts("Number of edges:: #{@num_edges}") # puts("Number of vertices:: #{@num_vertices}") return @num_edges end |
#print_graph(edges, vertices) ⇒ Object
——————————————#——————————————#——————————————
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 |
# File 'lib/automated_metareview/graph_generator.rb', line 525 def print_graph(edges, vertices) puts("*** List of vertices::") for j in (0..vertices.length-1) if(!vertices[j].nil?) puts("@@@ Vertex:: #{vertices[j].name}") puts("*** Frequency:: #{vertices[j].frequency} State:: #{vertices[j].state}") puts("*** Label:: #{vertices[j].label} Parent:: #{vertices[j].parent}") end end puts("*******") puts("*** List of edges::") for j in (0..edges.length-1) if(!edges[j].nil? and !edges[j].in_vertex.nil? and !edges[j].out_vertex.nil?) puts("@@@ Edge:: #{edges[j].in_vertex.name} & #{edges[j].out_vertex.name}") puts("*** Frequency:: #{edges[j].frequency} State:: #{edges[j].in_vertex.state} & #{edges[j].out_vertex.state}") puts("*** Label:: #{edges[j].label}") end end puts("--------------") end |
#remove_redundant_edges(in_vertex, out, index) ⇒ Object
NULLIFY ALL EDGES CONTAINING “ONLY SUBSTRINGS” (and not exact matches) OF EITHER IN/OUT VERTICES IN THE SAME SENTENCE (verts.index == index) And reset the @edges array with non-null elements.
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 |
# File 'lib/automated_metareview/graph_generator.rb', line 488 def remove_redundant_edges(in_vertex, out, index) list = @edges j = @num_edges - 1 while j >= 0 do if(!list[j].nil? and list[j].index == index) #when invertices are eq and out-verts are substrings or vice versa if(in_vertex.name.casecmp(list[j].in_vertex.name) == 0 and out.name.casecmp(list[j].out_vertex.name) != 0 and out.name.downcase.include?(list[j].out_vertex.name.downcase)) # puts("FOUND out_vertex match for edge:: #{list[j].in_vertex.name} - #{list[j].out_vertex.name}") list[j] = nil #@num_edges-=1 #when in-vertices are only substrings and out-verts are equal elsif(in_vertex.name.casecmp(list[j].in_vertex.name)!=0 and in_vertex.name.downcase.include?(list[j].in_vertex.name.downcase) and out.name.casecmp(list[j].out_vertex.name)==0) # puts("FOUND in_vertex match for edge: #{list[j].in_vertex.name} - #{list[j].out_vertex.name}") list[j] = nil #@num_edges-=1 end end j-=1 end #end of the while loop # puts "**** search_edges:: Old number #{@num_edges}" #recreating the edges array without the nil values counter = 0 edges_array = Array.new list.each{ |edge| if(!edge.nil?) # puts "edge:: #{edge.in_vertex.name} - #{edge.out_vertex.name}" edges_array << edge counter+=1 end } @edges = edges_array @num_edges = counter+1 # puts "**** search_edges:: New number of edges #{@num_edges}" end |
#remove_redundant_vertices(s, index) ⇒ Object
NULLIFY ALL VERTICES CONTAINING “ONLY SUBSTRINGS” (and not exact matches) OF THIS VERTEX IN THE SAME SENTENCE (verts.index == index) And reset the @vertices array with non-null elements.
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 |
# File 'lib/automated_metareview/graph_generator.rb', line 385 def remove_redundant_vertices(s, index) # puts "**** remove_redundant_vertices:: string #{s}" j = @num_vertices - 1 verts = @vertices while j >= 0 if(!verts[j].nil? and verts[j].index == index and s.casecmp(verts[j].name) != 0 and (s.downcase.include?(verts[j].name.downcase) and verts[j].name.length > 1)) #the last 'length' condition is added so as to prevent "I" (an indiv. vertex) from being replaced by nil # puts "*** string index = #{index}... verts[j].index = #{verts[j].index}" # puts "**** remove_redundant_vertices setting #{verts[j].name} to nil!" #search through all the edges and set those with this vertex as in-out- vertex to null if(!@edges.nil?) for i in 0..@edges.length - 1 edge = @edges[i] if(!edge.nil? and (edge.in_vertex == verts[j] or edge.out_vertex == verts[j])) # puts "edge #{edge.in_vertex.name} - #{edge.out_vertex.name}" @edges[i] = nil #setting that edge to nil end end end #finally setting the vertex to null verts[j] = nil end j-=1 end #end of while loop # puts "**** remove_redundant_vertices Old @num_vertices:: #{@num_vertices}" #recreating the vertices array without the nil values counter = 0 vertices_array = Array.new for i in (0..verts.length-1) vertex = verts[i] if(!vertex.nil?) vertices_array << vertex counter+=1 end end @vertices = vertices_array @num_vertices = counter+1 #since @num_vertices is always one advanced of the last vertex end |
#search_edges(list, in_vertex, out, index) ⇒ Object
Checks to see if an edge between vertices “in” and “out” exists.
true - if an edge exists and false - if an edge doesn't exist
edge[] list, vertex in, vertex out, int index
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 |
# File 'lib/automated_metareview/graph_generator.rb', line 433 def search_edges(list, in_vertex, out, index) edgePos = -1 if(list.nil?)#if the list is null return edgePos end for i in (0..list.length-1) if(!list[i].nil? and !list[i].in_vertex.nil? and !list[i].out_vertex.nil?) #checking for exact match with an edge if(((list[i].in_vertex.name.casecmp(in_vertex.name)==0 or list[i].in_vertex.name.include?(in_vertex.name)) and (list[i].out_vertex.name.casecmp(out.name)==0 or list[i].out_vertex.name.include?(out.name))) or ((list[i].in_vertex.name.casecmp(out.name)==0 or list[i].in_vertex.name.include?(out.name)) and (list[i].out_vertex.name.casecmp(in_vertex.name)==0 or list[i].out_vertex.name.include?(in_vertex.name)))) # puts("***** Found edge! : index:: #{index} list[i].index:: #{list[i].index}") #if an edge was found edgePos = i #returning its position in the array #INCREMENT FREQUENCY IF THE EDGE WAS FOUND IN A DIFFERENT SENT. (CHECK BY MAINTAINING A TEXT NUMBER AND CHECKING IF THE NEW # IS DIFF FROM PREV #) if(index != list[i].index) list[i].frequency+=1 end end end end #end of the for loop return edgePos end |
#search_edges_to_set_null(list, in_vertex, out, index) ⇒ Object
——————————————#——————————————#——————————————
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 |
# File 'lib/automated_metareview/graph_generator.rb', line 460 def search_edges_to_set_null(list, in_vertex, out, index) edgePos = -1 # puts("***** Searching edge to set to null:: #{in_vertex.name} - #{out.name} ... num_edges #{@num_edges}") for i in 0..@num_edges - 1 if(!list[i].nil? and !list[i].in_vertex.nil? and !list[i].out_vertex.nil?) # puts "comparing with #{list[i].in_vertex.name} - #{list[i].out_vertex.name}" #puts "#{list[i].in_vertex.name.downcase == in_vertex.name.downcase} - #{list[i].out_vertex.name.downcase == out.name.downcase}" #checking for exact match with an edge if((list[i].in_vertex.name.downcase == in_vertex.name.downcase and list[i].out_vertex.name.downcase == out.name.downcase) or (list[i].in_vertex.name.downcase == out.name.downcase and list[i].out_vertex.name.downcase == in_vertex.name.downcase)) #if an edge was found edgePos = i #returning its position in the array #INCREMENT FREQUENCY IF THE EDGE WAS FOUND IN A DIFFERENT SENT. (CHECK BY MAINTAINING A TEXT NUMBER AND CHECKING IF THE NEW # IS DIFF FROM PREV #) if(index != list[i].index) list[i].frequency+=1 end end end end #end of the for loop # puts("***** search_edges_to_set_null #{in_vertex.name} - #{out.name} returning:: #{edgePos}") return edgePos end |
#search_vertices(list, s, index) ⇒ Object
——————————————#——————————————#——————————————
365 366 367 368 369 370 371 372 373 374 375 376 377 |
# File 'lib/automated_metareview/graph_generator.rb', line 365 def search_vertices(list, s, index) for i in (0..list.length-1) if(!list[i].nil? and !s.nil?) #if the vertex exists and in the same sentence (index) if(list[i].name.casecmp(s) == 0 and list[i].index == index) # puts("***** search_vertices:: Returning:: #{s}") return list[i] end end end # puts("***** search_vertices:: Returning nil") return nil end |
#set_semantic_labels_for_edges ⇒ Object
-
Setting semantic labels for edges based on the labels vertices have with their parents
621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 |
# File 'lib/automated_metareview/graph_generator.rb', line 621 def set_semantic_labels_for_edges # puts "*** inside set_semantic_labels_for_edges" for i in (0.. @vertices.length - 1) if(!@vertices[i].nil? and !@vertices[i].parent.nil?) #parent = null for ROOT #search for the parent vertex for j in (0..@vertices.length - 1) if(!@vertices[j].nil? and (@vertices[j].name.casecmp(@vertices[i].parent) == 0 or @vertices[j].name.downcase.include?(@vertices[i].parent.downcase))) # puts("**Parent:: #{@vertices[j].name}") parent = @vertices[j] break #break out of search for the parent end end if(!parent.nil?)#{ #check if an edge exists between vertices[i] and the parent for k in (0..@edges.length - 1) if(!@edges[k].nil? and !@edges[k].in_vertex.nil? and !@edges[k].out_vertex.nil?) if((@edges[k].in_vertex.name.equal?(@vertices[i].name) and @edges[k].out_vertex.name.equal?(parent.name)) or (@edges[k].in_vertex.name.equal?(parent.name) and @edges[k].out_vertex.name.equal?(@vertices[i].name))) #set the role label if(@edges[k].label.nil?) @edges[k].label = @vertices[i].label elsif(!@edges[k].label.nil? and (@edges[k].label == "NMOD" or @edges[k].label == "PMOD") and (@vertices[i].label != "NMOD" or @vertices[i].label != "PMOD")) @edges[k].label = @vertices[i].label end end end end end#end of if paren.nil? condition end end #end of for loop end |