Class: PROIEL::Converter::CoNLLU::Token
- Inherits:
-
Object
- Object
- PROIEL::Converter::CoNLLU::Token
- Defined in:
- lib/proiel/cli/converters/conll-u.rb
Constant Summary collapse
- MORPHOLOGY_POSITIONAL_TAG_SEQUENCE =
[ :person, :number, :tense, :mood, :voice, :gender, :case, :degree, :strength, :inflection ]
Instance Attribute Summary collapse
-
#citation_part ⇒ Object
readonly
Returns the value of attribute citation_part.
-
#empty_token_sort ⇒ Object
readonly
Returns the value of attribute empty_token_sort.
-
#form ⇒ Object
readonly
Returns the value of attribute form.
-
#head_id ⇒ Object
Returns the value of attribute head_id.
-
#id ⇒ Object
readonly
Returns the value of attribute id.
-
#language ⇒ Object
readonly
Returns the value of attribute language.
-
#lemma ⇒ Object
readonly
Returns the value of attribute lemma.
-
#part_of_speech ⇒ Object
readonly
Returns the value of attribute part_of_speech.
-
#relation ⇒ Object
Returns the value of attribute relation.
-
#upos ⇒ Object
Returns the value of attribute upos.
Instance Method Summary collapse
- #add_slash!(slash) ⇒ Object
-
#adjectival? ⇒ Boolean
returns
trueif the node is an adjective or an ordinal. - #adverb? ⇒ Boolean
- #auxiliary? ⇒ Boolean
- #cardinal? ⇒ Boolean
-
#change_coordinations! ⇒ Object
Changes coordinations recursively from the bottom of the graph.
-
#clausal? ⇒ Boolean
A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute construction without a verb; or it has a subjunction dependent; or it is a relative pronoun/adverb or has a relative pronoun/adverb dependent; or if it is the root (e.g. in a nominal clause).
- #comparison_word? ⇒ Boolean
- #conj_head ⇒ Object
- #conjunction? ⇒ Boolean
- #coordinated? ⇒ Boolean
-
#copula? ⇒ Boolean
Returns
trueif the node has an xobj dependent and either 1) the lemma is copular or 2) the node is empty and has no pid slash or a pid slash to a node with a copular lemma. - #count_subgraph ⇒ Object
- #dependents ⇒ Object
- #deponent? ⇒ Boolean
- #determiner? ⇒ Boolean
- #distribute_shared_modifiers! ⇒ Object
- #ellipsis? ⇒ Boolean
- #find_appositive_head ⇒ Object
- #find_highest_daughter ⇒ Object
- #find_postag(possible_postags) ⇒ Object
- #find_relation(possible_relations) ⇒ Object
- #find_remnant ⇒ Object
- #foreign? ⇒ Boolean
- #format_features(features) ⇒ Object
- #genitive? ⇒ Boolean
- #has_conjunct? ⇒ Boolean
- #has_content? ⇒ Boolean
- #has_copula? ⇒ Boolean
- #has_preposition? ⇒ Boolean
- #has_subject? ⇒ Boolean
- #head ⇒ Object
-
#initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence) ⇒ Token
constructor
A new instance of Token.
- #interjection? ⇒ Boolean
-
#invert!(new_dependent_relation = nil, new_head_relation = nil) ⇒ Object
Inverts the direction of a dependency relation.
- #is_empty? ⇒ Boolean
- #left_corner ⇒ Object
- #long? ⇒ Boolean
- #map_morphology(morph) ⇒ Object
- #map_part_of_speech! ⇒ Object
- #map_relation ⇒ Object
- #mediopassive? ⇒ Boolean
- #miscellaneous ⇒ Object
- #negation? ⇒ Boolean
- #nominal? ⇒ Boolean
- #nominalized? ⇒ Boolean
- #orphan? ⇒ Boolean
- #particle? ⇒ Boolean
- #passive? ⇒ Boolean
- #pid ⇒ Object
- #preposition? ⇒ Boolean
- #process_comparison! ⇒ Object
- #process_coordination! ⇒ Object
- #process_copula! ⇒ Object
- #process_dislocation! ⇒ Object
- #process_ellipsis! ⇒ Object
- #process_preposition! ⇒ Object
-
#process_subjunction! ⇒ Object
attach subjunctions with ‘mark’ under their verbs and promote the verb to take over the subjunction’s relation.
-
#promote!(new_sibling_relation = nil, new_dependent_relation = 'aux') ⇒ Object
promotes a node to its head’s place.
- #pronominal? ⇒ Boolean
- #proper_noun? ⇒ Boolean
- #relabel_graph! ⇒ Object
- #relative? ⇒ Boolean
- #remove_empties! ⇒ Object
- #root? ⇒ Boolean
- #siblings ⇒ Object
- #subgraph_set ⇒ Object
- #subjunction? ⇒ Boolean
- #tam_particle? ⇒ Boolean
- #to_conll ⇒ Object
- #to_graph(indents = 0) ⇒ Object
- #to_n ⇒ Object
- #to_s ⇒ Object
- #verb? ⇒ Boolean
Constructor Details
#initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence) ⇒ Token
Returns a new instance of Token.
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 245 def initialize(id, head_id, form, lemma, part_of_speech, language, morphology, relation, empty_token_sort, slashes, citation_part, sentence) @id = id @head_id = head_id @form = form @lemma = lemma @baselemma, @variant = @lemma.split('#') @part_of_speech = part_of_speech @language = language @morphology = morphology @relation = relation @empty_token_sort = empty_token_sort @slashes = slashes @sentence = sentence @features = (morphology ? map_morphology(morphology) : '' ) @citation_part = 'ref=' + (citation_part ? citation_part : '').gsub(/\s/, '_') @upos = nil end |
Instance Attribute Details
#citation_part ⇒ Object (readonly)
Returns the value of attribute citation_part.
243 244 245 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 243 def citation_part @citation_part end |
#empty_token_sort ⇒ Object (readonly)
Returns the value of attribute empty_token_sort.
241 242 243 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 241 def empty_token_sort @empty_token_sort end |
#form ⇒ Object (readonly)
Returns the value of attribute form.
242 243 244 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 242 def form @form end |
#head_id ⇒ Object
Returns the value of attribute head_id.
234 235 236 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 234 def head_id @head_id end |
#id ⇒ Object (readonly)
Returns the value of attribute id.
238 239 240 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 238 def id @id end |
#language ⇒ Object (readonly)
Returns the value of attribute language.
240 241 242 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 240 def language @language end |
#lemma ⇒ Object (readonly)
Returns the value of attribute lemma.
239 240 241 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 239 def lemma @lemma end |
#part_of_speech ⇒ Object (readonly)
Returns the value of attribute part_of_speech.
237 238 239 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 237 def part_of_speech @part_of_speech end |
#relation ⇒ Object
Returns the value of attribute relation.
236 237 238 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 236 def relation @relation end |
#upos ⇒ Object
Returns the value of attribute upos.
235 236 237 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 235 def upos @upos end |
Instance Method Details
#add_slash!(slash) ⇒ Object
727 728 729 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 727 def add_slash!(slash) @slashes << slash end |
#adjectival? ⇒ Boolean
returns true if the node is an adjective or an ordinal
285 286 287 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 285 def adjectival? @part_of_speech == 'A-' or @part_of_speech == 'Mo' end |
#adverb? ⇒ Boolean
293 294 295 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 293 def adverb? @part_of_speech =~ /\AD/ end |
#auxiliary? ⇒ Boolean
351 352 353 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 351 def auxiliary? AUXILIARIES.include?([lemma, part_of_speech, language].join(',')) or (part_of_speech == "V-" and relation == 'aux') end |
#cardinal? ⇒ Boolean
297 298 299 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 297 def cardinal? @part_of_speech == 'Ma' end |
#change_coordinations! ⇒ Object
Changes coordinations recursively from the bottom of the graph
703 704 705 706 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 703 def change_coordinations! dependents.each(&:change_coordinations!) process_coordination! if conjunction? end |
#clausal? ⇒ Boolean
A node is clausal if it is a verb and not nominalized; or it has a copula dependent; or it has a subject (e.g. in an absolute construction without a verb; or it has a subjunction dependent; or it is a relative pronoun/adverb or has a relative pronoun/adverb dependent; or if it is the root (e.g. in a nominal clause)
314 315 316 317 318 319 320 321 322 323 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 314 def clausal? (@part_of_speech == 'V-' and !nominalized? and !has_preposition?) or dependents.any?(&:copula?) or dependents.any? { |d| ['sub', 'nsubj','nsubj:outer', 'nsubj:pass', 'csubj', 'csubj:pass'].include? d.relation } or dependents.any?(&:subjunction?) or relative? or dependents.any?(&:relative?) or dependents.any?(&:orphan?) or root? end |
#comparison_word? ⇒ Boolean
355 356 357 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 355 def comparison_word? COMPARISON_LEMMATA.include?([lemma,part_of_speech,language].join(',')) end |
#conj_head ⇒ Object
462 463 464 465 466 467 468 469 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 462 def conj_head raise 'Not a conjunct' unless @relation == 'conj' if head.relation == 'conj' head.conj_head else head end end |
#conjunction? ⇒ Boolean
325 326 327 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 325 def conjunction? part_of_speech == 'C-' or @empty_token_sort == 'C' end |
#coordinated? ⇒ Boolean
329 330 331 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 329 def coordinated? head and head.conjunction? and head.relation == @relation end |
#copula? ⇒ Boolean
Returns true if the node has an xobj dependent and either 1) the lemma is copular or 2) the node is empty and has no pid slash or a pid slash to a node with a copular lemma
340 341 342 343 344 345 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 340 def copula? @relation == 'cop' or (COPULAR_LEMMATA.include?([lemma, part_of_speech, language].join(',')) or (@empty_token_sort == 'V' and (pid.nil? or pid.is_empty? or COPULAR_LEMMATA.include?([pid.lemma, pid.part_of_speech, pid.language].join(',')))) and dependents.any? { |d| d.relation == 'xobj' } ) end |
#count_subgraph ⇒ Object
450 451 452 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 450 def count_subgraph dependents.map(&:count_subgraph).inject(0, :+) + (is_empty? ? 0 : 1) end |
#dependents ⇒ Object
526 527 528 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 526 def dependents @sentence.tokens.select { |t| t.head_id == @id }.sort_by(&:id) end |
#deponent? ⇒ Boolean
387 388 389 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 387 def deponent? DEPONENTS[@language] and DEPONENTS[@language].match(@lemma) end |
#determiner? ⇒ Boolean
359 360 361 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 359 def determiner? DETERMINERS.include? @part_of_speech end |
#distribute_shared_modifiers! ⇒ Object
715 716 717 718 719 720 721 722 723 724 725 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 715 def distribute_shared_modifiers! raise 'Can only distribute over a conjunction!' unless conjunction? conjuncts, modifiers = dependents.reject { |d| d.relation == 'aux' }.partition { |d| d.relation == @relation or (d.relation == 'adv' and @relation == 'xadv') } first_conjunct = conjuncts.shift raise "No first conjunct under #{to_n}\n#{to_graph}" unless first_conjunct raise "The first conjunct is a misannotated conjunction in #{to_n}\n#{to_graph}" if first_conjunct.conjunction? and first_conjunct.dependents.empty? modifiers.each do |m| m.head_id = first_conjunct.id conjuncts.each { |c| c.add_slash! [m.id, m.relation] } end end |
#ellipsis? ⇒ Boolean
363 364 365 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 363 def ellipsis? @empty_token_sort == 'V' end |
#find_appositive_head ⇒ Object
530 531 532 533 534 535 536 537 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 530 def find_appositive_head raise 'Not an apposition' unless @relation == 'apos' if head.conjunction? and head.relation == 'apos' head.find_appositive_head else head end end |
#find_highest_daughter ⇒ Object
664 665 666 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 664 def find_highest_daughter dependents.min_by { |d| OBLIQUENESS_HIERARCHY.find_index(d.map_relation[/[^:]*/]) || 1000 } end |
#find_postag(possible_postags) ⇒ Object
539 540 541 542 543 544 545 546 547 548 549 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 539 def find_postag tag, crit, feats = .shift if tag.nil? # raise "Found no postag" elsif crit.call self @upos = tag @features += ((@features.empty? ? '' : '|') + feats) if feats else find_postag end end |
#find_relation(possible_relations) ⇒ Object
551 552 553 554 555 556 557 558 559 560 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 551 def find_relation possible_relations rel, crit = possible_relations.shift if rel.nil? # raise "Found no relation" elsif crit.call self rel else find_relation possible_relations end end |
#find_remnant ⇒ Object
656 657 658 659 660 661 662 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 656 def find_remnant if r = dependents.select { |d| d.relation == 'remnant' }.first r.find_remnant else self end end |
#foreign? ⇒ Boolean
367 368 369 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 367 def foreign? @part_of_speech == 'F-' end |
#format_features(features) ⇒ Object
479 480 481 482 483 484 485 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 479 def format_features(features) if features == '' '_' else features.split('|').sort.join('|') end end |
#genitive? ⇒ Boolean
280 281 282 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 280 def genitive? @morphology =~ /......g.*/ end |
#has_conjunct? ⇒ Boolean
333 334 335 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 333 def has_conjunct? dependents.any? { |d| d.relation == 'conj' } end |
#has_content? ⇒ Boolean
371 372 373 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 371 def has_content? @empty_token_sort.nil? or @empty_token_sort == '' end |
#has_copula? ⇒ Boolean
347 348 349 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 347 def has_copula? dependents.any?(&:copula?) end |
#has_preposition? ⇒ Boolean
678 679 680 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 678 def has_preposition? dependents.any? { |d| d.preposition? and d.relation == 'case' } end |
#has_subject? ⇒ Boolean
375 376 377 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 375 def has_subject? dependents.any? { |d| ['sub','nsubj','nsubj:pass','csubj','csubj:pass','nsubj:outer'].include?(d.relation) } end |
#head ⇒ Object
522 523 524 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 522 def head @sentence.tokens.select { |t| t.id == @head_id }.first end |
#interjection? ⇒ Boolean
379 380 381 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 379 def interjection? @part_of_speech == 'I-' end |
#invert!(new_dependent_relation = nil, new_head_relation = nil) ⇒ Object
Inverts the direction of a dependency relation. By default the labels are also swapped, but new relations can be specified for both the new dependent and the new head.
734 735 736 737 738 739 740 741 742 743 744 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 734 def invert!(new_dependent_relation = nil, new_head_relation = nil) raise 'Cannot promote a token under root!' if @head_id == 0 new_dependent_relation ||= @relation new_head_relation ||= head.relation new_head_id = head.head_id head.head_id = @id head.relation = new_dependent_relation @head_id = new_head_id self.relation = new_head_relation end |
#is_empty? ⇒ Boolean
383 384 385 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 383 def is_empty? !has_content? end |
#left_corner ⇒ Object
458 459 460 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 458 def left_corner ([self] + dependents).sort_by(&:id).first end |
#long? ⇒ Boolean
407 408 409 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 407 def long? @morphology[8] == 'w' end |
#map_morphology(morph) ⇒ Object
268 269 270 271 272 273 274 275 276 277 278 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 268 def map_morphology morph res = [] for tag in 0..morph.length - 1 res << MORPHOLOGY_MAP[MORPHOLOGY_POSITIONAL_TAG_SEQUENCE[tag]][morph[tag]] end res = res.reject {|v| v == 'VerbForm=Part'} if res.include?('VerbForm=PartRes|Tense=Past') res = res.reject {|s| s == 'Strength=Weak' } unless @language == 'got' res = res.map { |s| s == 'Strength=Strong' ? 'Variant=Short' : s } unless @language == 'got' res << 'Polarity=Neg' if ['не.быти','не.бꙑти'].include?(@lemma) res.compact.join('|') end |
#map_part_of_speech! ⇒ Object
562 563 564 565 566 567 568 569 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 562 def map_part_of_speech! dependents.each(&:map_part_of_speech!) = POS_MAP[@part_of_speech] find_postag .dup # ugly, but the ugliness comes from UDEP @upos = 'PRON' if @upos == 'DET' and @relation != 'det' @upos = REL_TO_POS[@relation] if @upos == 'X' end |
#map_relation ⇒ Object
578 579 580 581 582 583 584 585 586 587 588 589 590 591 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 578 def map_relation possible_relations = RELATION_MAPPING[@relation] case possible_relations when String possible_relations when Array x = find_relation possible_relations.dup when nil # do nothing: the token has already changed its relation @relation else raise "Unknown value #{possible_relations.inspect} for #{@relation}" end end |
#mediopassive? ⇒ Boolean
391 392 393 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 391 def mediopassive? (!deponent? and @morphology) ? @morphology[4] =~/[mpe]/ : false end |
#miscellaneous ⇒ Object
487 488 489 490 491 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 487 def miscellaneous m = @citation_part m += "|LId=#{@variant}" if @variant m end |
#negation? ⇒ Boolean
399 400 401 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 399 def negation? NEGATION_LEMMATA.include?([lemma, part_of_speech, language].join(',')) end |
#nominal? ⇒ Boolean
403 404 405 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 403 def nominal? @part_of_speech =~ /\A[NPM]/ or nominalized? end |
#nominalized? ⇒ Boolean
411 412 413 414 415 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 411 def nominalized? dependents.any? do |d| d.determiner? and ['atr', 'aux', 'det'].include? d.relation end end |
#orphan? ⇒ Boolean
309 310 311 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 309 def orphan? relation == 'orphan' end |
#particle? ⇒ Boolean
421 422 423 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 421 def particle? @relation == 'aux' and PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(',')) end |
#passive? ⇒ Boolean
395 396 397 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 395 def passive? (!deponent? and @morphology) ? @morphology[4] == 'p' : false end |
#pid ⇒ Object
471 472 473 474 475 476 477 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 471 def pid if pid = @slashes.select { |t, r| r == 'pid' }.first @sentence.tokens.select { |t| pid.first == t.id}.first else nil end end |
#preposition? ⇒ Boolean
429 430 431 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 429 def preposition? @part_of_speech == 'R-' end |
#process_comparison! ⇒ Object
615 616 617 618 619 620 621 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 615 def process_comparison! cl = dependents.select { |d| ['sub','obj','obl','comp','adv'].include?(d.relation) } head.relation = 'advcl:cmp' if head and head.part_of_speech == 'C-' and head.relation == relation comp = cl.first comp.invert!('mark','advcl:cmp') dependents.each { |d| d.head_id = comp.id } end |
#process_coordination! ⇒ Object
708 709 710 711 712 713 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 708 def process_coordination! raise 'Only coordinations can be processed this way!' unless conjunction? return if dependents.reject { |d| d.relation == 'aux' }.empty? distribute_shared_modifiers! dependents.reject { |d| d.relation == 'aux' }.sort_by { |d| d.left_corner.id }.first.promote!('conj', 'cc') end |
#process_copula! ⇒ Object
668 669 670 671 672 673 674 675 676 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 668 def process_copula! predicates = dependents.select { |d| d.relation == 'xobj' } raise "#{predicates.size} predicates under #{to_n}\n#{to_graph}" if predicates.size != 1 sub = dependents.select { |d| d.relation == 'sub' }.first new_head = predicates.first new_head_sub = new_head.dependents.select { |d| d.relation == 'sub' }.first sub.relation = 'nsubj:outer' if sub and new_head_sub predicates.first.promote!(nil, 'cop') end |
#process_dislocation! ⇒ Object
623 624 625 626 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 623 def process_dislocation! self.head_id = head.head_id unless head.root? self.relation = "dislocated" end |
#process_ellipsis! ⇒ Object
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 628 def process_ellipsis! aux = dependents.select(&:auxiliary?).first if aux aux.promote! return end sub = dependents.select { |d| d.relation == 'sub' }.first new_head = find_highest_daughter new_head_sub = new_head.dependents.select { |d| d.relation == 'sub' }.first sub.relation = 'nsubj:outer' if sub and new_head_sub new_head.promote!('orphan') # dependents.each do |d| # check if there's a partner with the same relation under the overt node. # TODO: this isn't really very convincing when it comes to ADVs # if partner = overt.dependents.select { |p| p != self and p.relation == d.relation }.first #inserted p != self # partner = partner.find_remnant # d.head_id = partner.id # d.relation = 'remnant' # if there's no partner, just attach under the overt node, preserving the relation # else # d.head_id = overt.id # end # end @sentence.remove_token!(self) end |
#process_preposition! ⇒ Object
682 683 684 685 686 687 688 689 690 691 692 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 682 def process_preposition! raise 'Only prepositions can be processed this way!' unless part_of_speech == 'R-' obliques = dependents.select { |d| d.relation == 'obl' } doublepreps = dependents.select { |d| d.relation == 'aux' and d.preposition? } mods = dependents.select { |d| d.relation != 'obl' and !(d.relation == 'aux' and d.preposition?) } raise "#{obliques.size} oblique dependents under #{to_n}\n#{to_graph}" if obliques.size > 1 return if obliques.empty? #shouldn't really happen, but in practice obliques.first.invert!('case') # , "adv") doublepreps.each { |p| p.head_id = obliques.first.id and p.relation = 'case' } mods.each { |m| m.head_id = obliques.first.id } end |
#process_subjunction! ⇒ Object
attach subjunctions with ‘mark’ under their verbs and promote the verb to take over the subjunction’s relation. If the verb is empty, the subjunction stays as head.
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 596 def process_subjunction! # ignore if the subjunction has no dependents or only conj dependents. # NB: this requires that the function is called *after* processing conjunctions return if dependents.reject { |d| ['conj', 'cc'].include? d.relation }.empty? pred = dependents.select { |d| d.relation == 'pred' } raise "#{pred.size} PREDs under the subjunction #{to_n}:\n#{@sentence.to_graph}" unless pred.one? pred = pred.first # promote the subjunction if the verb is empty if pred.is_empty? pred.dependents.each { |d| d.head_id = id } @sentence.remove_token! pred # else demote the subjunction else pred.invert!('mark') # move any remaining discourse children to the new head (note that we need to keep some aux'es to get them as "fixed" dependents dependents.each { |d| d.head_id = pred.id unless (d.relation == 'aux' and ['Px', 'Pr'].include? d.part_of_speech) or d.relation == 'fixed' } end end |
#promote!(new_sibling_relation = nil, new_dependent_relation = 'aux') ⇒ Object
promotes a node to its head’s place. The node takes over its former head’s relation and all dependents. The new relation for these dependents can be specified; if it is not, they will keep their former relation. The former head is made a dependent of the node (with a specified relation) or, if it is an empty node, destroyed.
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 753 def promote!(new_sibling_relation = nil, new_dependent_relation = 'aux') raise 'Cannot promote a token under root!' if @head_id == 0 new_head_relation = head.relation new_head_id = head.head_id # move all dependents of the former head to the new one siblings.each do |t| t.head_id = @id # ugly hack to avoid overwriting the aux relation here (aux siblings aren't really siblings), now also includes conj, cc t.relation = new_sibling_relation if (new_sibling_relation and !['aux','conj','cc'].include?(t.relation)) end # remove the former head if it was empty if head.is_empty? @sentence.remove_token!(head) # else make it a dependent of the new head else head.head_id = @id head.relation = new_dependent_relation end @head_id = new_head_id # don't use relation=, as we don't want this relation to be # copied down a tree of conjunctions @relation = new_head_relation end |
#pronominal? ⇒ Boolean
425 426 427 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 425 def pronominal? @part_of_speech =~ /\AP./ # no evidence that possessives are pronoun/determiner-like end |
#proper_noun? ⇒ Boolean
433 434 435 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 433 def proper_noun? @part_of_speech == 'Ne' end |
#relabel_graph! ⇒ Object
571 572 573 574 575 576 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 571 def relabel_graph! dependents.each(&:relabel_graph!) # TODO: if there are iobjs without an obj among the dependents, one of them should be promoted to obj @relation = map_relation raise "No relation for #{form}" unless @relation end |
#relative? ⇒ Boolean
301 302 303 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 301 def relative? @part_of_speech == 'Pr' or @part_of_speech == 'Dq' end |
#remove_empties! ⇒ Object
694 695 696 697 698 699 700 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 694 def remove_empties! dependents.each(&:remove_empties!) if is_empty? dependents.each { |d| d.head_id = head_id; d.relation = 'remnant' } @sentence.remove_token! self end end |
#root? ⇒ Boolean
437 438 439 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 437 def root? @head_id == 0 end |
#siblings ⇒ Object
518 519 520 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 518 def siblings @sentence.tokens.select { |t| t.head_id == @head_id } - [self] end |
#subgraph_set ⇒ Object
454 455 456 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 454 def subgraph_set [self] + dependents.map(&:subgraph_set).flatten end |
#subjunction? ⇒ Boolean
289 290 291 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 289 def subjunction? @part_of_speech == 'G-' end |
#tam_particle? ⇒ Boolean
417 418 419 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 417 def tam_particle? @relation == 'aux' and TAM_PARTICLE_LEMMATA.include?([lemma, part_of_speech, language].join(',')) end |
#to_conll ⇒ Object
493 494 495 496 497 498 499 500 501 502 503 504 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 493 def to_conll [@id, @form, @baselemma.gsub(/не\./,''), @upos, @part_of_speech, format_features(@features), @head_id, (@head_id == 0 ? 'root' : @relation), # override non-root relations on root until we've found out how to handle unembedded reports etc '_', # slashes here miscellaneous].join("\t") end |
#to_graph(indents = 0) ⇒ Object
514 515 516 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 514 def to_graph(indents = 0) ([("\t" * indents) + (to_n)] + dependents.map { |d| d.to_graph(indents + 1) }).join("\n") end |
#to_n ⇒ Object
510 511 512 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 510 def to_n [@relation, @id, (@form || @empty_token_sort), (@upos || @part_of_speech) ].join('-') end |
#to_s ⇒ Object
506 507 508 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 506 def to_s [@id, @form, @head_id, @relation].join("\t") end |
#verb? ⇒ Boolean
305 306 307 |
# File 'lib/proiel/cli/converters/conll-u.rb', line 305 def verb? @part_of_speech == 'V-' or @empty_token_sort == 'V' end |