Class: PROIEL::Token

Inherits:
TreebankObject show all
Extended by:
Memoist
Defined in:
lib/proiel/token.rb

Overview

A token object in a treebank.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from TreebankObject

#inspect

Constructor Details

#initialize(parent, id, head_id, form, lemma, part_of_speech, morphology, relation, empty_token_sort, citation_part, presentation_before, presentation_after, antecedent_id, information_status, contrast_group, foreign_ids, slashes, alignment_id) ⇒ Token

Creates a new token object.

Raises:

  • (ArgumentError)


70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/proiel/token.rb', line 70

def initialize(parent, id, head_id, form, lemma, part_of_speech,
               morphology, relation, empty_token_sort, citation_part,
               presentation_before, presentation_after, antecedent_id,
               information_status, contrast_group, foreign_ids, slashes,
               alignment_id)
  @sentence = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'integer or nil expected' unless head_id.nil? or head_id.is_a?(Integer)
  @head_id = head_id

  raise ArgumentError, 'string or nil expected' unless form.nil? or form.is_a?(String)
  @form = form.freeze

  raise ArgumentError, 'string or nil expected' unless lemma.nil? or lemma.is_a?(String)
  @lemma = lemma.freeze

  raise ArgumentError, 'string or nil expected' unless part_of_speech.nil? or part_of_speech.is_a?(String)
  @part_of_speech = part_of_speech.freeze

  raise ArgumentError, 'string or nil expected' unless morphology.nil? or morphology.is_a?(String)
  @morphology = morphology.freeze

  raise ArgumentError, 'string or nil expected' unless relation.nil? or relation.is_a?(String)
  @relation = relation.freeze

  raise ArgumentError, 'string or nil expected' unless empty_token_sort.nil? or empty_token_sort.is_a?(String)
  @empty_token_sort = empty_token_sort.freeze

  raise ArgumentError, 'string or nil expected' unless citation_part.nil? or citation_part.is_a?(String)
  @citation_part = citation_part.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless antecedent_id.nil? or antecedent_id.is_a?(Integer)
  @antecedent_id = antecedent_id

  raise ArgumentError, 'string or nil expected' unless information_status.nil? or information_status.is_a?(String)
  @information_status = information_status.freeze

  raise ArgumentError, 'string or nil expected' unless contrast_group.nil? or contrast_group.is_a?(String)
  @contrast_group = contrast_group.freeze

  raise ArgumentError, 'string or nil expected' unless foreign_ids.nil? or foreign_ids.is_a?(String)
  @foreign_ids = foreign_ids.freeze

  raise ArgumentError, 'array expected' unless slashes.is_a?(Array)
  @slashes = slashes.map { |s| [s.relation.freeze, s.target_id] }

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id
end

Instance Attribute Details

#alignment_idnil, Integer (readonly)



67
68
69
# File 'lib/proiel/token.rb', line 67

def alignment_id
  @alignment_id
end

#antecedent_idnil, Fixnum (readonly)



52
53
54
# File 'lib/proiel/token.rb', line 52

def antecedent_id
  @antecedent_id
end

#citation_partnil, String (readonly)



43
44
45
# File 'lib/proiel/token.rb', line 43

def citation_part
  @citation_part
end

#contrast_groupnil, String (readonly)



58
59
60
# File 'lib/proiel/token.rb', line 58

def contrast_group
  @contrast_group
end

#empty_token_sortnil, String (readonly)



40
41
42
# File 'lib/proiel/token.rb', line 40

def empty_token_sort
  @empty_token_sort
end

#foreign_idsnil, String (readonly)



61
62
63
# File 'lib/proiel/token.rb', line 61

def foreign_ids
  @foreign_ids
end

#formnil, String (readonly)



22
23
24
# File 'lib/proiel/token.rb', line 22

def form
  @form
end

#head_idnil, Fixnum (readonly)



19
20
21
# File 'lib/proiel/token.rb', line 19

def head_id
  @head_id
end

#idFixnum (readonly)



13
14
15
# File 'lib/proiel/token.rb', line 13

def id
  @id
end

#information_statusnil, String (readonly)



55
56
57
# File 'lib/proiel/token.rb', line 55

def information_status
  @information_status
end

#lemmanil, String (readonly)



25
26
27
# File 'lib/proiel/token.rb', line 25

def lemma
  @lemma
end

#morphologynil, String (readonly)



34
35
36
# File 'lib/proiel/token.rb', line 34

def morphology
  @morphology
end

#part_of_speechnil, String (readonly) Also known as: pos



28
29
30
# File 'lib/proiel/token.rb', line 28

def part_of_speech
  @part_of_speech
end

#presentation_afternil, String (readonly)



49
50
51
# File 'lib/proiel/token.rb', line 49

def presentation_after
  @presentation_after
end

#presentation_beforenil, String (readonly)



46
47
48
# File 'lib/proiel/token.rb', line 46

def presentation_before
  @presentation_before
end

#relationnil, String (readonly)



37
38
39
# File 'lib/proiel/token.rb', line 37

def relation
  @relation
end

#sentenceSentence



16
17
18
# File 'lib/proiel/token.rb', line 16

def sentence
  @sentence
end

#slashesArray<Array<String,Fixnum>> (readonly)



64
65
66
# File 'lib/proiel/token.rb', line 64

def slashes
  @slashes
end

Instance Method Details

#alignment(aligned_source) ⇒ Token, NilClass

Returns the aligned token if any.



400
401
402
# File 'lib/proiel/token.rb', line 400

def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_token(alignment_id) : nil
end

#ancestorsArray<Token>

Finds ancestors of this token in the dependency graph.

The ancestors are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned ancestors is as follows: The first ancestor is the head of this token, the next ancestor is the head of the previous token, and so on.



266
267
268
269
270
271
272
# File 'lib/proiel/token.rb', line 266

def ancestors
  if is_root?
    []
  else
    [head] + head.ancestors
  end
end

#citationnil, String



152
153
154
155
156
157
158
# File 'lib/proiel/token.rb', line 152

def citation
  if citation_part
    [source.citation_part, citation_part].compact.join(' ')
  else
    nil
  end
end

#common_ancestors(other_token, inclusive: false) ⇒ Array<Token>

Finds the common ancestors that this token and another token share in the dependency graph.

If inclusive is false, a common ancestor is defined strictly as a common ancestor of both tokens. If inclusive is true, one of the tokens can be a common ancestor of the other.

Ancestors are returned in the same order as #ancestors.

Examples:

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.common_ancestors(y, inclusive: false) # => [z, u]
x.common_ancestors(w, inclusive: false) # => [z, u]
x.common_ancestors(x, inclusive: false) # => [w, z, u]

x.common_ancestors(y, inclusive: true)  # => [z, u]
x.common_ancestors(w, inclusive: true)  # => [w, z, u]
x.common_ancestors(x, inclusive: true)  # => [x, w, z, u]

See Also:



358
359
360
361
362
363
364
365
366
# File 'lib/proiel/token.rb', line 358

def common_ancestors(other_token, inclusive: false)
  if inclusive
    x, y = [self] + ancestors, [other_token] + other_token.ancestors
  else
    x, y = ancestors, other_token.ancestors
  end

  x & y
end

#dependentsArray<Token> Also known as: children

Finds dependent of this token in the dependency graph.

The dependents are the children of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned dependents is indeterminate.



248
249
250
# File 'lib/proiel/token.rb', line 248

def dependents
  @sentence.tokens.select { |t| t.head_id == @id }
end

#descendentsArray<Token> Also known as: descendants

Finds descendents of this token in the dependency graph.

The descendents are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned descendents is as indeterminate.



284
285
286
# File 'lib/proiel/token.rb', line 284

def descendents
  dependents.map { |dependent| [dependent] + dependent.descendents }.flatten
end

#divDiv



130
131
132
# File 'lib/proiel/token.rb', line 130

def div
  @sentence.div
end

#first_common_ancestor(other_token, inclusive: false) ⇒ nil, Token

Finds the first common ancestor that this token and another token share in the dependency graph.

If inclusive is false, a common ancestor is defined strictly as a common ancestor of both tokens. If inclusive is true, one of the tokens can be a common ancestor of the other.

Examples:

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.first_common_ancestor(y, inclusive: false) # => z
x.first_common_ancestor(w, inclusive: false) # => z
x.first_common_ancestor(x, inclusive: false) # => w

x.first_common_ancestor(y, inclusive: true)  # => z
x.first_common_ancestor(w, inclusive: true)  # => w
x.first_common_ancestor(x, inclusive: true)  # => x

See Also:



393
394
395
# File 'lib/proiel/token.rb', line 393

def first_common_ancestor(other_token, inclusive: false)
  common_ancestors(other_token, inclusive: inclusive).first
end

#has_citation?true, false

Tests if the token has a citation.

A token has a citation if citation_part is not nil.



320
321
322
# File 'lib/proiel/token.rb', line 320

def has_citation?
  !citation_part.nil?
end

#has_content?true, false

Tests if the token has content.

A token has content if it has a form.

See Also:



311
312
313
# File 'lib/proiel/token.rb', line 311

def has_content?
  empty_token_sort.nil?
end

#headToken Also known as: parent

Finds the head of this token.

The head is the parent of the this token in the tree that has tokens as nodes and primary relations as edges.



228
229
230
231
232
233
234
# File 'lib/proiel/token.rb', line 228

def head
  if is_root?
    nil
  else
    treebank.find_token(head_id)
  end
end

#is_empty?true, false

Tests if the token is empty.

A token is empty if it does not have a form. If the token is empty, #empty_token_sort explains its function.

See Also:



300
301
302
# File 'lib/proiel/token.rb', line 300

def is_empty?
  !empty_token_sort.nil?
end

#is_root?true, false

Checks if the token is the root of its dependency graph.

If the token belongs to a sentence that lacks dependency annotation, all tokens are treated as roots. If a sentence has partial or complete dependency annotation there may still be multiple root tokens.



218
219
220
# File 'lib/proiel/token.rb', line 218

def is_root?
  head_id.nil?
end

#languageString



145
146
147
# File 'lib/proiel/token.rb', line 145

def language
  source.language
end

#morphology_hashHash<Symbol,String>



201
202
203
204
205
206
207
# File 'lib/proiel/token.rb', line 201

def morphology_hash
  if morphology
    MORPHOLOGY_POSITIONAL_TAG_SEQUENCE.zip(morphology.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end

#part_of_speech_hashHash<Symbol,String> Also known as: pos_hash



178
179
180
181
182
183
184
# File 'lib/proiel/token.rb', line 178

def part_of_speech_hash
  if part_of_speech
    POS_POSITIONAL_TAG_SEQUENCE.zip(part_of_speech.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end

#part_of_speech_with_nullsString Also known as: pos_with_nulls

Returns the part of speech tag if set, but also provides a suitable part of speech tag for empty elements.



194
195
196
# File 'lib/proiel/token.rb', line 194

def part_of_speech_with_nulls
  part_of_speech || NULL_PARTS_OF_SPEECH[empty_token_sort]
end

#printable_form(custom_token_formatter: nil) ⇒ String

Returns the printable form of the token with any presentation data.

which is passed the token as its sole argument



166
167
168
169
170
171
172
173
174
175
# File 'lib/proiel/token.rb', line 166

def printable_form(custom_token_formatter: nil)
  printable_form =
    if custom_token_formatter
      custom_token_formatter.call(self)
    else
      form
    end

  [presentation_before, printable_form, presentation_after].compact.join
end

#pro?true, false

Checks if the token is a PRO token.



327
328
329
# File 'lib/proiel/token.rb', line 327

def pro?
  empty_token_sort == 'P'
end

#sourceSource



135
136
137
# File 'lib/proiel/token.rb', line 135

def source
  @sentence.div.source
end

#treebankTreebank



140
141
142
# File 'lib/proiel/token.rb', line 140

def treebank
  @sentence.div.source.treebank
end