Class: PROIEL::Token

Inherits:
TreebankObject show all
Extended by:
Memoist
Defined in:
lib/proiel/token.rb

Overview

A token object in a treebank.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from TreebankObject

#inspect

Constructor Details

#initialize(parent, id, head_id, form, lemma, part_of_speech, morphology, relation, empty_token_sort, citation_part, presentation_before, presentation_after, antecedent_id, information_status, contrast_group, foreign_ids, slashes, alignment_id) ⇒ Token

Creates a new token object.

Raises:

  • (ArgumentError)

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/proiel/token.rb', line 70

def initialize(parent, id, head_id, form, lemma, part_of_speech,
               morphology, relation, empty_token_sort, citation_part,
               presentation_before, presentation_after, antecedent_id,
               information_status, contrast_group, foreign_ids, slashes,
               alignment_id)
  @sentence = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'integer or nil expected' unless head_id.nil? or head_id.is_a?(Integer)
  @head_id = head_id

  raise ArgumentError, 'string or nil expected' unless form.nil? or form.is_a?(String)
  @form = form.freeze

  raise ArgumentError, 'string or nil expected' unless lemma.nil? or lemma.is_a?(String)
  @lemma = lemma.freeze

  raise ArgumentError, 'string or nil expected' unless part_of_speech.nil? or part_of_speech.is_a?(String)
  @part_of_speech = part_of_speech.freeze

  raise ArgumentError, 'string or nil expected' unless morphology.nil? or morphology.is_a?(String)
  @morphology = morphology.freeze

  raise ArgumentError, 'string or nil expected' unless relation.nil? or relation.is_a?(String)
  @relation = relation.freeze

  raise ArgumentError, 'string or nil expected' unless empty_token_sort.nil? or empty_token_sort.is_a?(String)
  @empty_token_sort = empty_token_sort.freeze

  raise ArgumentError, 'string or nil expected' unless citation_part.nil? or citation_part.is_a?(String)
  @citation_part = citation_part.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless antecedent_id.nil? or antecedent_id.is_a?(Integer)
  @antecedent_id = antecedent_id

  raise ArgumentError, 'string or nil expected' unless information_status.nil? or information_status.is_a?(String)
  @information_status = information_status.freeze

  raise ArgumentError, 'string or nil expected' unless contrast_group.nil? or contrast_group.is_a?(String)
  @contrast_group = contrast_group.freeze

  raise ArgumentError, 'string or nil expected' unless foreign_ids.nil? or foreign_ids.is_a?(String)
  @foreign_ids = foreign_ids.freeze

  raise ArgumentError, 'array expected' unless slashes.is_a?(Array)
  @slashes = slashes.map { |s| [s.relation.freeze, s.target_id] }

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id
end

Instance Attribute Details

#alignment_idnil, Integer (readonly)

Returns ID of the sentence that this sentence is aligned to.

Returns:

  • (nil, Integer)

    ID of the sentence that this sentence is aligned to


67
68
69
# File 'lib/proiel/token.rb', line 67

def alignment_id
  @alignment_id
end

#antecedent_idnil, Fixnum (readonly)

Returns ID of antecedent token.

Returns:

  • (nil, Fixnum)

    ID of antecedent token


52
53
54
# File 'lib/proiel/token.rb', line 52

def antecedent_id
  @antecedent_id
end

#citation_partnil, String (readonly)

Returns citation part.

Returns:

  • (nil, String)

    citation part


43
44
45
# File 'lib/proiel/token.rb', line 43

def citation_part
  @citation_part
end

#contrast_groupnil, String (readonly)

Returns contrast group tag.

Returns:

  • (nil, String)

    contrast group tag


58
59
60
# File 'lib/proiel/token.rb', line 58

def contrast_group
  @contrast_group
end

#empty_token_sortnil, String (readonly)

Returns token empty token sort tag.

Returns:

  • (nil, String)

    token empty token sort tag


40
41
42
# File 'lib/proiel/token.rb', line 40

def empty_token_sort
  @empty_token_sort
end

#foreign_idsnil, String (readonly)

Returns free-form foreign IDs.

Returns:

  • (nil, String)

    free-form foreign IDs


61
62
63
# File 'lib/proiel/token.rb', line 61

def foreign_ids
  @foreign_ids
end

#formnil, String (readonly)

Returns token form.

Returns:

  • (nil, String)

    token form


22
23
24
# File 'lib/proiel/token.rb', line 22

def form
  @form
end

#head_idnil, Fixnum (readonly)

Returns ID of head token.

Returns:

  • (nil, Fixnum)

    ID of head token


19
20
21
# File 'lib/proiel/token.rb', line 19

def head_id
  @head_id
end

#idFixnum (readonly)

Returns ID of the sentence.

Returns:

  • (Fixnum)

    ID of the sentence


13
14
15
# File 'lib/proiel/token.rb', line 13

def id
  @id
end

#information_statusnil, String (readonly)

Returns information status tag.

Returns:

  • (nil, String)

    information status tag


55
56
57
# File 'lib/proiel/token.rb', line 55

def information_status
  @information_status
end

#lemmanil, String (readonly)

Returns token lemma.

Returns:

  • (nil, String)

    token lemma


25
26
27
# File 'lib/proiel/token.rb', line 25

def lemma
  @lemma
end

#morphologynil, String (readonly)

Returns token morphological tag.

Returns:

  • (nil, String)

    token morphological tag


34
35
36
# File 'lib/proiel/token.rb', line 34

def morphology
  @morphology
end

#part_of_speechnil, String (readonly) Also known as: pos

Returns token part of speech tag.

Returns:

  • (nil, String)

    token part of speech tag


28
29
30
# File 'lib/proiel/token.rb', line 28

def part_of_speech
  @part_of_speech
end

#presentation_afternil, String (readonly)

Returns presentation material after form.

Returns:

  • (nil, String)

    presentation material after form


49
50
51
# File 'lib/proiel/token.rb', line 49

def presentation_after
  @presentation_after
end

#presentation_beforenil, String (readonly)

Returns presentation material before form.

Returns:

  • (nil, String)

    presentation material before form


46
47
48
# File 'lib/proiel/token.rb', line 46

def presentation_before
  @presentation_before
end

#relationnil, String (readonly)

Returns token relation tag.

Returns:

  • (nil, String)

    token relation tag


37
38
39
# File 'lib/proiel/token.rb', line 37

def relation
  @relation
end

#sentenceSentence

Returns parent sentence object.

Returns:


16
17
18
# File 'lib/proiel/token.rb', line 16

def sentence
  @sentence
end

#slashesArray<Array<String,Fixnum>> (readonly)

Returns secondary edges as an array of pairs of relation tag and target token ID.

Returns:

  • (Array<Array<String,Fixnum>>)

    secondary edges as an array of pairs of relation tag and target token ID


64
65
66
# File 'lib/proiel/token.rb', line 64

def slashes
  @slashes
end

Instance Method Details

#alignment(aligned_source) ⇒ Token, NilClass

Returns the aligned token if any.

Returns:

  • (Token, NilClass)

    aligned token


400
401
402
# File 'lib/proiel/token.rb', line 400

def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_token(alignment_id) : nil
end

#ancestorsArray<Token>

Finds ancestors of this token in the dependency graph.

The ancestors are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned ancestors is as follows: The first ancestor is the head of this token, the next ancestor is the head of the previous token, and so on.

Returns:

  • (Array<Token>)

    ancestors


266
267
268
269
270
271
272
# File 'lib/proiel/token.rb', line 266

def ancestors
  if is_root?
    []
  else
    [head] + head.ancestors
  end
end

#citationnil, String

Returns a complete citation for the token.

Returns:

  • (nil, String)

    a complete citation for the token


152
153
154
155
156
157
158
# File 'lib/proiel/token.rb', line 152

def citation
  if citation_part
    [source.citation_part, citation_part].compact.join(' ')
  else
    nil
  end
end

#common_ancestors(other_token, inclusive: false) ⇒ Array<Token>

Finds the common ancestors that this token and another token share in the dependency graph.

If `inclusive` is `false`, a common ancestor is defined strictly as a common ancestor of both tokens. If `inclusive` is `true`, one of the tokens can be a common ancestor of the other.

Ancestors are returned in the same order as #ancestors.

Examples:

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.common_ancestors(y, inclusive: false) # => [z, u]
x.common_ancestors(w, inclusive: false) # => [z, u]
x.common_ancestors(x, inclusive: false) # => [w, z, u]

x.common_ancestors(y, inclusive: true)  # => [z, u]
x.common_ancestors(w, inclusive: true)  # => [w, z, u]
x.common_ancestors(x, inclusive: true)  # => [x, w, z, u]

Returns:

  • (Array<Token>)

    common ancestors

See Also:


358
359
360
361
362
363
364
365
366
# File 'lib/proiel/token.rb', line 358

def common_ancestors(other_token, inclusive: false)
  if inclusive
    x, y = [self] + ancestors, [other_token] + other_token.ancestors
  else
    x, y = ancestors, other_token.ancestors
  end

  x & y
end

#dependentsArray<Token> Also known as: children

Finds dependent of this token in the dependency graph.

The dependents are the children of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned dependents is indeterminate.

Returns:

  • (Array<Token>)

    dependent


248
249
250
# File 'lib/proiel/token.rb', line 248

def dependents
  @sentence.tokens.select { |t| t.head_id == @id }
end

#descendentsArray<Token> Also known as: descendants

Finds descendents of this token in the dependency graph.

The descendents are the ancestors of the this token in the tree that has tokens as nodes and primary relations as edges.

The order of the returned descendents is as indeterminate.

Returns:

  • (Array<Token>)

    descendents


284
285
286
# File 'lib/proiel/token.rb', line 284

def descendents
  dependents.map { |dependent| [dependent] + dependent.descendents }.flatten
end

#divDiv

Returns parent div object.

Returns:

  • (Div)

    parent div object


130
131
132
# File 'lib/proiel/token.rb', line 130

def div
  @sentence.div
end

#first_common_ancestor(other_token, inclusive: false) ⇒ nil, Token

Finds the first common ancestor that this token and another token share in the dependency graph.

If `inclusive` is `false`, a common ancestor is defined strictly as a common ancestor of both tokens. If `inclusive` is `true`, one of the tokens can be a common ancestor of the other.

Examples:

x.head # => w
w.head # => z
y.head # => z
z.head # => u

x.first_common_ancestor(y, inclusive: false) # => z
x.first_common_ancestor(w, inclusive: false) # => z
x.first_common_ancestor(x, inclusive: false) # => w

x.first_common_ancestor(y, inclusive: true)  # => z
x.first_common_ancestor(w, inclusive: true)  # => w
x.first_common_ancestor(x, inclusive: true)  # => x

Returns:

  • (nil, Token)

    first common ancestor

See Also:


393
394
395
# File 'lib/proiel/token.rb', line 393

def first_common_ancestor(other_token, inclusive: false)
  common_ancestors(other_token, inclusive: inclusive).first
end

#has_citation?true, false

Tests if the token has a citation.

A token has a citation if `citation_part` is not `nil`.

Returns:

  • (true, false)

320
321
322
# File 'lib/proiel/token.rb', line 320

def has_citation?
  !citation_part.nil?
end

#has_content?true, false

Tests if the token has content.

A token has content if it has a form.

Returns:

  • (true, false)

See Also:


311
312
313
# File 'lib/proiel/token.rb', line 311

def has_content?
  empty_token_sort.nil?
end

#headToken Also known as: parent

Finds the head of this token.

The head is the parent of the this token in the tree that has tokens as nodes and primary relations as edges.

Returns:


228
229
230
231
232
233
234
# File 'lib/proiel/token.rb', line 228

def head
  if is_root?
    nil
  else
    treebank.find_token(head_id)
  end
end

#is_empty?true, false

Tests if the token is empty.

A token is empty if it does not have a form. If the token is empty, #empty_token_sort explains its function.

Returns:

  • (true, false)

See Also:


300
301
302
# File 'lib/proiel/token.rb', line 300

def is_empty?
  !empty_token_sort.nil?
end

#is_root?true, false

Checks if the token is the root of its dependency graph.

If the token belongs to a sentence that lacks dependency annotation, all tokens are treated as roots. If a sentence has partial or complete dependency annotation there may still be multiple root tokens.

Returns:

  • (true, false)

218
219
220
# File 'lib/proiel/token.rb', line 218

def is_root?
  head_id.nil?
end

#languageString

Returns language of the token as an ISO 639-3 language tag.

Returns:

  • (String)

    language of the token as an ISO 639-3 language tag


145
146
147
# File 'lib/proiel/token.rb', line 145

def language
  source.language
end

#morphology_hashHash<Symbol,String>

Returns token morphology tag as a hash.

Returns:

  • (Hash<Symbol,String>)

    token morphology tag as a hash


201
202
203
204
205
206
207
# File 'lib/proiel/token.rb', line 201

def morphology_hash
  if morphology
    MORPHOLOGY_POSITIONAL_TAG_SEQUENCE.zip(morphology.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end

#part_of_speech_hashHash<Symbol,String> Also known as: pos_hash

Returns token part of speech tag as a hash.

Returns:

  • (Hash<Symbol,String>)

    token part of speech tag as a hash


178
179
180
181
182
183
184
# File 'lib/proiel/token.rb', line 178

def part_of_speech_hash
  if part_of_speech
    POS_POSITIONAL_TAG_SEQUENCE.zip(part_of_speech.split('')).reject { |_, v| v == '-' }.to_h
  else
    {}
  end
end

#part_of_speech_with_nullsString Also known as: pos_with_nulls

Returns the part of speech tag if set, but also provides a suitable part of speech tag for empty elements.

Returns:

  • (String)

    part of speech tag


194
195
196
# File 'lib/proiel/token.rb', line 194

def part_of_speech_with_nulls
  part_of_speech || NULL_PARTS_OF_SPEECH[empty_token_sort]
end

#printable_form(custom_token_formatter: nil) ⇒ String

Returns the printable form of the token with any presentation data.

which is passed the token as its sole argument

Parameters:

  • custom_token_formatter (Lambda) (defaults to: nil)

    formatting function for tokens

Returns:

  • (String)

    the printable form of the token


166
167
168
169
170
171
172
173
174
175
# File 'lib/proiel/token.rb', line 166

def printable_form(custom_token_formatter: nil)
  printable_form =
    if custom_token_formatter
      custom_token_formatter.call(self)
    else
      form
    end

  [presentation_before, printable_form, presentation_after].compact.join
end

#pro?true, false

Checks if the token is a PRO token.

Returns:

  • (true, false)

327
328
329
# File 'lib/proiel/token.rb', line 327

def pro?
  empty_token_sort == 'P'
end

#sourceSource

Returns parent source object.

Returns:

  • (Source)

    parent source object


135
136
137
# File 'lib/proiel/token.rb', line 135

def source
  @sentence.div.source
end

#treebankTreebank

Returns parent treebank object.

Returns:


140
141
142
# File 'lib/proiel/token.rb', line 140

def treebank
  @sentence.div.source.treebank
end