Class: PROIEL::Sentence

Inherits:
TreebankObject show all
Extended by:
Memoist
Defined in:
lib/proiel/sentence.rb

Overview

A sentence object in a treebank.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from TreebankObject

#inspect

Constructor Details

#initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block) ⇒ Sentence

Creates a new sentence object.

Raises:

  • (ArgumentError)


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/proiel/sentence.rb', line 42

def initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block)
  @div = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'string or symbol expected' unless status.is_a?(String) or status.is_a?(Symbol)
  @status = status.to_sym

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id

  raise ArgumentError, 'XML schema date time or nil expected' unless annotated_at.nil? or PROIEL::Utilities.xmlschema_datetime?(annotated_at)
  @annotated_at = annotated_at ? DateTime.xmlschema(annotated_at).freeze : nil

  raise ArgumentError, 'XML schema date time or nil expected' unless reviewed_at.nil? or PROIEL::Utilities.xmlschema_datetime?(reviewed_at)
  @reviewed_at = reviewed_at ? DateTime.xmlschema(reviewed_at).freeze : nil

  raise ArgumentError, 'string or nil expected' unless annotated_by.nil? or annotated_by.is_a?(String)
  @annotated_by = annotated_by.freeze

  raise ArgumentError, 'string or nil expected' unless reviewed_by.nil? or reviewed_by.is_a?(String)
  @reviewed_by = reviewed_by.freeze

  @children = block.call(self) if block_given?
end

Instance Attribute Details

#alignment_idnil, Integer (readonly)

Returns ID of the sentence that this sentence is aligned to.

Returns:

  • (nil, Integer)

    ID of the sentence that this sentence is aligned to



27
28
29
# File 'lib/proiel/sentence.rb', line 27

def alignment_id
  @alignment_id
end

#annotated_atnil, DateTime (readonly)

Returns time of annotation.

Returns:

  • (nil, DateTime)

    time of annotation



36
37
38
# File 'lib/proiel/sentence.rb', line 36

def annotated_at
  @annotated_at
end

#annotated_bynil, String (readonly)

Returns annotator of sentence.

Returns:

  • (nil, String)

    annotator of sentence



30
31
32
# File 'lib/proiel/sentence.rb', line 30

def annotated_by
  @annotated_by
end

#divDiv (readonly)

Returns parent div object.

Returns:

  • (Div)

    parent div object



15
16
17
# File 'lib/proiel/sentence.rb', line 15

def div
  @div
end

#idFixnum (readonly)

Returns ID of the sentence.

Returns:

  • (Fixnum)

    ID of the sentence



12
13
14
# File 'lib/proiel/sentence.rb', line 12

def id
  @id
end

#presentation_afternil, String (readonly)

Returns presentation material after sentence.

Returns:

  • (nil, String)

    presentation material after sentence



24
25
26
# File 'lib/proiel/sentence.rb', line 24

def presentation_after
  @presentation_after
end

#presentation_beforenil, String (readonly)

Returns presentation material before sentence.

Returns:

  • (nil, String)

    presentation material before sentence



21
22
23
# File 'lib/proiel/sentence.rb', line 21

def presentation_before
  @presentation_before
end

#reviewed_atnil, DateTime (readonly)

Returns time of reviewed.

Returns:

  • (nil, DateTime)

    time of reviewed



39
40
41
# File 'lib/proiel/sentence.rb', line 39

def reviewed_at
  @reviewed_at
end

#reviewed_bynil, String (readonly)

Returns reviewer of sentence.

Returns:

  • (nil, String)

    reviewer of sentence



33
34
35
# File 'lib/proiel/sentence.rb', line 33

def reviewed_by
  @reviewed_by
end

#statusSymbol (readonly)

Returns annotation status of sentence.

Returns:

  • (Symbol)

    annotation status of sentence



18
19
20
# File 'lib/proiel/sentence.rb', line 18

def status
  @status
end

Instance Method Details

#alignment(aligned_source) ⇒ Sentence, NilClass

Returns the aligned sentence if any.

Returns:

  • (Sentence, NilClass)

    aligned sentence



227
228
229
# File 'lib/proiel/sentence.rb', line 227

def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_sentence(alignment_id) : nil
end

#annotated?true, false

Checks if the sentence is annotated.

Since only annotated sentences can be reviewed, a sentence is annotated if its ‘status` is either `:reviewed` or `:annotated`.

Returns:

  • (true, false)


141
142
143
# File 'lib/proiel/sentence.rb', line 141

def annotated?
  @status == :reviewed or @status == :annotated
end

#citationString

Returns the complete citation for the sentence.

Returns:

  • (String)

    the complete citation for the sentence



93
94
95
# File 'lib/proiel/sentence.rb', line 93

def citation
  [source.citation_part, citation_part].join(' ')
end

#citation_partString

Computes an appropriate citation component for the sentence.

The computed citation component must be concatenated with the citation component provided by the source to produce a complete citation.

Returns:

  • (String)

    the citation component

See Also:



105
106
107
108
109
110
111
# File 'lib/proiel/sentence.rb', line 105

def citation_part
  tc = @children.select(&:has_citation?)
  x = tc.first ? tc.first.citation_part : nil
  y = tc.last ? tc.last.citation_part : nil

  Citations.citation_make_range(x, y)
end

#inferred_alignment(aligned_source) ⇒ Array<Sentence>

Returns inferred aligned sentences if any.

Returns:

  • (Array<Sentence>)

    inferred aligned sentences



234
235
236
237
238
# File 'lib/proiel/sentence.rb', line 234

def inferred_alignment(aligned_source)
  tokens.select(&:alignment_id).map do |token|
    token.alignment(aligned_source)
  end.flatten.compact.map(&:sentence).uniq
end

#languageString

Returns language of the sentence as an ISO 639-3 language tag.

Returns:

  • (String)

    language of the sentence as an ISO 639-3 language tag



86
87
88
# File 'lib/proiel/sentence.rb', line 86

def language
  source.language
end

#printable_form(custom_token_formatter: nil) ⇒ String

Returns the printable form of the sentence with all token forms and any presentation data.

which is passed the token as its sole argument

Parameters:

  • custom_token_formatter (Lambda) (defaults to: nil)

    formatting function for tokens

Returns:

  • (String)

    the printable form of the sentence



120
121
122
123
124
# File 'lib/proiel/sentence.rb', line 120

def printable_form(custom_token_formatter: nil)
  [presentation_before,
   @children.reject(&:is_empty?).map { |t| t.printable_form(custom_token_formatter: custom_token_formatter) },
   presentation_after].compact.join
end

#reviewed?true, false

Checks if the sentence is reviewed.

A sentence has been reviewed if its ‘status` is `:reviewed`.

Returns:

  • (true, false)


131
132
133
# File 'lib/proiel/sentence.rb', line 131

def reviewed?
  @status == :reviewed
end

#sourceSource

Returns parent source object.

Returns:

  • (Source)

    parent source object



76
77
78
# File 'lib/proiel/sentence.rb', line 76

def source
  @div.source
end

#syntax_graphHash

Builds a syntax graph for the dependency annotation of the sentence and inserts a dummy root node. The graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:


sentence.syntax_graph # => [id: nil, relation: nil, children: [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}], slashes: []]

Returns:

  • (Hash)

    a single graph with a dummy root node represented as a hash



165
166
167
# File 'lib/proiel/sentence.rb', line 165

def syntax_graph
  { id: nil, relation: nil, children: syntax_graphs, slashes: [] }
end

#syntax_graphsArray

Builds syntax graphs for the dependency annotation of the sentence. Multiple graphs may be returned as the function does not insert an empty dummy root node. Each graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:

Get a single syntax graph with a dummy root node


sentence.syntax_graphs # => [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}]

Returns:

  • (Array)

    zero or more syntax graphs represented as hashes



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/proiel/sentence.rb', line 181

def syntax_graphs
  Array.new.tap do |graphs|
    token_map = {}

    # Pass 1: create new attribute hashes for each token and index each hash by token ID
    @children.each do |token|
      token_map[token.id] =
        {
          id: token.id,
          relation: token.relation,
          children: [],
          slashes: token.slashes,
        }
    end

    # Pass 2: append attribute hashes for tokens with a head ID to the head's children list; append attribute hashes for tokens without a head ID to the list of graphs to return
    @children.each do |token|
      if token.head_id
        token_map[token.head_id][:children] << token_map[token.id]
      else
        graphs << token_map[token.id]
      end
    end
  end
end

#tokensEnumerator

Finds all tokens in the sentence.

Examples:

Iterating tokens

tokens.each { |t| puts t.id }

Create an array with only empty tokens

tokens.select(&:is_empty?)

Counting tokens

puts tokens.count #=> 200

Returns:

  • (Enumerator)

    tokens in the sentence



220
221
222
# File 'lib/proiel/sentence.rb', line 220

def tokens
  @children.to_enum
end

#treebankTreebank

Returns parent treebank object.

Returns:



81
82
83
# File 'lib/proiel/sentence.rb', line 81

def treebank
  @div.source.treebank
end

#unannotated?true, false

Checks if the sentence is unannotated.

A sentence is unannotated if its ‘status` is `:unannotated`.

Returns:

  • (true, false)


150
151
152
# File 'lib/proiel/sentence.rb', line 150

def unannotated?
  @status == :unannotated
end