Class: PROIEL::Sentence

Inherits:
TreebankObject show all
Extended by:
Memoist
Defined in:
lib/proiel/sentence.rb

Overview

A sentence object in a treebank.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from TreebankObject

#inspect

Constructor Details

#initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block) ⇒ Sentence

Creates a new sentence object.

Raises:

  • (ArgumentError)


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/proiel/sentence.rb', line 42

def initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block)
  @div = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'string or symbol expected' unless status.is_a?(String) or status.is_a?(Symbol)
  @status = status.to_sym

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id

  raise ArgumentError, 'XML schema date time or nil expected' unless annotated_at.nil? or PROIEL::Utilities.xmlschema_datetime?(annotated_at)
  @annotated_at = annotated_at ? DateTime.xmlschema(annotated_at).freeze : nil

  raise ArgumentError, 'XML schema date time or nil expected' unless reviewed_at.nil? or PROIEL::Utilities.xmlschema_datetime?(reviewed_at)
  @reviewed_at = reviewed_at ? DateTime.xmlschema(reviewed_at).freeze : nil

  raise ArgumentError, 'string or nil expected' unless annotated_by.nil? or annotated_by.is_a?(String)
  @annotated_by = annotated_by.freeze

  raise ArgumentError, 'string or nil expected' unless reviewed_by.nil? or reviewed_by.is_a?(String)
  @reviewed_by = reviewed_by.freeze

  @children = block.call(self) if block_given?
end

Instance Attribute Details

#alignment_idnil, Integer (readonly)

Returns ID of the sentence that this sentence is aligned to.

Returns:

  • (nil, Integer)

    ID of the sentence that this sentence is aligned to



27
28
29
# File 'lib/proiel/sentence.rb', line 27

def alignment_id
  @alignment_id
end

#annotated_atnil, DateTime (readonly)

Returns time of annotation.

Returns:

  • (nil, DateTime)

    time of annotation



36
37
38
# File 'lib/proiel/sentence.rb', line 36

def annotated_at
  @annotated_at
end

#annotated_bynil, String (readonly)

Returns annotator of sentence.

Returns:

  • (nil, String)

    annotator of sentence



30
31
32
# File 'lib/proiel/sentence.rb', line 30

def annotated_by
  @annotated_by
end

#divDiv (readonly)

Returns parent div object.

Returns:

  • (Div)

    parent div object



15
16
17
# File 'lib/proiel/sentence.rb', line 15

def div
  @div
end

#idFixnum (readonly)

Returns ID of the sentence.

Returns:

  • (Fixnum)

    ID of the sentence



12
13
14
# File 'lib/proiel/sentence.rb', line 12

def id
  @id
end

#presentation_afternil, String (readonly)

Returns presentation material after sentence.

Returns:

  • (nil, String)

    presentation material after sentence



24
25
26
# File 'lib/proiel/sentence.rb', line 24

def presentation_after
  @presentation_after
end

#presentation_beforenil, String (readonly)

Returns presentation material before sentence.

Returns:

  • (nil, String)

    presentation material before sentence



21
22
23
# File 'lib/proiel/sentence.rb', line 21

def presentation_before
  @presentation_before
end

#reviewed_atnil, DateTime (readonly)

Returns time of reviewed.

Returns:

  • (nil, DateTime)

    time of reviewed



39
40
41
# File 'lib/proiel/sentence.rb', line 39

def reviewed_at
  @reviewed_at
end

#reviewed_bynil, String (readonly)

Returns reviewer of sentence.

Returns:

  • (nil, String)

    reviewer of sentence



33
34
35
# File 'lib/proiel/sentence.rb', line 33

def reviewed_by
  @reviewed_by
end

#statusSymbol (readonly)

Returns annotation status of sentence.

Returns:

  • (Symbol)

    annotation status of sentence



18
19
20
# File 'lib/proiel/sentence.rb', line 18

def status
  @status
end

Instance Method Details

#alignment(aligned_source) ⇒ Sentence, NilClass

Returns the aligned sentence if any.

Returns:

  • (Sentence, NilClass)

    aligned sentence



224
225
226
# File 'lib/proiel/sentence.rb', line 224

def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_sentence(alignment_id) : nil
end

#annotated?true, false

Checks if the sentence is annotated.

Since only annotated sentences can be reviewed, a sentence is annotated if its ‘status` is either `:reviewed` or `:annotated`.

Returns:

  • (true, false)


138
139
140
# File 'lib/proiel/sentence.rb', line 138

def annotated?
  @status == :reviewed or @status == :annotated
end

#citationString

Returns the complete citation for the sentence.

Returns:

  • (String)

    the complete citation for the sentence



93
94
95
# File 'lib/proiel/sentence.rb', line 93

def citation
  [source.citation_part, citation_part].join(' ')
end

#citation_partString

Computes an appropriate citation component for the sentence.

The computed citation component must be concatenated with the citation component provided by the source to produce a complete citation.

Returns:

  • (String)

    the citation component

See Also:



105
106
107
108
109
110
111
# File 'lib/proiel/sentence.rb', line 105

def citation_part
  tc = @children.select(&:has_citation?)
  x = tc.first ? tc.first.citation_part : nil
  y = tc.last ? tc.last.citation_part : nil

  Citations.citation_make_range(x, y)
end

#inferred_alignment(aligned_source) ⇒ Array<Sentence>

Returns inferred aligned sentences if any.

Returns:

  • (Array<Sentence>)

    inferred aligned sentences



231
232
233
234
235
# File 'lib/proiel/sentence.rb', line 231

def inferred_alignment(aligned_source)
  tokens.select(&:alignment_id).map do |token|
    token.alignment(aligned_source)
  end.flatten.compact.map(&:sentence).uniq
end

#languageString

Returns language of the sentence as an ISO 639-3 language tag.

Returns:

  • (String)

    language of the sentence as an ISO 639-3 language tag



86
87
88
# File 'lib/proiel/sentence.rb', line 86

def language
  source.language
end

#printable_form(options = {}) ⇒ String

Returns the printable form of the sentence with all token forms and any presentation data.

Returns:

  • (String)

    the printable form of the sentence



117
118
119
120
121
# File 'lib/proiel/sentence.rb', line 117

def printable_form(options = {})
  [presentation_before,
   @children.reject(&:is_empty?).map { |t| t.printable_form(options) },
   presentation_after].compact.join
end

#reviewed?true, false

Checks if the sentence is reviewed.

A sentence has been reviewed if its ‘status` is `:reviewed`.

Returns:

  • (true, false)


128
129
130
# File 'lib/proiel/sentence.rb', line 128

def reviewed?
  @status == :reviewed
end

#sourceSource

Returns parent source object.

Returns:

  • (Source)

    parent source object



76
77
78
# File 'lib/proiel/sentence.rb', line 76

def source
  @div.source
end

#syntax_graphHash

Builds a syntax graph for the dependency annotation of the sentence and inserts a dummy root node. The graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:


sentence.syntax_graph # => [id: nil, relation: nil, children: [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}], slashes: []]

Returns:

  • (Hash)

    a single graph with a dummy root node represented as a hash



162
163
164
# File 'lib/proiel/sentence.rb', line 162

def syntax_graph
  { id: nil, relation: nil, children: syntax_graphs, slashes: [] }
end

#syntax_graphsArray

Builds syntax graphs for the dependency annotation of the sentence. Multiple graphs may be returned as the function does not insert an empty dummy root node. Each graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:

Get a single syntax graph with a dummy root node


sentence.syntax_graphs # => [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}]

Returns:

  • (Array)

    zero or more syntax graphs represented as hashes



178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/proiel/sentence.rb', line 178

def syntax_graphs
  Array.new.tap do |graphs|
    token_map = {}

    # Pass 1: create new attribute hashes for each token and index each hash by token ID
    @children.each do |token|
      token_map[token.id] =
        {
          id: token.id,
          relation: token.relation,
          children: [],
          slashes: token.slashes,
        }
    end

    # Pass 2: append attribute hashes for tokens with a head ID to the head's children list; append attribute hashes for tokens without a head ID to the list of graphs to return
    @children.each do |token|
      if token.head_id
        token_map[token.head_id][:children] << token_map[token.id]
      else
        graphs << token_map[token.id]
      end
    end
  end
end

#tokensEnumerator

Finds all tokens in the sentence.

Examples:

Iterating tokens

tokens.each { |t| puts t.id }

Create an array with only empty tokens

tokens.select(&:is_empty?)

Counting tokens

puts tokens.count #=> 200

Returns:

  • (Enumerator)

    tokens in the sentence



217
218
219
# File 'lib/proiel/sentence.rb', line 217

def tokens
  @children.to_enum
end

#treebankTreebank

Returns parent treebank object.

Returns:



81
82
83
# File 'lib/proiel/sentence.rb', line 81

def treebank
  @div.source.treebank
end

#unannotated?true, false

Checks if the sentence is unannotated.

A sentence is unannotated if its ‘status` is `:unannotated`.

Returns:

  • (true, false)


147
148
149
# File 'lib/proiel/sentence.rb', line 147

def unannotated?
  @status == :unannotated
end