Class: PROIEL::Sentence

Inherits:
TreebankObject show all
Extended by:
Memoist
Defined in:
lib/proiel/sentence.rb

Overview

A sentence object in a treebank.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from TreebankObject

#inspect

Constructor Details

#initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block) ⇒ Sentence

Creates a new sentence object.

Raises:

  • (ArgumentError)


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/proiel/sentence.rb', line 42

def initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block)
  @div = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'string or symbol expected' unless status.is_a?(String) or status.is_a?(Symbol)
  @status = status.to_sym

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id

  unless annotated_at.nil? or PROIEL::Utilities.xmlschema_datetime?(annotated_at)
    raise ArgumentError, 'XML schema date time or nil expected'
  end
  @annotated_at = annotated_at ? DateTime.xmlschema(annotated_at).freeze : nil

  unless reviewed_at.nil? or PROIEL::Utilities.xmlschema_datetime?(reviewed_at)
    raise ArgumentError, 'XML schema date time or nil expected'
  end
  @reviewed_at = reviewed_at ? DateTime.xmlschema(reviewed_at).freeze : nil

  raise ArgumentError, 'string or nil expected' unless annotated_by.nil? or annotated_by.is_a?(String)
  @annotated_by = annotated_by.freeze

  raise ArgumentError, 'string or nil expected' unless reviewed_by.nil? or reviewed_by.is_a?(String)
  @reviewed_by = reviewed_by.freeze

  @children = block.call(self) if block_given?
end

Instance Attribute Details

#alignment_idnil, Integer (readonly)

Returns ID of the sentence that this sentence is aligned to.

Returns:

  • (nil, Integer)

    ID of the sentence that this sentence is aligned to



27
28
29
# File 'lib/proiel/sentence.rb', line 27

def alignment_id
  @alignment_id
end

#annotated_atnil, DateTime (readonly)

Returns time of annotation.

Returns:

  • (nil, DateTime)

    time of annotation



36
37
38
# File 'lib/proiel/sentence.rb', line 36

def annotated_at
  @annotated_at
end

#annotated_bynil, String (readonly)

Returns annotator of sentence.

Returns:

  • (nil, String)

    annotator of sentence



30
31
32
# File 'lib/proiel/sentence.rb', line 30

def annotated_by
  @annotated_by
end

#divDiv (readonly)

Returns parent div object.

Returns:

  • (Div)

    parent div object



15
16
17
# File 'lib/proiel/sentence.rb', line 15

def div
  @div
end

#idFixnum (readonly)

Returns ID of the sentence.

Returns:

  • (Fixnum)

    ID of the sentence



12
13
14
# File 'lib/proiel/sentence.rb', line 12

def id
  @id
end

#presentation_afternil, String (readonly)

Returns presentation material after sentence.

Returns:

  • (nil, String)

    presentation material after sentence



24
25
26
# File 'lib/proiel/sentence.rb', line 24

def presentation_after
  @presentation_after
end

#presentation_beforenil, String (readonly)

Returns presentation material before sentence.

Returns:

  • (nil, String)

    presentation material before sentence



21
22
23
# File 'lib/proiel/sentence.rb', line 21

def presentation_before
  @presentation_before
end

#reviewed_atnil, DateTime (readonly)

Returns time of reviewed.

Returns:

  • (nil, DateTime)

    time of reviewed



39
40
41
# File 'lib/proiel/sentence.rb', line 39

def reviewed_at
  @reviewed_at
end

#reviewed_bynil, String (readonly)

Returns reviewer of sentence.

Returns:

  • (nil, String)

    reviewer of sentence



33
34
35
# File 'lib/proiel/sentence.rb', line 33

def reviewed_by
  @reviewed_by
end

#statusSymbol (readonly)

Returns annotation status of sentence.

Returns:

  • (Symbol)

    annotation status of sentence



18
19
20
# File 'lib/proiel/sentence.rb', line 18

def status
  @status
end

Instance Method Details

#alignment(aligned_source) ⇒ Sentence, NilClass

Returns the aligned sentence if any.

Returns:

  • (Sentence, NilClass)

    aligned sentence



231
232
233
# File 'lib/proiel/sentence.rb', line 231

def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_sentence(alignment_id) : nil
end

#annotated?true, false

Checks if the sentence is annotated.

Since only annotated sentences can be reviewed, a sentence is annotated if its ‘status` is either `:reviewed` or `:annotated`.

Returns:

  • (true, false)


145
146
147
# File 'lib/proiel/sentence.rb', line 145

def annotated?
  @status == :reviewed or @status == :annotated
end

#citationString

Returns the complete citation for the sentence.

Returns:

  • (String)

    the complete citation for the sentence



97
98
99
# File 'lib/proiel/sentence.rb', line 97

def citation
  [source.citation_part, citation_part].join(' ')
end

#citation_partString

Computes an appropriate citation component for the sentence.

The computed citation component must be concatenated with the citation component provided by the source to produce a complete citation.

Returns:

  • (String)

    the citation component

See Also:



109
110
111
112
113
114
115
# File 'lib/proiel/sentence.rb', line 109

def citation_part
  tc = @children.select(&:has_citation?)
  x = tc.first ? tc.first.citation_part : nil
  y = tc.last ? tc.last.citation_part : nil

  Citations.citation_make_range(x, y)
end

#inferred_alignment(aligned_source) ⇒ Array<Sentence>

Returns inferred aligned sentences if any.

Returns:

  • (Array<Sentence>)

    inferred aligned sentences



238
239
240
241
242
# File 'lib/proiel/sentence.rb', line 238

def inferred_alignment(aligned_source)
  tokens.select(&:alignment_id).map do |token|
    token.alignment(aligned_source)
  end.flatten.compact.map(&:sentence).uniq
end

#languageString

Returns language of the sentence as an ISO 639-3 language tag.

Returns:

  • (String)

    language of the sentence as an ISO 639-3 language tag



90
91
92
# File 'lib/proiel/sentence.rb', line 90

def language
  source.language
end

#printable_form(custom_token_formatter: nil) ⇒ String

Returns the printable form of the sentence with all token forms and any presentation data.

which is passed the token as its sole argument

Parameters:

  • custom_token_formatter (Lambda) (defaults to: nil)

    formatting function for tokens

Returns:

  • (String)

    the printable form of the sentence



124
125
126
127
128
# File 'lib/proiel/sentence.rb', line 124

def printable_form(custom_token_formatter: nil)
  [presentation_before,
   @children.reject(&:is_empty?).map { |t| t.printable_form(custom_token_formatter: custom_token_formatter) },
   presentation_after].compact.join
end

#reviewed?true, false

Checks if the sentence is reviewed.

A sentence has been reviewed if its ‘status` is `:reviewed`.

Returns:

  • (true, false)


135
136
137
# File 'lib/proiel/sentence.rb', line 135

def reviewed?
  @status == :reviewed
end

#sourceSource

Returns parent source object.

Returns:

  • (Source)

    parent source object



80
81
82
# File 'lib/proiel/sentence.rb', line 80

def source
  @div.source
end

#syntax_graphHash

Builds a syntax graph for the dependency annotation of the sentence and inserts a dummy root node. The graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:


sentence.syntax_graph # => [id: nil, relation: nil, children: [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}], slashes: []]

Returns:

  • (Hash)

    a single graph with a dummy root node represented as a hash



169
170
171
# File 'lib/proiel/sentence.rb', line 169

def syntax_graph
  { id: nil, relation: nil, children: syntax_graphs, slashes: [] }
end

#syntax_graphsArray

Builds syntax graphs for the dependency annotation of the sentence. Multiple graphs may be returned as the function does not insert an empty dummy root node. Each graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:

Get a single syntax graph with a dummy root node


sentence.syntax_graphs # => [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}]

Returns:

  • (Array)

    zero or more syntax graphs represented as hashes



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/proiel/sentence.rb', line 185

def syntax_graphs
  Array.new.tap do |graphs|
    token_map = {}

    # Pass 1: create new attribute hashes for each token and index each hash by token ID
    @children.each do |token|
      token_map[token.id] =
        {
          id: token.id,
          relation: token.relation,
          children: [],
          slashes: token.slashes,
        }
    end

    # Pass 2: append attribute hashes for tokens with a head ID to the head's children list; append attribute hashes for tokens without a head ID to the list of graphs to return
    @children.each do |token|
      if token.head_id
        token_map[token.head_id][:children] << token_map[token.id]
      else
        graphs << token_map[token.id]
      end
    end
  end
end

#tokensEnumerator

Finds all tokens in the sentence.

Examples:

Iterating tokens

tokens.each { |t| puts t.id }

Create an array with only empty tokens

tokens.select(&:is_empty?)

Counting tokens

puts tokens.count #=> 200

Returns:

  • (Enumerator)

    tokens in the sentence



224
225
226
# File 'lib/proiel/sentence.rb', line 224

def tokens
  @children.to_enum
end

#treebankTreebank

Returns parent treebank object.

Returns:



85
86
87
# File 'lib/proiel/sentence.rb', line 85

def treebank
  @div.source.treebank
end

#unannotated?true, false

Checks if the sentence is unannotated.

A sentence is unannotated if its ‘status` is `:unannotated`.

Returns:

  • (true, false)


154
155
156
# File 'lib/proiel/sentence.rb', line 154

def unannotated?
  @status == :unannotated
end