Class: PROIEL::Sentence

Inherits:
TreebankObject show all
Extended by:
Memoist
Defined in:
lib/proiel/sentence.rb

Overview

A sentence object in a treebank.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from TreebankObject

#inspect

Constructor Details

#initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block) ⇒ Sentence

Creates a new sentence object.

Raises:

  • (ArgumentError)


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/proiel/sentence.rb', line 42

def initialize(parent, id, status, presentation_before, presentation_after, alignment_id, annotated_by, reviewed_by, annotated_at, reviewed_at, &block)
  @div = parent

  raise ArgumentError, 'integer expected' unless id.is_a?(Integer)
  @id = id

  raise ArgumentError, 'string or symbol expected' unless status.is_a?(String) or status.is_a?(Symbol)
  @status = status.to_sym

  raise ArgumentError, 'string or nil expected' unless presentation_before.nil? or presentation_before.is_a?(String)
  @presentation_before = presentation_before.freeze

  raise ArgumentError, 'string or nil expected' unless presentation_after.nil? or presentation_after.is_a?(String)
  @presentation_after = presentation_after.freeze

  raise ArgumentError, 'integer or nil expected' unless alignment_id.nil? or alignment_id.is_a?(Integer)
  @alignment_id = alignment_id

  unless annotated_at.nil? or PROIEL::Utilities.xmlschema_datetime?(annotated_at)
    raise ArgumentError, 'XML schema date time or nil expected'
  end
  @annotated_at = annotated_at ? DateTime.xmlschema(annotated_at).freeze : nil

  unless reviewed_at.nil? or PROIEL::Utilities.xmlschema_datetime?(reviewed_at)
    raise ArgumentError, 'XML schema date time or nil expected'
  end
  @reviewed_at = reviewed_at ? DateTime.xmlschema(reviewed_at).freeze : nil

  raise ArgumentError, 'string or nil expected' unless annotated_by.nil? or annotated_by.is_a?(String)
  @annotated_by = annotated_by.freeze

  raise ArgumentError, 'string or nil expected' unless reviewed_by.nil? or reviewed_by.is_a?(String)
  @reviewed_by = reviewed_by.freeze

  @children = block.call(self) if block_given?
end

Instance Attribute Details

#alignment_idnil, Integer (readonly)



27
28
29
# File 'lib/proiel/sentence.rb', line 27

def alignment_id
  @alignment_id
end

#annotated_atnil, DateTime (readonly)



36
37
38
# File 'lib/proiel/sentence.rb', line 36

def annotated_at
  @annotated_at
end

#annotated_bynil, String (readonly)



30
31
32
# File 'lib/proiel/sentence.rb', line 30

def annotated_by
  @annotated_by
end

#divDiv (readonly)



15
16
17
# File 'lib/proiel/sentence.rb', line 15

def div
  @div
end

#idFixnum (readonly)



12
13
14
# File 'lib/proiel/sentence.rb', line 12

def id
  @id
end

#presentation_afternil, String (readonly)



24
25
26
# File 'lib/proiel/sentence.rb', line 24

def presentation_after
  @presentation_after
end

#presentation_beforenil, String (readonly)



21
22
23
# File 'lib/proiel/sentence.rb', line 21

def presentation_before
  @presentation_before
end

#reviewed_atnil, DateTime (readonly)



39
40
41
# File 'lib/proiel/sentence.rb', line 39

def reviewed_at
  @reviewed_at
end

#reviewed_bynil, String (readonly)



33
34
35
# File 'lib/proiel/sentence.rb', line 33

def reviewed_by
  @reviewed_by
end

#statusSymbol (readonly)



18
19
20
# File 'lib/proiel/sentence.rb', line 18

def status
  @status
end

Instance Method Details

#alignment(aligned_source) ⇒ Sentence, NilClass

Returns the aligned sentence if any.



231
232
233
# File 'lib/proiel/sentence.rb', line 231

def alignment(aligned_source)
  alignment_id ? aligned_source.treebank.find_sentence(alignment_id) : nil
end

#annotated?true, false

Checks if the sentence is annotated.

Since only annotated sentences can be reviewed, a sentence is annotated if its ‘status` is either `:reviewed` or `:annotated`.



145
146
147
# File 'lib/proiel/sentence.rb', line 145

def annotated?
  @status == :reviewed or @status == :annotated
end

#citationString



97
98
99
# File 'lib/proiel/sentence.rb', line 97

def citation
  [source.citation_part, citation_part].join(' ')
end

#citation_partString

Computes an appropriate citation component for the sentence.

The computed citation component must be concatenated with the citation component provided by the source to produce a complete citation.

See Also:



109
110
111
112
113
114
115
# File 'lib/proiel/sentence.rb', line 109

def citation_part
  tc = @children.select(&:has_citation?)
  x = tc.first ? tc.first.citation_part : nil
  y = tc.last ? tc.last.citation_part : nil

  Citations.citation_make_range(x, y)
end

#inferred_alignment(aligned_source) ⇒ Array<Sentence>

Returns inferred aligned sentences if any.



238
239
240
241
242
# File 'lib/proiel/sentence.rb', line 238

def inferred_alignment(aligned_source)
  tokens.select(&:alignment_id).map do |token|
    token.alignment(aligned_source)
  end.flatten.compact.map(&:sentence).uniq
end

#languageString



90
91
92
# File 'lib/proiel/sentence.rb', line 90

def language
  source.language
end

#printable_form(custom_token_formatter: nil) ⇒ String

Returns the printable form of the sentence with all token forms and any presentation data.

which is passed the token as its sole argument



124
125
126
127
128
# File 'lib/proiel/sentence.rb', line 124

def printable_form(custom_token_formatter: nil)
  [presentation_before,
   @children.reject(&:is_empty?).map { |t| t.printable_form(custom_token_formatter: custom_token_formatter) },
   presentation_after].compact.join
end

#reviewed?true, false

Checks if the sentence is reviewed.

A sentence has been reviewed if its ‘status` is `:reviewed`.



135
136
137
# File 'lib/proiel/sentence.rb', line 135

def reviewed?
  @status == :reviewed
end

#sourceSource



80
81
82
# File 'lib/proiel/sentence.rb', line 80

def source
  @div.source
end

#syntax_graphHash

Builds a syntax graph for the dependency annotation of the sentence and inserts a dummy root node. The graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:


sentence.syntax_graph # => [id: nil, relation: nil, children: [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}], slashes: []]


169
170
171
# File 'lib/proiel/sentence.rb', line 169

def syntax_graph
  { id: nil, relation: nil, children: syntax_graphs, slashes: [] }
end

#syntax_graphsArray

Builds syntax graphs for the dependency annotation of the sentence. Multiple graphs may be returned as the function does not insert an empty dummy root node. Each graph is represented as a hash of hashes. Each hash contains the ID of the token, its relation (to its syntatically dominating token) and a list of secondary edges.

Examples:

Get a single syntax graph with a dummy root node


sentence.syntax_graphs # => [{ id: 1000, relation: "pred", children: [ { id: 1001, relation: "xcomp", children: [], slashes: [["xsub", 1000]]}]}]


185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/proiel/sentence.rb', line 185

def syntax_graphs
  Array.new.tap do |graphs|
    token_map = {}

    # Pass 1: create new attribute hashes for each token and index each hash by token ID
    @children.each do |token|
      token_map[token.id] =
        {
          id: token.id,
          relation: token.relation,
          children: [],
          slashes: token.slashes,
        }
    end

    # Pass 2: append attribute hashes for tokens with a head ID to the head's children list; append attribute hashes for tokens without a head ID to the list of graphs to return
    @children.each do |token|
      if token.head_id
        token_map[token.head_id][:children] << token_map[token.id]
      else
        graphs << token_map[token.id]
      end
    end
  end
end

#tokensEnumerator

Finds all tokens in the sentence.

Examples:

Iterating tokens

tokens.each { |t| puts t.id }

Create an array with only empty tokens

tokens.select(&:is_empty?)

Counting tokens

puts tokens.count #=> 200


224
225
226
# File 'lib/proiel/sentence.rb', line 224

def tokens
  @children.to_enum
end

#treebankTreebank



85
86
87
# File 'lib/proiel/sentence.rb', line 85

def treebank
  @div.source.treebank
end

#unannotated?true, false

Checks if the sentence is unannotated.

A sentence is unannotated if its ‘status` is `:unannotated`.



154
155
156
# File 'lib/proiel/sentence.rb', line 154

def unannotated?
  @status == :unannotated
end