Class: PROIEL::Valency::Lexicon

Inherits:
Object
  • Object
show all
Defined in:
lib/proiel/valency/lexicon.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeLexicon

Returns a new instance of Lexicon.



6
7
8
9
10
# File 'lib/proiel/valency/lexicon.rb', line 6

def initialize
  @source_ids = Set.new
  @source_languages = Set.new
  @frames = {}
end

Instance Attribute Details

#framesObject (readonly)

Returns the value of attribute frames.



4
5
6
# File 'lib/proiel/valency/lexicon.rb', line 4

def frames
  @frames
end

Instance Method Details

#add_source!(source) ⇒ Object

Generates a valency lexicon from the provided sources. In practice the sources should be in the same language but this is not enforced. This makes it possible to generate a lexicon from sources in closely related languages or dialects.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/proiel/valency/lexicon.rb', line 16

def add_source!(source)
  @source_ids << source.id
  @source_languages << source.language

  source.sentences.each do |sentence|
    tokens = find_verbal_nodes(sentence)
    tokens.each do |token|
      frame = PROIEL::Valency::Arguments.get_argument_frame(token)

      partition =
        if token.dependents.any? { |d| d.relation == 'aux' and d.part_of_speech == 'Pk' }
          :r
        else
          :a
        end

      @frames[token.lemma] ||= {}
      @frames[token.lemma][token.part_of_speech] ||= {}
      @frames[token.lemma][token.part_of_speech][frame] ||= { a: [], r: [] }
      @frames[token.lemma][token.part_of_speech][frame][partition] << token.id
    end
  end
end

#lookup(lemma, part_of_speech) ⇒ Object



40
41
42
43
44
45
46
# File 'lib/proiel/valency/lexicon.rb', line 40

def lookup(lemma, part_of_speech)
  frames =
    @frames[lemma][part_of_speech].map do |arguments, token_ids|
      { arguments: arguments, tokens: token_ids }
    end
  PROIEL::Valency::Obliqueness.sort_frames(frames)
end