Class: Lingo::Language::Word

Inherits:
WordForm show all
Defined in:
lib/lingo/language/word.rb

Overview

– Die Klasse Word bündelt spezifische Eigenschaften eines Wortes mit den dazu notwendigen Methoden. ++

Instance Attribute Summary collapse

Attributes inherited from WordForm

#attr, #form, #gender, #head, #src

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from WordForm

#eql?, #hash, #identified?, #inspect, #to_a, #unknown?

Constructor Details

#initialize(form, attr = WA_UNSET, token = nil) ⇒ Word

– Exakte Representation der originären Zeichenkette, so wie sie im Satz gefunden wurde, z.B. form = "RubyLing"

Ergebnis der Wörterbuch-Suche. Sie stellt die Grundform des Wortes dar. Dabei kann es mehrere mögliche Grundformen geben, z.B. kann abgeschoben als Grundform das Adjektiv abgeschoben sein, oder aber das Verb abschieben.

lemma = [['abgeschoben', '#a'], ['abschieben', '#v']].

Achtung: Lemma wird nicht durch die Word-Klasse bestückt, sondern extern durch die Klasse Dictionary ++



79
80
81
82
# File 'lib/lingo/language/word.rb', line 79

def initialize(form, attr = WA_UNSET, token = nil)
  @token, @lexicals = token, []
  super
end

Instance Attribute Details

#lexicals(compound_parts = true) ⇒ Object



88
89
90
91
92
93
94
# File 'lib/lingo/language/word.rb', line 88

def lexicals(compound_parts = true)
  if !compound_parts && attr == WA_COMPOUND
    @lexicals.select { |lex| lex.attr == LA_COMPOUND }
  else
    @lexicals
  end
end

#tokenObject (readonly)

Returns the value of attribute token



84
85
86
# File 'lib/lingo/language/word.rb', line 84

def token
  @token
end

Class Method Details

.new_compound_head(lex, attr = WA_UNSET) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/lingo/language/word.rb', line 48

def new_compound_head(lex, attr = WA_UNSET)
  form, head_lex = nil, []

  lex.reverse_each { |l|
    src =  l.src ||= l.form
    form ||= src
    form  != src ? break : head_lex.unshift(l.dup)
  }

  head_lex.each { |l| l.attr = l.attr[/\w+/] }.uniq!

  new_lexicals(form, attr, head_lex) if form
end

.new_lexical(form, attr, lex_attr) ⇒ Object



44
45
46
# File 'lib/lingo/language/word.rb', line 44

def new_lexical(form, attr, lex_attr)
  new_lexicals(form, attr, Lexical.new(form, lex_attr))
end

.new_lexicals(form, attr, lex) ⇒ Object



40
41
42
# File 'lib/lingo/language/word.rb', line 40

def new_lexicals(form, attr, lex)
  new(form, attr) << lex
end

Instance Method Details

#<<(*lex) ⇒ Object



149
150
151
152
153
# File 'lib/lingo/language/word.rb', line 149

def <<(*lex)
  lex.flatten!
  @lexicals.concat(lex)
  self
end

#<=>(other) ⇒ Object



155
156
157
# File 'lib/lingo/language/word.rb', line 155

def <=>(other)
  other.nil? ? 1 : to_a.push(lexicals) <=> other.to_a.push(other.lexicals)
end

#add_lexicals(lex) ⇒ Object



96
97
98
# File 'lib/lingo/language/word.rb', line 96

def add_lexicals(lex)
  @lexicals.concat(lex - @lexicals)
end

#attr?(*attr) ⇒ Boolean

Returns:

  • (Boolean)


100
101
102
# File 'lib/lingo/language/word.rb', line 100

def attr?(*attr)
  !(attrs & attr).empty?
end

#attrs(compound_parts = true) ⇒ Object



104
105
106
# File 'lib/lingo/language/word.rb', line 104

def attrs(compound_parts = true)
  lexicals(compound_parts).map { |i| i.attr }
end

#compo_formObject



133
134
135
# File 'lib/lingo/language/word.rb', line 133

def compo_form
  get_class(LA_COMPOUND).first if attr == WA_COMPOUND
end

#full_compound?Boolean

Returns:

  • (Boolean)


137
138
139
# File 'lib/lingo/language/word.rb', line 137

def full_compound?
  attr == WA_COMPOUND && get_class('x+').empty?
end

#genders(compound_parts = true) ⇒ Object



108
109
110
# File 'lib/lingo/language/word.rb', line 108

def genders(compound_parts = true)
  lexicals(compound_parts).map { |i| i.gender }
end

#get_class(wc_re) ⇒ Object



122
123
124
125
126
127
# File 'lib/lingo/language/word.rb', line 122

def get_class(wc_re)
  wc_re = Regexp.new(wc_re) unless wc_re.is_a?(Regexp)

  lexicals.empty? ? attr =~ wc_re ? [self] : [] :
    lexicals.select { |lex| lex.attr =~ wc_re }
end

#identify(lex, wc = nil) ⇒ Object



112
113
114
115
116
117
118
119
120
# File 'lib/lingo/language/word.rb', line 112

def identify(lex, wc = nil)
  return self if lex.empty?

  self.lexicals = lex
  self.attr = wc ||= attr?(LA_COMPOUND) ? WA_COMPOUND : WA_IDENTIFIED
  self.head = self.class.new_compound_head(lex) if wc == WA_COMPOUND

  self
end

#multiword_size(wc_re = LA_MULTIWORD) ⇒ Object



141
142
143
# File 'lib/lingo/language/word.rb', line 141

def multiword_size(wc_re = LA_MULTIWORD)
  lex = get_class(wc_re).first and lex.form.count(' ') + 1
end

#normObject



129
130
131
# File 'lib/lingo/language/word.rb', line 129

def norm
  identified? ? lexicals.first.form : form
end

#position_and_offsetObject



145
146
147
# File 'lib/lingo/language/word.rb', line 145

def position_and_offset
  token.position_and_offset if token
end

#to_sObject



159
160
161
162
163
164
# File 'lib/lingo/language/word.rb', line 159

def to_s
  s =  "<#{form}"
  s << "|#{attr}" unless identified?
  s << " = #{lexicals.inspect}" unless lexicals.empty?
  s << '>'
end