Class: LexM::Lemma

Inherits:
Object
  • Object
show all
Defined in:
lib/lexm/lemma.rb

Overview

Represents a lemma, the main entry in a lexicon

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(input = nil, source_file = nil, source_line = nil, source_column = nil) ⇒ Lemma

Initialize from either a string or direct components

Parameters:

  • input (String, nil) (defaults to: nil)

    input string in LexM format to parse

  • source_file (String, nil) (defaults to: nil)

    source file path

  • source_line (Integer, nil) (defaults to: nil)

    source line number

  • source_column (Integer, nil) (defaults to: nil)

    source column number



23
24
25
26
27
28
29
30
31
32
33
# File 'lib/lexm/lemma.rb', line 23

def initialize(input = nil, source_file = nil, source_line = nil, source_column = nil)
    @text = nil
    @annotations = {}
    @sublemmas = []
    @redirect = nil
    @source_file = source_file
    @source_line = source_line
    @source_column = source_column
    
    parse(input) if input.is_a?(String)
end

Instance Attribute Details

#annotationsObject

Returns the value of attribute annotations.



14
15
16
# File 'lib/lexm/lemma.rb', line 14

def annotations
  @annotations
end

#redirectObject

Returns the value of attribute redirect.



14
15
16
# File 'lib/lexm/lemma.rb', line 14

def redirect
  @redirect
end

#source_columnObject

Source location information



16
17
18
# File 'lib/lexm/lemma.rb', line 16

def source_column
  @source_column
end

#source_fileObject

Source location information



16
17
18
# File 'lib/lexm/lemma.rb', line 16

def source_file
  @source_file
end

#source_lineObject

Source location information



16
17
18
# File 'lib/lexm/lemma.rb', line 16

def source_line
  @source_line
end

#sublemmasObject

Returns the value of attribute sublemmas.



14
15
16
# File 'lib/lexm/lemma.rb', line 14

def sublemmas
  @sublemmas
end

#textObject

Returns the value of attribute text.



14
15
16
# File 'lib/lexm/lemma.rb', line 14

def text
  @text
end

Instance Method Details

#addRedirect(target, types = []) ⇒ Lemma

Add a pure redirect sublemma

Parameters:

  • target (String)

    target to redirect to

  • types (Array<String>) (defaults to: [])

    relation types

Returns:



239
240
241
242
243
244
245
246
# File 'lib/lexm/lemma.rb', line 239

def addRedirect(target, types = [])
    if redirected?
        raise "Cannot add sublemmas to a redirection lemma"
    end
    redirect = LemmaRedirect.new(target, types)
    @sublemmas << Sublemma.new(nil, redirect)
    self
end

#addSublemma(text) ⇒ Lemma

Add a standard sublemma

Parameters:

  • text (String)

    text of the sublemma

Returns:



214
215
216
217
218
219
220
# File 'lib/lexm/lemma.rb', line 214

def addSublemma(text)
    if redirected?
        raise "Cannot add sublemmas to a redirection lemma"
    end
    @sublemmas << Sublemma.new(text)
    self
end

#addSublemmas(texts) ⇒ Lemma

Add multiple sublemmas at once

Parameters:

  • texts (Array<String>)

    array of sublemma texts

Returns:



225
226
227
228
229
230
231
232
233
# File 'lib/lexm/lemma.rb', line 225

def addSublemmas(texts)
    if redirected?
        raise "Cannot add sublemmas to a redirection lemma"
    end
    texts.each do |text|
        @sublemmas << Sublemma.new(text)
    end
    self
end

#clearLemma

Clear all annotations and sublemmas but keep the main lemma

Returns:



330
331
332
333
334
335
# File 'lib/lexm/lemma.rb', line 330

def clear
    @annotations = {}
    @sublemmas = []
    @redirect = nil
    self
end

#clearAllLemma

Clear everything including the main lemma

Returns:



339
340
341
342
343
344
345
# File 'lib/lexm/lemma.rb', line 339

def clearAll
    @text = nil
    @annotations = {}
    @sublemmas = []
    @redirect = nil
    self
end

#clearAnnotationsLemma

Clear all annotations

Returns:



309
310
311
312
# File 'lib/lexm/lemma.rb', line 309

def clearAnnotations
    @annotations = {}
    self
end

#clearRedirectLemma

Clear redirect

Returns:



323
324
325
326
# File 'lib/lexm/lemma.rb', line 323

def clearRedirect
    @redirect = nil
    self
end

#clearSublemmasLemma

Clear all sublemmas

Returns:



316
317
318
319
# File 'lib/lexm/lemma.rb', line 316

def clearSublemmas
    @sublemmas = []
    self
end

#parse(input) ⇒ Lemma

Parse a lemma string

Parameters:

  • input (String)

    lemma string in LexM format

Returns:



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/lexm/lemma.rb', line 38

def parse(input)
    # Check for empty input
    raise "Empty lemma input!" if input.nil? || input.strip.empty?
    
    # Check for basic syntax issues
    if input.count('[') != input.count(']')
        raise "Malformed input: mismatched brackets in '#{input}'"
    end
    
    # Check for balanced pipes
    if input.include?("|") && input.start_with?("|")
        raise "Malformed input: lemma starts with pipe character in '#{input}'"
    end
  
    if input.include?(">>")
        parseRedirectionLemma(input)
        return self
    end
  
    lemmaPart, sublemmasPart = input.split('|', 2)
  
    parseLemma(lemmaPart)
    parseSublemmas(sublemmasPart) if sublemmasPart
  
    self
end

#parseAnnotations(annotationsText) ⇒ void

This method returns an undefined value.

Parse annotations like sp:past,pp:participle or pl:oxen

Parameters:

  • annotationsText (String)

    annotations string



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/lexm/lemma.rb', line 176

def parseAnnotations(annotationsText)
    if annotationsText.strip.empty?
        raise "Empty annotations block"
    end
    
    annotationsText.split(',').each do |annotation|
        if annotation.strip.empty?
            raise "Empty annotation in comma-separated list"
        end
      
        if annotation.include?(':')
            type, value = annotation.split(':', 2)
        
            # Validate annotation type
            if type.strip.empty?
                raise "Empty annotation type in '#{annotation}'"
            end
        
            # Validate annotation value
            if value.strip.empty?
                raise "Empty annotation value for type '#{type.strip}'"
            end
        
            @annotations[type.strip] = value.strip
        else
            # Handle simple annotations without values
            if annotation.strip.empty?
                raise "Empty annotation name"
            end
        
            @annotations[annotation.strip] = true
        end
    end
end

#parseLemma(lemmaPart) ⇒ void

This method returns an undefined value.

Parse just the lemma part (before any pipe)

Parameters:

  • lemmaPart (String)

    lemma part string



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/lexm/lemma.rb', line 92

def parseLemma(lemmaPart)
    if lemmaPart.include?('[')
        baseLemma, annotationsPart = lemmaPart.split('[', 2)
      
        # Check for malformed annotation syntax
        raise "Malformed annotation: missing closing ']' in '#{lemmaPart}'" unless annotationsPart.end_with?(']')
      
        # Ensure there's actual lemma text before annotations
        if baseLemma.strip.empty?
            raise "Missing lemma text before annotations in '#{lemmaPart}'"
        end
      
        @text = baseLemma.strip
  
        annotationsPart.sub!(/\]$/, '')
        parseAnnotations(annotationsPart)
    else
        # Simple lemma
        # Ensure there's actual text
        if lemmaPart.strip.empty?
            raise "Empty lemma text in '#{lemmaPart}'"
        end
        @text = lemmaPart.strip
    end
end

#parseRedirectionLemma(input) ⇒ void

This method returns an undefined value.

Parse a redirection lemma (with >> syntax)

Parameters:

  • input (String)

    redirection lemma string



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/lexm/lemma.rb', line 68

def parseRedirectionLemma(input)
    # Check for valid redirection syntax (needs a target after >>)
    if input.match(/>>[\s]*$/)
        raise "Malformed redirection syntax in '#{input}'. Should be 'word>>target' or 'word>>(relation)target'"
    end
    
    if input =~ /(.+?)>>\((.+?)\)(.+)/
        @text = $1.strip
        @redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip))
    elsif input =~ /(.+?)>>(.+)/
        @text = $1.strip
        target = $2.strip
        if target.empty?
            raise "Malformed redirection syntax in '#{input}'. Missing target after '>>'"
        end
        @redirect = LemmaRedirect.new(target)
    else
        raise "Malformed redirection syntax in '#{input}'. Should be 'word>>target' or 'word>>(relation)target'"
    end
end

#parseSublemmas(sublemmasPart) ⇒ void

This method returns an undefined value.

Parse sublemmas part (after the pipe)

Parameters:

  • sublemmasPart (String)

    sublemmas part string



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/lexm/lemma.rb', line 121

def parseSublemmas(sublemmasPart)
    # Check if the sublemma part starts with a redirection marker
    if sublemmasPart.start_with?('>')
        # This is a case where the lemma has a pure redirection sublemma
        # Format: word|>(relation)target
        if sublemmasPart =~ />\((.+?)\)(.+)/
            redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip))
            @sublemmas << Sublemma.new(nil, redirect)
        elsif sublemmasPart =~ />(.+)/
            redirect = LemmaRedirect.new($1.strip)
            @sublemmas << Sublemma.new(nil, redirect)
        end
    else
        # Split the sublemmas and process each one
        sublemmas = sublemmasPart.split(',')
        
        # Process normal sublemmas
        sublemmas.each do |sublemma|
            sublemma = sublemma.strip
            
            # Handle pure redirection sublemma
            if sublemma.start_with?('>')
                if sublemma =~ />\((.+?)\)(.+)/
                    redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip))
                    @sublemmas << Sublemma.new(nil, redirect)
                elsif sublemma =~ />(.+)/
                    redirect = LemmaRedirect.new($1.strip)
                    @sublemmas << Sublemma.new(nil, redirect)
                end
            # Handle normal sublemma with possible redirection
            elsif sublemma.include?('>')
                if sublemma =~ /(.+?)>\((.+?)\)(.+)/
                    # Format: word>(relation)target
                    text = $1.strip
                    redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip))
                    @sublemmas << Sublemma.new(text, redirect)
                elsif sublemma =~ /(.+?)>(.+)/
                    # Simple redirection without relation type
                    text = $1.strip
                    redirect = LemmaRedirect.new($2.strip)
                    @sublemmas << Sublemma.new(text, redirect)
                else
                    @sublemmas << Sublemma.new(sublemma)
                end
            else
                # Simple sublemma
                @sublemmas << Sublemma.new(sublemma)
            end
        end
    end
end

#redirected?Boolean

Is this a redirection lemma (no sublemmas, just a redirect)?

Returns:

  • (Boolean)

    true if this is a redirection lemma



349
350
351
# File 'lib/lexm/lemma.rb', line 349

def redirected?
    !@redirect.nil? && @sublemmas.empty?
end

#setAnnotation(type, value = true) ⇒ Lemma

Set an annotation

Parameters:

  • type (String)

    annotation type

  • value (Object) (defaults to: true)

    annotation value

Returns:



285
286
287
288
289
290
291
292
# File 'lib/lexm/lemma.rb', line 285

def setAnnotation(type, value = true)
    if redirected?
        raise "Cannot add annotations to a redirection lemma"
    end
    validateAnnotation(type, value)
    @annotations[type] = value
    self
end

#setAnnotations(annotations) ⇒ Lemma

Add multiple annotations at once

Parameters:

  • annotations (Hash)

    hash of annotation type => value pairs

Returns:



297
298
299
300
301
302
303
304
305
# File 'lib/lexm/lemma.rb', line 297

def setAnnotations(annotations)
    if redirected?
        raise "Cannot add annotations to a redirection lemma"
    end
    annotations.each do |key, value|
        @annotations[key] = value
    end
    self
end

#setRedirect(target, types = []) ⇒ Lemma

Set the lemma’s redirection

Parameters:

  • target (String)

    target to redirect to

  • types (Array<String>) (defaults to: [])

    relation types

Returns:



252
253
254
255
256
257
258
# File 'lib/lexm/lemma.rb', line 252

def setRedirect(target, types = [])
    if !@sublemmas.empty?
        raise "Cannot set redirect on a lemma with sublemmas"
    end
    @redirect = LemmaRedirect.new(target, types)
    self
end

#to_sString

Convert to string format

Returns:

  • (String)

    the string representation of this lemma



355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/lexm/lemma.rb', line 355

def to_s
    # Redirection lemma format (with double >>)
    if redirected?
        return "#{@text}>>#{@redirect.to_s.sub('>', '')}"
    end
    
    # Normal lemma format
    result = ""
    
    # Format the lemma part with any annotations
    if @text.nil?
        return ""
    elsif @annotations.empty?
        result << @text
    else
        annotationsStr = @annotations.map do |type, value| 
            value == true ? type : "#{type}:#{value}"
        end.join(',')
        result << "#{@text}[#{annotationsStr}]"
    end
    
    # Add sublemmas if present
    if !@sublemmas.empty?
        result << "|"
        result << @sublemmas.map(&:to_s).join(',')
    end
    
    result
end

#validateAnnotation(key, value) ⇒ Boolean

Validate annotation key and value format Ensures keys and values follow the expected format

Parameters:

  • key (String)

    annotation key to validate

  • value (String, Boolean)

    annotation value to validate

Returns:

  • (Boolean)

    true if validation passes

Raises:

  • (StandardError)

    with detailed message if validation fails



266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/lexm/lemma.rb', line 266

def validateAnnotation(key, value)
    # Check that key matches a valid pattern (alphanumeric and limited symbols)
    unless key =~ /^[a-zA-Z0-9_]+$/
        raise "Invalid annotation key: '#{key}' (must contain only letters, numbers, and underscores)"
    end
    
    # Additional validation for values
    if value.is_a?(String)
        # Check for invalid characters in value if needed
        if value.include?(']') || value.include?('[')
            raise "Invalid annotation value for '#{key}': cannot contain square brackets"
        end
    end
end