Class: LexM::Lemma
- Inherits:
-
Object
- Object
- LexM::Lemma
- Defined in:
- lib/lexm/lemma.rb
Overview
Represents a lemma, the main entry in a lexicon
Instance Attribute Summary collapse
-
#annotations ⇒ Object
Returns the value of attribute annotations.
-
#redirect ⇒ Object
Returns the value of attribute redirect.
-
#source_column ⇒ Object
Source location information.
-
#source_file ⇒ Object
Source location information.
-
#source_line ⇒ Object
Source location information.
-
#sublemmas ⇒ Object
Returns the value of attribute sublemmas.
-
#text ⇒ Object
Returns the value of attribute text.
Instance Method Summary collapse
-
#addRedirect(target, types = []) ⇒ Lemma
Add a pure redirect sublemma.
-
#addSublemma(text) ⇒ Lemma
Add a standard sublemma.
-
#addSublemmas(texts) ⇒ Lemma
Add multiple sublemmas at once.
-
#clear ⇒ Lemma
Clear all annotations and sublemmas but keep the main lemma.
-
#clearAll ⇒ Lemma
Clear everything including the main lemma.
-
#clearAnnotations ⇒ Lemma
Clear all annotations.
-
#clearRedirect ⇒ Lemma
Clear redirect.
-
#clearSublemmas ⇒ Lemma
Clear all sublemmas.
-
#initialize(input = nil, source_file = nil, source_line = nil, source_column = nil) ⇒ Lemma
constructor
Initialize from either a string or direct components.
-
#parse(input) ⇒ Lemma
Parse a lemma string.
-
#parseAnnotations(annotationsText) ⇒ void
Parse annotations like sp:past,pp:participle or pl:oxen.
-
#parseLemma(lemmaPart) ⇒ void
Parse just the lemma part (before any pipe).
-
#parseRedirectionLemma(input) ⇒ void
Parse a redirection lemma (with >> syntax).
-
#parseSublemmas(sublemmasPart) ⇒ void
Parse sublemmas part (after the pipe).
-
#redirected? ⇒ Boolean
Is this a redirection lemma (no sublemmas, just a redirect)?.
-
#setAnnotation(type, value = true) ⇒ Lemma
Set an annotation.
-
#setAnnotations(annotations) ⇒ Lemma
Add multiple annotations at once.
-
#setRedirect(target, types = []) ⇒ Lemma
Set the lemma’s redirection.
-
#shortcuts(placeholder = "~") ⇒ Hash<String, String>
Returns a hash mapping each sublemma to its shortcut.
-
#smart_split_sublemmas(text) ⇒ Array<String>
Helper method to split sublemmas while respecting parentheses This ensures we don’t split inside relation type lists like (sp,pp).
-
#to_s ⇒ String
Convert to string format.
-
#validateAnnotation(key, value) ⇒ Boolean
Validate annotation key and value format Ensures keys and values follow the expected format.
Constructor Details
#initialize(input = nil, source_file = nil, source_line = nil, source_column = nil) ⇒ Lemma
Initialize from either a string or direct components
23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/lexm/lemma.rb', line 23 def initialize(input = nil, source_file = nil, source_line = nil, source_column = nil) @text = nil @annotations = {} @sublemmas = [] @redirect = nil @source_file = source_file @source_line = source_line @source_column = source_column parse(input) if input.is_a?(String) end |
Instance Attribute Details
#annotations ⇒ Object
Returns the value of attribute annotations.
14 15 16 |
# File 'lib/lexm/lemma.rb', line 14 def annotations @annotations end |
#redirect ⇒ Object
Returns the value of attribute redirect.
14 15 16 |
# File 'lib/lexm/lemma.rb', line 14 def redirect @redirect end |
#source_column ⇒ Object
Source location information
16 17 18 |
# File 'lib/lexm/lemma.rb', line 16 def source_column @source_column end |
#source_file ⇒ Object
Source location information
16 17 18 |
# File 'lib/lexm/lemma.rb', line 16 def source_file @source_file end |
#source_line ⇒ Object
Source location information
16 17 18 |
# File 'lib/lexm/lemma.rb', line 16 def source_line @source_line end |
#sublemmas ⇒ Object
Returns the value of attribute sublemmas.
14 15 16 |
# File 'lib/lexm/lemma.rb', line 14 def sublemmas @sublemmas end |
#text ⇒ Object
Returns the value of attribute text.
14 15 16 |
# File 'lib/lexm/lemma.rb', line 14 def text @text end |
Instance Method Details
#addRedirect(target, types = []) ⇒ Lemma
Add a pure redirect sublemma
257 258 259 260 261 262 263 264 |
# File 'lib/lexm/lemma.rb', line 257 def addRedirect(target, types = []) if redirected? raise "Cannot add sublemmas to a redirection lemma" end redirect = LemmaRedirect.new(target, types) @sublemmas << Sublemma.new(nil, redirect, self) self end |
#addSublemma(text) ⇒ Lemma
Add a standard sublemma
232 233 234 235 236 237 238 |
# File 'lib/lexm/lemma.rb', line 232 def addSublemma(text) if redirected? raise "Cannot add sublemmas to a redirection lemma" end @sublemmas << Sublemma.new(text, nil, self) self end |
#addSublemmas(texts) ⇒ Lemma
Add multiple sublemmas at once
243 244 245 246 247 248 249 250 251 |
# File 'lib/lexm/lemma.rb', line 243 def addSublemmas(texts) if redirected? raise "Cannot add sublemmas to a redirection lemma" end texts.each do |text| @sublemmas << Sublemma.new(text, nil, self) end self end |
#clear ⇒ Lemma
Clear all annotations and sublemmas but keep the main lemma
363 364 365 366 367 368 |
# File 'lib/lexm/lemma.rb', line 363 def clear @annotations = {} @sublemmas = [] @redirect = nil self end |
#clearAll ⇒ Lemma
Clear everything including the main lemma
372 373 374 375 376 377 378 |
# File 'lib/lexm/lemma.rb', line 372 def clearAll @text = nil @annotations = {} @sublemmas = [] @redirect = nil self end |
#clearAnnotations ⇒ Lemma
Clear all annotations
342 343 344 345 |
# File 'lib/lexm/lemma.rb', line 342 def clearAnnotations @annotations = {} self end |
#clearRedirect ⇒ Lemma
Clear redirect
356 357 358 359 |
# File 'lib/lexm/lemma.rb', line 356 def clearRedirect @redirect = nil self end |
#clearSublemmas ⇒ Lemma
Clear all sublemmas
349 350 351 352 |
# File 'lib/lexm/lemma.rb', line 349 def clearSublemmas @sublemmas = [] self end |
#parse(input) ⇒ Lemma
Parse a lemma string
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/lexm/lemma.rb', line 38 def parse(input) # Check for empty input raise "Empty lemma input!" if input.nil? || input.strip.empty? # Check for basic syntax issues if input.count('[') != input.count(']') raise "Malformed input: mismatched brackets in '#{input}'" end # Check for balanced pipes if input.include?("|") && input.start_with?("|") raise "Malformed input: lemma starts with pipe character in '#{input}'" end if input.include?(">>") parseRedirectionLemma(input) return self end lemmaPart, sublemmasPart = input.split('|', 2) parseLemma(lemmaPart) parseSublemmas(sublemmasPart) if sublemmasPart self end |
#parseAnnotations(annotationsText) ⇒ void
This method returns an undefined value.
Parse annotations like sp:past,pp:participle or pl:oxen
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/lexm/lemma.rb', line 194 def parseAnnotations(annotationsText) if annotationsText.strip.empty? raise "Empty annotations block" end annotationsText.split(',').each do |annotation| if annotation.strip.empty? raise "Empty annotation in comma-separated list" end if annotation.include?(':') type, value = annotation.split(':', 2) # Validate annotation type if type.strip.empty? raise "Empty annotation type in '#{annotation}'" end # Validate annotation value if value.strip.empty? raise "Empty annotation value for type '#{type.strip}'" end @annotations[type.strip] = value.strip else # Handle simple annotations without values if annotation.strip.empty? raise "Empty annotation name" end @annotations[annotation.strip] = true end end end |
#parseLemma(lemmaPart) ⇒ void
This method returns an undefined value.
Parse just the lemma part (before any pipe)
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/lexm/lemma.rb', line 92 def parseLemma(lemmaPart) if lemmaPart.include?('[') baseLemma, annotationsPart = lemmaPart.split('[', 2) # Check for malformed annotation syntax raise "Malformed annotation: missing closing ']' in '#{lemmaPart}'" unless annotationsPart.end_with?(']') # Ensure there's actual lemma text before annotations if baseLemma.strip.empty? raise "Missing lemma text before annotations in '#{lemmaPart}'" end @text = baseLemma.strip annotationsPart.sub!(/\]$/, '') parseAnnotations(annotationsPart) else # Simple lemma # Ensure there's actual text if lemmaPart.strip.empty? raise "Empty lemma text in '#{lemmaPart}'" end @text = lemmaPart.strip end end |
#parseRedirectionLemma(input) ⇒ void
This method returns an undefined value.
Parse a redirection lemma (with >> syntax)
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/lexm/lemma.rb', line 68 def parseRedirectionLemma(input) # Check for valid redirection syntax (needs a target after >>) if input.match(/>>[\s]*$/) raise "Malformed redirection syntax in '#{input}'. Should be 'word>>target' or 'word>>(relation)target'" end if input =~ /(.+?)>>\((.+?)\)(.+)/ @text = $1.strip @redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip)) elsif input =~ /(.+?)>>(.+)/ @text = $1.strip target = $2.strip if target.empty? raise "Malformed redirection syntax in '#{input}'. Missing target after '>>'" end @redirect = LemmaRedirect.new(target) else raise "Malformed redirection syntax in '#{input}'. Should be 'word>>target' or 'word>>(relation)target'" end end |
#parseSublemmas(sublemmasPart) ⇒ void
This method returns an undefined value.
Parse sublemmas part (after the pipe)
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/lexm/lemma.rb', line 121 def parseSublemmas(sublemmasPart) # We need a smarter way to split sublemmas that respects parentheses # This helps us correctly handle cases like ">(sp,pp)wring,abc" sublemmas = smart_split_sublemmas(sublemmasPart) # Process each sublemma sublemmas.each do |sublemma| sublemma = sublemma.strip # Handle pure redirection sublemma (starts with >) if sublemma.start_with?('>') if sublemma =~ />\((.+?)\)(.+)/ redirect = LemmaRedirect.new($2.strip, $1.split(',').map(&:strip)) @sublemmas << Sublemma.new(nil, redirect, self) elsif sublemma =~ />(.+)/ redirect = LemmaRedirect.new($1.strip) @sublemmas << Sublemma.new(nil, redirect, self) end # Handle normal sublemma with possible redirection elsif sublemma.include?('>') # Check for a redirection with relation types if sublemma =~ /(.+?)>\((.+?)\)(.+)/ # Format: word>(relation)target text = $1.strip redirect = LemmaRedirect.new($3.strip, $2.split(',').map(&:strip)) @sublemmas << Sublemma.new(text, redirect, self) elsif sublemma =~ /(.+?)>(.+)/ # Simple redirection without relation type text = $1.strip redirect = LemmaRedirect.new($2.strip) @sublemmas << Sublemma.new(text, redirect, self) else @sublemmas << Sublemma.new(sublemma, nil, self) end else # Simple sublemma @sublemmas << Sublemma.new(sublemma, nil, self) end end end |
#redirected? ⇒ Boolean
Is this a redirection lemma (no sublemmas, just a redirect)?
382 383 384 |
# File 'lib/lexm/lemma.rb', line 382 def redirected? !@redirect.nil? && @sublemmas.empty? end |
#setAnnotation(type, value = true) ⇒ Lemma
Set an annotation
318 319 320 321 322 323 324 325 |
# File 'lib/lexm/lemma.rb', line 318 def setAnnotation(type, value = true) if redirected? raise "Cannot add annotations to a redirection lemma" end validateAnnotation(type, value) @annotations[type] = value self end |
#setAnnotations(annotations) ⇒ Lemma
Add multiple annotations at once
330 331 332 333 334 335 336 337 338 |
# File 'lib/lexm/lemma.rb', line 330 def setAnnotations(annotations) if redirected? raise "Cannot add annotations to a redirection lemma" end annotations.each do |key, value| @annotations[key] = value end self end |
#setRedirect(target, types = []) ⇒ Lemma
Set the lemma’s redirection
270 271 272 273 274 275 276 |
# File 'lib/lexm/lemma.rb', line 270 def setRedirect(target, types = []) if !@sublemmas.empty? raise "Cannot set redirect on a lemma with sublemmas" end @redirect = LemmaRedirect.new(target, types) self end |
#shortcuts(placeholder = "~") ⇒ Hash<String, String>
Returns a hash mapping each sublemma to its shortcut
281 282 283 284 285 286 287 288 289 290 291 |
# File 'lib/lexm/lemma.rb', line 281 def shortcuts(placeholder = "~") return {} if @text.nil? || redirected? || @sublemmas.empty? result = {} @sublemmas.each do |sublemma| # Skip redirections and get the shortcut for text sublemmas next if sublemma.redirected? || sublemma.text.nil? result[sublemma.text] = sublemma.shortcut(placeholder) end result end |
#smart_split_sublemmas(text) ⇒ Array<String>
Helper method to split sublemmas while respecting parentheses This ensures we don’t split inside relation type lists like (sp,pp)
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# File 'lib/lexm/lemma.rb', line 166 def smart_split_sublemmas(text) result = [] current = "" paren_level = 0 text.each_char do |c| if c == ',' && paren_level == 0 # Only split on commas outside of parentheses result << current unless current.empty? current = "" else current << c # Track parenthesis nesting level if c == '(' paren_level += 1 elsif c == ')' paren_level -= 1 if paren_level > 0 end end end result << current unless current.empty? result end |
#to_s ⇒ String
Convert to string format
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 |
# File 'lib/lexm/lemma.rb', line 388 def to_s # Redirection lemma format (with double >>) if redirected? return "#{@text}>>#{@redirect.to_s.sub('>', '')}" end # Normal lemma format result = "" # Format the lemma part with any annotations if @text.nil? return "" elsif @annotations.empty? result << @text else annotationsStr = @annotations.map do |type, value| value == true ? type : "#{type}:#{value}" end.join(',') result << "#{@text}[#{annotationsStr}]" end # Add sublemmas if present if !@sublemmas.empty? result << "|" result << @sublemmas.map(&:to_s).join(',') end result end |
#validateAnnotation(key, value) ⇒ Boolean
Validate annotation key and value format Ensures keys and values follow the expected format
299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
# File 'lib/lexm/lemma.rb', line 299 def validateAnnotation(key, value) # Check that key matches a valid pattern (alphanumeric and limited symbols) unless key =~ /^[a-zA-Z0-9_]+$/ raise "Invalid annotation key: '#{key}' (must contain only letters, numbers, and underscores)" end # Additional validation for values if value.is_a?(String) # Check for invalid characters in value if needed if value.include?(']') || value.include?('[') raise "Invalid annotation value for '#{key}': cannot contain square brackets" end end end |