Class: Myasorubka::AOT::Dictionary

Inherits:
Object
  • Object
show all
Defined in:
lib/myasorubka/aot/dictionary.rb

Overview

MRD file is a text file that contains a morphological dictionary of a natural language. MRD is an abbreviation of “morphological dictionary”.

All words in MRD file are written in UPPERCASE. One MRD file has the following sections: section of flexion and prefix models, section of accentual models, section of user sessions, session of prefix sets, section of lemmas.

Defined Under Namespace

Classes: Section

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(filename, language = nil, ee = nil, ie = Encoding.default_external) ⇒ Dictionary

The parser should be initialized by passing filename and language parameters.



19
20
21
22
23
24
25
26
27
28
29
# File 'lib/myasorubka/aot/dictionary.rb', line 19

def initialize(filename, language = nil, ee = nil, ie = Encoding.default_external)
  encoding = { internal_encoding: ie, external_encoding: ee }
  @filename = filename
  @lines, @language = File.readlines(filename, $/, encoding), language

  @rules_offset = 0
  @accents_offset = rules_offset + rules.length + 1
  @logs_offset = accents_offset + accents.length + 1
  @prefixes_offset = logs_offset + logs.length + 1
  @lemmas_offset = prefixes_offset + prefixes.length + 1
end

Instance Attribute Details

#accents_offsetObject (readonly)

Returns the value of attribute accents_offset.



13
14
15
# File 'lib/myasorubka/aot/dictionary.rb', line 13

def accents_offset
  @accents_offset
end

#filenameObject (readonly)

Returns the value of attribute filename.



12
13
14
# File 'lib/myasorubka/aot/dictionary.rb', line 12

def filename
  @filename
end

#languageObject (readonly)

Returns the value of attribute language.



12
13
14
# File 'lib/myasorubka/aot/dictionary.rb', line 12

def language
  @language
end

#lemmas_offsetObject (readonly)

Returns the value of attribute lemmas_offset.



13
14
15
# File 'lib/myasorubka/aot/dictionary.rb', line 13

def lemmas_offset
  @lemmas_offset
end

#linesObject (readonly)

Returns the value of attribute lines.



12
13
14
# File 'lib/myasorubka/aot/dictionary.rb', line 12

def lines
  @lines
end

#logs_offsetObject (readonly)

Returns the value of attribute logs_offset.



13
14
15
# File 'lib/myasorubka/aot/dictionary.rb', line 13

def logs_offset
  @logs_offset
end

#prefixes_offsetObject (readonly)

Returns the value of attribute prefixes_offset.



13
14
15
# File 'lib/myasorubka/aot/dictionary.rb', line 13

def prefixes_offset
  @prefixes_offset
end

#rules_offsetObject (readonly)

Returns the value of attribute rules_offset.



13
14
15
# File 'lib/myasorubka/aot/dictionary.rb', line 13

def rules_offset
  @rules_offset
end

Instance Method Details

#accentsObject

Accents section accessor.



93
94
95
# File 'lib/myasorubka/aot/dictionary.rb', line 93

def accents
  @accents ||= Section.new(lines, accents_offset)
end

#inspectObject

:nodoc:



133
134
135
136
# File 'lib/myasorubka/aot/dictionary.rb', line 133

def inspect
  sprintf('#<%s filename=%s language=%s>',
    self.class.name, filename.inspect, language.inspect)
end

#lemmasObject

Lemmas section accessor.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/myasorubka/aot/dictionary.rb', line 111

def lemmas
  @lemmas ||= Section.new(lines, lemmas_offset) do |line|
    stem, rule_id, accent_id, session_id, ancode, prefix_id = line.split

    case language
    when :russian then
      stem &&= stem.tr 'Ёё', 'Ее'
    end

    Array.new.tap do |result|
      result <<
        (stem == '#' ? nil : stem) <<
        rule_id.to_i <<
        accent_id.to_i <<
        session_id.to_i <<
        (ancode == '-' ? nil : ancode[0..1]) <<
        (prefix_id == '-' ? nil : prefix_id.to_i)
    end
  end
end

#logsObject

Logs section accessor.



99
100
101
# File 'lib/myasorubka/aot/dictionary.rb', line 99

def logs
  @logs ||= Section.new(lines, logs_offset)
end

#prefixesObject

Prefixes section accessor.



105
106
107
# File 'lib/myasorubka/aot/dictionary.rb', line 105

def prefixes
  @prefixes ||= Section.new(lines, prefixes_offset)
end

#rulesObject

Rules section accessor.



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/myasorubka/aot/dictionary.rb', line 73

def rules
  @rules ||= Section.new(lines, rules_offset) do |line|
    line.split('%').map do |rule_line|
      next unless rule_line && !rule_line.empty?

      suffix, ancode, prefix = rule_line.split '*'

      case language
      when :russian then
        suffix &&= suffix.tr 'Ёё', 'Ее'
        prefix &&= prefix.tr 'Ёё', 'Ее'
      end

      [suffix, ancode[0..1], prefix]
    end.compact
  end
end