Class: Lingo::Database::Source

Inherits:
Object
  • Object
show all
Defined in:
lib/lingo/database/source.rb,
lib/lingo/database/source/key_value.rb,
lib/lingo/database/source/multi_key.rb,
lib/lingo/database/source/word_class.rb,
lib/lingo/database/source/multi_value.rb,
lib/lingo/database/source/single_word.rb

Overview

– Die Klasse Source stellt eine einheitliche Schnittstelle auf die unterschiedlichen Formate von Wörterbuch-Quelldateien bereit. Die Identifizierung der Quelldatei erfolgt über die ID der Datei, so wie sie in der Sprachkonfigurationsdatei de.lang unter language/dictionary/databases hinterlegt ist.

Die Verarbeitung der Wörterbücher erfolgt mittels des Iterators each, der für jede Zeile der Quelldatei ein Array bereitstellt in der Form [ key, [val1, val2, ...] ].

Nicht korrekt erkannte Zeilen werden abgewiesen und in eine Revoke-Datei gespeichert, die an der Dateiendung .rev zu erkennen ist. ++

Direct Known Subclasses

KeyValue, MultiKey, MultiValue, SingleWord, WordClass

Defined Under Namespace

Classes: KeyValue, MultiKey, MultiValue, SingleWord, WordClass

Constant Summary collapse

LEXICAL_SEPARATOR =
'#'.freeze
DEFAULT_SEPARATOR =
nil
DEFAULT_DEF_WC =
nil
MAX_LENGTH =
4096

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(name = nil, config = {}, id = nil) ⇒ Source

Returns a new instance of Source.



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/lingo/database/source.rb', line 84

def initialize(name = nil, config = {}, id = nil)
  @config = config

  src_file = Lingo.find(:dict, name, relax: true) if name

  rej_file = begin
    Lingo.find(:store, src_file) << '.rev'
  rescue NoWritableStoreError, SourceFileNotFoundError
  end if id && src_file

  @src = Pathname.new(src_file) if src_file
  @rej = Pathname.new(rej_file) if rej_file

  raise id ? SourceFileNotFoundError.new(name, id) :
    FileNotFoundError.new(name) if name && !@src.exist?

  @sep = config.fetch('separator', self.class::DEFAULT_SEPARATOR)
  @def = config.fetch('def-wc', self.class::DEFAULT_DEF_WC)
  @def = @def.downcase if @def

  @wrd = "(?:#{Language::Char::ANY})+"
  @pat = /^#{@wrd}$/

  @pos = @rej_cnt = 0
end

Instance Attribute Details

#posObject (readonly)

Returns the value of attribute pos.



82
83
84
# File 'lib/lingo/database/source.rb', line 82

def pos
  @pos
end

Class Method Details

.from_config(config, id = nil) ⇒ Object



66
67
68
69
# File 'lib/lingo/database/source.rb', line 66

def from_config(config, id = nil)
  format = config.fetch('txt-format', 'key_value')
  Lingo.get_const(format, self).new(config['name'], config, id)
end

.from_id(id, lingo) ⇒ Object



62
63
64
# File 'lib/lingo/database/source.rb', line 62

def from_id(id, lingo)
  from_config(lingo.database_config(id), id)
end

.lexicals(val, sep = LEXICAL_SEPARATOR, ref = KEY_REF_RE) ⇒ Object



71
72
73
74
75
76
77
78
# File 'lib/lingo/database/source.rb', line 71

def lexicals(val, sep = LEXICAL_SEPARATOR, ref = KEY_REF_RE)
  val.map { |str|
    str =~ ref ? $1.to_i : begin
      k, *w = str.split(sep)
      Language::Lexical.new(k.strip, w)
    end
  }.uniq if val
end

Instance Method Details

#eachObject



114
115
116
117
# File 'lib/lingo/database/source.rb', line 114

def each
  return enum_for(__method__) unless block_given?
  each_line { |line, key, val| yield parse_line(line, key, val) }
end

#each_dump(*args) ⇒ Object



151
152
153
154
# File 'lib/lingo/database/source.rb', line 151

def each_dump(*args)
  return enum_for(__method__, *args) unless block_given?
  each_lexical { |key, val| yield dump_line(key, val, *args) }
end

#each_lexicalObject



146
147
148
149
# File 'lib/lingo/database/source.rb', line 146

def each_lexical
  return enum_for(__method__) unless block_given?
  each { |key, val| yield key, self.class.lexicals(val) }
end

#each_lineObject



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'lib/lingo/database/source.rb', line 119

def each_line
  return enum_for(__method__) unless block_given?

  rej_file = @rej.open('w', encoding: ENCODING) if @rej

  @src.each_line($/, encoding: ENCODING) { |line|
    @pos += length = line.bytesize

    line.strip!
    next if line.empty? || line.start_with?('#')

    if length < MAX_LENGTH && line.replace(Unicode.downcase(line)) =~ @pat
      yield line, $1, $2
    else
      @rej_cnt += 1
      rej_file.puts(line) if rej_file
    end
  }

  self
ensure
  if rej_file
    rej_file.close
    @rej.delete if @rej.size.zero?
  end
end

#rejectedObject



160
161
162
# File 'lib/lingo/database/source.rb', line 160

def rejected
  [@rej_cnt, @rej]
end

#set(db, key, val) ⇒ Object



156
157
158
# File 'lib/lingo/database/source.rb', line 156

def set(db, key, val)
  db[key] = val
end

#sizeObject



110
111
112
# File 'lib/lingo/database/source.rb', line 110

def size
  @src.size
end