Class: Lingo::Database::Source
- Inherits:
-
Object
- Object
- Lingo::Database::Source
show all
- Defined in:
- lib/lingo/database/source.rb,
lib/lingo/database/source/key_value.rb,
lib/lingo/database/source/multi_key.rb,
lib/lingo/database/source/word_class.rb,
lib/lingo/database/source/multi_value.rb,
lib/lingo/database/source/single_word.rb
Overview
– Die Klasse Source stellt eine einheitliche Schnittstelle auf die unterschiedlichen Formate von Wörterbuch-Quelldateien bereit. Die Identifizierung der Quelldatei erfolgt über die ID der Datei, so wie sie in der Sprachkonfigurationsdatei de.lang
unter language/dictionary/databases
hinterlegt ist.
Die Verarbeitung der Wörterbücher erfolgt mittels des Iterators each, der für jede Zeile der Quelldatei ein Array bereitstellt in der Form [ key, [val1, val2, ...] ]
.
Nicht korrekt erkannte Zeilen werden abgewiesen und in eine Revoke-Datei gespeichert, die an der Dateiendung .rev
zu erkennen ist. ++
Defined Under Namespace
Classes: KeyValue, MultiKey, MultiValue, SingleWord, WordClass
Constant Summary
collapse
- LEXICAL_SEPARATOR =
'#'.freeze
- DEFAULT_SEPARATOR =
nil
- DEFAULT_DEF_WC =
nil
- MAX_LENGTH =
4096
Instance Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
Constructor Details
#initialize(name = nil, config = {}, id = nil) ⇒ Source
Returns a new instance of Source.
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
# File 'lib/lingo/database/source.rb', line 84
def initialize(name = nil, config = {}, id = nil)
@config = config
src_file = Lingo.find(:dict, name, relax: true) if name
rej_file = begin
Lingo.find(:store, src_file) << '.rev'
rescue NoWritableStoreError, SourceFileNotFoundError
end if id && src_file
@src = Pathname.new(src_file) if src_file
@rej = Pathname.new(rej_file) if rej_file
raise id ? SourceFileNotFoundError.new(name, id) :
FileNotFoundError.new(name) if name && !@src.exist?
@sep = config.fetch('separator', self.class::DEFAULT_SEPARATOR)
@def = config.fetch('def-wc', self.class::DEFAULT_DEF_WC)
@def = @def.downcase if @def
@wrd = "(?:#{Language::Char::ANY})+"
@pat = /^#{@wrd}$/
@pos = @rej_cnt = 0
end
|
Instance Attribute Details
#pos ⇒ Object
Returns the value of attribute pos.
82
83
84
|
# File 'lib/lingo/database/source.rb', line 82
def pos
@pos
end
|
Class Method Details
.from_config(config, id = nil) ⇒ Object
66
67
68
69
|
# File 'lib/lingo/database/source.rb', line 66
def from_config(config, id = nil)
format = config.fetch('txt-format', 'key_value')
Lingo.get_const(format, self).new(config['name'], config, id)
end
|
.from_id(id, lingo) ⇒ Object
62
63
64
|
# File 'lib/lingo/database/source.rb', line 62
def from_id(id, lingo)
from_config(lingo.database_config(id), id)
end
|
.lexicals(val, sep = LEXICAL_SEPARATOR, ref = KEY_REF_RE) ⇒ Object
71
72
73
74
75
76
77
78
|
# File 'lib/lingo/database/source.rb', line 71
def lexicals(val, sep = LEXICAL_SEPARATOR, ref = KEY_REF_RE)
val.map { |str|
str =~ ref ? $1.to_i : begin
k, *w = str.split(sep)
Language::Lexical.new(k.strip, w)
end
}.uniq if val
end
|
Instance Method Details
#each ⇒ Object
114
115
116
117
|
# File 'lib/lingo/database/source.rb', line 114
def each
return enum_for(__method__) unless block_given?
each_line { |line, key, val| yield parse_line(line, key, val) }
end
|
#each_dump(*args) ⇒ Object
151
152
153
154
|
# File 'lib/lingo/database/source.rb', line 151
def each_dump(*args)
return enum_for(__method__, *args) unless block_given?
each_lexical { |key, val| yield dump_line(key, val, *args) }
end
|
#each_lexical ⇒ Object
146
147
148
149
|
# File 'lib/lingo/database/source.rb', line 146
def each_lexical
return enum_for(__method__) unless block_given?
each { |key, val| yield key, self.class.lexicals(val) }
end
|
#each_line ⇒ Object
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
# File 'lib/lingo/database/source.rb', line 119
def each_line
return enum_for(__method__) unless block_given?
rej_file = @rej.open('w', encoding: ENCODING) if @rej
@src.each_line($/, encoding: ENCODING) { |line|
@pos += length = line.bytesize
line.strip!
next if line.empty? || line.start_with?('#')
if length < MAX_LENGTH && line.replace(Unicode.downcase(line)) =~ @pat
yield line, $1, $2
else
@rej_cnt += 1
rej_file.puts(line) if rej_file
end
}
self
ensure
if rej_file
rej_file.close
@rej.delete if @rej.size.zero?
end
end
|
#rejected ⇒ Object
160
161
162
|
# File 'lib/lingo/database/source.rb', line 160
def rejected
[@rej_cnt, @rej]
end
|
#set(db, key, val) ⇒ Object
156
157
158
|
# File 'lib/lingo/database/source.rb', line 156
def set(db, key, val)
db[key] = val
end
|
#size ⇒ Object
110
111
112
|
# File 'lib/lingo/database/source.rb', line 110
def size
@src.size
end
|