Class: Lingo::Database::Source::WordClass

Inherits:
Lingo::Database::Source show all
Includes:
ArrayUtils
Defined in:
lib/lingo/database/source/word_class.rb

Overview

– Abgeleitet von Source behandelt die Klasse Dateien mit dem Format WordClass. Eine Zeile "essen,essen #v essen #o esse #s\n" wird gewandelt in [ 'essen', ['esse#s', 'essen#v', 'essen#o'] ]. Der Trenner zwischen Schlüssel und Projektion kann über den Parameter separator geändert werden. ++

Constant Summary collapse

DEFAULT_SEPARATOR =
','.freeze
GENDER_SEPARATOR =
'.'.freeze
VALUE_SEPARATOR =
'|'.freeze
WC_SEPARATOR =
'#'.freeze
SCAN_RE =
/(\S.*?)\s*#{WC_SEPARATOR}(\S+)/o

Constants inherited from Lingo::Database::Source

DEFAULT_DEF_WC, LEXICAL_SEPARATOR, MAX_LENGTH

Instance Attribute Summary

Attributes inherited from Lingo::Database::Source

#pos

Instance Method Summary collapse

Methods inherited from Lingo::Database::Source

#each, #each_dump, #each_lexical, #each_line, from_config, from_id, lexicals, #rejected, #set, #size

Constructor Details

#initializeWordClass

Returns a new instance of WordClass.



53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/lingo/database/source/word_class.rb', line 53

def initialize(*)
  super

  gen = Regexp.escape(GENDER_SEPARATOR)
  val = Regexp.escape(VALUE_SEPARATOR)
  sep = Regexp.escape(@sep)

  w, a = "\\w%1$s(?:#{val}\\w%1$s)*", '[+]?'
  wc   = "#{WC_SEPARATOR}#{w % a}(?:#{gen}#{w % ''})?"

  @pat = /^(#{@wrd})#{sep}((?:#{@wrd}#{wc})+)$/
end

Instance Method Details

#dump_line(key, val, key_sep = nil, val_sep = nil, compact = true) ⇒ Object



81
82
83
# File 'lib/lingo/database/source/word_class.rb', line 81

def dump_line(key, val, key_sep = nil, val_sep = nil, compact = true, *)
  "#{key}#{key_sep || @sep}#{dump_values(val, compact).join(val_sep || ' ')}"
end

#dump_values(val, compact = true) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/lingo/database/source/word_class.rb', line 85

def dump_values(val, compact = true)
  join = lambda { |v|
    v.compact!; v.uniq!; v.sort!; v.join(VALUE_SEPARATOR) }

  if compact
    values = Hash.new { |h, k| h[k] = [[], []] }; val.each { |lex|
      a, g = values[lex.form]; a << lex.attr; g << lex.gender }
  else
    values = val.map { |lex| [lex.form, [[lex.attr], [lex.gender]]] }
  end

  values.sort.map { |form, (attrs, genders)|
    res = "#{form} #{WC_SEPARATOR}#{join[attrs]}"
    genders.any? ? "#{res}#{GENDER_SEPARATOR}#{join[genders]}" : res
  }
end

#parse_line(line, key, val) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/lingo/database/source/word_class.rb', line 66

def parse_line(line, key, val)
  values = []

  val.strip.scan(SCAN_RE) { |k, v|
    v, f = v.split(GENDER_SEPARATOR)
    f = f ? f.split(VALUE_SEPARATOR) : [nil]

    combinations(v.split(VALUE_SEPARATOR), f) { |w, g|
      values << lexical(k, w, g)
    }
  }

  [key.strip, values]
end