Class: PROIEL::DictionaryBuilder

Inherits:
Object
  • Object
show all
Defined in:
lib/proiel/dictionary/builder.rb

Constant Summary collapse

CURRENT_SCHEMA_VERSION =
'3.0'.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeDictionaryBuilder

Returns a new instance of DictionaryBuilder.



15
16
17
18
19
20
21
# File 'lib/proiel/dictionary/builder.rb', line 15

def initialize
  @language = nil
  @license = nil
  @sources = []
  @lemmata = {}
  @valency = PROIEL::Valency::Lexicon.new
end

Instance Attribute Details

#languageObject (readonly)

Returns the value of attribute language.



11
12
13
# File 'lib/proiel/dictionary/builder.rb', line 11

def language
  @language
end

#lemmataObject (readonly)

Returns the value of attribute lemmata.



13
14
15
# File 'lib/proiel/dictionary/builder.rb', line 13

def lemmata
  @lemmata
end

#licenseObject (readonly)

Returns the value of attribute license.



10
11
12
# File 'lib/proiel/dictionary/builder.rb', line 10

def license
  @license
end

#sourcesObject (readonly)

Returns the value of attribute sources.



12
13
14
# File 'lib/proiel/dictionary/builder.rb', line 12

def sources
  @sources
end

Instance Method Details

#add_external_glosses!(filename, languages = %i(eng)) ⇒ Object

Raises:

  • (ArgumentError)


60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/proiel/dictionary/builder.rb', line 60

def add_external_glosses!(filename, languages = %i(eng))
  raise ArgumentError, 'filename expected' unless filename.is_a?(String)
  raise ArgumentError, 'file not found' unless File.exists?(filename)

  CSV.foreach(filename, headers: true, encoding: 'utf-8', col_sep: "\t",
                        header_converters: :symbol, quote_char: "\b") do |row|
    h = row.to_h
    data = languages.map { |l| [l, h[l]] }.to_h

    lemma = initialize_lemma!(row[:lemma], row[:part_of_speech])
    lemma[:glosses] ||= {}
    lemma[:glosses].merge!(data)
  end
end

#add_source!(source) ⇒ Object

Raises:

  • (ArgumentError)


23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/proiel/dictionary/builder.rb', line 23

def add_source!(source)
  raise ArgumentError, 'source expected' unless source.is_a?(PROIEL::Source)
  raise ArgumentError, 'incompatible language' unless @language.nil? or @language == source.language
  raise ArgumentError, 'incompatible license' unless @license.nil? or @license == source.license

  @language ||= source.language
  @license ||= source.license
  @sources << source

  source.tokens.each { |token| index_token!(token) }

  index_homographs!
end

#to_xml(io) ⇒ Object



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/proiel/dictionary/builder.rb', line 39

def to_xml(io)
  builder = ::Builder::XmlMarkup.new(target: io, indent: 2)
  builder.instruct! :xml, version: '1.0', encoding: 'UTF-8'
  builder.proiel('export-time': DateTime.now.xmlschema, 'schema-version': CURRENT_SCHEMA_VERSION) do
    builder.dictionary(language: @language) do
      builder.sources do
        @sources.each do |source|
          builder.source(idref: source.id, license: source.license)
        end
      end

      builder.lemmata do
        @lemmata.sort_by { |lemma, _| lemma.downcase }.each do |form_and_pos, data|
          form, _ = form_and_pos.split(',')
          lemma_to_xml(builder, form, data)
        end
      end
    end
  end
end