Class: Mabmapper::ElasticSearchWriter

Inherits:
Object
  • Object
show all
Defined in:
lib/mabmapper/elasticsearch_writer.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(io) ⇒ ElasticSearchWriter

Returns a new instance of ElasticSearchWriter.



20
21
22
23
24
25
# File 'lib/mabmapper/elasticsearch_writer.rb', line 20

def initialize(io)
  @io = io

  # Set libxml as minixml backend to improve performance
  ActiveSupport::XmlMini.backend = 'LibXML'
end

Class Method Details

.out_file(output_dir_name, file_name, options = {}) ⇒ Object



13
14
15
16
17
18
# File 'lib/mabmapper/elasticsearch_writer.rb', line 13

def self.out_file(output_dir_name, file_name, options = {})
  file_basename = File.basename(file_name).gsub(/\.tar.gz|\.tar|\.tgz/, '.es_bulk')
  file_basename << '.gz' if options[:will_be_gziped] === true

  File.join(output_dir_name, file_basename)
end

Instance Method Details

#add_file(name, mode) {|_self| ... } ⇒ Object

:yields: io

Yields:

  • (_self)

Yield Parameters:



27
28
29
# File 'lib/mabmapper/elasticsearch_writer.rb', line 27

def add_file(name, mode) # :yields: io
  yield self
end

#add_file_simple(name, mode, size) {|_self| ... } ⇒ Object

:yields: io

Yields:

  • (_self)

Yield Parameters:



31
32
33
# File 'lib/mabmapper/elasticsearch_writer.rb', line 31

def add_file_simple(name, mode, size) # :yields: io
  yield self
end

#closeObject



35
36
37
# File 'lib/mabmapper/elasticsearch_writer.rb', line 35

def close
  @io.close unless @io.closed?
end

#write(xml) ⇒ Object

we assume that data is string serialized xml



40
41
42
43
44
45
46
47
48
49
# File 'lib/mabmapper/elasticsearch_writer.rb', line 40

def write(xml)
  bulk = []
  hash = Hash.from_xml(xml)

  bulk.push(Oj.dump({ index: { _id: "#{hash['document']['id']}" }}, mode: :compat))
  bulk.push(Oj.dump(hash['document'], mode: :compat))

  # Beware, right positions of newlines is vital for elasticsearch bulk import
  @io.write(bulk.join("\n") << "\n")
end