Class: Mabmapper::ElasticSearchWriter
- Inherits:
-
Object
- Object
- Mabmapper::ElasticSearchWriter
- Defined in:
- lib/mabmapper/elasticsearch_writer.rb
Class Method Summary collapse
Instance Method Summary collapse
-
#add_file(name, mode) {|_self| ... } ⇒ Object
:yields: io.
-
#add_file_simple(name, mode, size) {|_self| ... } ⇒ Object
:yields: io.
- #close ⇒ Object
-
#initialize(io) ⇒ ElasticSearchWriter
constructor
A new instance of ElasticSearchWriter.
-
#write(xml) ⇒ Object
we assume that data is string serialized xml.
Constructor Details
#initialize(io) ⇒ ElasticSearchWriter
Returns a new instance of ElasticSearchWriter.
20 21 22 23 24 25 |
# File 'lib/mabmapper/elasticsearch_writer.rb', line 20 def initialize(io) @io = io # Set libxml as minixml backend to improve performance ActiveSupport::XmlMini.backend = 'LibXML' end |
Class Method Details
.out_file(output_dir_name, file_name, options = {}) ⇒ Object
13 14 15 16 17 18 |
# File 'lib/mabmapper/elasticsearch_writer.rb', line 13 def self.out_file(output_dir_name, file_name, = {}) file_basename = File.basename(file_name).gsub(/\.tar.gz|\.tar|\.tgz/, '.es_bulk') file_basename << '.gz' if [:will_be_gziped] === true File.join(output_dir_name, file_basename) end |
Instance Method Details
#add_file(name, mode) {|_self| ... } ⇒ Object
:yields: io
27 28 29 |
# File 'lib/mabmapper/elasticsearch_writer.rb', line 27 def add_file(name, mode) # :yields: io yield self end |
#add_file_simple(name, mode, size) {|_self| ... } ⇒ Object
:yields: io
31 32 33 |
# File 'lib/mabmapper/elasticsearch_writer.rb', line 31 def add_file_simple(name, mode, size) # :yields: io yield self end |
#close ⇒ Object
35 36 37 |
# File 'lib/mabmapper/elasticsearch_writer.rb', line 35 def close @io.close unless @io.closed? end |
#write(xml) ⇒ Object
we assume that data is string serialized xml
40 41 42 43 44 45 46 47 48 49 |
# File 'lib/mabmapper/elasticsearch_writer.rb', line 40 def write(xml) bulk = [] hash = Hash.from_xml(xml) bulk.push(Oj.dump({ index: { _id: "#{hash['document']['id']}" }}, mode: :compat)) bulk.push(Oj.dump(hash['document'], mode: :compat)) # Beware, right positions of newlines is vital for elasticsearch bulk import @io.write(bulk.join("\n") << "\n") end |