Class: Traject::NDJReader

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/traject/ndj_reader.rb

Overview

Read newline-delimited JSON file, where each line is a marc-in-json string. UTF-8 encoding is required.

Instance Method Summary collapse

Constructor Details

#initialize(input_stream, settings) ⇒ NDJReader

Returns a new instance of NDJReader.


12
13
14
15
16
17
18
# File 'lib/traject/ndj_reader.rb', line 12

def initialize(input_stream, settings)
  @settings = settings
  @input_stream = input_stream
  if input_stream.respond_to?(:path) && /\.gz\Z/.match(input_stream.path)
    @input_stream = Zlib::GzipReader.new(@input_stream, :external_encoding => "UTF-8")
  end
end

Instance Method Details

#eachObject


24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/traject/ndj_reader.rb', line 24

def each
  unless block_given?
    return enum_for(:each)
  end

  @input_stream.each_with_index do |json, i|
    begin
      yield MARC::Record.new_from_hash(JSON.parse(json))
    rescue Exception => e
      self.logger.error("Problem with JSON record on line #{i}: #{e.message}")
    end
  end
end

#loggerObject


20
21
22
# File 'lib/traject/ndj_reader.rb', line 20

def logger
  @logger ||= (@settings[:logger] || Yell.new(STDERR, :level => "gt.fatal")) # null logger)
end