Class: Marc4R::MarcStreamReader

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/marc4r/marc_stream_reader.rb

Instance Method Summary collapse

Constructor Details

#initialize(io, enc = nil) ⇒ MarcStreamReader

Returns a new instance of MarcStreamReader.



3
4
5
6
# File 'lib/marc4r/marc_stream_reader.rb', line 3

def initialize(io, enc=nil)
  @io = io
  @encoding = enc ? Encoding.find(enc) : nil
end

Instance Method Details

#closeObject



7
8
9
# File 'lib/marc4r/marc_stream_reader.rb', line 7

def close
  @io.close
end

#data_field(tag, field_content) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/marc4r/marc_stream_reader.rb', line 67

def data_field(tag, field_content)
  raise Marc4R::MarcException.new("malformed data data field content") if field_content.size < 2
  ind1 = field_content[0]
  ind2 = field_content[1]

  data_field = Marc4R::Fields::DataField.new(tag, ind1, ind2)
  if field_content[2] == Marc4R::Terminators::SUBFIELD_TERMINATOR
    subfields = field_content[3..-1].split(Marc4R::Terminators::SUBFIELD_TERMINATOR)
    subfields = subfields.collect {|x| [x[0], x.slice(1..-1).force_encoding(@encoding)]}
    subfields.flatten!
  else
    subfields = []
  end
  return Marc4R::Fields::DataField.new(tag, ind1, ind2, *subfields)
end

#eachObject



10
11
12
13
14
15
16
17
18
# File 'lib/marc4r/marc_stream_reader.rb', line 10

def each
  until @io.eof? do
    leader = @io.read(24)
    record = Marc4R::Record.new(leader)
    record_body = @io.read(record.leader.record_length - 24)
    load_record(record, record_body)
    yield record
  end
end

#load_record(record, content) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/marc4r/marc_stream_reader.rb', line 19

def load_record(record, content)
  leader = record.leader
  dir_length = leader.data_base_address - 25
  raise Marc4R::MarcException.new("invalid directory length #{dir_length}\n#{leader.to_s}\n#{content}") unless (dir_length % 12) == 0
  size = dir_length / 12

  # if MARC 21 then check encoding
  unless @encoding
    if leader.char_coding_scheme.eql? ' '
      @encoding = Encoding.find("ISO-8859-1")
    elsif leader.char_coding_scheme.eql? 'a'
      @encoding = Encoding.find("UTF-8")
    end
  end
  
  tags = []
  lengths = []
  starts = []
  offset = 0
  size.times do
    tags << content.slice(offset,3)
    lengths << content.slice(offset+3,4).to_i
    offset += 12
  end

  unless content[offset] == Marc4R::Terminators::FIELD_TERMINATOR
    raise Marc4R::MarcException.new("expected field terminator at end of directory")
  else
    offset += 1
  end

  (0...size).each do |i|
    field_content = content.slice(offset,lengths[i])
    offset += lengths[i]
    unless field_content[-1] == Marc4R::Terminators::FIELD_TERMINATOR
      raise Marc4R::MarcException.new("expected field terminator at end of field #{field_content}")
    end
    if Marc4R::Fields::ControlField.accepts_tag(tags[i])
      data = field_content[0...-1].force_encoding(@encoding)
      record.control_fields << Marc4R::Fields::ControlField.new(tags[i], data)
    else # read a data field
      record.data_fields << data_field(tags[i],field_content[0...-1])
    end
  end
  unless content[offset] == Marc4R::Terminators::RECORD_TERMINATOR
    raise Marc4R::MarcException.new("expected record terminator")
  end
end