Class: Metacrunch::Mab2::Document::AlephMabXmlParser

Inherits:
Ox::Sax
  • Object
show all
Defined in:
lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.parse(aleph_mab_xml) ⇒ Object



9
10
11
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 9

def self.parse(aleph_mab_xml)
  new.parse(aleph_mab_xml)
end

Instance Method Details

#attr(name, value) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 56

def attr(name, value)
  if @in_subfield
    @subfield.code = value if name == :code
  elsif @in_datafield
    if name == :tag
      @datafield.tag = value
    elsif name == :ind1
      @datafield.ind1 = value
    elsif name == :ind2
      @datafield.ind2 = value
    end
  elsif @in_controlfield
    @controlfield.tag = value if name == :tag
  end
end

#end_element(name) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 43

def end_element(name)
  if @in_subfield
    @in_subfield = false
    @datafield.add_subfield(@subfield)
  elsif @in_datafield
    @in_datafield = false
    @document.add_datafield(@datafield)
  elsif @in_controlfield
    @in_controlfield = false
    @document.add_controlfield(@controlfield)
  end
end

#parse(io_or_string) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 13

def parse(io_or_string)
  # initialize state machine
  @in_controlfield = @in_datafield = @in_subfield = false

  @controlfield = @datafield = @subfield = nil
  @document = Document.new
  @html_entities_coder = HTMLEntities.new

  io = io_or_string.is_a?(IO) ? io_or_string : StringIO.new(io_or_string)

  # convert_special tells ox to convert some html entities already during
  # parsing, which minifies the amount of entities we have to decode ourself
  Ox.sax_parse(self, io, convert_special: true)

  return @document
end

#start_element(name) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 30

def start_element(name)
  if name == :subfield
    @in_subfield = true
    @subfield = Metacrunch::Mab2::Document::Subfield.new
  elsif name == :datafield
    @in_datafield = true
    @datafield = Metacrunch::Mab2::Document::Datafield.new
  elsif name == :controlfield
    @in_controlfield = true
    @controlfield = Metacrunch::Mab2::Document::Controlfield.new
  end
end

#text(value) ⇒ Object



72
73
74
75
76
77
78
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 72

def text(value)
  if @in_subfield
    @subfield.value = value.include?("&") ? @html_entities_coder.decode(value) : value
  elsif @in_controlfield
    @controlfield.values = value
  end
end