Class: Metacrunch::Mab2::AlephMabXmlDocumentFactory::Parser

Inherits:
Ox::Sax
  • Object
show all
Defined in:
lib/metacrunch/mab2/aleph_mab_xml_document_factory.rb

Instance Method Summary collapse

Instance Method Details

#attr(name, value) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/metacrunch/mab2/aleph_mab_xml_document_factory.rb', line 63

def attr(name, value)
  if @in_subfield
    @subfield.code = value if name == :code
  elsif @in_datafield
    if name == :tag
      @datafield.tag = value
    elsif name == :ind1
      @datafield.ind1 = value
    elsif name == :ind2
      @datafield.ind2 = value
    end
  elsif @in_controlfield
    @controlfield.tag = value if name == :tag
  end
end

#end_element(name) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/metacrunch/mab2/aleph_mab_xml_document_factory.rb', line 50

def end_element(name)
  if @in_subfield
    @in_subfield = false
    @datafield.add_subfield(@subfield)
  elsif @in_datafield
    @in_datafield = false
    @document.add_datafield(@datafield)
  elsif @in_controlfield
    @in_controlfield = false
    @document.add_controlfield(@controlfield)
  end
end

#parse(io_or_string) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/metacrunch/mab2/aleph_mab_xml_document_factory.rb', line 20

def parse(io_or_string)
  # initialize state machine
  @in_controlfield = @in_datafield = @in_subfield = false

  @controlfield = @datafield = @subfield = nil
  @document = Document.new
  @html_entities_coder = HTMLEntities.new

  io = io_or_string.is_a?(IO) ? io_or_string : StringIO.new(io_or_string)

  # convert_special tells ox to convert some html entities already during
  # parsing, which minifies the amount of entities we have to decode ourself
  Ox.sax_parse(self, io, convert_special: true)

  return @document
end

#start_element(name) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/metacrunch/mab2/aleph_mab_xml_document_factory.rb', line 37

def start_element(name)
  if name == :subfield
    @in_subfield = true
    @subfield = Document::Datafield::Subfield.new
  elsif name == :datafield
    @in_datafield = true
    @datafield = Document::Datafield.new
  elsif name == :controlfield
    @in_controlfield = true
    @controlfield = Document::Controlfield.new
  end
end

#text(value) ⇒ Object



79
80
81
82
83
84
85
# File 'lib/metacrunch/mab2/aleph_mab_xml_document_factory.rb', line 79

def text(value)
  if @in_subfield
    @subfield.value = value.include?("&") ? @html_entities_coder.decode(value) : value
  elsif @in_controlfield
    @controlfield.values = value
  end
end