Class: Metacrunch::Mab2::Document::MabXmlParser

Inherits:
Ox::Sax
  • Object
show all
Defined in:
lib/metacrunch/mab2/document/mab_xml_parser.rb,
lib/metacrunch/mab2/document/jruby/mab_xml_parser.rb

Instance Method Summary collapse

Instance Method Details

#attr(name, value) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/metacrunch/mab2/document/mab_xml_parser.rb', line 49

def attr(name, value)
  if @in_subfield
    @subfield.code = value if name == :code
  elsif @in_datafield
    if name == :tag
      @datafield.tag = value
    elsif name == :ind1
      @datafield.ind1 = value
    elsif name == :ind2
      @datafield.ind2 = value
    end
  elsif @in_controlfield
    @controlfield.tag = value if name == :tag
  end
end

#end_element(name) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/metacrunch/mab2/document/mab_xml_parser.rb', line 36

def end_element(name)
  if @in_subfield
    @in_subfield = false
    @datafield.add_subfield(@subfield)
  elsif @in_datafield
    @in_datafield = false
    @document.add_datafield(@datafield)
  elsif @in_controlfield
    @in_controlfield = false
    @document.add_controlfield(@controlfield)
  end
end

#parse(mab_xml) ⇒ Object



8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/metacrunch/mab2/document/mab_xml_parser.rb', line 8

def parse(mab_xml)
  # initialize state machine
  @in_controlfield = @in_datafield = @in_subfield = false

  @controlfield = @datafield = @subfield = nil
  @document = Document.new
  @html_entities_coder = HTMLEntities.new

  # convert_special tells ox to convert some html entities already during
  # parsing, which minifies the amount of entities we have to decode ourself
  Ox.sax_parse(self, mab_xml, convert_special: true)

  return @document
end

#start_element(name) ⇒ Object



23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/metacrunch/mab2/document/mab_xml_parser.rb', line 23

def start_element(name)
  if name == :subfield
    @in_subfield = true
    @subfield = Metacrunch::Mab2::Document::Subfield.new
  elsif name == :datafield
    @in_datafield = true
    @datafield = Metacrunch::Mab2::Document::Datafield.new
  elsif name == :controlfield
    @in_controlfield = true
    @controlfield = Metacrunch::Mab2::Document::Controlfield.new
  end
end

#text(value) ⇒ Object



65
66
67
68
69
70
71
# File 'lib/metacrunch/mab2/document/mab_xml_parser.rb', line 65

def text(value)
  if @in_subfield
    @subfield.value = value.include?("&") ? @html_entities_coder.decode(value) : value
  elsif @in_controlfield
    @controlfield.values = value
  end
end