Class: Metacrunch::Mab2::Document::AlephMabXmlParser
- Inherits:
-
Ox::Sax
- Object
- Ox::Sax
- Metacrunch::Mab2::Document::AlephMabXmlParser
- Defined in:
- lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb
Class Method Summary collapse
Instance Method Summary collapse
- #attr(name, value) ⇒ Object
- #end_element(name) ⇒ Object
- #parse(io_or_string) ⇒ Object
- #start_element(name) ⇒ Object
- #text(value) ⇒ Object
Class Method Details
.parse(aleph_mab_xml) ⇒ Object
9 10 11 |
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 9 def self.parse(aleph_mab_xml) new.parse(aleph_mab_xml) end |
Instance Method Details
#attr(name, value) ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 56 def attr(name, value) if @in_subfield @subfield.code = value if name == :code elsif @in_datafield if name == :tag @datafield.tag = value elsif name == :ind1 @datafield.ind1 = value elsif name == :ind2 @datafield.ind2 = value end elsif @in_controlfield @controlfield.tag = value if name == :tag end end |
#end_element(name) ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 43 def end_element(name) if @in_subfield @in_subfield = false @datafield.add_subfield(@subfield) elsif @in_datafield @in_datafield = false @document.add_datafield(@datafield) elsif @in_controlfield @in_controlfield = false @document.add_controlfield(@controlfield) end end |
#parse(io_or_string) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 13 def parse(io_or_string) # initialize state machine @in_controlfield = @in_datafield = @in_subfield = false @controlfield = @datafield = @subfield = nil @document = Document.new @html_entities_coder = HTMLEntities.new io = io_or_string.is_a?(IO) ? io_or_string : StringIO.new(io_or_string) # convert_special tells ox to convert some html entities already during # parsing, which minifies the amount of entities we have to decode ourself Ox.sax_parse(self, io, convert_special: true) return @document end |
#start_element(name) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 30 def start_element(name) if name == :subfield @in_subfield = true @subfield = Metacrunch::Mab2::Document::Subfield.new elsif name == :datafield @in_datafield = true @datafield = Metacrunch::Mab2::Document::Datafield.new elsif name == :controlfield @in_controlfield = true @controlfield = Metacrunch::Mab2::Document::Controlfield.new end end |
#text(value) ⇒ Object
72 73 74 75 76 77 78 |
# File 'lib/metacrunch/mab2/document/aleph_mab_xml_parser.rb', line 72 def text(value) if @in_subfield @subfield.value = value.include?("&") ? @html_entities_coder.decode(value) : value elsif @in_controlfield @controlfield.values = value end end |