Method: Libis::Tools::MetsFile.parse
- Defined in:
- lib/libis/tools/mets_file.rb
.parse(xml) ⇒ Hash
Reads an existing METS XML file and parses into a large Hash structure for inspection.
It will not immediately allow you to create a Libis::Tools::MetsFile instance from it, but with some inspection and knowledge of METS file structure it should be possible to recreate a similar file using the result.
The returned Hash has the following structure:
-
:amd - the general AMD section with subsections
-
:dmd - the general DMD section with the DC record(s)
Each amd section has one or more subsections with keys :tech, :rights, :source or :digiprov. Each subsection is a Hash with section id as key and an array as value. For each <record> element a Hash is added to the array with <key@id> as key and <key> content as value.
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/libis/tools/mets_file.rb', line 79 def self.parse(xml) xml_doc = case xml when String Libis::Tools::XmlDocument.parse(xml).document when Hash Libis::Tools::XmlDocument.from_hash(xml).document when Libis::Tools::XmlDocument xml.document when Nokogiri::XML::Document xml else raise ArgumentError, "Libis::Tools::MetsFile#parse does not accept input of type #{xml.class}" end dmd_sec = xml_doc.root.xpath('mets:dmdSec', NS).inject({}) do |hash_dmd, dmd| hash_dmd[dmd[:ID]] = dmd.xpath('.//dc:record', NS).first.children.inject({}) do |h, c| h[c.name] = c.content if c.name != 'text' h end hash_dmd end amd_sec = xml_doc.root.xpath('mets:amdSec', NS).inject({}) do |hash_amd, amd| hash_amd[amd[:ID]] = [:tech, :rights, :source, :digiprov].inject({}) do |hash_sec, sec| md = amd.xpath("mets:#{sec}MD", NS).first return hash_sec unless md # hash_sec[sec] = md.xpath('mets:mdWrap/dnx:dnx/dnx:section', NS).inject({}) do |hash_md, dnx_sec| hash_sec[sec] = md.xpath('.//dnx:section', NS).inject({}) do |hash_md, dnx_sec| hash_md[dnx_sec[:id]] = dnx_sec.xpath('dnx:record', NS).inject([]) do |records, dnx_record| records << dnx_record.xpath('dnx:key', NS).inject({}) do |record_hash, key| record_hash[key[:id]] = key.content record_hash end records end hash_md end hash_sec end hash_amd end rep_sec = xml_doc.root.xpath('.//mets:fileGrp', NS).inject({}) do |hash_rep, rep| hash_rep[rep[:ID]] = { amd: amd_sec[rep[:ADMID]], dmd: amd_sec[rep[:DMDID]] }.cleanup.merge( rep.xpath('mets:file', NS).inject({}) do |hash_file, file| hash_file[file[:ID]] = { group: file[:GROUPID], amd: amd_sec[file[:ADMID]], dmd: dmd_sec[file[:DMDID]], }.cleanup hash_file end ) hash_rep end {amd: amd_sec['ie-amd'], dmd: dmd_sec['ie-dmd'], }.cleanup.merge( xml_doc.root.xpath('.//mets:structMap[@TYPE="PHYSICAL"]', NS).inject({}) do |hash_map, map| rep_id = map[:ID].gsub(/-\d+$/, '') rep = rep_sec[rep_id] div_parser = lambda do |div| if div[:TYPE] == 'FILE' file_id = div.xpath('mets:fptr').first[:FILEID] { id: file_id }.merge rep[file_id] else div.children.inject({}) do |hash, child| # noinspection RubyScope hash[child[:LABEL]] = div_parser.call(child) hash end end end hash_map[map.xpath('mets:div').first[:LABEL]] = { id: rep_id, amd: rep_sec[rep_id][:amd], dmd: rep_sec[rep_id][:dmd], }.cleanup.merge( map.xpath('mets:div', NS).inject({}) do |hash, div| hash[div[:LABEL]] = div_parser.call(div) end ) hash_map end ) end |