Method: Libis::Tools::MetsFile.parse

Defined in:
lib/libis/tools/mets_file.rb

.parse(xml) ⇒ Hash

Reads an existing METS XML file and parses into a large Hash structure for inspection.

It will not immediately allow you to create a Libis::Tools::MetsFile instance from it, but with some inspection and knowledge of METS file structure it should be possible to recreate a similar file using the result.

The returned Hash has the following structure:

  • :amd - the general AMD section with subsections

  • :dmd - the general DMD section with the DC record(s)

Each amd section has one or more subsections with keys :tech, :rights, :source or :digiprov. Each subsection is a Hash with section id as key and an array as value. For each <record> element a Hash is added to the array with <key@id> as key and <key> content as value.



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/libis/tools/mets_file.rb', line 79

def self.parse(xml)
  xml_doc = case xml
              when String
                Libis::Tools::XmlDocument.parse(xml).document
              when Hash
                Libis::Tools::XmlDocument.from_hash(xml).document
              when Libis::Tools::XmlDocument
                xml.document
              when Nokogiri::XML::Document
                xml
              else
                raise ArgumentError, "Libis::Tools::MetsFile#parse does not accept input of type #{xml.class}"
            end

  dmd_sec = xml_doc.root.xpath('mets:dmdSec', NS).inject({}) do |hash_dmd, dmd|
    hash_dmd[dmd[:ID]] = dmd.xpath('.//dc:record', NS).first.children.inject({}) do |h, c|
      h[c.name] = c.content if c.name != 'text'
      h
    end
    hash_dmd
  end
  amd_sec = xml_doc.root.xpath('mets:amdSec', NS).inject({}) do |hash_amd, amd|
    hash_amd[amd[:ID]] = [:tech, :rights, :source, :digiprov].inject({}) do |hash_sec, sec|
      md = amd.xpath("mets:#{sec}MD", NS).first
      return hash_sec unless md
      # hash_sec[sec] = md.xpath('mets:mdWrap/dnx:dnx/dnx:section', NS).inject({}) do |hash_md, dnx_sec|
      hash_sec[sec] = md.xpath('.//dnx:section', NS).inject({}) do |hash_md, dnx_sec|
        hash_md[dnx_sec[:id]] = dnx_sec.xpath('dnx:record', NS).inject([]) do |records, dnx_record|
          records << dnx_record.xpath('dnx:key', NS).inject({}) do |record_hash, key|
            record_hash[key[:id]] = key.content
            record_hash
          end
          records
        end
        hash_md
      end
      hash_sec
    end
    hash_amd
  end
  rep_sec = xml_doc.root.xpath('.//mets:fileGrp', NS).inject({}) do |hash_rep, rep|
    hash_rep[rep[:ID]] = {
        amd: amd_sec[rep[:ADMID]],
        dmd: amd_sec[rep[:DMDID]]
    }.cleanup.merge(
        rep.xpath('mets:file', NS).inject({}) do |hash_file, file|
          hash_file[file[:ID]] = {
              group: file[:GROUPID],
              amd: amd_sec[file[:ADMID]],
              dmd: dmd_sec[file[:DMDID]],
          }.cleanup
          hash_file
        end
    )
    hash_rep
  end
  {amd: amd_sec['ie-amd'],
   dmd: dmd_sec['ie-dmd'],
  }.cleanup.merge(
      xml_doc.root.xpath('.//mets:structMap[@TYPE="PHYSICAL"]', NS).inject({}) do |hash_map, map|
        rep_id = map[:ID].gsub(/-\d+$/, '')
        rep = rep_sec[rep_id]
        div_parser = lambda do |div|
          if div[:TYPE] == 'FILE'
            file_id = div.xpath('mets:fptr').first[:FILEID]
            {
                id: file_id
            }.merge rep[file_id]
          else
            div.children.inject({}) do |hash, child|
              # noinspection RubyScope
              hash[child[:LABEL]] = div_parser.call(child)
              hash
            end
          end
        end
        hash_map[map.xpath('mets:div').first[:LABEL]] = {
            id: rep_id,
            amd: rep_sec[rep_id][:amd],
            dmd: rep_sec[rep_id][:dmd],
        }.cleanup.merge(
            map.xpath('mets:div', NS).inject({}) do |hash, div|
              hash[div[:LABEL]] = div_parser.call(div)
            end
        )
        hash_map
      end
  )
end