Class: JhoveService
- Inherits:
-
Object
- Object
- JhoveService
- Defined in:
- lib/jhove_service.rb
Instance Attribute Summary collapse
-
#bin_pathname ⇒ Pathname
The directory in which program files are located.
-
#digital_object_id ⇒ String
The druid of the object, which gets inserted in the root element of the output.
-
#target_pathname ⇒ Pathname
The directory in which output should be generated.
Instance Method Summary collapse
-
#cleanup ⇒ void
Cleanup the temporary workspace used to hold the metadata outputs.
-
#create_technical_metadata(jhove_pathname = jhove_output) ⇒ String
Convert jhove output it to technicalMetadata, returning the output file path.
- #exec_command(command) ⇒ Object
-
#get_jhove_command(input_path, output_file = jhove_output) ⇒ String
The jhove-toolkit command to be exectuted in a system call.
-
#initialize(target_dir = nil) ⇒ JhoveService
constructor
A new instance of JhoveService.
-
#jhove_output ⇒ String
The output file from the JHOVE run.
- #remove_path_from_file_nodes(jhove_output_xml_ng, path) ⇒ Object
-
#run_jhove(content_dir, fileset_file = nil) ⇒ String
Run JHOVE to characterize all content files, returning the output file path.
-
#tech_md_output ⇒ String
The technicalMetadata.xml output file path.
-
#upgrade_technical_metadata(old_tm) ⇒ String
Convert old techMD date to new technicalMetadata format.
Constructor Details
#initialize(target_dir = nil) ⇒ JhoveService
Returns a new instance of JhoveService.
21 22 23 24 |
# File 'lib/jhove_service.rb', line 21 def initialize(target_dir=nil) @target_pathname = Pathname.new(target_dir) unless target_dir.nil? @bin_pathname = Pathname.new(File.(File.dirname(__FILE__) + '/../bin')) end |
Instance Attribute Details
#bin_pathname ⇒ Pathname
Returns The directory in which program files are located.
12 13 14 |
# File 'lib/jhove_service.rb', line 12 def bin_pathname @bin_pathname end |
#digital_object_id ⇒ String
Returns The druid of the object, which gets inserted in the root element of the output.
18 19 20 |
# File 'lib/jhove_service.rb', line 18 def digital_object_id @digital_object_id end |
#target_pathname ⇒ Pathname
Returns The directory in which output should be generated.
15 16 17 |
# File 'lib/jhove_service.rb', line 15 def target_pathname @target_pathname end |
Instance Method Details
#cleanup ⇒ void
This method returns an undefined value.
Returns Cleanup the temporary workspace used to hold the metadata outputs.
122 123 124 125 |
# File 'lib/jhove_service.rb', line 122 def cleanup() jhove_output.delete if jhove_output.exist? tech_md_output.delete if tech_md_output.exist? end |
#create_technical_metadata(jhove_pathname = jhove_output) ⇒ String
Returns Convert jhove output it to technicalMetadata, returning the output file path.
94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/jhove_service.rb', line 94 def (jhove_pathname=jhove_output) jhove_pathname = Pathname.new(jhove_pathname) jhovetm = JhoveTechnicalMetadata.new() jhovetm.digital_object_id=self.digital_object_id jhovetm.output_file=tech_md_output # Create a SAX parser parser = Nokogiri::XML::SAX::Parser.new(jhovetm) # Feed the parser some XML parser.parse(jhove_pathname.open('rb')) tech_md_output.to_s end |
#exec_command(command) ⇒ Object
68 69 70 71 |
# File 'lib/jhove_service.rb', line 68 def exec_command(command) stdout, stderr, status = Open3.capture3(command, chdir: @bin_pathname) raise "Error when running JHOVE #{command}:\n#{stderr}" unless status.success? end |
#get_jhove_command(input_path, output_file = jhove_output) ⇒ String
Returns The jhove-toolkit command to be exectuted in a system call.
76 77 78 79 80 81 82 |
# File 'lib/jhove_service.rb', line 76 def get_jhove_command(input_path,output_file = jhove_output) filename = Shellwords.escape(input_path) # escape any special characters in the path args = "-h xml -o \"#{output_file}\" \\\"#{filename}" jhove_script = './jhoveToolkit.sh' jhove_cmd = "#{jhove_script} #{args}" jhove_cmd end |
#jhove_output ⇒ String
Returns The output file from the JHOVE run.
27 28 29 |
# File 'lib/jhove_service.rb', line 27 def jhove_output @target_pathname.join('jhove_output.xml') end |
#remove_path_from_file_nodes(jhove_output_xml_ng, path) ⇒ Object
86 87 88 89 90 |
# File 'lib/jhove_service.rb', line 86 def remove_path_from_file_nodes(jhove_output_xml_ng,path) jhove_output_xml_ng.xpath('//jhove:repInfo', 'jhove' => 'http://schema.openpreservation.org/ois/xml/ns/jhove').each do |filename_node| filename_node.attributes['uri'].value = URI.decode(filename_node.attributes['uri'].value.gsub("#{path}",'').sub(/^\//,'')) # decode and remove path and any leading / end end |
#run_jhove(content_dir, fileset_file = nil) ⇒ String
Returns Run JHOVE to characterize all content files, returning the output file path.
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/jhove_service.rb', line 39 def run_jhove(content_dir, fileset_file=nil) raise "Content #{content_dir} not found" unless File.directory? content_dir if fileset_file.nil? # a simple directory gets called directly exec_command(get_jhove_command(content_dir)) jhove_output_xml_ng = File.open(jhove_output) { |f| Nokogiri::XML(f) } else # a filelist gets run one by one, jhove cannot do this out of the box, so we need to run jhove file by file and then assemble the results ourselves into a single XML raise "File list #{fileset_file} not found" unless File.exists? fileset_file files = File.new(fileset_file).readlines raise "File list #{fileset_file} empty" if files.size == 0 combined_xml_output = "" jhove_output_xml_ng = Nokogiri::XML('') files.each_with_index do |filename,i| # generate jhove output for each file in a separate xml file full_path_to_file = File.join(content_dir,filename.strip) output_file = @target_pathname.join("jhove_output_#{i}.xml") exec_command(get_jhove_command(full_path_to_file,output_file)) jhove_output_xml_ng = File.open(output_file) { |f| Nokogiri::XML(f) } combined_xml_output += jhove_output_xml_ng.css("//repInfo").to_xml # build up an XML string with all output output_file.delete end jhove_output_xml_ng.root.children.each {|n| n.remove} # use all of the files we built up above, strip all the children to get the root jhove node jhove_output_xml_ng.root << combined_xml_output # now add the combined xml for all files end remove_path_from_file_nodes(jhove_output_xml_ng,content_dir) File.write(jhove_output, jhove_output_xml_ng.to_xml) jhove_output.to_s end |
#tech_md_output ⇒ String
Returns The technicalMetadata.xml output file path.
32 33 34 |
# File 'lib/jhove_service.rb', line 32 def tech_md_output @target_pathname.join('technicalMetadata.xml') end |
#upgrade_technical_metadata(old_tm) ⇒ String
Returns Convert old techMD date to new technicalMetadata format.
108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/jhove_service.rb', line 108 def (old_tm) new_tm = StringIO.new() upgrade_sax_handler = JhoveTechnicalMetadata.new() upgrade_sax_handler.digital_object_id=self.digital_object_id upgrade_sax_handler.ios = new_tm # Create a SAX parser parser = Nokogiri::XML::SAX::Parser.new(upgrade_sax_handler) # Feed the parser some XML parser.parse(old_tm) new_tm.string end |