Class: JhoveService

Inherits:
Object
  • Object
show all
Defined in:
lib/jhove_service.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(target_dir = nil) ⇒ JhoveService

Returns a new instance of JhoveService.

Parameters:

  • target_dir (String) (defaults to: nil)

    The directory into which output should be generated



21
22
23
24
# File 'lib/jhove_service.rb', line 21

def initialize(target_dir=nil)
  @target_pathname = Pathname.new(target_dir) unless target_dir.nil?
  @bin_pathname = Pathname.new(File.expand_path(File.dirname(__FILE__) + '/../bin'))
end

Instance Attribute Details

#bin_pathnamePathname

Returns The directory in which program files are located.

Returns:

  • (Pathname)

    The directory in which program files are located



12
13
14
# File 'lib/jhove_service.rb', line 12

def bin_pathname
  @bin_pathname
end

#digital_object_idString

Returns The druid of the object, which gets inserted in the root element of the output.

Returns:

  • (String)

    The druid of the object, which gets inserted in the root element of the output



18
19
20
# File 'lib/jhove_service.rb', line 18

def digital_object_id
  @digital_object_id
end

#target_pathnamePathname

Returns The directory in which output should be generated.

Returns:

  • (Pathname)

    The directory in which output should be generated



15
16
17
# File 'lib/jhove_service.rb', line 15

def target_pathname
  @target_pathname
end

Instance Method Details

#cleanupvoid

This method returns an undefined value.

Returns Cleanup the temporary workspace used to hold the metadata outputs.



122
123
124
125
# File 'lib/jhove_service.rb', line 122

def cleanup()
  jhove_output.delete if jhove_output.exist?
  tech_md_output.delete if tech_md_output.exist?
end

#create_technical_metadata(jhove_pathname = jhove_output) ⇒ String

Returns Convert jhove output it to technicalMetadata, returning the output file path.

Parameters:

  • jhove_pathname (Pathname, String) (defaults to: jhove_output)

    The full path of the file containing JHOVE output to be transformed to technical metadata

Returns:

  • (String)

    Convert jhove output it to technicalMetadata, returning the output file path



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/jhove_service.rb', line 94

def (jhove_pathname=jhove_output)
  jhove_pathname = Pathname.new(jhove_pathname)
  jhovetm = JhoveTechnicalMetadata.new()
  jhovetm.digital_object_id=self.digital_object_id
  jhovetm.output_file=tech_md_output
  # Create a SAX parser
  parser = Nokogiri::XML::SAX::Parser.new(jhovetm)
  # Feed the parser some XML
  parser.parse(jhove_pathname.open('rb'))
  tech_md_output.to_s
end

#exec_command(command) ⇒ Object

Parameters:

  • command (String)

    the command to execute on the command line



68
69
70
71
# File 'lib/jhove_service.rb', line 68

def exec_command(command)
  stdout, stderr, status = Open3.capture3(command, chdir: @bin_pathname)
  raise "Error when running JHOVE #{command}:\n#{stderr}" unless status.success?
end

#get_jhove_command(input_path, output_file = jhove_output) ⇒ String

Returns The jhove-toolkit command to be exectuted in a system call.

Parameters:

  • input_path (Pathname, String)

    the directory path or filename containing the folder or file to be analyzed by JHOVE

  • output_file (Pathname, String) (defaults to: jhove_output)

    the output file to write the XML to, defaults to filename specified in jhove_output

Returns:

  • (String)

    The jhove-toolkit command to be exectuted in a system call



76
77
78
79
80
81
82
# File 'lib/jhove_service.rb', line 76

def get_jhove_command(input_path,output_file = jhove_output)
  filename = Shellwords.escape(input_path) # escape any special characters in the path
  args = "-h xml -o \"#{output_file}\" \\\"#{filename}"
  jhove_script = './jhoveToolkit.sh'
  jhove_cmd = "#{jhove_script} #{args}"
  jhove_cmd
end

#jhove_outputString

Returns The output file from the JHOVE run.

Returns:

  • (String)

    The output file from the JHOVE run



27
28
29
# File 'lib/jhove_service.rb', line 27

def jhove_output
  @target_pathname.join('jhove_output.xml')
end

#remove_path_from_file_nodes(jhove_output_xml_ng, path) ⇒ Object

Parameters:

  • jhove_output_xml_ng (ng_xml_obj)

    the nokogiri xml output from jhove

  • path (String)

    the shared path that will be removed from each file name to ensure the file nodes are relative



86
87
88
89
90
# File 'lib/jhove_service.rb', line 86

def remove_path_from_file_nodes(jhove_output_xml_ng,path)
  jhove_output_xml_ng.xpath('//jhove:repInfo', 'jhove' => 'http://schema.openpreservation.org/ois/xml/ns/jhove').each do |filename_node|
    filename_node.attributes['uri'].value = URI.decode(filename_node.attributes['uri'].value.gsub("#{path}",'').sub(/^\//,'')) # decode and remove path and any leading /
  end
end

#run_jhove(content_dir, fileset_file = nil) ⇒ String

Returns Run JHOVE to characterize all content files, returning the output file path.

Parameters:

  • content_dir (Pathname, String)

    the directory path containing the files to be analyzed by JHOVE

  • fileset_file (Pathname, String) (defaults to: nil)

    the pathname of the file listing which files should be processed. If nil, process all files.

Returns:

  • (String)

    Run JHOVE to characterize all content files, returning the output file path



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/jhove_service.rb', line 39

def run_jhove(content_dir, fileset_file=nil)
  raise "Content #{content_dir} not found" unless File.directory? content_dir
  if fileset_file.nil? # a simple directory gets called directly
    exec_command(get_jhove_command(content_dir))
    jhove_output_xml_ng = File.open(jhove_output) { |f| Nokogiri::XML(f) }
  else # a filelist gets run one by one, jhove cannot do this out of the box, so we need to run jhove file by file and then assemble the results ourselves into a single XML
    raise "File list #{fileset_file} not found" unless File.exists? fileset_file
    files = File.new(fileset_file).readlines
    raise "File list #{fileset_file} empty" if files.size == 0
    combined_xml_output = ""
    jhove_output_xml_ng = Nokogiri::XML('')
    files.each_with_index do |filename,i| # generate jhove output for each file in a separate xml file
      full_path_to_file = File.join(content_dir,filename.strip)
      output_file = @target_pathname.join("jhove_output_#{i}.xml")
      exec_command(get_jhove_command(full_path_to_file,output_file))
      jhove_output_xml_ng = File.open(output_file) { |f| Nokogiri::XML(f) }
      combined_xml_output += jhove_output_xml_ng.css("//repInfo").to_xml # build up an XML string with all output
      output_file.delete
    end
    jhove_output_xml_ng.root.children.each {|n| n.remove} # use all of the files we built up above, strip all the children to get the root jhove node
    jhove_output_xml_ng.root << combined_xml_output # now add the combined xml for all files
  end
  remove_path_from_file_nodes(jhove_output_xml_ng,content_dir)
  File.write(jhove_output, jhove_output_xml_ng.to_xml)
  jhove_output.to_s
end

#tech_md_outputString

Returns The technicalMetadata.xml output file path.

Returns:

  • (String)

    The technicalMetadata.xml output file path



32
33
34
# File 'lib/jhove_service.rb', line 32

def tech_md_output
  @target_pathname.join('technicalMetadata.xml')
end

#upgrade_technical_metadata(old_tm) ⇒ String

Returns Convert old techMD date to new technicalMetadata format.

Parameters:

  • old_tm (String)

    the old techMD xml to be transformed to new technical metadata format

Returns:

  • (String)

    Convert old techMD date to new technicalMetadata format



108
109
110
111
112
113
114
115
116
117
118
# File 'lib/jhove_service.rb', line 108

def (old_tm)
  new_tm = StringIO.new()
  upgrade_sax_handler = JhoveTechnicalMetadata.new()
  upgrade_sax_handler.digital_object_id=self.digital_object_id
  upgrade_sax_handler.ios = new_tm
  # Create a SAX parser
  parser = Nokogiri::XML::SAX::Parser.new(upgrade_sax_handler)
  # Feed the parser some XML
  parser.parse(old_tm)
  new_tm.string
end