Class: JhoveService

Inherits:
Object
  • Object
show all
Defined in:
lib/jhove_service.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(target_dir = nil) ⇒ JhoveService

Returns a new instance of JhoveService.

Parameters:

  • target_dir (String) (defaults to: nil)

    The directory into which output should be generated



18
19
20
21
# File 'lib/jhove_service.rb', line 18

def initialize(target_dir=nil)
  @target_pathname = Pathname.new(target_dir) unless target_dir.nil?
  @bin_pathname = Pathname.new(File.expand_path(File.dirname(__FILE__) + '/../bin'))
end

Instance Attribute Details

#bin_pathnamePathname

Returns The directory in which program files are located.

Returns:

  • (Pathname)

    The directory in which program files are located



9
10
11
# File 'lib/jhove_service.rb', line 9

def bin_pathname
  @bin_pathname
end

#digital_object_idString

Returns The druid of the object, which gets inserted in the root element of the output.

Returns:

  • (String)

    The druid of the object, which gets inserted in the root element of the output



15
16
17
# File 'lib/jhove_service.rb', line 15

def digital_object_id
  @digital_object_id
end

#target_pathnamePathname

Returns The directory in which output should be generated.

Returns:

  • (Pathname)

    The directory in which output should be generated



12
13
14
# File 'lib/jhove_service.rb', line 12

def target_pathname
  @target_pathname
end

Instance Method Details

#cleanupvoid

This method returns an undefined value.

Returns Cleanup the temporary workspace used to hold the metadata outputs.



110
111
112
113
# File 'lib/jhove_service.rb', line 110

def cleanup()
  jhove_output.delete if jhove_output.exist?
  tech_md_output.delete if tech_md_output.exist?
end

#create_technical_metadata(jhove_pathname = jhove_output) ⇒ String

Returns Convert jhove output it to technicalMetadata, returning the output file path.

Parameters:

  • jhove_pathname (Pathname, String) (defaults to: jhove_output)

    The full path of the file containing JHOVE output to be transformed to technical metadata

Returns:

  • (String)

    Convert jhove output it to technicalMetadata, returning the output file path



82
83
84
85
86
87
88
89
90
91
92
# File 'lib/jhove_service.rb', line 82

def (jhove_pathname=jhove_output)
  jhove_pathname = Pathname.new(jhove_pathname)
  jhovetm = .new()
  jhovetm.digital_object_id=self.digital_object_id
  jhovetm.output_file=tech_md_output
  # Create a SAX parser
  parser = Nokogiri::XML::SAX::Parser.new(jhovetm)
  # Feed the parser some XML
  parser.parse(jhove_pathname.open('rb'))
  tech_md_output.to_s
end

#exec_command(command) ⇒ String

Returns exitcode, or raised exception if there is a problem.

Parameters:

  • command (String)

    the command to execute on the command line

Returns:

  • (String)

    exitcode, or raised exception if there is a problem



63
64
65
66
67
68
# File 'lib/jhove_service.rb', line 63

def exec_command(command)
  `#{command}`
  exitcode = $?.exitstatus
  raise "Error when running JHOVE #{command}" if (exitcode != 0)
  exitcode
end

#get_jhove_command(content, output_file = jhove_output) ⇒ String

Returns The jhove-toolkit command to be exectuted in a system call.

Parameters:

  • content (Pathname, String)

    the directory path or filename containing the folder or file to be analyzed by JHOVE

  • output_file (Pathname, String) (defaults to: jhove_output)

    the output file to write the XML to, defaults to filename specified in jhove_output

Returns:

  • (String)

    The jhove-toolkit command to be exectuted in a system call



73
74
75
76
77
78
# File 'lib/jhove_service.rb', line 73

def get_jhove_command(content,output_file = jhove_output)
  args = "-h xml -o \"#{output_file}\" \\\"#{content}"
  jhove_script = @bin_pathname.join('jhoveToolkit.sh')
  jhove_cmd = "#{jhove_script} #{args}"
  jhove_cmd
end

#jhove_outputString

Returns The output file from the JHOVE run.

Returns:

  • (String)

    The output file from the JHOVE run



24
25
26
# File 'lib/jhove_service.rb', line 24

def jhove_output
  @target_pathname.join('jhove_output.xml')
end

#run_jhove(content_dir, fileset_file = nil) ⇒ String

Returns Run JHOVE to characterize all content files, returning the output file path.

Parameters:

  • content_dir (Pathname, String)

    the directory path containing the files to be analyzed by JHOVE

  • fileset_file (Pathname, String) (defaults to: nil)

    the pathname of the file listing which files should be processed. If nil, process all files.

Returns:

  • (String)

    Run JHOVE to characterize all content files, returning the output file path



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/jhove_service.rb', line 36

def run_jhove(content_dir, fileset_file=nil)
  raise "Content #{content_dir} not found" unless File.directory? content_dir
  if fileset_file.nil? # a simple directory gets called directly
    exec_command(get_jhove_command(content_dir))
  else # a filelist gets run one by one, jhove cannot do this out of the box, so we need to run jhove file by file and then assemble the results ourselves into a single XML
    raise "File list #{fileset_file} not found" unless File.exists? fileset_file
    files = File.new(fileset_file).readlines
    raise "File list #{fileset_file} empty" if files.size == 0
    combined_xml_output = ""
    jhove_output_xml_ng = Nokogiri::XML('')
    files.each_with_index do |filename,i| # generate jhove output for each file in a separate xml file
      full_path_to_file = File.join(content_dir,filename.strip)
      output_file = @target_pathname.join("jhove_output_#{i}.xml")
      exec_command(get_jhove_command(full_path_to_file,output_file))
      jhove_output_xml_ng = File.open(output_file) { |f| Nokogiri::XML(f) }
      combined_xml_output += jhove_output_xml_ng.css("//repInfo").to_xml # build up an XML string with all output
      output_file.delete
    end
    jhove_output_xml_ng.root.children.each {|n| n.remove} # use of the files we built up above, strip all the children to ge the root jhove node
    jhove_output_xml_ng.root << combined_xml_output # now add the combined xml for all files
    File.write(jhove_output, jhove_output_xml_ng.to_xml)
  end
  jhove_output.to_s
end

#tech_md_outputString

Returns The technicalMetadata.xml output file path.

Returns:

  • (String)

    The technicalMetadata.xml output file path



29
30
31
# File 'lib/jhove_service.rb', line 29

def tech_md_output
  @target_pathname.join('technicalMetadata.xml')
end

#upgrade_technical_metadata(old_tm) ⇒ String

Returns Convert old techMD date to new technicalMetadata format.

Parameters:

  • old_tm (String)

    the old techMD xml to be transformed to new technical metadata format

Returns:

  • (String)

    Convert old techMD date to new technicalMetadata format



96
97
98
99
100
101
102
103
104
105
106
# File 'lib/jhove_service.rb', line 96

def (old_tm)
  new_tm = StringIO.new()
  upgrade_sax_handler = .new()
  upgrade_sax_handler.digital_object_id=self.digital_object_id
  upgrade_sax_handler.ios = new_tm
  # Create a SAX parser
  parser = Nokogiri::XML::SAX::Parser.new(upgrade_sax_handler)
  # Feed the parser some XML
  parser.parse(old_tm)
  new_tm.string
end