Module: Harvestdor

Defined in:
lib/harvestdor.rb,
lib/harvestdor/client.rb,
lib/harvestdor/errors.rb,
lib/harvestdor/version.rb,
lib/harvestdor/purl_xml.rb

Defined Under Namespace

Modules: Errors Classes: Client

Constant Summary collapse

LOG_NAME_DEFAULT =
"harvestdor.log"
LOG_DIR_DEFAULT =
File.join(File.dirname(__FILE__), "..", "logs")
PURL_DEFAULT =
'https://purl.stanford.edu'
VERSION =
"0.1.2"
RDF_NAMESPACE =

Mixin: code to retrieve Purl public xml pieces

'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
OAI_DC_NAMESPACE =
'http://www.openarchives.org/OAI/2.0/oai_dc/'
MODS_NAMESPACE =
'http://www.loc.gov/mods/v3'

Class Method Summary collapse

Class Method Details

.content_metadata(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the contentMetadata for this fedora object, from the purl xml

Parameters:

  • object (Object)

    a String containing a druid (e.g. ab123cd4567), or a Nokogiri::XML::Document containing the public_xml for an object

  • purl_url (String) (defaults to: Harvestdor::PURL_DEFAULT)

    url for the purl server. default is Harvestdor::PURL_DEFAULT

Returns:

  • (Nokogiri::XML::Document)

    the contentMetadata for the fedora object



42
43
44
45
46
47
48
49
50
51
52
# File 'lib/harvestdor/purl_xml.rb', line 42

def self. object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
    raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect)
  end
end

.dc(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the Dublin Core for this fedora object, from the purl xml

Parameters:

  • object (Object)

    a String containing a druid (e.g. ab123cd4567), or a Nokogiri::XML::Document containing the public_xml for an object

  • purl_url (String) (defaults to: Harvestdor::PURL_DEFAULT)

    url for the purl server. default is Harvestdor::PURL_DEFAULT

Returns:

  • (Nokogiri::XML::Document)

    the dc for the fedora object



110
111
112
113
114
115
116
117
118
119
120
# File 'lib/harvestdor/purl_xml.rb', line 110

def self.dc object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/dc:dc', {'dc' => Harvestdor::OAI_DC_NAMESPACE}).to_xml(:encoding => 'utf-8'))
    raise Harvestdor::Errors::MissingDC.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingDC.new(object.inspect)
  end
end

.identity_metadata(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the identityMetadata for this fedora object, from the purl xml

Parameters:

  • object (Object)

    a String containing a druid (e.g. ab123cd4567), or a Nokogiri::XML::Document containing the public_xml for an object

  • purl_url (String) (defaults to: Harvestdor::PURL_DEFAULT)

    url for the purl server. default is Harvestdor::PURL_DEFAULT

Returns:

  • (Nokogiri::XML::Document)

    the identityMetadata for the fedora object



59
60
61
62
63
64
65
66
67
68
69
# File 'lib/harvestdor/purl_xml.rb', line 59

def self. object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
    raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect)
  end
end

.mods(druid, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the MODS metadata for this fedora object, from the purl server

Parameters:

  • druid (String)

    e.g. ab123cd4567

  • purl_url (String) (defaults to: Harvestdor::PURL_DEFAULT)

    url for the purl server. default is Harvestdor::PURL_DEFAULT

Returns:

  • (Nokogiri::XML::Document)

    the MODS for the fedora object



14
15
16
17
18
19
20
# File 'lib/harvestdor/purl_xml.rb', line 14

def self.mods druid, purl_url = Harvestdor::PURL_DEFAULT
  begin
    Nokogiri::XML(open("#{purl_url}/#{druid}.mods"),nil,'UTF-8')
  rescue OpenURI::HTTPError
    raise Harvestdor::Errors::MissingMods.new(druid)
  end
end

.public_xml(druid, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the public xml for this fedora object, from the purl page

Parameters:

  • druid (String)

    e.g. ab123cd4567

  • purl_url (String) (defaults to: Harvestdor::PURL_DEFAULT)

    url for the purl server. default is Harvestdor::PURL_DEFAULT

Returns:

  • (Nokogiri::XML::Document)

    the public xml for the fedora object



26
27
28
29
30
31
32
33
34
35
# File 'lib/harvestdor/purl_xml.rb', line 26

def self.public_xml druid, purl_url = Harvestdor::PURL_DEFAULT
  return druid if druid.instance_of?(Nokogiri::XML::Document)
  begin
    ng_doc = Nokogiri::XML(open("#{purl_url}/#{druid}.xml"))
    raise Harvestdor::Errors::MissingPublicXml.new(druid) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue OpenURI::HTTPError
    raise Harvestdor::Errors::MissingPurlPage.new(druid)
  end
end

.rdf(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the RDF for this fedora object, from the purl xml

Parameters:

  • object (Object)

    a String containing a druid (e.g. ab123cd4567), or a Nokogiri::XML::Document containing the public_xml for an object

  • purl_url (String) (defaults to: Harvestdor::PURL_DEFAULT)

    url for the purl server. default is Harvestdor::PURL_DEFAULT

Returns:

  • (Nokogiri::XML::Document)

    the RDF for the fedora object



93
94
95
96
97
98
99
100
101
102
103
# File 'lib/harvestdor/purl_xml.rb', line 93

def self.rdf object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/rdf:RDF', {'rdf' => Harvestdor::RDF_NAMESPACE}).to_xml)
    raise Harvestdor::Errors::MissingRDF.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingRDF.new(object.inspect)
  end
end

.rights_metadata(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the rightsMetadata for this fedora object, from the purl xml

Parameters:

  • object (Object)

    a String containing a druid (e.g. ab123cd4567), or a Nokogiri::XML::Document containing the public_xml for an object

  • purl_url (String) (defaults to: Harvestdor::PURL_DEFAULT)

    url for the purl server. default is Harvestdor::PURL_DEFAULT

Returns:

  • (Nokogiri::XML::Document)

    the rightsMetadata for the fedora object



76
77
78
79
80
81
82
83
84
85
86
# File 'lib/harvestdor/purl_xml.rb', line 76

def self. object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/rightsMetadata').to_xml)
    raise Harvestdor::Errors::MissingRightsMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingRightsMetadata.new(object.inspect)
  end
end