Module: Harvestdor

Defined in:
lib/harvestdor.rb,
lib/harvestdor/client.rb,
lib/harvestdor/errors.rb,
lib/harvestdor/version.rb,
lib/harvestdor/purl_xml.rb

Defined Under Namespace

Modules: Errors Classes: Client

Constant Summary collapse

LOG_NAME_DEFAULT =
"harvestdor.log"
LOG_DIR_DEFAULT =
File.join(File.dirname(__FILE__), "..", "logs")
PURL_DEFAULT =
'https://purl.stanford.edu'
VERSION =
"0.3.2"
RDF_NAMESPACE =

Mixin: code to retrieve Purl public xml pieces

'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
OAI_DC_NAMESPACE =
'http://www.openarchives.org/OAI/2.0/oai_dc/'
MODS_NAMESPACE =
'http://www.loc.gov/mods/v3'

Class Method Summary collapse

Class Method Details

.content_metadata(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the contentMetadata for this fedora object, from the purl xml



43
44
45
46
47
48
49
50
51
52
53
# File 'lib/harvestdor/purl_xml.rb', line 43

def self. object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
    raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingContentMetadata.new(object.inspect)
  end
end

.dc(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the Dublin Core for this fedora object, from the purl xml



111
112
113
114
115
116
117
118
119
120
121
# File 'lib/harvestdor/purl_xml.rb', line 111

def self.dc object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/dc:dc', {'dc' => Harvestdor::OAI_DC_NAMESPACE}).to_xml(:encoding => 'utf-8'))
    raise Harvestdor::Errors::MissingDC.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingDC.new(object.inspect)
  end
end

.identity_metadata(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the identityMetadata for this fedora object, from the purl xml



60
61
62
63
64
65
66
67
68
69
70
# File 'lib/harvestdor/purl_xml.rb', line 60

def self. object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
    raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingIdentityMetadata.new(object.inspect)
  end
end

.mods(druid, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the MODS metadata for this fedora object, from the purl server



15
16
17
18
19
20
21
# File 'lib/harvestdor/purl_xml.rb', line 15

def self.mods druid, purl_url = Harvestdor::PURL_DEFAULT
  begin
    Nokogiri::XML(http_client.get("#{purl_url}/#{druid}.mods").body,nil,'UTF-8')
  rescue Faraday::ClientError
    raise Harvestdor::Errors::MissingMods.new(druid)
  end
end

.public_xml(druid, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the public xml for this fedora object, from the purl page



27
28
29
30
31
32
33
34
35
36
# File 'lib/harvestdor/purl_xml.rb', line 27

def self.public_xml druid, purl_url = Harvestdor::PURL_DEFAULT
  return druid if druid.instance_of?(Nokogiri::XML::Document)
  begin
    ng_doc = Nokogiri::XML(http_client.get("#{purl_url}/#{druid}.xml").body)
    raise Harvestdor::Errors::MissingPublicXml.new(druid) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue Faraday::ClientError
    raise Harvestdor::Errors::MissingPurlPage.new(druid)
  end
end

.rdf(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the RDF for this fedora object, from the purl xml



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/harvestdor/purl_xml.rb', line 94

def self.rdf object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/rdf:RDF', {'rdf' => Harvestdor::RDF_NAMESPACE}).to_xml)
    raise Harvestdor::Errors::MissingRDF.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingRDF.new(object.inspect)
  end
end

.rights_metadata(object, purl_url = Harvestdor::PURL_DEFAULT) ⇒ Nokogiri::XML::Document

the rightsMetadata for this fedora object, from the purl xml



77
78
79
80
81
82
83
84
85
86
87
# File 'lib/harvestdor/purl_xml.rb', line 77

def self. object, purl_url = Harvestdor::PURL_DEFAULT
  pub_xml_ng_doc = pub_xml(object, purl_url)
  begin
    # preserve namespaces, etc for the node
    ng_doc = Nokogiri::XML(pub_xml_ng_doc.root.xpath('/publicObject/rightsMetadata').to_xml)
    raise Harvestdor::Errors::MissingRightsMetadata.new(object.inspect) if !ng_doc || ng_doc.children.empty?
    ng_doc
  rescue
    raise Harvestdor::Errors::MissingRightsMetadata.new(object.inspect)
  end
end