Module: ROF

Defined in:
lib/rof.rb,
lib/rof/cli.rb,
lib/rof/access.rb,
lib/rof/filter.rb,
lib/rof/ingest.rb,
lib/rof/filters.rb,
lib/rof/utility.rb,
lib/rof/version.rb,
lib/rof/collection.rb,
lib/rof/translator.rb,
lib/rof/compare_rof.rb,
lib/rof/osf_context.rb,
lib/rof/rdf_context.rb,
lib/rof/translators.rb,
lib/rof/filters/work.rb,
lib/rof/filters/bendo.rb,
lib/rof/filters/label.rb,
lib/rof/filters/date_stamp.rb,
lib/rof/filters/file_to_url.rb,
lib/rof/filters/access_to_relsext.rb,
lib/rof/translators/fedora_to_rof.rb,
lib/rof/translators/jsonld_to_rof.rb,
lib/rof/ingesters/rels_ext_ingester.rb,
lib/rof/ingesters/rights_metadata_ingester.rb,
lib/rof/translators/jsonld_to_rof/accumulator.rb,
lib/rof/translators/jsonld_to_rof/predicate_handler.rb,
lib/rof/translators/jsonld_to_rof/statement_handler.rb,
lib/rof/translators/jsonld_to_rof/predicate_object_handler.rb

Defined Under Namespace

Modules: CLI, Filters, Ingesters, Translators Classes: Access, Collection, CompareRof, Filter, MissingPidError, NotFobjectError, SourceError, TooManyIdentitiesError, Translator, Utility

Constant Summary collapse

VERSION =
"1.2.10"
OsfPrefixList =
{
  'dcterms' => 'http://purl.org/dc/terms/',
  'osf-model' => 'http://www.dataconservancy.org/osf-business-object-model#'
}.freeze
OsfToNDMap =
{
  'dc:created' => 'http://purl.org/dc/terms/created',
  'dc:description' => 'http://purl.org/dc/terms/description',
  'dc:title' => 'http://purl.org/dc/terms/title',
  'dc:subject' => 'http://www.dataconservancy.org/osf-business-object-model#hasTag',
  'isPublic' => 'http://www.dataconservancy.org/osf-business-object-model#isPublic',
  'hasContributor' => 'http://www.dataconservancy.org/osf-business-object-model#hasContributor',
  'isBibliographic' => 'http://www.dataconservancy.org/osf-business-object-model#isBibliographic',
  'hasFullName' => 'http://www.dataconservancy.org/osf-business-object-model#hasFullName',
  'hasUser' => 'http://www.dataconservancy.org/osf-business-object-model#hasUser',
  'registeredFrom' => 'http://www.dataconservancy.org/osf-business-object-model#registeredFrom'
}.freeze
RdfContext =
{
  'bibo' => 'http://purl.org/ontology/bibo/',
  'dc' => 'http://purl.org/dc/terms/',
  'ebucore' => 'http://www.ebu.ch/metadata/ontologies/ebucore/ebucore#',
  'foaf' => 'http://xmlns.com/foaf/0.1/',
  'hydramata-rel' => 'http://projecthydra.org/ns/relations#',
  'hydra' => 'http://projecthydra.org/ns/relations#',
  'mrel' => 'http://id.loc.gov/vocabulary/relators/',
  'ms' => 'http://www.ndltd.org/standards/metadata/etdms/1.1/',
  'nd' => 'https://library.nd.edu/ns/terms/',
  'rdfs' => 'http://www.w3.org/2000/01/rdf-schema#',
  'ths' => 'http://id.loc.gov/vocabulary/relators/',
  'vracore' => 'http://purl.org/vra/',
  'pav' => 'http://purl.org/pav/',

  'dc:dateSubmitted' => {
    '@type' => 'http://www.w3.org/2001/XMLSchema#date'
  },
  'dc:created' => {
    '@type' => 'http://www.w3.org/2001/XMLSchema#date'
  },
  'dc:modified' => {
    '@type' => 'http://www.w3.org/2001/XMLSchema#date'
  }
}.freeze
RelsExtRefContext =
{
  '@vocab' => 'info:fedora/fedora-system:def/relations-external#',
  'fedora-model' => 'info:fedora/fedora-system:def/model#',
  'pav' => 'http://purl.org/pav/',
  'hydra' => 'http://projecthydra.org/ns/relations#',
  'hydramata-rel' => 'http://projecthydra.org/ns/relations#',
  'hasModel' => { '@id' => 'fedora-model:hasModel', '@type' => '@id' },
  "hasEditor"=>{"@id"=>"hydra:hasEditor", "@type"=>"@id"},
  "hasEditorGroup"=>{"@id"=>"hydra:hasEditorGroup", "@type"=>"@id"},
  "hasViewer"=>{"@id"=>"hydra:hasViewer", "@type"=>"@id"},
  "hasViewerGroup"=>{"@id"=>"hydra:hasViewerGroup", "@type"=>"@id"},
  'isPartOf' => { '@type' => '@id' },
  'isMemberOfCollection' => { '@type' => '@id' },
  'isEditorOf' => { '@id' => 'hydra:isEditorOf', '@type' => '@id' },
  'hasMember' => { '@type' => '@id' },
  'previousVersion' => 'http://purl.org/pav/previousVersion'
}.freeze

Class Method Summary collapse

Class Method Details

.af_model_name(model) ⇒ Object



200
201
202
# File 'lib/rof/ingest.rb', line 200

def self.af_model_name(model)
  "info:fedora/afmodel:#{model}"
end

.find_file_and_open(fname, search_path, flags) ⇒ Object

find fname by looking through directories in search_path, an array of strings. Will not find any files if search_path is empty. Raises Errno::ENOENT if no file is found, otherwise opens the file and returns a fd

Raises:

  • (Errno::ENOENT)


185
186
187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/rof/ingest.rb', line 185

def self.find_file_and_open(fname, search_path, flags)
  # don't search if file has an absolute path
  if fname[0] == "/"
    return File.open(fname, flags)
  end
  search_path.each do |path|
    begin
      f = File.open(File.join(path,fname), flags)
      return f
    rescue Errno::ENOENT
    end
  end
  raise Errno::ENOENT.new(fname)
end

.Ingest(item, fedora = nil, search_paths = [], bendo = nil) ⇒ Array<String>

Ingest or update item in fedora if fedora is nil, then we verify that item is in the proper format Otherwise fedora is a Rubydora::Reporitory object (for now…) Returns a list of ingested datastreams, if everything is okay. Otherwise raises an exception depending on the error.

Parameters:

  • item (Hash)
    • has string based keys

  • fedora (#find_or_initialize, nil) (defaults to: nil)
    • when nil then we verify that item is in the proper format; otherwise we ingest or update

  • search_paths (Array) (defaults to: [])
  • bendo (nil, String) (defaults to: nil)
    • when nil, no attempts to substitute the bendo URL

Returns:

  • (Array<String>)

    List of ingested data streams; Implies everything ingested ok.

Raises:



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/rof/ingest.rb', line 28

def self.Ingest(item, fedora=nil, search_paths=[], bendo=nil)
  raise NotFobjectError if item["type"] != "fobject"
  raise TooManyIdentitiesError if item.key?("id") && item.key?("pid")
  item["pid"] = item["id"] unless item.key?("pid")
  raise MissingPidError unless item["pid"].is_a? String
  models = string_nil_to_array(item["model"])
  models += string_nil_to_array(item["af-model"]).map { |m| af_model_name(m) }
  # does it already exist in fedora? Create it otherwise
  doc = nil
  if fedora
    doc = fedora.find_or_initialize(item["pid"])
    # the addRelationship API is broken in Fedora 3.6.x.
    # Since the `models` method in Rubydora uses that API, it
    # also doesn't work. ActiveFedora is not affected since it
    # serializes to RELS-EXT itself, bypassing addRelationship endpoint.
    # models.each do |m|
    #   doc.models << m unless doc.models.include?(m)
    # end

    # it seems like we need to save the document before adding datastreams?!?
    doc.save
  end

  ds_touched = []
  # update rels-ext if there is either a rels-ext present or if there
  # is a model to set. Otherwise, don't touch it!
  if (item.has_key?("rels-ext") || !models.empty?)
    update_rels_ext(models, item, doc)
    ds_touched << "rels-ext"
  end
  # now handle all the other datastreams
  item.each do |key,value|
    case key
    # fields having special treatement
    when "rights"
      self.(item, doc)
      ds_touched << "rightsMetadata"
    when "metadata"
      self.(item, doc)
      ds_touched << "descMetadata"

    # ignore these fields
    when "type", "pid", "model", "id", "af-model", "rels-ext", "collections"

    # datastream fields
    when /\A(.+)-file\Z/, /\A(.+)-meta\Z/, /\A(.+)\Z/
      # ingest a datastream
      dsname = $1
      next if ds_touched.include?(dsname)
      self.ingest_datastream(dsname, item, doc, search_paths, bendo)
      ds_touched << dsname
    end
  end
  return ds_touched
end

.ingest_datastream(dsname, item, fdoc, search_paths, bendo) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/rof/ingest.rb', line 84

def self.ingest_datastream(dsname, item, fdoc, search_paths, bendo)
  # What kind of content is there?
  ds_content = item[dsname]
  ds_filename = item["#{dsname}-file"]
  ds_meta = item["#{dsname}-meta"]
  if ds_filename && ds_content
    raise SourceError.new("Both #{dsname} and #{dsname}-file are present.")
  end
  if ds_content && !ds_content.is_a?(String)
    raise SourceError.new("Content for #{dsname} is not a string.")
  end
  # A URL, without content or file, is an R datastream
  # A URL, with content or file, raises an error
  ds_url = ds_meta["URL"] if ds_meta && ds_meta.is_a?(Hash)
  if ds_url && ds_content
    raise SourceError.new("Both #{ds_url} and #{dsname} are present.")
  end
  if ds_url && ds_filename
    raise SourceError.new("Both #{ds_url} and #{dsname}-file are present.")
  end

  md = {"mime-type" => "text/plain",
        "label" => "",
        "versionable" => true,
        "control-group" => "M",
  }

  if ds_meta
    md.merge!(item["#{dsname}-meta"])
  end

  if ds_url
     md["control-group"] = "R"

     # If the bendo server was passed in the command line, assume that the URL is in
     # the form "bendo:/item/<item#>/<item name> and substitute bendo: w/ the server name
     # if no bendo provided, use whatever's there.
     if bendo
       md["URL"] = md["URL"].sub("bendo:", bendo)
     end
  end

  # NOTE(dbrower): this could be refactored a bit. I was trying to keep the
  # same path for whether fdoc is nil or not as much as possible.
  ds = nil
  if fdoc
    ds = fdoc[dsname]
    # TODO(dbrower): maybe verify these options to be within bounds?
    ds.controlGroup = md["control-group"]
    ds.dsLabel = md["label"]
    ds.versionable = md["versionable"]
    ds.mimeType = md["mime-type"]
    ds.dsLocation = md["URL"] if md["URL"]
  end
  need_close = false
  if ds_filename
    ds_content = self.find_file_and_open(ds_filename, search_paths, "rb")
    need_close = true
  end
  if ds
    ds.content = ds_content if ds_content
    ds.save
  end
ensure
  ds_content.close if ds_content && need_close
end

.ingest_ld_metadata(item, fdoc) ⇒ Object



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/rof/ingest.rb', line 155

def self.(item, fdoc)
  input = item['metadata']
  # sometimes json-ld generates @graph structures when converting from fedora to ROF.
  # in that case, don't provide an id key
  if !input.has_key?("@graph")
    input["@id"] = "info:fedora/#{item['pid']}" unless input["@id"]
  end
  graph = RDF::Graph.new << JSON::LD::API.toRdf(input)
  content = graph.dump(:ntriples)
  # we read the rof file as utf-8. the RDF gem seems to convert it back to
  # the default encoding. so fix it.
  content.force_encoding('UTF-8')
  if fdoc
    ds = fdoc['descMetadata']
    ds.mimeType = "text/plain"
    ds.content = content
    ds.save
  end
  content
end

.ingest_rights_metadata(item, fdoc) ⇒ Object



151
152
153
# File 'lib/rof/ingest.rb', line 151

def self.(item, fdoc)
  Ingesters::RightsMetadataIngester.call(item: item, fedora_document: fdoc)
end

.string_nil_to_array(x) ⇒ Object



204
205
206
207
208
# File 'lib/rof/ingest.rb', line 204

def self.string_nil_to_array(x)
  return [] if x.nil?
  return [x] unless x.is_a? Array
  x
end

.update_rels_ext(models, item, fdoc) ⇒ Object



176
177
178
# File 'lib/rof/ingest.rb', line 176

def self.update_rels_ext(models, item, fdoc)
  Ingesters::RelsExtIngester.call(models: models, item: item, fedora_document: fdoc)
end