Class: Preservation::Transfer::Dataset

Inherits:
Base
  • Object
show all
Defined in:
lib/preservation/transfer/dataset.rb

Overview

Transfer preparation for dataset

Instance Attribute Summary

Attributes inherited from Base

#logger

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ Dataset

Returns a new instance of Dataset.

Parameters:

  • config (Hash)


12
13
14
15
# File 'lib/preservation/transfer/dataset.rb', line 12

def initialize(config)
  super()
  @config = config
end

Instance Method Details

#prepare(uuid: nil, dir_scheme: :uuid, delay: 0) ⇒ Boolean

For given uuid, if necessary, fetch the metadata, prepare a directory in the ingest path and populate it with the files and JSON description file.

Parameters:

  • uuid (String) (defaults to: nil)

    uuid to preserve

  • dir_scheme (Symbol) (defaults to: :uuid)

    how to make directory name

  • delay (Integer) (defaults to: 0)

    days to wait (after modification date) before preserving

Returns:

  • (Boolean)

    indicates presence of metadata description file



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/preservation/transfer/dataset.rb', line 25

def prepare(uuid: nil,
            dir_scheme: :uuid,
            delay: 0)
  success = false

  if uuid.nil?
    @logger.error 'Missing ' + uuid
    exit
  end
  dir_base_path = Preservation.ingest_path

  dataset_extractor = Puree::Extractor::Dataset.new @config
  d = dataset_extractor.find uuid: uuid
  if !d
    @logger.error 'No metadata for ' + uuid
    exit
  end

   = {
    doi:   d.doi,
    uuid:  d.uuid,
    title: d.title
  }

  # configurable to become more human-readable
  dir_name = Preservation::Builder.build_directory_name(, dir_scheme)

  # continue only if dir_name is not empty (e.g. because there was no DOI)
  # continue only if there is no DB entry
  # continue only if the dataset has a DOI
  # continue only if there are files for this resource
  # continue only if it is time to preserve
  if !dir_name.nil? &&
     !dir_name.empty? &&
     !Preservation::Report::Transfer.in_db?(dir_name) &&
     d.doi &&
     !d.files.empty? &&
     Preservation::Temporal.time_to_preserve?(d.modified, delay)

    dir_file_path = dir_base_path + '/' + dir_name
     = dir_file_path + '/metadata/'
     =  + 'metadata.json'

    # calculate total size of data files
    download_storage_required = 0
    d.files.each { |i| download_storage_required += i.size.to_i }

    # do we have enough space in filesystem to fetch data files?
    if Preservation::Storage.enough_storage_for_download? download_storage_required
      # @logger.info 'Sufficient disk space for ' + dir_file_path
    else
      @logger.error 'Insufficient disk space to store files fetched from Pure. Skipping ' + dir_file_path
    end

    # has metadata file been created? if so, files and metadata are in place
    # continue only if files not present in ingest location
    if !File.size? 

      @logger.info 'Preparing ' + dir_name + ', Pure UUID ' + d.uuid

      data = []
      d.files.each do |f|
        o =  d, f
        data << o
        wget_str = Preservation::Builder.build_wget @config[:username],
                                                    @config[:password],
                                                    f.url

        Dir.mkdir(dir_file_path) if !Dir.exists?(dir_file_path)

        # fetch the file
        Dir.chdir(dir_file_path) do
          # puts 'Changing dir to ' + Dir.pwd
          # puts 'Size of ' + f.name + ' is ' + File.size(f.name).to_s
          if File.size?(f.name)
            # puts 'Should be deleting ' + f['name']
            File.delete(f.name)
          end
          # puts f.name + ' missing or empty'
          # puts wget_str
          `#{wget_str}`
        end
      end

      Dir.mkdir() if !Dir.exists?()

      pretty = JSON.pretty_generate( data, :indent => '  ')
      # puts pretty
      File.write(,pretty)
      @logger.info 'Created ' + 
      success = true
    else
      @logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d.uuid +
                   ' because ' +  + ' exists'
    end
  else
    @logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d.uuid
  end
  success
end

#prepare_batch(max: nil, dir_scheme: :uuid, delay: 30) ⇒ Object

For multiple datasets, if necessary, fetch the metadata, prepare a directory in the ingest path and populate it with the files and JSON description file.

Parameters:

  • max (Integer) (defaults to: nil)

    maximum to prepare, omit to set no maximum

  • dir_scheme (Symbol) (defaults to: :uuid)

    how to make directory name

  • delay (Integer) (defaults to: 30)

    days to wait (after modification date) before preserving



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/preservation/transfer/dataset.rb', line 133

def prepare_batch(max: nil,
                  dir_scheme: :uuid,
                  delay: 30)
  collection_extractor = Puree::Extractor::Collection.new config:   @config,
                                                          resource: :dataset
  count = collection_extractor.count

  max = count if max.nil?

  batch_size = 10
  num_prepared = 0
  0.step(count, batch_size) do |n|

    dataset_collection = collection_extractor.find limit:  batch_size,
                                                   offset: n
    dataset_collection.each do |dataset|
      success = prepare uuid:       dataset.uuid,
                        dir_scheme: dir_scheme.to_sym,
                        delay:      delay

      num_prepared += 1 if success
      exit if num_prepared == max
    end
  end
end