Class: Preservation::Transfer::Dataset

Inherits:
Base
  • Object
show all
Defined in:
lib/preservation/transfer/dataset.rb

Overview

Transfer preparation for dataset

Instance Attribute Summary

Attributes inherited from Base

#logger

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ Dataset



12
13
14
15
# File 'lib/preservation/transfer/dataset.rb', line 12

def initialize(config)
  super()
  @config = config
end

Instance Method Details

#prepare(uuid: nil, dir_scheme: :uuid, delay: 0) ⇒ Boolean

For given uuid, if necessary, fetch the metadata, prepare a directory in the ingest path and populate it with the files and JSON description file.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/preservation/transfer/dataset.rb', line 25

def prepare(uuid: nil,
            dir_scheme: :uuid,
            delay: 0)
  success = false

  if uuid.nil?
    @logger.error 'Missing ' + uuid
    exit
  end
  dir_base_path = Preservation.ingest_path

  dataset_extractor = Puree::Extractor::Dataset.new @config
  d = dataset_extractor.find uuid: uuid
  if !d
    @logger.error 'No metadata for ' + uuid
    exit
  end

   = {
    doi:   d.doi,
    uuid:  d.uuid,
    title: d.title
  }

  # configurable to become more human-readable
  dir_name = Preservation::Builder.build_directory_name(, dir_scheme)

  # continue only if dir_name is not empty (e.g. because there was no DOI)
  # continue only if there is no DB entry
  # continue only if the dataset has a DOI
  # continue only if there are files for this resource
  # continue only if it is time to preserve
  if !dir_name.nil? &&
     !dir_name.empty? &&
     !Preservation::Report::Transfer.in_db?(dir_name) &&
     d.doi &&
     !d.files.empty? &&
     Preservation::Temporal.time_to_preserve?(d.modified, delay)

    dir_file_path = dir_base_path + '/' + dir_name
     = dir_file_path + '/metadata/'
     =  + 'metadata.json'

    # calculate total size of data files
    download_storage_required = 0
    d.files.each { |i| download_storage_required += i.size.to_i }

    # do we have enough space in filesystem to fetch data files?
    if Preservation::Storage.enough_storage_for_download? download_storage_required
      # @logger.info 'Sufficient disk space for ' + dir_file_path
    else
      @logger.error 'Insufficient disk space to store files fetched from Pure. Skipping ' + dir_file_path
    end

    # has metadata file been created? if so, files and metadata are in place
    # continue only if files not present in ingest location
    if !File.size? 

      @logger.info 'Preparing ' + dir_name + ', Pure UUID ' + d.uuid

      data = []
      d.files.each do |f|
        o =  d, f
        data << o
        wget_str = Preservation::Builder.build_wget @config[:username],
                                                    @config[:password],
                                                    f.url

        Dir.mkdir(dir_file_path) if !Dir.exists?(dir_file_path)

        # fetch the file
        Dir.chdir(dir_file_path) do
          # puts 'Changing dir to ' + Dir.pwd
          # puts 'Size of ' + f.name + ' is ' + File.size(f.name).to_s
          if File.size?(f.name)
            # puts 'Should be deleting ' + f['name']
            File.delete(f.name)
          end
          # puts f.name + ' missing or empty'
          # puts wget_str
          `#{wget_str}`
        end
      end

      Dir.mkdir() if !Dir.exists?()

      pretty = JSON.pretty_generate( data, :indent => '  ')
      # puts pretty
      File.write(,pretty)
      @logger.info 'Created ' + 
      success = true
    else
      @logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d.uuid +
                   ' because ' +  + ' exists'
    end
  else
    @logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d.uuid
  end
  success
end

#prepare_batch(max: nil, dir_scheme: :uuid, delay: 30) ⇒ Object

For multiple datasets, if necessary, fetch the metadata, prepare a directory in the ingest path and populate it with the files and JSON description file.



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/preservation/transfer/dataset.rb', line 133

def prepare_batch(max: nil,
                  dir_scheme: :uuid,
                  delay: 30)
  collection_extractor = Puree::Extractor::Collection.new config:   @config,
                                                          resource: :dataset
  count = collection_extractor.count

  max = count if max.nil?

  batch_size = 10
  num_prepared = 0
  0.step(count, batch_size) do |n|

    dataset_collection = collection_extractor.find limit:  batch_size,
                                                   offset: n
    dataset_collection.each do |dataset|
      success = prepare uuid:       dataset.uuid,
                        dir_scheme: dir_scheme.to_sym,
                        delay:      delay

      num_prepared += 1 if success
      exit if num_prepared == max
    end
  end
end