Class: BulkOps::Operation

Inherits:
ActiveRecord::Base
  • Object
show all
Includes:
Verification
Defined in:
lib/bulk_ops/operation.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Verification

#find_field_name, #get_file_paths, is_file_field?, #notify, #record_exists?, #verify

Instance Attribute Details

#metadataObject

Returns the value of attribute metadata.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def 
  
end

#reference_identifierObject

Returns the value of attribute reference_identifier.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def reference_identifier
  @reference_identifier
end

#visibilityObject

Returns the value of attribute visibility.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def visibility
  @visibility
end

#work_typeObject

Returns the value of attribute work_type.



10
11
12
# File 'lib/bulk_ops/operation.rb', line 10

def work_type
  @work_type
end

Class Method Details

.default_metadata_fields(labels = true) ⇒ Object



318
319
320
321
322
323
324
325
326
# File 'lib/bulk_ops/operation.rb', line 318

def self.(labels = true)
  #returns full set of metadata parameters from ScoobySnacks to include in ingest template spreadsheet    
  field_names = []
  schema.all_fields.each do |field|
    field_names << field.name
    field_names << "#{field.name} Label" if labels && field.controlled?
  end
  return field_names
end

.schemaObject



45
46
47
# File 'lib/bulk_ops/operation.rb', line 45

def self.schema
  ScoobySnacks::
end

.unique_name(name, user) ⇒ Object



14
15
16
17
18
19
20
21
22
23
# File 'lib/bulk_ops/operation.rb', line 14

def self.unique_name name, user
  while  BulkOps::Operation.find_by(name: name) || BulkOps::GithubAccess.list_branch_names(user).include?(name) do
    if ['-','_'].include?(name[-2]) && name[-1].to_i > 0
      name = name[0..-2]+(name[-1].to_i + 1).to_s
    else
      name = name + "_1"
    end
  end
  return name
end

Instance Method Details

#accumulated_errorsObject



204
205
206
207
# File 'lib/bulk_ops/operation.rb', line 204

def accumulated_errors
  proxy_errors + (@operation_errors || [])
  # TODO - make sure this captures all operation errors
end

#apply!Object



65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/bulk_ops/operation.rb', line 65

def apply!
  status = "#{type}ing"
  update({stage: "running", message: "#{type.titleize} initiated by #{user.name || user.email}"})
#      @stage = "running"
  final_spreadsheet
 
# This commented line currently fails because it doesn't pull from the master branch by default
# It's usually already verified, but maybe we should fix this for double-checking 
# in the future
#      return unless verify

  apply_ingest! if ingest?
  apply_update! if update?
end

#apply_ingest!Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/bulk_ops/operation.rb', line 80

def apply_ingest! 
  #Destroy any existing work proxies (which should not exist for an ingest). Create new proxies from finalized spreadsheet only.
  work_proxies.each{|proxy| proxy.destroy!}

  #create a work proxy for each work in the spreadsheet, creating filesets where appropriate
  .each_with_index do |values,row_number|
    next if values.to_s.gsub(',','').blank?

    next if BulkOps::Parser.is_file_set? , row_number

    work_proxies.create(status: "queued",
                        last_event: DateTime.now,
                        row_number: row_number,
                        visibility: options['visibility'],
                        message: "created during ingest initiated by #{user.name || user.email}")
  end
  
  # make sure the work proxies we just created are loaded in memory
  reload
  #loop through the work proxies to create a job for each work
  .each_with_index do |values,row_number|
    proxy = work_proxies.find_by(row_number: row_number)
    proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
    data = BulkOps::Parser.new(proxy, ).interpret_data(raw_row: values)
    next unless proxy.proxy_errors.blank?
    BulkOps::WorkJob.perform_later(proxy.work_type || "Work",
                                         user.email,
                                         data,
                                         proxy.id,
                                         proxy.visibility)
  end
  # If any errors have occurred, make sure they are logged in github and users are notified.
  report_errors!
end

#apply_update!(spreadsheet) ⇒ Object



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/bulk_ops/operation.rb', line 160

def apply_update! spreadsheet

  # this array will keep track of any current proxies not included in the final spreadsheet
  abandoned_proxies = work_proxies.dup
  # Loop through the final spreadsheet
  final_spreadsheet.each_with_index do |values,row_number|     
    # Grab the work id
    work_id = false
    values.each{|field,val| work_id = val if ["id","workid","recordid"].include?(field.downcase.gsub(/-_\s/,''))}
    @operation_errors << BulkOps::Error.new(:no_work_id_field) unless work_id

    #proxy = BulkOps::WorkProxy.find_by(operation_id: id, work_id: values["work_id"])
    if (proxy = work_proxies.find_by(work_id: work_id))
      abandoned_proxies.delete(proxy)
      proxy.update(status: "updating",
                   row_number: row_number,
                   message: "update initiated by #{user.name || user.email}")
    else
      # Create a proxy for a work that is in the spreadsheet, but wasn't in the initial draft
      work_proxies.create(status: "queued",
                          last_event: DateTime.now,
                          row_number: row_number,
                          message: "created during update application, which was initiated by #{user.name || user.email}")
    end
  end

  # Loop through any proxies in the draft that were dropped from the spreadsheet
  abandoned_proxies.each do |dead_proxy|
    dead_proxy.lift_hold
    dead_proxy.destroy!
  end
  
  #loop through the work proxies to create a job for each work
  work_proxies.each do |proxy|
    data = BulkOps::Parser.new(proxy,final_spreadsheet).interpret_data(raw_row: final_spreadsheet[proxy.row_number])
    BulkOps::UpdateWorkJob.perform_later(proxy.work_type || "",
                                         user.email,
                                         data,
                                         proxy.id,
                                         proxy.visibility)
  end
  report_errors! 
end

#busy?Boolean

Returns:

  • (Boolean)


294
295
296
297
298
299
# File 'lib/bulk_ops/operation.rb', line 294

def busy?
  return true if work_proxies.any?{|prx| prx.status.downcase == "running"}
  return true if work_proxies.any?{|prx| prx.status.downcase == "queued"}
  return true if work_proxies.any?{|prx| prx.status.downcase == "starting"}
  return false
end

#check_if_finishedObject



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/bulk_ops/operation.rb', line 122

def check_if_finished
  return unless stage == "running" && !busy?

  update(stage: "finishing")

  # Attempt to resolve each dangling (objectless) relationships
  relationships = work_proxies.reduce([]){|all_rels,proxy| all_rels + proxy.relationships.select{|rel| rel.status == "pending"}}
  relationships.each do |rel| 
    begin
      rel.resolve! 
    rescue StandardError => e
      @operation_errors << BulkOps::Error.new(:relationship_error, row_number: proxy.row_number, object_id: relationship.id, message: "#{e.class} - #{e.message}" )
    end
  end
  
  work_proxies.each do |proxy| 
    work = nil
    begin
      work = Work.find(proxy.work_id).save
    rescue StandardError => e
      @operation_errors << BulkOps::Error.new(:ingest_failure, row_number: proxy.row_number, object_id: proxy.id, message: "#{e.class} - #{e.message}")
    end
  end

  new_stage = accumulated_errors.blank? ? "complete" : "errors"
  update(stage: new_stage)
  report_errors!
  lift_holds
end

#complete?Boolean

Returns:

  • (Boolean)


290
291
292
# File 'lib/bulk_ops/operation.rb', line 290

def complete?
  return (stage == 'complete')
end

#create_branch(fields: nil, work_ids: nil, options: nil, operation_type: :ingest) ⇒ Object



225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/bulk_ops/operation.rb', line 225

def create_branch(fields: nil, work_ids: nil, options: nil, operation_type: :ingest)
  git.create_branch!
  bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir

  #copy template files
  Dir["#{bulk_ops_dir}/#{BulkOps::TEMPLATE_DIR}/*"].each do |file| 
    git.add_file file 
  end

  #update configuration options 
  unless options.blank?
    full_options = YAML.load_file(File.join(bulk_ops_dir,BulkOps::TEMPLATE_DIR, BulkOps::OPTIONS_FILENAME))

    options.each { |option, value| full_options[option] = value }

    full_options[name] = name
    full_options[type] = type
    full_options[status] = status

    git.update_options full_options
  end

  create_new_spreadsheet(fields: fields, work_ids: work_ids) if operation_type == :ingest
end

#create_pull_request(message: false) ⇒ Object



214
215
216
217
218
# File 'lib/bulk_ops/operation.rb', line 214

def create_pull_request message: false
  return false unless (pull_num = git.create_pull_request(message: message))
  update(pull_id: pull_num)
  return pull_num
end

#delete_allObject



115
116
117
118
119
120
# File 'lib/bulk_ops/operation.rb', line 115

def delete_all
  work_proxies.each do |proxy| 
    ActiveFedora::Base.find(proxy.work_id).destroy 
    proxy.update(status: "destroyed", message: "The work created by this proxy was destroyed by the user")
  end
end

#delete_branchObject



309
310
311
# File 'lib/bulk_ops/operation.rb', line 309

def delete_branch
  git.delete_branch!
end

#destroyObject



313
314
315
316
# File 'lib/bulk_ops/operation.rb', line 313

def destroy
  git.delete_branch!
  super
end

#draft?Boolean

Returns:

  • (Boolean)


282
283
284
# File 'lib/bulk_ops/operation.rb', line 282

def draft?
  return (stage == 'draft')
end

#error_urlObject



333
334
335
# File 'lib/bulk_ops/operation.rb', line 333

def error_url
  "https://github.com/#{git.repo}/tree/#{git.name}/#{git.name}/errors"
end

#filename_prefixObject



337
338
339
# File 'lib/bulk_ops/operation.rb', line 337

def filename_prefix
  @filename_prefix ||= options['filename_prefix']
end

#final_spreadsheetObject



259
260
261
# File 'lib/bulk_ops/operation.rb', line 259

def final_spreadsheet
   ||= git. branch: "master"
end

#finalize_draft(fields: nil, work_ids: nil) ⇒ Object



220
221
222
223
# File 'lib/bulk_ops/operation.rb', line 220

def finalize_draft(fields: nil, work_ids: nil)
  create_new_spreadsheet(fields: fields, work_ids: work_ids)
  update(stage: "pending")
end

#get_spreadsheet(return_headers: false) ⇒ Object



250
251
252
253
# File 'lib/bulk_ops/operation.rb', line 250

def get_spreadsheet return_headers: false
  branch = ((running? || complete?) ? "master" : nil)
  git. return_headers: return_headers, branch: branch
end

#ignored_fieldsObject



328
329
330
# File 'lib/bulk_ops/operation.rb', line 328

def ignored_fields
  (options['ignored headers'] || []) + BulkOps::IGNORED_COLUMNS
end

#ingest?Boolean

Returns:

  • (Boolean)


301
302
303
# File 'lib/bulk_ops/operation.rb', line 301

def ingest?
  type == "ingest"
end

#lift_holdsObject



152
153
154
# File 'lib/bulk_ops/operation.rb', line 152

def lift_holds
  work_proxies.each { |proxy| proxy.lift_hold}
end

#optionsObject



275
276
277
278
279
280
# File 'lib/bulk_ops/operation.rb', line 275

def options
  return {} if name.nil?
  return @options if @options
  branch = (running? || complete?) ? "master" : nil
  @options ||= git.load_options(branch: branch)
end

#place_holdsObject



156
157
158
# File 'lib/bulk_ops/operation.rb', line 156

def place_holds
  work_proxies.each { |proxy| proxy.place_hold}
end

#proxy_errorsObject



25
26
27
28
29
30
31
32
33
# File 'lib/bulk_ops/operation.rb', line 25

def proxy_errors
  work_proxies.reduce([]) do |errors, proxy| 
    if proxy.proxy_errors
      errors += proxy.proxy_errors
    elsif proxy.status == "job_error"
      errors += BulkOps::Error.new(type: :job_failure, object_id: proxy.work_id, message: proxy.message)
    end
  end
end

#proxy_statesObject



35
36
37
38
39
# File 'lib/bulk_ops/operation.rb', line 35

def proxy_states
  states = {}
  work_proxies.each{|proxy| (states[proxy.status] ||= []) << proxy }
  states
end

#report_errors!Object



209
210
211
212
# File 'lib/bulk_ops/operation.rb', line 209

def report_errors!
  error_file_name = BulkOps::Error.write_errors!(accumulated_errors, git)
  notify!(subject: "Errors initializing bulk #{type} in Hycruz", message: "Hycruz encountered some errors while it  was setting up your #{type} and preparing to begin. For most types of errors, the individual rows of the spreadsheet with errors will be ignored and the rest will proceed. Please consult the #{type} summary for real time information on the status of the #{type}. Details about these initialization errors can be seen on Github at the following url: https://github.com/#{git.repo}/blob/#{git.name}/#{git.name}/errors/#{error_file_name}") if error_file_name
end

#running?Boolean

Returns:

  • (Boolean)


286
287
288
# File 'lib/bulk_ops/operation.rb', line 286

def running?
  return (['running','finishing'].include?(stage))
end

#schemaObject



49
50
51
# File 'lib/bulk_ops/operation.rb', line 49

def schema
  self.class.schema
end

#set_stage(new_stage) ⇒ Object



61
62
63
# File 'lib/bulk_ops/operation.rb', line 61

def set_stage new_stage
  update(stage: new_stage)
end

#spreadsheet_countObject



255
256
257
# File 'lib/bulk_ops/operation.rb', line 255

def spreadsheet_count
  git.spreadsheet_count
end

#typeObject



41
42
43
# File 'lib/bulk_ops/operation.rb', line 41

def type
  operation_type
end

#update?Boolean

Returns:

  • (Boolean)


305
306
307
# File 'lib/bulk_ops/operation.rb', line 305

def update?
  type == "update"
end

#update_options(options, message = nil) ⇒ Object



267
268
269
# File 'lib/bulk_ops/operation.rb', line 267

def update_options options, message=nil
  git.update_options(options, message: message)
end

#update_spreadsheet(file, message: nil) ⇒ Object



263
264
265
# File 'lib/bulk_ops/operation.rb', line 263

def update_spreadsheet file, message: nil
  git.update_spreadsheet(file, message: message)
end