Class: BulkOps::Parser

Inherits:
Object
  • Object
show all
Includes:
InterpretControlledBehavior, InterpretFilesBehavior, InterpretOptionsBehavior, InterpretRelationshipsBehavior, InterpretScalarBehavior
Defined in:
lib/bulk_ops/parser.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from InterpretOptionsBehavior

#interpret_option_fields

Methods included from InterpretScalarBehavior

#interpret_scalar_fields

Methods included from InterpretFilesBehavior

#interpret_file_fields

Methods included from InterpretRelationshipsBehavior

#interpret_relationship_fields

Constructor Details

#initialize(prx, metadata_sheet = nil, options = {}) ⇒ Parser

Returns a new instance of Parser.



65
66
67
68
69
70
71
72
# File 'lib/bulk_ops/parser.rb', line 65

def initialize prx, =nil, options={}
  @proxy = prx
  @raw_data = ( || operation.)
  @raw_row = @raw_data[@proxy.row_number]
  @metadata = {}
  @parsing_errors = []
  @options = options || operation.options
end

Instance Attribute Details

#proxyObject

Returns the value of attribute proxy.



4
5
6
# File 'lib/bulk_ops/parser.rb', line 4

def proxy
  @proxy
end

#raw_dataObject

Returns the value of attribute raw_data.



4
5
6
# File 'lib/bulk_ops/parser.rb', line 4

def raw_data
  @raw_data
end

#raw_rowObject

Returns the value of attribute raw_row.



4
5
6
# File 'lib/bulk_ops/parser.rb', line 4

def raw_row
  @raw_row
end

Class Method Details

.get_negating_metadata(work_id, metadata = {}) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/bulk_ops/parser.rb', line 46

def self.(work_id, ={})
  return false unless BulkOps::SolrService.record_exists?(work_id)
  work = ActiveFedora::Base.find(work_id)
  schema = ScoobySnacks::METADATA_SCHEMA
  schema.all_fields.each do |field|
    field_key = field.controlled? ? "#{field.name}_attributes" : field.name
    [field_key] ||= (field.multiple? ? [] : nil)
    if field.controlled?        
      values = Array(work.send(field.name)).map{|value| {id: value.id, _destroy: true} }
      if field.multiple?
        [field_key] += values
      else
        [field_key] = values.first
      end
    end
  end
  return 
end

.is_file_set?(metadata, row_number) ⇒ Boolean

Returns:

  • (Boolean)


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/bulk_ops/parser.rb', line 29

def self.is_file_set? , row_number
  return false unless [row_number].present?
  # If the work type is explicitly specified, use that
  if (type_key = [row_number].to_h.keys.find{|key| key.to_s.downcase.gsub(/[_\-\s]/,"").include?("worktype") })
    return true if [row_number][type_key].downcase == "fileset" 
    return false if [row_number][type_key].present?
  end
#    Otherwise, if there are any valid fields other than relationship or file fields, call it a work
  [row_number].each do |field, value|
    next if BulkOps::Verification.is_file_field?(field)
    next if ["parent", "order"].include?(normalize_relationship_field_name(field))
    next if ["title","label"].include?(field.downcase.strip)
    return false
  end
  return true
end

.normalize_relationship_field_name(field) ⇒ Object



24
25
26
27
# File 'lib/bulk_ops/parser.rb', line 24

def self.normalize_relationship_field_name field
  normfield = field.to_s.downcase.parameterize.gsub(/[_\s-]/,'')
  BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
end

.split_values(value_string) ⇒ Object



18
19
20
21
22
# File 'lib/bulk_ops/parser.rb', line 18

def self.split_values value_string
  # Split values on all un-escaped separator character (escape character is '\')
  # Then replace all escaped separator charactors with un-escaped versions
  value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
end

.unescape_csv(value) ⇒ Object



14
15
16
# File 'lib/bulk_ops/parser.rb', line 14

def self.unescape_csv(value)
  value.gsub(/\\(['";,])/,'\1')
end

Instance Method Details

#connect_existing_workObject



111
112
113
114
115
116
117
# File 'lib/bulk_ops/parser.rb', line 111

def connect_existing_work
  return unless (column_name = @options["update_identifier"])
  return unless (key = @raw_row.to_h.keys.find{|key| key.to_s.parameterize.downcase.gsub("_","") == column_name.to_s.parameterize.downcase.gsub("_","")})
  return unless (value = @raw_row[key]).present?
  return unless (work_id = (key, value))
  proxy.update(work_id: work_id)
end

#disambiguate_columnsObject



97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/bulk_ops/parser.rb', line 97

def disambiguate_columns
  #do nothing unless there are columns with the same header
  return unless (@raw_row.respond_to?(:headers) && (@raw_row.headers.uniq.length < @raw_row.length) )
  row = {}
  (0...@raw_row.length).each do |i|
    header = @raw_row.headers[i]
    value = @raw_row[i]
    next unless value.present?
    # separate values in identical columns using the separator
    row[header] = (Array(row[header]) << value).join(BulkOps::SEPARATOR)
  end
  @raw_row = row
end

#find_field_name(field) ⇒ Object



148
149
150
# File 'lib/bulk_ops/parser.rb', line 148

def find_field_name(field)
  operation.find_field_name(field)
end

#find_work_id_from_unique_metadata(field_name, value) ⇒ Object



119
120
121
122
123
124
125
126
127
# File 'lib/bulk_ops/parser.rb', line 119

def  field_name, value
  field_solr_name = schema.get_field(field_name).solr_name
  query = "_query_:\"{!dismax qf=#{field_solr_name}}#{value}\""
  response = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path, params: { fq: query, rows: 1, start: 0})["response"]
  if response["numFound"] > 1
    report_error( :id_not_unique , "",  row_number: row_number, object_id: @proxy.id, options_name: field_name, option_values: value )
  end
  return response["docs"][0]["id"]
end

#interpret_data(raw_row: nil, raw_data: nil, proxy: nil) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/bulk_ops/parser.rb', line 74

def interpret_data raw_row: nil, raw_data: nil, proxy: nil
  @raw_row = raw_row if raw_row.present?
  @proxy = proxy if proxy.present?
  @raw_data = raw_data if raw_data.present?
  disambiguate_columns
  setAdminSet
  #The order here matters a little: interpreting the relationship fields specifies containing collections,
  # which may have opinions about whether we should inherit metadata from parent works
  interpret_relationship_fields
  setMetadataInheritance
  interpret_option_fields
  if @proxy.work_id.present? && @options['discard_existing_metadata']
    @metadata.deep_merge!(self.class.(@proxy.work_id))
  end
  interpret_file_fields
  interpret_controlled_fields
  interpret_scalar_fields
  connect_existing_work 
  @proxy.update(status: "ERROR", message: "error parsing spreadsheet line") if @parsing_errors.present?
  @proxy.proxy_errors = (@proxy.proxy_errors || []) + @parsing_errors
  return @metadata
end

#report_error(type, message, **args) ⇒ Object



141
142
143
144
145
146
# File 'lib/bulk_ops/parser.rb', line 141

def report_error type, message, **args
  puts "ERROR MESSAGE: #{message}"
  @proxy.update(status: "error", message: message)
  args[:type]=type
  (@parsing_errors ||= []) <<  BulkOps::Error.new(**args)
end

#schemaObject



152
153
154
# File 'lib/bulk_ops/parser.rb', line 152

def schema
  ScoobySnacks::METADATA_SCHEMA
end

#setAdminSetObject



129
130
131
132
133
134
# File 'lib/bulk_ops/parser.rb', line 129

def setAdminSet 
  return if @metadata[:admin_set_id]
  asets = AdminSet.where({title: "Bulk Ingest Set"})
  asets = AdminSet.find('admin_set/default') if asets.blank?
  @metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
end

#setMetadataInheritanceObject



136
137
138
139
# File 'lib/bulk_ops/parser.rb', line 136

def setMetadataInheritance
  return if @metadata[:metadataInheritance].present?
  @metadata[:metadataInheritance] = @options["metadataInheritance"] unless @options["metadataInheritance"].blank?
end