Class: BulkOps::Parser

Inherits:

Object

Object
BulkOps::Parser

show all

Includes:: InterpretControlledBehavior, InterpretFilesBehavior, InterpretOptionsBehavior, InterpretRelationshipsBehavior, InterpretScalarBehavior

Defined in:: lib/bulk_ops/parser.rb

Instance Attribute Summary collapse

#proxy ⇒ Object

Returns the value of attribute proxy.
#raw_data ⇒ Object

Returns the value of attribute raw_data.
#raw_row ⇒ Object

Returns the value of attribute raw_row.

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(prx, metadata_sheet = nil, options = {}) ⇒ `Parser`

Returns a new instance of Parser.

# File 'lib/bulk_ops/parser.rb', line 65

def initialize prx, metadata_sheet=nil, options={}
  @proxy = prx
  @raw_data = (metadata_sheet || operation.metadata)
  @raw_row = @raw_data[@proxy.row_number]
  @metadata = {}
  @parsing_errors = []
  @options = options || operation.options
end

Instance Attribute Details

#proxy ⇒ `Object`

Returns the value of attribute proxy.



4
5
6

# File 'lib/bulk_ops/parser.rb', line 4

def proxy
  @proxy
end

#raw_data ⇒ `Object`

Returns the value of attribute raw_data.



4
5
6

# File 'lib/bulk_ops/parser.rb', line 4

def raw_data
  @raw_data
end

#raw_row ⇒ `Object`

Returns the value of attribute raw_row.



4
5
6

# File 'lib/bulk_ops/parser.rb', line 4

def raw_row
  @raw_row
end

Class Method Details

.get_negating_metadata(work_id, metadata = {}) ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 46

def self.get_negating_metadata(work_id, metadata={})
  return false unless BulkOps::SolrService.record_exists?(work_id)
  work = ActiveFedora::Base.find(work_id)
  schema = ScoobySnacks::METADATA_SCHEMA
  schema.all_fields.each do |field|
    field_key = field.controlled? ? "#{field.name}_attributes" : field.name
    metadata[field_key] ||= (field.multiple? ? [] : nil)
    if field.controlled?        
      values = Array(work.send(field.name)).map{|value| {id: value.id, _destroy: true} }
      if field.multiple?
        metadata[field_key] += values
      else
        metadata[field_key] = values.first
      end
    end
  end
  return metadata
end

.is_file_set?(metadata, row_number) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/bulk_ops/parser.rb', line 29

def self.is_file_set? metadata, row_number
  return false unless metadata[row_number].present?
  # If the work type is explicitly specified, use that
  if (type_key = metadata[row_number].to_h.keys.find{|key| key.to_s.downcase.gsub(/[_\-\s]/,"").include?("worktype") })
    return true if metadata[row_number][type_key].downcase == "fileset" 
    return false if metadata[row_number][type_key].present?
  end
#    Otherwise, if there are any valid fields other than relationship or file fields, call it a work
  metadata[row_number].each do |field, value|
    next if BulkOps::Verification.is_file_field?(field)
    next if ["parent", "order"].include?(normalize_relationship_field_name(field))
    next if ["title","label"].include?(field.downcase.strip)
    return false
  end
  return true
end

.normalize_relationship_field_name(field) ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 24

def self.normalize_relationship_field_name field
  normfield = field.to_s.downcase.parameterize.gsub(/[_\s-]/,'')
  BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
end

.split_values(value_string) ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 18

def self.split_values value_string
  # Split values on all un-escaped separator character (escape character is '\')
  # Then replace all escaped separator charactors with un-escaped versions
  value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
end

.unescape_csv(value) ⇒ `Object`



14
15
16

# File 'lib/bulk_ops/parser.rb', line 14

def self.unescape_csv(value)
  value.gsub(/\\(['";,])/,'\1')
end

Instance Method Details

#connect_existing_work ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 111

def connect_existing_work
  return unless (column_name = @options["update_identifier"])
  return unless (key = @raw_row.to_h.keys.find{|key| key.to_s.parameterize.downcase.gsub("_","") == column_name.to_s.parameterize.downcase.gsub("_","")})
  return unless (value = @raw_row[key]).present?
  return unless (work_id = find_work_id_from_unique_metadata(key, value))
  proxy.update(work_id: work_id)
end

#disambiguate_columns ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 97

def disambiguate_columns
  #do nothing unless there are columns with the same header
  return unless (@raw_row.respond_to?(:headers) && (@raw_row.headers.uniq.length < @raw_row.length) )
  row = {}
  (0...@raw_row.length).each do |i|
    header = @raw_row.headers[i]
    value = @raw_row[i]
    next unless value.present?
    # separate values in identical columns using the separator
    row[header] = (Array(row[header]) << value).join(BulkOps::SEPARATOR)
  end
  @raw_row = row
end

#find_field_name(field) ⇒ `Object`



148
149
150

# File 'lib/bulk_ops/parser.rb', line 148

def find_field_name(field)
  operation.find_field_name(field)
end

#find_work_id_from_unique_metadata(field_name, value) ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 119

def find_work_id_from_unique_metadata field_name, value
  field_solr_name = schema.get_field(field_name).solr_name
  query = "_query_:\"{!dismax qf=#{field_solr_name}}#{value}\""
  response = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path, params: { fq: query, rows: 1, start: 0})["response"]
  if response["numFound"] > 1
    report_error( :id_not_unique , "",  row_number: row_number, object_id: @proxy.id, options_name: field_name, option_values: value )
  end
  return response["docs"][0]["id"]
end

#interpret_data(raw_row: nil, raw_data: nil, proxy: nil) ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 74

def interpret_data raw_row: nil, raw_data: nil, proxy: nil
  @raw_row = raw_row if raw_row.present?
  @proxy = proxy if proxy.present?
  @raw_data = raw_data if raw_data.present?
  disambiguate_columns
  setAdminSet
  #The order here matters a little: interpreting the relationship fields specifies containing collections,
  # which may have opinions about whether we should inherit metadata from parent works
  interpret_relationship_fields
  setMetadataInheritance
  interpret_option_fields
  if @proxy.work_id.present? && @options['discard_existing_metadata']
    @metadata.deep_merge!(self.class.get_negating_metadata(@proxy.work_id))
  end
  interpret_file_fields
  interpret_controlled_fields
  interpret_scalar_fields
  connect_existing_work 
  @proxy.update(status: "ERROR", message: "error parsing spreadsheet line") if @parsing_errors.present?
  @proxy.proxy_errors = (@proxy.proxy_errors || []) + @parsing_errors
  return @metadata
end

#report_error(type, message, **args) ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 141

def report_error type, message, **args
  puts "ERROR MESSAGE: #{message}"
  @proxy.update(status: "error", message: message)
  args[:type]=type
  (@parsing_errors ||= []) <<  BulkOps::Error.new(**args)
end

#schema ⇒ `Object`



152
153
154

# File 'lib/bulk_ops/parser.rb', line 152

def schema
  ScoobySnacks::METADATA_SCHEMA
end

#setAdminSet ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 129

def setAdminSet 
  return if @metadata[:admin_set_id]
  asets = AdminSet.where({title: "Bulk Ingest Set"})
  asets = AdminSet.find('admin_set/default') if asets.blank?
  @metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
end

#setMetadataInheritance ⇒ `Object`

# File 'lib/bulk_ops/parser.rb', line 136

def setMetadataInheritance
  return if @metadata[:metadataInheritance].present?
  @metadata[:metadataInheritance] = @options["metadataInheritance"] unless @options["metadataInheritance"].blank?
end

Class: BulkOps::Parser

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from InterpretOptionsBehavior

Methods included from InterpretScalarBehavior

Methods included from InterpretFilesBehavior

Methods included from InterpretRelationshipsBehavior

Constructor Details

#initialize(prx, metadata_sheet = nil, options = {}) ⇒ Parser

Instance Attribute Details

#proxy ⇒ Object

#raw_data ⇒ Object

#raw_row ⇒ Object

Class Method Details

.get_negating_metadata(work_id, metadata = {}) ⇒ Object

.is_file_set?(metadata, row_number) ⇒ Boolean

.normalize_relationship_field_name(field) ⇒ Object

.split_values(value_string) ⇒ Object

.unescape_csv(value) ⇒ Object

Instance Method Details

#connect_existing_work ⇒ Object

#disambiguate_columns ⇒ Object

#find_field_name(field) ⇒ Object

#find_work_id_from_unique_metadata(field_name, value) ⇒ Object

#interpret_data(raw_row: nil, raw_data: nil, proxy: nil) ⇒ Object

#report_error(type, message, **args) ⇒ Object

#schema ⇒ Object

#setAdminSet ⇒ Object

#setMetadataInheritance ⇒ Object

#initialize(prx, metadata_sheet = nil, options = {}) ⇒ `Parser`

#proxy ⇒ `Object`

#raw_data ⇒ `Object`

#raw_row ⇒ `Object`

.get_negating_metadata(work_id, metadata = {}) ⇒ `Object`

.is_file_set?(metadata, row_number) ⇒ `Boolean`

.normalize_relationship_field_name(field) ⇒ `Object`

.split_values(value_string) ⇒ `Object`

.unescape_csv(value) ⇒ `Object`

#connect_existing_work ⇒ `Object`

#disambiguate_columns ⇒ `Object`

#find_field_name(field) ⇒ `Object`

#find_work_id_from_unique_metadata(field_name, value) ⇒ `Object`

#interpret_data(raw_row: nil, raw_data: nil, proxy: nil) ⇒ `Object`

#report_error(type, message, **args) ⇒ `Object`

#schema ⇒ `Object`

#setAdminSet ⇒ `Object`

#setMetadataInheritance ⇒ `Object`