Class: Referent

Inherits:
ActiveRecord::Base
  • Object
show all
Includes:
MetadataHelper, TruncateToDbLimit
Defined in:
app/models/referent.rb

Overview

Note: There are a few actual attributes stored as Columns in referent – these were originally used for identifying a Referent identifying the 'same thing' as an incoming OpenURL, to re-use it. But we don't re-use cached referents anymore. So these attributes are NOT USED – actual values are over in ReferentValues. But the attributes are left for now (and set) merely for making it easier to eyeball the database by hand: atitle, title, issn, isbn, volume, year. (why no issue/page number? hell if i know).

Class Method Summary collapse

Instance Method Summary collapse

Methods included from TruncateToDbLimit

#do_truncate_to_db_limit!

Methods included from MetadataHelper

#get_doi, #get_epage, #get_gpo_item_nums, #get_identifier, #get_isbn, #get_issn, #get_lccn, #get_month, #get_oclcnum, #get_pmid, #get_search_creator, #get_search_terms, #get_search_title, #get_spage, #get_sudoc, #get_top_level_creator, #get_year, #normalize_lccn, #normalize_title, #raw_search_title, title_is_serial?

Methods included from MarcHelper

#add_856_links, #edition_statement, #get_title, #get_years, #gmd_values, #service_type_for_856, #should_skip_856_link?, #strip_gmd

Class Method Details

.clean_up_context_object(co) ⇒ Object

Okay, we need to do some pre-processing on weird context objects sent by, for example, firstSearch. Remove invalid identifiers. Also will adjust context objects according to configured umlaut refernet filters (see config.app_config.referent_filters in environment.rb ) Mutator: Modifies ContextObject arg passed in.


78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'app/models/referent.rb', line 78

def self.clean_up_context_object(co)
  # First, remove any empty DOIs! or other empty identifiers?
  # LOTS of sources send awful empty identifiers. 
  # That's not a valid identifier!
  empty_ids = co.referent.identifiers.find_all { |i| i =~ Regexp.new('^[^:]+:[^/:]*(/|:)?$')}
  empty_ids.each { |e| co.referent.delete_identifier( e )}
  
  # Now look for ISSN identifiers that are on article_level. FirstSearch
  # gives us ISSN identifiers incorrectly on article level cites. 
  issn_ids = co.referent.identifiers.find_all { |i| i =~ /^urn:ISSN/}
  issn_ids.each do |issn_id|
    # Long as we're at it, add an rft.issn if one's not there.
    issn_data = issn_id.slice( (9..issn_id.length)) # actual ISSN without identifier prefix
    co.referent.(issn, issn_data) if co.referent.('issn').blank? && ! issn_data.blank?

    # And remove it as an identifier unless we know this is journal-level
    # cite.
    unless ( co.referent.('genre') == 'journal' )
      co.referent.delete_identifier( issn_id )
    end      
  end

  # Clean up OCLC numbers from old bad formats that may have snuck in to an info url incorrectly. # also delete preceding 0's
  oclcnum_ids = co.referent.identifiers.find_all { |i| i =~ /^info:oclcnum/}
  oclcnum_ids.each do |oclcnum_id|
    # FIXME Does this regex need "ocn" as well?
    if (oclcnum_id =~ /^info:oclcnum\/(ocm0*|ocn0*|on0*|\(OCoLC\)0*|ocl70*|0+)(.*)$/)
      # Delete the original, take out just the actual oclcnum, not
      # those old prefixes. or preceding 0s.
      co.referent.delete_identifier( oclcnum_id )
      co.referent.add_identifier("info:oclcnum/#{$2}")
    end
  end


  
  
end

.create_by_context_object(co, options = {}) ⇒ Object

Pass in :permalink => :force to force creation of a permalink, otherwise no permalink is created by this method, one can be lazily created when needed.


29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'app/models/referent.rb', line 29

def self.create_by_context_object(co, options = {})    
  self.clean_up_context_object(co)    
  
  rft = Referent.new

  # Wrap everything in a transaction for better efficiency, at least
  # with MySQL, I think. 
  
  Referent.transaction do
    
    rft.set_values_from_context_object(co)

    # Permalinks created on-demand later. But if set config to :force, can
    # force old behavior. 
    if options[:permalink] == :force
      permalink = Permalink.new_with_values!(rft, co.referrer.identifier)
    end

    # Add shortcuts.
    rft.referent_values.each do | val |
      rft.atitle = val.normalized_value if val.key_name == 'atitle' and val.metadata?
      rft.title = val.normalized_value if val.key_name.match(/^[bj]?title$/) and val.metadata? 
      rft.issn = val.normalized_value.gsub(/[^\d]/, '')[0,8] if val.key_name == 'issn' and val.metadata?
      rft.isbn = val.normalized_value.gsub(/[^\d]/, '')[0,13] if val.key_name == 'isbn' and val.metadata?      
      rft.volume = val.normalized_value if val.key_name == 'volume' and val.metadata?
      rft.year = val.normalized_value.gsub(/[^\d]/, '')[0,4] if val.key_name == 'date' and val.metadata?
    end
    rft.save!

    # Apply referent filters
    rfr_id = ""
    rfr_id = co.referrer.identifier if (co.referrer && ! co.referrer.identifier.blank?)
    UmlautController.umlaut_config.lookup!("referent_filters", []).each do |filter_config|
      regexp = filter_config[:match]
      filter = filter_config[:filter]
      if (regexp =~ rfr_id)
        filter.filter(rft) if filter.respond_to?(:filter)
      end
    end
  end
  return rft          
end

Instance Method Details

#add_identifier(id) ⇒ Object


215
216
217
218
219
220
221
# File 'app/models/referent.rb', line 215

def add_identifier(id)
  unless ( identifiers.find{|i| i == id}  )
    Referent.connection_pool.with_connection do 
      self.referent_values.create(:key_name => 'identifier', :value => id, :normalized_value => ReferentValue.normalize(id), :metadata => false, :private_data => false).save!            
    end
  end
end

#build_referent_value(key_name, value) ⇒ Object

private use. Adds a referent_value and returns it, does NOT persist it to db. referent_value is constructed with ActiveRecord build, and will be saved when Referent (self) is saved, works on persisted or unpersisted Referent.


122
123
124
125
126
127
128
129
130
# File 'app/models/referent.rb', line 122

def build_referent_value(key_name, value)
  return self.referent_values.build(
    :key_name         => key_name, 
    :value            => value, 
    :normalized_value => ReferentValue.normalize(value),
    :private_data     => (key_name == "private_data"),
    :metadata         => (key_name != "identifier" && key_name != "format")
  )
end

#container_type_of_thingObject

Like type_of_thing, but if it's a contained item, give container name instead.


374
375
376
377
378
379
380
381
382
383
384
385
# File 'app/models/referent.rb', line 374

def container_type_of_thing
  i18n_key = case self.['genre']    
    when 'article'  then 'journal'
    when 'bookitem' then 'book'
    else self.['genre'] || self.format
  end

  label = I18n.t(i18n_key, :scope => "umlaut.citation.genre", :default => "")
  label = nil if label.blank?

  return label
end

#doiObject


252
253
254
# File 'app/models/referent.rb', line 252

def doi
  return get_doi(self)
end

#enhance_referent(key, value, metadata = true, private_data = false, options = {}) ⇒ Object

options => { :overwrite => false } to only enhance if not already there


396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
# File 'app/models/referent.rb', line 396

def enhance_referent(key, value, =true, private_data=false, options = {})


  ActiveRecord::Base.connection_pool.with_connection do
    return if value.nil?

    matches = self.referent_values.to_a.find_all do |rv| 
      # We ignore #metadata and #private_data matches in overwriting
      # existing value. We used to take them into account, but it triggered
      # a bug in Jruby, and pretty much isn't neccesary, those fields
      # are pretty useless and mostly not used and should prob be removed. 
      (rv.key_name == key) # && (rv.metadata == metadata) && (rv.private_data == private_data) 
    end
    
    matches.each do |rv|
      unless (options[:overwrite] == false || rv.value == value)
        rv.value = value
        rv.save!
      end
    end
    
    if (matches.length == 0)
      val = self.referent_values.create(:key_name => key, :value => value, :normalized_value => ReferentValue.normalize(value), :metadata => , :private_data => private_data)
      val.save!
    end
    
    if key.match((/(^[ajb]?title$)|(^is[sb]n$)|(^volume$)|(^date$)/))
      case key
        when 'date' then self.year = ReferentValue.normalize(value)
        when 'volume' then self.volume = ReferentValue.normalize(value)
        when 'issn' then self.issn = ReferentValue.normalize(value)
        when 'isbn' then self.isbn = ReferentValue.normalize(value)
        when 'atitle' then self.atitle = ReferentValue.normalize(value)
        else self.title = ReferentValue.normalize(value)
      end
      self.save!
    end
  end
end

#formatObject


223
224
225
# File 'app/models/referent.rb', line 223

def format
  self.referent_values.to_a.find { | val | val.key_name == 'format'}.try(:value)
end

#identifiersObject


204
205
206
207
208
209
210
211
212
213
# File 'app/models/referent.rb', line 204

def identifiers
  self.referent_values
  identifiers = []
  self.referent_values.each { | val |    
    if val.key_name == 'identifier'
      identifiers << val.value
    end
  }
  return identifiers
end

#isbnObject


244
245
246
# File 'app/models/referent.rb', line 244

def isbn
  return get_isbn(self)
end

#issnObject

Gets an ISSN, makes sure it's a valid ISSN or else returns nil. So will return a valid ISSN (NOT empty string) or nil.


240
241
242
# File 'app/models/referent.rb', line 240

def issn
  return get_issn(self)
end

#lccnObject

finds and normalizes an LCCN. If multiple LCCNs are in the record, returns the first one. Returns a NORMALIZED lccn, but does NOT do validation. see: info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/


234
235
236
# File 'app/models/referent.rb', line 234

def lccn
  return get_lccn(self)
end

#metadataObject

Creates a hash of values from referrent_values, to assemble what was spread accross differnet db rows into one easy-lookup hash, for easy access. See also #to_citation for a different hash, specifically for use in View to print citation. And #to_context_object.


187
188
189
190
191
192
193
# File 'app/models/referent.rb', line 187

def 
   = {}
  self.referent_values.each { | val |
    [val.key_name] = val.value if val.metadata? and not val.private_data?
  }
  return 
end

#metadata_intersects?(arg) ⇒ Boolean

pass in a Referent, or a ropenurl ContextObjectEntity that has a metadata method. Or really anything with a #metadata method returning openurl-style keys and values. Method returns true iff the keys in common to both metadata packages have equal (==) values.

Returns:

  • (Boolean)

167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'app/models/referent.rb', line 167

def (arg)
  
  # if it's empty, good enough. 
  return true unless arg
  
  intersect_keys = self..keys & arg..keys
  # Take out keys who's values are blank. If one is blank but not
  # both, we can still consider that a match. 
  intersect_keys.delete_if{ |k| self.[k].blank? || arg.[k].blank? }
  
  self_subset = self..reject{ |k, v| ! intersect_keys.include?(k) }
  arg_subset = arg..reject{ |k, v| ! intersect_keys.include?(k) }

  return self_subset == arg_subset    
end

#oclcnumObject


248
249
250
# File 'app/models/referent.rb', line 248

def oclcnum
  return get_oclcnum(self)
end

#private_dataObject


195
196
197
198
199
200
201
202
# File 'app/models/referent.rb', line 195

def private_data
  self.referent_values
  priv_data = {}
  self.referent_values.each { | val |
    priv_data[val.key_name] = val.value if val.private_data?
  }
  return priv_data    
end

#remove_value(key) ⇒ Object


388
389
390
391
392
393
# File 'app/models/referent.rb', line 388

def remove_value(key)
  referent_values.where(:key_name => key).to_a.each do |rv|
    rv.delete
    referent_values.delete(rv)
  end    
end

#set_values_from_context_object(co) ⇒ Object

Populate the referent_values table with a ropenurl contextobject object Note, does NOT save self, self may still be unsaved.


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'app/models/referent.rb', line 135

def set_values_from_context_object(co)
  rft = co.referent

  # Multiple identifiers are possible! 
  rft.identifiers.each do |id_string|
    build_referent_value('identifier', id_string)            
  end
  if rft.format
    build_referent_value('format', rft.format)
  end
  if rft.private_data
    # this comes in as "pid" or "rft_dat", we store it in
    # our database as "private_data", sorry, easiest way to
    # fit this in at the moment. 
    build_referent_value("private_data", rft.private_data)
  end
  
  rft..each { | key, value |
    next unless value.present?
    # Sometimes value is an array, for DC for instance. Do the best we
    # can. 
    Array(value).each do |v|
      build_referent_value( key, v)
    end
  }    
end

#to_citationObject

Creates a hash for use in View code to display a citation

TODO, move to_citation, type_of_thing, and container_type_of_thing OUT of Refernet, to helper module or own class.


282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# File 'app/models/referent.rb', line 282

def to_citation
  citation = {}
  # call self.metadata once and use the array for efficiency, don't
  # keep calling it. profiling shows it DOES make a difference. 
   = self.

  if ['atitle'].present?
    citation[:title] = ['atitle']
    ['title','btitle','jtitle'].each do | t_type |
      if [t_type].present?
        citation[:container_title] = [t_type]      
        break
      end
    end
  else # only top-level thing, no sub-thing
    ['title','btitle','jtitle'].each do | t_type |
      if [t_type].present?
        citation[:title] = [t_type]
        break
      end
    end      
  end

  citation[:title_label] = I18n.t("umlaut.citation.title_of_x", :x =>  self.type_of_thing, :default => "umlaut.citation.title_label")
  citation[:container_label] = self.container_type_of_thing    

  # add publisher for books
  if (['genre'] =~ /book/i)
    citation[:pub] = ['pub'] unless ['pub'].blank?
  end

  citation[:issn] = issn if issn
  citation[:isbn] = isbn if isbn
  
  ['volume','issue','date'].each do | key |
    citation[key.to_sym] = [key]
  end

  if ["au"].present?
    citation[:author] = ["au"].strip
  elsif ["aulast"]
    citation[:author] = ["aulast"].strip
    if ["aufirst"].present?
      citation[:author] += ', '+["aufirst"].strip
    else
      if ["auinit"].present?
        citation[:author] += ', '+["auinit"].strip
      else
        if ["auinit1"].present?
          citation[:author] += ', '+["auinit1"].strip
        end
        if ["auinitm"].present?
          citation[:author] += ["auinitm"].strip
        end
      end
    end
  elsif ["aucorp"]
    citation[:author] = ["aucorp"]
  end 
 	if ['spage']
 	  citation[:page] = ['spage']
 	  citation[:page] += ' - ' + ['epage'] if ! ['epage'].blank?
 	end
 	citation[:identifiers] = []
 	self.identifiers.each do | id |
 	  citation[:identifiers] << id unless (id.blank? || id.match(/^tag:/))
 	end
 	return citation
end

#to_context_objectObject

Creates an OpenURL::ContextObject assembling all the data in this referrent.


258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# File 'app/models/referent.rb', line 258

def to_context_object
  co = OpenURL::ContextObject.new

  # Got to initialize the referent entity properly for our format.
  # OpenURL sucks, this is confusing, yes. 
  fmt_uri = 'info:ofi/fmt:xml:xsd:' + self.format
  co.referent = OpenURL::ContextObjectEntity.new_from_format( fmt_uri )
  rft = co.referent
  
  # Now set all the values.
  self.referent_values.each do | val |
    if val.metadata?
      rft.(val.key_name, val.value)
      next
    end
    rft.send('set_'+val.key_name, val.value) if rft.respond_to?('set_'+val.key_name)        
  end
  return co
end

#type_of_thingObject


352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
# File 'app/models/referent.rb', line 352

def type_of_thing
   = self.

  key = ["genre"]
  key = self.format if key.blank?
  key = key.downcase

  if key == "journal" && ['atitle'].present?
    key = 'article'
  end

  if key == "book" && ['atitle'].present?
    key = "bookitem"
  end
  
  label = I18n.t(key, :scope => "umlaut.citation.genre", :default => "")
  label = nil if label.blank?

  return label
end