Class: Taxonifi::Model::RefCollection

Inherits:
Collection
  • Object
show all
Defined in:
lib/taxonifi/model/ref_collection.rb

Overview

A collection of references.

Instance Attribute Summary collapse

Attributes inherited from Collection

#by_id_index, #collection, #current_free_id

Instance Method Summary collapse

Methods inherited from Collection

#add_object, #add_object_pre_indexed, #children_of_object, #object_by_id, #objects_without_parents, #parent_id_vector, subclass_prefixes

Methods included from SharedClassMethods

included

Constructor Details

#initialize(options = {}) ⇒ RefCollection

Returns a new instance of RefCollection.



17
18
19
20
21
22
23
# File 'lib/taxonifi/model/ref_collection.rb', line 17

def initialize(options = {})
  super
  @row_index = []
  @author_index = {}
  @fingerprint_index = {}
  true
end

Instance Attribute Details

#author_indexObject

A Hash. Keys are Ref#id, values are an Array of Person#ids.

Built on request.



15
16
17
# File 'lib/taxonifi/model/ref_collection.rb', line 15

def author_index
  @author_index
end

#row_indexObject

An options index when there is one reference per row. A Hash. {:row_number => Ref



11
12
13
# File 'lib/taxonifi/model/ref_collection.rb', line 11

def row_index
  @row_index
end

Instance Method Details

#all_authorsObject

Returns Array of Taxonifi::Model::Person



164
165
166
# File 'lib/taxonifi/model/ref_collection.rb', line 164

def all_authors
  @collection.collect{|r| r.authors}.flatten.compact.uniq
end

#build_author_indexObject

Build the author index.

{Ref#id => [a1#id, ... an#id]}


137
138
139
140
141
# File 'lib/taxonifi/model/ref_collection.rb', line 137

def build_author_index
  collection.each do |r|
    @author_index.merge!(r.id => r.authors.collect{|a| a.id ? a.id : -1})
  end
end

#enumerate_authors(initial_id = 0) ⇒ Object

Incrementally (re-)assigns the id of every associated author (Person) This is only useful if you assume every author is unique.



39
40
41
42
43
44
45
46
47
# File 'lib/taxonifi/model/ref_collection.rb', line 39

def enumerate_authors(initial_id = 0)
  i = initial_id 
  collection.each do ||r
    r.authors.each do |a|
      a.id = i
      i += 1
    end
  end
end

#object_classObject

The instance collection class.



26
27
28
# File 'lib/taxonifi/model/ref_collection.rb', line 26

def object_class
  Taxonifi::Model::Ref  
end

#object_from_row(row_number) ⇒ Object

The object at a given row. TODO: inherit from Collection?



32
33
34
35
# File 'lib/taxonifi/model/ref_collection.rb', line 32

def object_from_row(row_number)
  return nil if row_number.nil?
  @row_index[row_number]
end

#unique_author_stringsObject

Return an Array the unique author strings in this collection.



144
145
146
147
148
149
150
151
152
# File 'lib/taxonifi/model/ref_collection.rb', line 144

def unique_author_strings
  auths = {}
  collection.each do |r|
    r.authors.each do |a|
      auths.merge!(a.display_name => nil)
    end
  end
  auths.keys.sort
end

#unique_authorsObject

Returns Array of Taxonifi::Model::Person !! Runs uniquify first. Careful, you might not want to do this !! unless you understand the consequences.



157
158
159
160
# File 'lib/taxonifi/model/ref_collection.rb', line 157

def unique_authors
  uniquify_authors
  all_authors
end

#uniquify_authors(initial_id = 0) ⇒ Object

Finds unique authors, and combines them, then rebuilds author lists using references to the new unique set.



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/taxonifi/model/ref_collection.rb', line 51

def uniquify_authors(initial_id = 0)

  matching_index = {
    # ref_id => { 'ref_string_fingerprint' => [author_position in Ref.authors]} 
  }

  author_fingerprints = {}

  # First pass, build matching array
  collection.each do |r|
    # Check for, and modify where necessary, Authors that are clearly not unique because
    # they are replicated names in a author string, e.g. "Sweet and Sweet". 
    matching_index[r.id] = {}
    r.authors.each_with_index do |a,i|
      id = a.compact_string
      if matching_index[r.id][id]
        matching_index[r.id][id].push(i)
      else
        matching_index[r.id][id] = [i]
      end
    end
  end

  # Next pass, modify names of necessarily unique authors so
  # their fingerprint is unique.  Note we do not differentiate
  # b/w sequential sets.
  # E.g. if we have 5 names like so:
  # Quate [1] and Quate [2]
  # Quate [3], Smith [4] and Quate [5]
  # Then [1,3], [2,5] become the same Person in this process.  We can not
  # of course differentiate order, or if a 3rd "Quate" is present here given
  # only this information.  Later on we might use Year of publication, or something
  # similar to further "guess".
  collection.each do |r|
    matching_index[r.id].keys.each do |i|
      if matching_index[r.id][i].size > 1
        matching_index[r.id][i].each_with_index do |j,k|
          # puts "uniquifying:" + "\_#{k}\_#{r.authors[j].last_name}"
          r.authors[j].last_name = "\_#{k}\_#{r.authors[j].last_name}"
        end
      end
    end
  end

  # Generate new authors based on identity 
  authors = [] 
  collection.each do |r|
    r.authors.each do |a|
      found = false
      authors.each do |x|
        if a.identical?(x)
          found = true 
          next           
        end
      end
      if not found
        authors.push a.clone
      end
    end
  end

  # Sequentially number the new authors, and index them.
  auth_index = {}
  authors.each_with_index do |a, i|
    a.id = i + initial_id
    auth_index.merge!(a.compact_string => a)
  end
 
  # Replace old authors with newly built/sequntially id'ed authors 
  collection.each do |r|
    new_authors = []
    r.authors.inject(new_authors){|ary, a| ary.push(auth_index[a.compact_string])}
    r.authors = new_authors
  end

  # Remove the modifications that made authors unique 
  # Crude to loop those unnecessary, but clean
  authors.each do |a|
    a.last_name.gsub!(/\_\d+\_/, '')
  end

  true 
end