Module: Commonmeta::AuthorUtils

Included in:
MetadataUtils
Defined in:
lib/commonmeta/author_utils.rb

Instance Method Summary collapse

Instance Method Details

#author_name_identifiers(id) ⇒ Object



173
174
175
176
177
178
179
180
181
# File 'lib/commonmeta/author_utils.rb', line 173

def author_name_identifiers(id)
  return nil unless id.present?

  Array.wrap(id).map do |i|
    { 'nameIdentifier' => i,
      'nameIdentifierScheme' => 'ORCID',
      'schemeUri' => 'https://orcid.org' }.compact
  end.compact.presence
end

#authors_as_string(authors) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
# File 'lib/commonmeta/author_utils.rb', line 137

def authors_as_string(authors)
  Array.wrap(authors).map do |a|
    if a['familyName'].present?
      [a['familyName'], a['givenName']].join(', ')
    elsif a['type'] == 'Person'
      a['name']
    elsif a['name'].present?
      "{#{a['name']}}"
    end
  end.join(' and ').presence
end

#cleanup_author(author) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/commonmeta/author_utils.rb', line 93

def cleanup_author(author)
  return nil unless author.present?

  # detect pattern "Smith J.", but not "Smith, John K."
  unless author.include?(',')
    author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2')
  end

  # remove spaces around hyphens
  author = author.gsub(' - ', '-')

  # remove non-standard space characters
  author.gsub(/[[:space:]]/, ' ')
end

#get_affiliations(affiliations) ⇒ Object



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/commonmeta/author_utils.rb', line 149

def get_affiliations(affiliations)
  return nil unless affiliations.present?

  Array.wrap(affiliations).map do |a|
    affiliation_identifier = nil
    if a.is_a?(String)
      name = a.squish
    elsif a.is_a?(Hash)
      if a['affiliationIdentifier'].present?
        affiliation_identifier = a['affiliationIdentifier']
        if a['schemeURI'].present?
          schemeURI = a['schemeURI'].end_with?('/') ? a['schemeURI'] : "#{a['schemeURI']}/"
        end
        affiliation_identifier = !affiliation_identifier.to_s.start_with?('https://') && schemeURI.present? ? normalize_id(schemeURI + affiliation_identifier) : normalize_id(affiliation_identifier)
      end
      name = (a['name'] || a['__content__']).to_s.squish.presence
    end

    next unless name.present?

    { 'id' => affiliation_identifier, 'name' => name }.compact
  end.compact.presence
end

#get_authors(authors) ⇒ Object

parse array of author strings into commonmeta format



133
134
135
# File 'lib/commonmeta/author_utils.rb', line 133

def get_authors(authors)
  Array.wrap(authors).map { |author| get_one_author(author) }.compact
end

#get_one_author(author) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/commonmeta/author_utils.rb', line 7

def get_one_author(author)
  # basic sanity checks
  return nil if author.blank?

  # author is a string
  author = { 'name' => author } if author.is_a?(String)

  # malformed XML
  return nil if author.fetch('name', nil).is_a?(Array)

  # parse author name attributes
  name = parse_attributes(author.fetch('name', nil)) ||
         parse_attributes(author.fetch('creatorName', nil)) ||
         parse_attributes(author.fetch('contributorName', nil))

  given_name = parse_attributes(author.fetch('givenName', nil)) ||
               parse_attributes(author.fetch('given', nil))
  family_name = parse_attributes(author.fetch('familyName', nil)) ||
                parse_attributes(author.fetch('family', nil))

  # parse author identifier
  id = parse_attributes(author.fetch('id', nil), first: true) ||
       parse_attributes(author.fetch('identifier', nil), first: true) ||
       parse_attributes(author.fetch('sameAs', nil), first: true)

  # DataCite metadata
  if id.nil? && author['nameIdentifiers'].present?
    id = Array.wrap(author.dig('nameIdentifiers')).find do |ni|
      ni['nameIdentifierScheme'] == 'ORCID'
    end
    id = id['nameIdentifier'] if id.present?
  # Crossref metadata
  elsif id.nil? && author['ORCID'].present?
    id = author.fetch('ORCID')
  end
  id = normalize_orcid(id)

  # parse author type, i.e. "Person", "Organization" or not specified
  type = author.fetch('type', nil)
  type = type.first if type.is_a?(Array)
  # DataCite metadata
  type = type[0..-3] if type.is_a?(String) && type.end_with?('al')

  if type.blank? && id.is_a?(String) && URI.parse(id).host == 'ror.org'
    type = 'Organization'
  elsif type.blank? && author['type'] == 'Organization'
    type = 'Organization'
  elsif type.blank? && id.is_a?(String) && URI.parse(id).host == 'orcid.org'
    type = 'Person'
  elsif type.blank? && (given_name.present? || family_name.present?)
    type = 'Person'
  elsif type.blank? && is_personal_name?(name: name) && name.to_s.exclude?(';')
    type = 'Person'
  end

  # parse author contributor role
  contributor_type = parse_attributes(author.fetch('contributorType', nil))

  name = cleanup_author(name)

  # split name for type Person into given/family name if not already provided
  if type == 'Person' && given_name.blank? && family_name.blank?
    Namae.options[:include_particle_in_family] = true
    names = Namae.parse(name)
    parsed_name = names.first

    if parsed_name.present?
      given_name = parsed_name.given
      family_name = parsed_name.family
    else
      given_name = nil
      family_name = nil
    end
  end

  # return author in commonmeta format, using name vs. given/family name
  # depending on type
  { 'id' => id,
    'type' => type,
    'name' => type == 'Person' ? nil : name,
    'givenName' => type == 'Organization' ? nil : given_name,
    'familyName' => type == 'Organization' ? nil : family_name,
    'affiliation' => get_affiliations(author.fetch('affiliation', nil)),
    'contributorType' => contributor_type }.compact
end

#is_personal_name?(name: nil) ⇒ Boolean

check if given name is in the database of known given names: github.com/bmuller/gender_detector

Returns:

  • (Boolean)


110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/commonmeta/author_utils.rb', line 110

def is_personal_name?(name: nil)
  return true if name_exists?(name.to_s.split.first) || name_exists?(name.to_s.split(', ').last)

  # check if a name has only one word, e.g. "FamousOrganization"
  return false if name.to_s.split(' ').size == 1

  # check of name can be parsed into given/family name
  Namae.options[:include_particle_in_family] = true
  names = Namae.parse(name)
  parsed_name = names.first
  return true if parsed_name && parsed_name.given

  false
end

#name_exists?(name) ⇒ Boolean

recognize given name if we have loaded ::NameDetector data, e.g. in a Rails initializer

Returns:

  • (Boolean)


126
127
128
129
130
# File 'lib/commonmeta/author_utils.rb', line 126

def name_exists?(name)
  return false unless name_detector.present?

  name_detector.name_exists?(name)
end