Module: Commonmeta::AuthorUtils

Included in:
MetadataUtils
Defined in:
lib/commonmeta/author_utils.rb

Instance Method Summary collapse

Instance Method Details

#author_name_identifiers(id) ⇒ Object



233
234
235
236
237
238
239
240
241
# File 'lib/commonmeta/author_utils.rb', line 233

def author_name_identifiers(id)
  return nil unless id.present?

  Array.wrap(id).map do |i|
    { "nameIdentifier" => i,
      "nameIdentifierScheme" => "ORCID",
      "schemeUri" => "https://orcid.org" }.compact
  end.compact.presence
end

#authors_as_string(authors) ⇒ Object



199
200
201
202
203
204
205
206
207
208
209
# File 'lib/commonmeta/author_utils.rb', line 199

def authors_as_string(authors)
  Array.wrap(authors).map do |a|
    if a["familyName"].present?
      [a["familyName"], a["givenName"]].join(", ")
    elsif a["type"] == "Person"
      a["name"]
    elsif a["name"].present?
      "{#{a["name"]}}"
    end
  end.join(" and ").presence
end

#cleanup_author(author) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/commonmeta/author_utils.rb', line 132

def cleanup_author(author)
  return nil unless author.present?

  # detect pattern "Smith J.", but not "Smith, John K."
  unless author.include?(",")
    author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2')
  end

  # strip suffixes, e.g. "John Smith, MD" as the named parser doesn't handle them
  author = author.split(",").first if %w[MD PhD].include? author.split(", ").last

  # remove email addresses
  email = validate_email(author)
  author = author.gsub(email, "") if email.present?

  # strip spaces at the beginning and end of string
  author = author.strip

  # remove parentheses around names
  author = author[1..-2] if author[0] == "(" && author[-1] == ")"

  # remove spaces around hyphens
  author = author.gsub(" - ", "-")

  # remove non-standard space characters
  author.gsub(/[[:space:]]/, " ")
end

#datacite_contributor_rolesObject

mapping of DataCite contributorType to commonmeta contributorRoles



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/commonmeta/author_utils.rb', line 8

def datacite_contributor_roles = {
  "ContactPerson" => "ContactPerson",
  "DataCurator" => "DataCuration",
  "DataManager" => "Other",
  "Distributor" => "Other",
  "Editor" => "Editor",
  "HostingInstitution" => "Other",
  "Other" => "Other",
  "Producer" => "Other",
  "ProjectLeader" => "Other",
  "ProjectManager" => "Other",
  "ProjectMember" => "Other",
  "RegistrationAgency" => "Other",
  "RegistrationAuthority" => "Other",
  "RelatedPerson" => "Other",
  "ResearchGroup" => "Other",
  "RightsHolder" => "Other",
  "Researcher" => "Other",
  "Sponsor" => "Other",
  "Supervisor" => "Supervision",
  "WorkPackageLeader" => "Other",
}

#get_affiliations(affiliations) ⇒ Object



211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
# File 'lib/commonmeta/author_utils.rb', line 211

def get_affiliations(affiliations)
  return nil unless affiliations.present?

  Array.wrap(affiliations).map do |a|
    affiliation_identifier = nil
    if a.is_a?(String)
      name = a.squish
    elsif a.is_a?(Hash)
      if a["affiliationIdentifier"].present?
        affiliation_identifier = a["affiliationIdentifier"]
        if a["schemeURI"].present?
          schemeURI = a["schemeURI"].end_with?("/") ? a["schemeURI"] : "#{a["schemeURI"]}/"
        end
        affiliation_identifier = !affiliation_identifier.to_s.start_with?("https://") && schemeURI.present? ? normalize_id(schemeURI + affiliation_identifier) : normalize_id(affiliation_identifier)
      end
      name = (a["name"] || a["__content__"]).to_s.squish.presence
    end

    { "id" => affiliation_identifier, "name" => name }.compact.presence
  end.compact.presence
end

#get_authors(authors) ⇒ Object

parse array of author strings into commonmeta format



195
196
197
# File 'lib/commonmeta/author_utils.rb', line 195

def get_authors(authors)
  Array.wrap(authors).map { |author| get_one_author(author) }.compact
end

#get_one_author(author) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/commonmeta/author_utils.rb', line 31

def get_one_author(author)
  # basic sanity checks
  return nil if author.blank?

  # author is a string
  author = { "name" => author } if author.is_a?(String)

  # malformed XML
  return nil if author.fetch("name", nil).is_a?(Array)

  # parse author name attributes
  name = parse_attributes(author.fetch("name", nil)) ||
         parse_attributes(author.fetch("creatorName", nil)) ||
         parse_attributes(author.fetch("contributorName", nil))

  given_name = parse_attributes(author.fetch("givenName", nil)) ||
               parse_attributes(author.fetch("given", nil))
  family_name = parse_attributes(author.fetch("familyName", nil)) ||
                parse_attributes(author.fetch("family", nil))

  name = cleanup_author(name)

  # parse author identifier
  id = parse_attributes(author.fetch("id", nil), first: true) ||
       parse_attributes(author.fetch("identifier", nil), first: true) ||
       parse_attributes(author.fetch("sameAs", nil), first: true)
  id = normalize_orcid(id) || normalize_ror(id) if id.present?

  # DataCite metadata
  if id.nil? && author["nameIdentifiers"].present?
    id = Array.wrap(author.dig("nameIdentifiers")).find do |ni|
      normalize_name_identifier(ni).present?
    end
    id = normalize_name_identifier(id) if id.present?
    # Crossref metadata
  elsif id.nil? && author["ORCID"].present?
    id = author.fetch("ORCID")
    id = normalize_orcid(id)
    # JSON Feed metadata
  elsif id.nil? && author["url"].present?
    id = author.fetch("url")
  end

  # parse author type, i.e. "Person", "Organization" or not specified
  type = author.fetch("type", nil)
  type = type.first if type.is_a?(Array)
  # DataCite metadata
  type = type[0..-3] if type.is_a?(String) && type.end_with?("al")

  if type.blank? && name.blank? && id.is_a?(String) && URI.parse(id).host == "ror.org"
    type = "Person"
    author["affiliation"] = { "affiliationIdentifier" => id }
    id = nil
  elsif type.blank? && id.is_a?(String) && URI.parse(id).host == "ror.org"
    type = "Organization"
  elsif type.blank? && author["type"] == "Organization"
    type = "Organization"
  elsif type.blank? && id.is_a?(String) && URI.parse(id).host == "orcid.org"
    type = "Person"
  elsif type.blank? && (given_name.present? || family_name.present?)
    type = "Person"
  elsif type.blank? && is_personal_name?(name: name) && name.to_s.exclude?(";")
    type = "Person"
  elsif type.blank? && name.present? && !is_personal_name?(name: name)
    type = "Organization"
  end

  # parse author contributor role
  contributor_roles = parse_attributes(author.fetch("contributorType", nil))
  if contributor_roles
    contributor_roles = [datacite_contributor_roles[contributor_roles]]
  else
    contributor_roles = ["Author"]
  end

  # split name for type Person into given/family name if not already provided
  if type == "Person" && name.present? && given_name.blank? && family_name.blank?
    Namae.options[:include_particle_in_family] = true
    names = Namae.parse(name)
    parsed_name = names.first

    if parsed_name.present?
      given_name = parsed_name.given
      family_name = parsed_name.family
    else
      given_name = nil
      family_name = nil
    end
  end

  # return author in commonmeta format, using name vs. given/family name
  # depending on type
  { "id" => id,
    "type" => type,
    "name" => type == "Person" ? nil : name,
    "contributorRoles" => contributor_roles,
    "givenName" => type == "Organization" ? nil : given_name,
    "familyName" => type == "Organization" ? nil : family_name,
    "affiliation" => get_affiliations(author.fetch("affiliation", nil)) }.compact
end

#is_personal_name?(name: nil) ⇒ Boolean

check if given name is in the database of known given names: github.com/bmuller/gender_detector

Returns:

  • (Boolean)


162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/commonmeta/author_utils.rb', line 162

def is_personal_name?(name: nil)
  # personal names are not allowed to contain semicolons
  return false if name.to_s.include?(";")

  return true if name_exists?(name.to_s.split.first) || name_exists?(name.to_s.split(", ").last)

  # check if a name has only one word, e.g. "FamousOrganization", not including commas
  return false if name.to_s.split(" ").size == 1 && name.to_s.exclude?(",")

  # check if name contains words known to be used in organization names
  return false if %w[University College Institute School Center Department Laboratory Library Museum Foundation Society Association Company Corporation Collaboration Consortium Incorporated Inc. Institut Research Science Team].any? { |word| name.to_s.include?(word) }

  # check for suffixes, e.g. "John Smith, MD"
  return true if name && %w[MD PhD].include?(name.split(", ").last)

  # check of name can be parsed into given/family name
  Namae.options[:include_particle_in_family] = true
  names = Namae.parse(name)

  parsed_name = names.first
  return true if parsed_name && parsed_name.given

  false
end

#name_exists?(name) ⇒ Boolean

recognize given name if we have loaded ::NameDetector data, e.g. in a Rails initializer

Returns:

  • (Boolean)


188
189
190
191
192
# File 'lib/commonmeta/author_utils.rb', line 188

def name_exists?(name)
  return false unless name_detector.present?

  name_detector.name_exists?(name)
end