Class: Toccatore::Base
- Inherits:
-
Object
show all
- Defined in:
- lib/toccatore/base.rb
Constant Summary
collapse
- ICON_URL =
"https://raw.githubusercontent.com/datacite/toccatore/master/lib/toccatore/images/toccatore.png"
Instance Method Summary
collapse
Instance Method Details
#cleanup_author(author) ⇒ Object
212
213
214
215
216
217
218
219
220
|
# File 'lib/toccatore/base.rb', line 212
def cleanup_author(author)
author = author.gsub(/[[:space:]]([A-Z]\.)?(-?[A-Z]\.)$/, ', \1\2') unless author.include?(",")
author.my_titleize
.gsub(/[[:space:]]/, ' ')
end
|
#get_authors(authors, options = {}) ⇒ Object
parse array of author strings into CSL format
230
231
232
|
# File 'lib/toccatore/base.rb', line 230
def get_authors(authors, options={})
Array(authors).map { |author| get_one_author(author) }
end
|
#get_data(options = {}) ⇒ Object
102
103
104
105
|
# File 'lib/toccatore/base.rb', line 102
def get_data(options={})
query_url = get_query_url(options)
Maremma.get(query_url, options)
end
|
#get_doi_ra(prefix) ⇒ Object
152
153
154
155
156
157
158
159
160
161
|
# File 'lib/toccatore/base.rb', line 152
def get_doi_ra(prefix)
return nil if prefix.blank?
url = "https://api.datacite.org/prefixes/#{prefix}"
result = Maremma.get(url)
return result.body.fetch("errors") if result.body.fetch("errors", nil).present?
result.body.fetch("data", {}).fetch('attributes', {}).fetch('registration-agency', nil)
end
|
#get_hashed_authors(authors) ⇒ Object
parse array of author hashes into CSL format
235
236
237
|
# File 'lib/toccatore/base.rb', line 235
def get_hashed_authors(authors)
Array(authors).map { |author| get_one_hashed_author(author) }
end
|
#get_name_identifier(author) ⇒ Object
247
248
249
250
251
252
253
254
255
|
# File 'lib/toccatore/base.rb', line 247
def get_name_identifier(author)
name_identifier = author.fetch("nameIdentifier", nil)
name_identifier_scheme = author.fetch("nameIdentifierScheme", "orcid").downcase
if name_identifier_scheme == "orcid" && name_identifier = validate_orcid(name_identifier)
"http://orcid.org/#{name_identifier}"
else
nil
end
end
|
#get_one_author(author) ⇒ Object
parse author string into CSL format only assume personal name when using sort-order: “Turing, Alan”
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
# File 'lib/toccatore/base.rb', line 196
def get_one_author(author)
return { "literal" => "" } if author.strip.blank?
author = cleanup_author(author)
names = Namae.parse(author)
if names.blank? || is_personal_name?(author).blank?
{ "literal" => author }
else
name = names.first
{ "family" => name.family,
"given" => name.given }.compact
end
end
|
#get_one_hashed_author(author) ⇒ Object
239
240
241
242
243
244
245
|
# File 'lib/toccatore/base.rb', line 239
def get_one_hashed_author(author)
raw_name = author.fetch("creatorName", nil)
author_hsh = get_one_author(raw_name)
author_hsh["ORCID"] = get_name_identifier(author)
author_hsh.compact
end
|
#get_query_url(options = {}) ⇒ Object
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
# File 'lib/toccatore/base.rb', line 28
def get_query_url(options={})
updated = "updated:[#{options[:from_date]}T00:00:00Z TO #{options[:until_date]}T23:59:59Z]"
fq = "#{updated} AND has_metadata:true AND is_active:true"
if options[:doi].present?
q = "doi:#{options[:doi]}"
elsif options[:orcid].present?
q = "nameIdentifier:ORCID\\:#{options[:orcid]}"
elsif options[:related_identifier].present?
q = "relatedIdentifier:DOI\\:#{options[:related_identifier]}"
elsif options[:query].present?
q = options[:query]
else
q = query
end
params = { q: q,
start: options[:offset],
rows: options[:rows],
fl: "doi,resourceTypeGeneral,relatedIdentifier,nameIdentifier,minted,updated",
fq: fq,
wt: "json" }
url + URI.encode_www_form(params)
end
|
#get_total(options = {}) ⇒ Object
53
54
55
56
57
|
# File 'lib/toccatore/base.rb', line 53
def get_total(options={})
query_url = get_query_url(options.merge(rows: 0))
result = Maremma.get(query_url, options)
result.body.fetch("data", {}).fetch("response", {}).fetch("numFound", 0)
end
|
#is_personal_name?(author) ⇒ Boolean
222
223
224
225
226
227
|
# File 'lib/toccatore/base.rb', line 222
def is_personal_name?(author)
return true if author.include?(",")
name_detector.name_exists?(author.split.first)
end
|
#job_batch_size ⇒ Object
132
133
134
|
# File 'lib/toccatore/base.rb', line 132
def job_batch_size
1000
end
|
#name_detector ⇒ Object
257
258
259
|
# File 'lib/toccatore/base.rb', line 257
def name_detector
GenderDetector.new
end
|
#normalize_doi(doi) ⇒ Object
171
172
173
174
175
176
177
178
179
180
|
# File 'lib/toccatore/base.rb', line 171
def normalize_doi(doi)
doi = validate_doi(doi)
return nil unless doi.present?
doi = doi.delete("\u200B").downcase
"https://doi.org/" + Addressable::URI.encode(doi)
end
|
#orcid_as_url(orcid) ⇒ Object
186
187
188
|
# File 'lib/toccatore/base.rb', line 186
def orcid_as_url(orcid)
"http://orcid.org/#{orcid}" if orcid.present?
end
|
#orcid_from_url(url) ⇒ Object
182
183
184
|
# File 'lib/toccatore/base.rb', line 182
def orcid_from_url(url)
Array(/\Ahttp:\/\/orcid\.org\/(.+)/.match(url)).last
end
|
#process_data(options = {}) ⇒ Object
93
94
95
96
97
98
99
100
|
# File 'lib/toccatore/base.rb', line 93
def process_data(options = {})
data = get_data(options.merge(timeout: timeout, source_id: source_id))
data = parse_data(data, options)
return [OpenStruct.new(body: { "data" => [] })] if data.empty?
push_data(data, options)
end
|
#push_data(items, options = {}) ⇒ Object
method returns number of errors
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
# File 'lib/toccatore/base.rb', line 108
def push_data(items, options={})
if items.empty?
puts "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
0
elsif options[:access_token].blank?
puts "An error occured: Access token missing."
options[:total]
else
error_total = 0
Array(items).each do |item|
error_total += push_item(item, options)
end
error_total
end
end
|
#queue_jobs(options = {}) ⇒ Object
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
|
# File 'lib/toccatore/base.rb', line 59
def queue_jobs(options={})
options[:offset] = options[:offset].to_i || 0
options[:rows] = options[:rows].presence || job_batch_size
options[:from_date] = options[:from_date].presence || (Time.now.to_date - 1.day).iso8601
options[:until_date] = options[:until_date].presence || Time.now.to_date.iso8601
total = get_total(options)
if total > 0
total_pages = (total.to_f / job_batch_size).ceil
error_total = 0
(0...total_pages).each do |page|
options[:offset] = page * job_batch_size
options[:total] = total
error_total += process_data(options)
end
text = "#{total} works processed with #{error_total} errors for date range #{options[:from_date]} - #{options[:until_date]}."
else
text = "No works found for date range #{options[:from_date]} - #{options[:until_date]}."
end
puts text
options[:level] = total > 0 ? "good" : "warning"
options[:title] = "Report for #{source_id}"
send_notification_to_slack(text, options) if options[:slack_webhook_url].present?
total
end
|
#send_notification_to_slack(text, options = {}) ⇒ Object
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
# File 'lib/toccatore/base.rb', line 136
def send_notification_to_slack(text, options={})
return nil unless options[:slack_webhook_url].present?
attachment = {
title: options[:title] || "Report",
text: text,
color: options[:level] || "good"
}
notifier = Slack::Notifier.new options[:slack_webhook_url],
username: "Event Data Agent",
icon_url: ICON_URL
response = notifier.ping attachments: [attachment]
response.body
end
|
#timeout ⇒ Object
128
129
130
|
# File 'lib/toccatore/base.rb', line 128
def timeout
120
end
|
#unfreeze(hsh) ⇒ Object
261
262
263
264
265
|
# File 'lib/toccatore/base.rb', line 261
def unfreeze(hsh)
new_hash = {}
hsh.each_pair { |k,v| new_hash.merge!({k.downcase.to_sym => v}) }
new_hash
end
|
#url ⇒ Object
124
125
126
|
# File 'lib/toccatore/base.rb', line 124
def url
"https://search.datacite.org/api?"
end
|
#validate_doi(doi) ⇒ Object
163
164
165
|
# File 'lib/toccatore/base.rb', line 163
def validate_doi(doi)
Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5}\/.+)\z/.match(doi)).last
end
|
#validate_orcid(orcid) ⇒ Object
190
191
192
|
# File 'lib/toccatore/base.rb', line 190
def validate_orcid(orcid)
Array(/\A(?:http:\/\/orcid\.org\/)?(\d{4}-\d{4}-\d{4}-\d{3}[0-9X]+)\z/.match(orcid)).last
end
|
#validate_prefix(doi) ⇒ Object
167
168
169
|
# File 'lib/toccatore/base.rb', line 167
def validate_prefix(doi)
Array(/\A(?:(http|https):\/\/(dx\.)?doi.org\/)?(doi:)?(10\.\d{4,5})\/.+\z/.match(doi)).last
end
|