Module: Cites

Defined in:
lib/cites.rb

Constant Summary collapse

@@cache_location =
ENV['HOME'] + '/.cites/cache'

Class Method Summary collapse

Class Method Details

.doi2cit(doi, format = 'text', style = 'apa', locale = 'en-US', cache = true) ⇒ Object

Get a citation in various formats from a DOI

Args:

  • doi: A DOI

  • format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,

  • style: Only used if format=‘text’, e.g., apa, harvard3

  • locale: A locale, e.g., en-US

  • cache: Should cache be used

* true: Try fetcing from cache and store to cache (default)

* false: Do use cache at all
* 'flush': Get a fresh response and cache it

Examples:

require 'cites'
Cites.doi2cit('10.1371/journal.pone.0000308')
Cites.doi2cit('10.1371/journal.pbio.0030427')
Cites.doi2cit('10.1371/journal.pbio.0030427', 'crossref-xml')
Cites.doi2cit('10.1371/journal.pbio.0030427', 'bibtex')
Cites.doi2cit('10.1371/journal.pbio.0030427', 'ris')

out = Cites.doi2cit(['10.1371/journal.pone.0000308','10.1371/journal.pbio.0030427','10.1371/journal.pone.0084549'], 'bibtex')

Cites.show(out)

Returns an array of citation content. The structure of the content will depend on the format requested.



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/cites.rb', line 153

def self.doi2cit(doi, format='text', style='apa', locale='en-US', 
				 cache=true)
	if doi.class == String
		doi = [doi]
	elsif doi.class == Array
		doi = doi
	else
		fail 'doi must be one of String or Array class'
	end		

	cc = []
	doi.each do |iter|
		# if iter.include?('http://')
		# 	iter = iter.sub('http://dx.doi.org/', '')
		# else
		# 	nil
		# end
		# cc << Cites.getcite(doi=iter, format=format, style=style, locale=locale)
		content = Cites.getcite(iter, format, style, locale, cache)
		if format == 'citeproc-json'
			content = JSON.parse(content)
		end
		cc << content
	end

	return cc
end

.getcacheObject



28
29
30
# File 'lib/cites.rb', line 28

def self.getcache()
	return @@cache_location
end

.getcite(doi, format = 'text', style = 'apa', locale = 'en-US', cache = true) ⇒ Object

Get a single citation in various formats from a DOI

Args:

  • doi: A DOI

  • format: one of rdf-xml, turtle, citeproc-json, text, ris, bibtex, crossref-xml,

  • style: Only used if format=‘text’, e.g., apa, harvard3

  • locale: A locale, e.g., en-US

  • cache: Should cache be used

* true: Try fetcing from cache and store to cache (default)

* false: Do use cache at all
* 'flush': Get a fresh response and cache it


53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/cites.rb', line 53

def self.getcite(doi, format='text', style='apa', locale='en-US', 
				 cache=true)
	formats = {"rdf-xml" => "application/rdf+xml",
			   "turtle" => "text/turtle",
			   "citeproc-json" => "application/vnd.citationstyles.csl+json",
			   "text" => "text/x-bibliography",
			   "ris" => "application/x-research-info-systems",
		       "bibtex" => "application/x-bibtex",
			   "crossref-xml" => "application/vnd.crossref.unixref+xml",
			   "datacite-xml" => "application/vnd.datacite.datacite+xml"
	}
	formatuse = formats[format]
	if format == 'text'
		type = "#{formatuse}; style=#{style}; locale=#{locale}"
	else
		type = formatuse
	end
	doi = 'http://dx.doi.org/' + doi

	if cache == true or cache == 'flush'
		if cache == true
			cache_time = 6000
			msg = "Requested DOI not in cache or is stale, requesting..."
		elsif cache == 'flush'
			cache_time = 1
			msg = "Flushing cache, requesting..."
		end
		# Keep cache data valid forever
		# [todo] - should using cache be reported?

		# Create a cache key based on the DOI requested + the type on
		# content
		cache_key = Digest::SHA1.hexdigest("#{doi}-#{type}")

		content = APICache.get(cache_key, :cache => cache_time, 
							   :valid => :forever, :period => 0,
							   :timeout => 30) do
		    puts msg
		    response = HTTParty.get(doi, :headers => {"Accept" => type})
		   
		    # If response code is ok (200) get response body and return
		    # that from this block. Otherwise an error will be raised.
		   	begin
			    if response_ok(response.code)
			    	content = response.body
			    end
				content
			rescue Exception => e
				puts e.message
				puts "Format requested: #{formatuse}"
				exit
			end
		end
	elsif cache == false
		puts "Not using cache, requesting..."
		response = HTTParty.get(doi, :headers => {"Accept" => type})
		
		if response_ok(response.code)
		    content = response.body
		end
	else
		fail "Invalid cache value #{cache}"
	end
	# response = HTTParty.get(doi, :headers => {"Accept" => type})
	if format == 'bibtex'
		output = BibTeX.parse(content).to_s
	else
		output = content
	end
	# output.display
	return output
end

.match(query) ⇒ Object

match: Look for matches to free-form citations to DOIs for an object (article, book, etc). in CrossRef

Args:

  • query: A free form string of terms.

Examples:

require 'cites'
Cites.match('Piwowar sharing data increases citation PLOS')
Cites.match('boettiger Modeling stabilizing selection')

Cites.match([‘Piwowar sharing data increases citation PLOS’, ‘boettiger Modeling stabilizing selection’]) out = Cites.match([‘piwowar sharing data increases citation PLOS’, ‘boettiger Modeling stabilizing selection’, ‘priem Using social media to explore scholarly impact’, ‘fenner Peroxisome ligands for the treatment of breast cancer’]) out.map {|i| i}

# Feed into the doi2cit method
Cites.doi2cit(out.map {|i| i['doi']})


206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/cites.rb', line 206

def self.match(query)
	if query.class == String
		query = [query]
	elsif query.class == Array
		query = query
	else
		fail 'query must be one of String or Array class'
	end
	url = "http://search.labs.crossref.org/links"
	out = 
		HTTParty.post(url, 
			:body => query.to_json, 
			:headers => { "Content-Type" => "application/json"}
		)
	if out.code == 200
		nil
	else
		puts "ERROR #{out.code}"
	end
	tt = out['results']
	coll = []
	tt.each do |item|
		gg = item['doi']
		if gg!=nil
			gg = gg.sub('http://dx.doi.org/', '')
		end
		coll << 
		{
			'match'=>item['match'], 
			'doi'=>gg,
			'text'=>item['text']
		}
	end
       # coll.display
       return coll
end

.search(query, options = {}) ⇒ Object

search: Search for scholary objects in CrossRef

Args:

  • query: A single or many terms (in an array). This function performs

    a single search if multiple terms are supplied. If this is 
    supplied, the doi arg is ignored.
    
  • doi: A DOI to search for. If this is supplied, query is ignored.

  • page: Page number to return.

  • rows: Number of records to return

  • sort: Sort (logical)

  • year: Year to restrict search to.

Examples:

require 'cites'
Cites.search(query='renear')
Cites.search('palmer')

Cites.search([‘ecology’, ‘microbiology’]) out = Cites.search([‘renear’, ‘science’, ‘smith birds’]) out.map {|i| i}

Cites.search(‘science’, :rows => 5)

# Feed into the doi2cit method
out = Cites.search('palmer')
g = Cites.doi2cit(out[1]['doi'], format='bibtex')
Cites.show(g)


271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# File 'lib/cites.rb', line 271

def self.search(query, options = {})
	defaults = {:doi => nil, :page => nil, :rows => 10, 
		:sort => nil, :year => nil, :header => true, 
		:fields => ["doi","normalizedScore","title","year"]}
	# defaults = {"query" => 'ecology', "doi" => nil, "page" => nil, "rows" => 10, 
	# 	"sort" => nil, "year" => nil, "header" => true, 
	# 	"fields" => ["doi","normalizedScore","title","year"]}
	options = defaults.merge(options)
	fields = options[:fields]
	options.delete(:fields)

	if query.class == String
		nil
	elsif query.class == Array
		query = query.join('+')
	else
		fail 'query must be one of String or Array class'
	end

	url = "http://search.labs.crossref.org/dois"
	
	if options[:doi] == nil
		# [fimxe] - looks like "rows" option isn't working like it's supposed to
        args = {"q" => query, "page" => options[:page], "rows" => options[:rows],
        	"sort" => options[:sort], "year" => options[:year], "header" => options[:header]}
        args = args.delete_if { |k, v| v.nil? }
        out = HTTParty.get(url, :query => args)
        if out.code == 200
			nil
		else
			puts "ERROR #{out.code}"
		end

		items = out['items']
        coll = []
		items.each do |item|
			gg = item.reject { |key,_| !fields.include? key }
			coll << gg
		end
		
		if options[:header] == true
			out = out.to_hash
        	meta = out.except('items')
        	coll = {'meta' => meta, 'items' => coll}
        else
        	nil
        end
	else
		nil
	end
	return coll
end

.setcache(directory) ⇒ Object



32
33
34
35
36
37
38
# File 'lib/cites.rb', line 32

def self.setcache(directory)
	if File.directory?(directory)
		@@cache_location = directory
	else
		puts "Directory path #{directory} does not exist"
	end
end

.show(input) ⇒ Object



181
182
183
184
185
# File 'lib/cites.rb', line 181

def self.show(input)
	input.each do |iter|
		puts iter.display,"\n"
	end
end