Module: PubMed

Defined in:
lib/rbbt/sources/pubmed.rb

Overview

This module offers an interface with PubMed, to perform queries, and retrieve simple information from articles. It uses the caching services of Rbbt.

Defined Under Namespace

Classes: Article

Constant Summary collapse

@@pubmed_lag =
1

Class Method Summary collapse

Class Method Details

.get_article(pmids) ⇒ Object

return Article.new(xml)

  end
end

end



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/rbbt/sources/pubmed.rb', line 233

def self.get_article(pmids)
  _array = Array === pmids

  pmids = [pmids] unless Array === pmids
  pmids = pmids.compact.collect{|id| id}

  result_files = FileCache.cache_online_elements(pmids, 'pubmed-{ID}.xml') do |ids|
    result = {}
    values = []
    chunks = Misc.divide(ids, (ids.length / 20) + 1)
    Log::ProgressBar.with_bar(chunks.length, :desc => "Downloading articles from PubMed") do |bar|
      chunks.each do |list|
        begin
          Misc.try3times do
            url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" 

            postdata = "db=pubmed&retmode=xml&id=#{list* ","}"
            xml = TmpFile.with_file(postdata) do |postfile|
              #Open.read(url, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--post-file=" => postfile)
              Open.read(url+'?'+postdata, :quiet => true, :nocache => true, :nice => @@pubmed_lag, :nice_key => "PubMed", "--__post-file=" => postfile)
            end

            values += xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
          end
        rescue Aborted
          raise $!
        rescue Exception
          Log.exception $!
        ensure
          bar.tick
        end
      end
    end

    values.each do |xml|
      pmid = xml.scan(/<PMID[^>]*?>(.*?)<\/PMID>/).flatten.first
      
      result[pmid] = xml
    end

    ids.each{|id| next if id.nil? or result[id]; fid = id.sub(/^0+/,''); next unless result[fid]; result[id] = result[fid]}
    ids.each{|id| next if id.nil? or result[id]; result[id] = ""}

    result
  end

  articles = {}
  pmids.each do |id| 
    next if id.nil? or result_files[id].nil?
    txt = Open.read(result_files[id]) 
    next if txt.empty?
    articles[id] = Article.new(txt) 
  end

  if _array
    articles
  else
    articles.values.first
  end
end

.query(query, retmax = nil) ⇒ Object

Performs the specified query and returns an array with the PubMed Ids returned. retmax can be used to limit the number of ids returned, if is not specified 30000 is used.



16
17
18
19
20
# File 'lib/rbbt/sources/pubmed.rb', line 16

def self.query(query, retmax=nil)
  retmax ||= 30000

  Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?retmax=#{retmax}&db=pubmed&term=#{query}",:quiet => true, :nocache => true).scan(/<Id>(\d+)<\/Id>/).flatten
end