Class: PubmedAPI::Interface

Inherits:
Object
  • Object
show all
Defined in:
lib/pubmed_api.rb

Constant Summary collapse

WAIT_TIME =

seconds

0.5
DEFAULT_OPTIONS =
{:tool => 'ruby-pubmed-api',
:database => 'pubmed', #which database eq pubmed/nlmcatalog
:verb => 'search', #which API verb to use e.g. search/fetch
:email => '',
:reldate => 90, #How far back shall we go in days 
:retmax => 100000,
:retstart => 0,
:load_all_pmids => false }
URI_TEMPLATE =
'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?db={database}&tool={tool}&email={email}'+
'&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&rettype=fasta&retmode=xml'

Class Method Summary collapse

Class Method Details

.convert_odd_journal_ids(id) ⇒ Object

Some journals have odd NLMIDs that need to be searched for rarther than accessed directly. TODO combine into single API request



98
99
100
101
102
103
104
105
106
107
108
# File 'lib/pubmed_api.rb', line 98

def convert_odd_journal_ids(id)
  
  new_id = nil
  results = search(id, {:database => 'nlmcatalog', :reldate => '100000'})
  if results.pmids.length ==1
    new_id = results.pmids[0]
  else
    puts "failed to convert " + id.to_s
  end
  new_id.to_s
end

.do_search(search_term, options) ⇒ Object

Performs a search and parses the response



45
46
47
48
49
50
# File 'lib/pubmed_api.rb', line 45

def do_search(search_term, options)
  wait
  doc = make_api_request(options.merge({:query => 'term='+search_term}))
  parser = XMLParser.new
  parser.parse_search(doc)
end

.fetch_journals(nlmids) ⇒ Object



58
59
60
61
62
63
64
# File 'lib/pubmed_api.rb', line 58

def fetch_journals(nlmids)
  #Change the ids of those wierd journals 
  nlmids = nlmids.map { |e|  ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
  xml = fetch_records(nlmids, 'nlmcatalog')
  parser = XMLParser.new
  parser.parse_journals(xml)       
end

.fetch_papers(ids) ⇒ Object



52
53
54
55
56
# File 'lib/pubmed_api.rb', line 52

def fetch_papers(ids)
  xml = fetch_records(ids, 'pubmed')
  parser = XMLParser.new
  parser.parse_papers(xml)
end

.fetch_records(ids, database) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/pubmed_api.rb', line 66

def fetch_records(ids, database)

  xml_records = []
  
  options = DEFAULT_OPTIONS

  #dice array into reasonable length chunks for download
  n_length = 500
  # TODO paralellise? 
  ids.each_slice(n_length) do |slice|

    #Turn string to something html friendly 
    id_string = slice.join(",")
    doc = make_api_request(options.merge({:verb => 'fetch',:database => database, :query => 'id='+id_string}))
    records = doc.xpath('./*/*')
    xml_records << records

  end
  xml_records.flatten
end

.make_api_request(options) ⇒ Object

Maked the HTTP request and return the responce TODO handle failures Log API calls?



90
91
92
93
# File 'lib/pubmed_api.rb', line 90

def make_api_request(options)
    url = expand_uri(URI_TEMPLATE, options)
    Nokogiri::XML( open url )
end

.search(term, options = {}) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/pubmed_api.rb', line 28

def search(term, options={})
  
  options = DEFAULT_OPTIONS.merge(options)

  results = do_search(term, options)

  if options[:load_all_pmids]
    # Send off subsequent requests to load all the PMIDs, add them to the results
    (options[:retmax]..results.count).step(options[:retmax]) do |step|
      results.pmids << do_search(term, options.merge({:retstart => step})).pmids
    end 
  end

  results
end

.waitObject

300ms minimum wait.



111
112
113
# File 'lib/pubmed_api.rb', line 111

def wait
  sleep WAIT_TIME 
end