Class: PubmedAPI::Interface

Inherits:
Object
  • Object
show all
Defined in:
lib/pubmed_api.rb

Constant Summary collapse

WAIT_TIME =

seconds

0.5
DEFAULT_OPTIONS =
{:tool => 'ruby-pubmed-api',
:database => 'db=pubmed', #which database eq pubmed/nlmcatalog
:verb => 'search', #which API verb to use e.g. search/fetch
:email => '',
#:reldate => 90, #How far back shall we go in days
:add =>'', 
:retmax => 100000,
:retstart => 0,
:load_all_pmids => true }
URI_TEMPLATE =
'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/e{verb}.fcgi?{database}&tool={tool}&email={email}'+
'&reldate={reldate}&retmax={retmax}&retstart={retstart}&{query}&retmode=xml&{add}'

Class Method Summary collapse

Class Method Details

.convert_odd_journal_ids(id) ⇒ Object

Some journals have odd NLMIDs that need to be searched for rarther than accessed directly.



121
122
123
124
125
126
127
128
129
130
131
# File 'lib/pubmed_api.rb', line 121

def convert_odd_journal_ids(id)
  
  new_id = nil
  results = search(id, {:database => 'db=nlmcatalog'})
  if results.pmids.length ==1
    new_id = results.pmids[0]
  else
    puts "failed to convert " + id.to_s
  end
  new_id.to_s
end

.do_search(search_term, options) ⇒ Object

Performs a search and parses the response



46
47
48
49
50
51
# File 'lib/pubmed_api.rb', line 46

def do_search(search_term, options)
  wait
  doc = make_api_request(options.merge({:query => 'term='+search_term}))
  parser = XMLParser.new
  parser.parse_search(doc)
end

.fetch_journals(nlmids) ⇒ Object



68
69
70
71
72
73
74
# File 'lib/pubmed_api.rb', line 68

def fetch_journals(nlmids)
  #Change the ids of those wierd journals 
  nlmids = nlmids.map { |e|  ((e.include? 'R') ? convert_odd_journal_ids(e) : e ) }
  xml = fetch_records(nlmids, {:verb => 'fetch',:database => 'db=nlmcatalog'})
  parser = XMLParser.new
  parser.parse_journals(xml)       
end

.fetch_papers(ids) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/pubmed_api.rb', line 53

def fetch_papers(ids)
  xml = fetch_records(ids, {:verb => 'fetch',:database => 'db=pubmed'})
  parser = XMLParser.new
  papers = parser.parse_papers(xml)
  lookup_hash = get_fulltext_links(ids)
  
  papers.each do |p|
    if p.nil?
       next
    else
      p.url =  lookup_hash[p.pmid].first.url 
    end
  end
end

.fetch_records(ids, opts = {}) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/pubmed_api.rb', line 87

def fetch_records(ids, opts={})

  xml_records = []
  
  options = DEFAULT_OPTIONS.merge(opts)

  #dice array into reasonable length chunks for download
  n_length = 500
  # TODO paralellise? 
  ids.each_slice(n_length) do |slice|

    #Turn string to something html friendly 
    id_string = slice.join(",")
    doc = make_api_request(options.merge({ :query => 'id='+id_string}))
    records = doc.xpath('./*/*')
    xml_records += records

  end

  xml_records
end


76
77
78
79
80
81
82
83
84
# File 'lib/pubmed_api.rb', line 76

def get_fulltext_links(ids)
  opts = {:verb => 'link',  :add => 'cmd=llinks', :database => 'dbfrom=pubmed'}
  xml = fetch_records(ids, opts)

  parser = XMLParser.new
  lookup_hash = parser.parse_links(xml)
  missing = (ids - lookup_hash.keys)
  lookup_hash
end

.get_journal_id_from_issn(issn) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/pubmed_api.rb', line 134

def get_journal_id_from_issn(issn)
  
  id = nil
  term = issn + "[ISSN]+AND+ncbijournals[filter]"

  results = search(term, {:database => 'db=nlmcatalog'})
  if results.pmids.length ==1
    id = results.pmids[0]
  else
    puts "failed to find " + issn.to_s
  end
  
  id.to_s
end

.make_api_request(options) ⇒ Object

Maked the HTTP request and return the responce TODO handle failures



114
115
116
117
# File 'lib/pubmed_api.rb', line 114

def make_api_request(options)
    url = expand_uri(URI_TEMPLATE, options)
    Nokogiri::XML( open url )
end

.search(term, options = {}) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/pubmed_api.rb', line 29

def search(term, options={})
  
  options = DEFAULT_OPTIONS.merge(options)

  results = do_search(term, options)

  if options[:load_all_pmids]
    # Send off subsequent requests to load all the PMIDs, add them to the results
    (options[:retmax]..results.count).step(options[:retmax]) do |step|
      results.pmids << do_search(term, options.merge({:retstart => step})).pmids
    end 
  end

  results
end

.waitObject

300ms minimum wait.



154
155
156
# File 'lib/pubmed_api.rb', line 154

def wait
  sleep WAIT_TIME 
end