Method: PubmedAPI::XMLParser#parse_papers

Defined in:
lib/pubmed_api/parsers.rb

#parse_papers(papers_xml) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/pubmed_api/parsers.rb', line 35

def parse_papers(papers_xml)

  results = []

  papers_xml.each do |paper|
    
    #check it's actually a paper
    if paper.xpath('/*/*').first.name().eql?('PubmedArticle')

      
      paper_output = PaperStruct.new
   
      paper_output.title = paper.at('ArticleTitle').text

      begin
        paper_output.abstract = paper.at('Abstract').text
      rescue NoMethodError
      
      end
      
      begin
        #Date in Y/M/D format
         =  Date.new( paper.at('ArticleDate/Year').text.to_i,  paper.at('ArticleDate/Month').text.to_i, paper.at('ArticleDate/Day').text.to_i)
        paper_output. =  
      rescue NoMethodError
         #puts "no date " +  " " + paper.css('PMID').text + " " + paper.css('ArticleTitle').text
         paper_output. =  Date.new()
      end

      #Parse mutlitple PubMedPubDate dates  
      dates = paper.css('PubMedPubDate')

      paper_output.pmid =  parse_pmid(paper.css('PMID').text)

      pub_date = [0,0,0]

      dates.each do |node|
        if node.attributes["PubStatus"].to_s == "entrez"
          pub_date = Date.new( node.at('Year').text.to_i,  node.at('Month').text.to_i, node.at('Day').text.to_i)
          paper_output.pubmed_date = pub_date
          paper_output.date_appeared = pub_date
        end
      end

      ids = paper.css('ArticleId')
    
      ids.each do |node|
        v = node.attributes["IdType"].to_s
        if v == 'doi'
          paper_output.doi = node.text
        end
      end


      #Extract the authors as friendly string for now...
      #TODO handle authors properly 
      authors = paper.css('Author')
      auth_arr = parse_authors(authors)
      
      author_string = ''

      auth_arr.each do |a|
        author_string += a[1] + ' ' + a[2] +', '
      end
      
      #cut additional ', ' off end 
      author_string = author_string[0..-3]
      paper_output.authors = author_string
      paper_output.nlmid = paper.css('NlmUniqueID').text
      
      
      results << paper_output
    end
  end
  
  return results
end