Class: PubMed::Article

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/sources/pubmed.rb

Overview

Processes the xml with an articles as served by MedLine and extracts the abstract, title and journal information

Constant Summary collapse

XML_KEYS =
[
  [:title    , "ArticleTitle"],
  [:journal  , "Journal/Title"],
  [:issue    , "Journal/JournalIssue/Issue"],
  [:volume   , "Journal/JournalIssue/Volume"],
  [:issn     , "Journal/ISSN"],
  [:year     , "Journal/JournalIssue/PubDate/Year"],
  [:month    , "Journal/JournalIssue/PubDate/Month"],
  [:pages    , "Pagination/MedlinePgn"],
  [:abstract , "Abstract/AbstractText"],
]
PMC_PDF_URL =
"http://www.ncbi.nlm.nih.gov/pmc/articles/PMCID/pdf/"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xml) ⇒ Article

Returns a new instance of Article.



120
121
122
123
124
125
126
127
# File 'lib/rbbt/sources/pubmed.rb', line 120

def initialize(xml)
  if xml && ! xml.empty?
    info = PubMed::Article.parse_xml xml
    info.each do |key, value|
      self.send("#{ key }=", value)
    end
  end
end

Instance Attribute Details

#abstractObject

Returns the value of attribute abstract.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def abstract
  @abstract
end

#authorObject

Returns the value of attribute author.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def author
  @author
end

#bibentryObject

Returns the value of attribute bibentry.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def bibentry
  @bibentry
end

#gscholar_pdfObject

Returns the value of attribute gscholar_pdf.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def gscholar_pdf
  @gscholar_pdf
end

#journalObject

Returns the value of attribute journal.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def journal
  @journal
end

#pdf_urlObject

Returns the value of attribute pdf_url.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def pdf_url
  @pdf_url
end

#pmc_pdfObject

Returns the value of attribute pmc_pdf.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def pmc_pdf
  @pmc_pdf
end

#pmidObject

Returns the value of attribute pmid.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def pmid
  @pmid
end

#titleObject

Returns the value of attribute title.



117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 117

def title
  @title
end

Class Method Details

.escape_title(title) ⇒ Object



59
60
61
# File 'lib/rbbt/sources/pubmed.rb', line 59

def self.escape_title(title)
  title.gsub(/(\w*[A-Z][A-Z]+\w*)/, '{\1}')
end

.make_bibentry(lastname, year, title) ⇒ Object



63
64
65
66
67
68
69
70
71
# File 'lib/rbbt/sources/pubmed.rb', line 63

def self.make_bibentry(lastname, year, title)
  words = title.downcase.scan(/\w+/)
  if words.first.length > 3
    abrev = words.first
  else
    abrev = words[0..2].collect{|w| w.chars.first} * ""
  end
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
end

.parse_xml(xml) ⇒ Object



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/rbbt/sources/pubmed.rb', line 72

def self.parse_xml(xml)
  parser  = LibXML::XML::Parser.string(xml)
  pubmed  = parser.parse.find("/PubmedArticle").first
  medline = pubmed.find("MedlineCitation").first
  article = medline.find("Article").first

  info = {}

  info[:pmid] = medline.find("PMID").first.content

  XML_KEYS.each do |p|
    name, key = p
    node = article.find(key).first

    next if node.nil?

    info[name] = node.content
  end

  bibentry = nil
  info[:author] = article.find("AuthorList/Author").collect do |author|
    begin
      lastname = author.find("LastName").first.content
      if author.find("ForeName").first.nil?
        forename = nil
      else
        forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
      end
      bibentry ||= make_bibentry lastname, info[:year], info[:title]
    rescue
    end
    [lastname, forename] * ", "
  end * " and "

  info[:bibentry] = bibentry.downcase if bibentry

  info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first

  if info[:pmc_pdf]
    info[:pmc_pdf] = PMC_PDF_URL.sub(/PMCID/, info[:pmc_pdf].content)
  end

  info
end

Instance Method Details

#bibtexObject



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/rbbt/sources/pubmed.rb', line 151

def bibtex
  keys = [:author] + XML_KEYS.collect{|p| p.first } - [:bibentry]
  bibtex = "@article{#{bibentry},\n"

  keys.each do |key|
    next if self.send(key).nil?

    case key

    when :title
      bibtex += "  title = { #{ PubMed::Article.escape_title title } },\n"

    when :issue
      bibtex += "  number = { #{ issue } },\n"

    else
      bibtex += "  #{ key } = { #{ self.send(key) } },\n"
    end

  end

  bibtex += "  fulltext = { #{ pdf_url } },\n" if pdf_url
  bibtex += "  pmid = { #{ pmid } }\n}"


  bibtex
end

#full_textObject



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/rbbt/sources/pubmed.rb', line 134

def full_text
  return nil if pdf_url.nil?

  text = nil
  TmpFile.with_file do |pdf|

    # Change user-agent, oh well...
    `wget --user-agent=firefox #{ pdf_url } -O #{ pdf }`
    TmpFile.with_file do |txt|
      `pdftotext #{ pdf } #{ txt }`
      text = Open.read(txt) if File.exists? txt
    end
  end

  text
end

#textObject

Join the text from title and abstract



180
181
182
# File 'lib/rbbt/sources/pubmed.rb', line 180

def text
  [title, abstract].join("\n")
end