Class: PubMed::Article

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/sources/pubmed.rb

Overview

Processes the xml with an articles as served by MedLine and extracts the abstract, title and journal information

Constant Summary collapse

XML_KEYS =
[
  [:title    , "ArticleTitle"],
  [:journal  , "Journal/Title"],
  [:issue    , "Journal/JournalIssue/Issue"],
  [:volume   , "Journal/JournalIssue/Volume"],
  [:issn     , "Journal/ISSN"],
  [:year     , "Journal/JournalIssue/PubDate/Year"],
  [:month    , "Journal/JournalIssue/PubDate/Month"],
  [:pages    , "Pagination/MedlinePgn"],
  [:author    , "AuthorList/Author"],
  [:abstract , "Abstract/AbstractText"],
]
PMC_PDF_URL =
"http://www.ncbi.nlm.nih.gov/pmc/articles/PMCID/pdf/"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xml) ⇒ Article

Returns a new instance of Article.



124
125
126
127
128
129
130
131
# File 'lib/rbbt/sources/pubmed.rb', line 124

def initialize(xml)
  if xml && ! xml.empty?
    info = PubMed::Article.parse_xml xml
    info.each do |key, value|
      self.send("#{ key }=", value)
    end
  end
end

Instance Attribute Details

#abstractObject

Returns the value of attribute abstract.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def abstract
  @abstract
end

#authorObject

Returns the value of attribute author.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def author
  @author
end

#bibentryObject

Returns the value of attribute bibentry.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def bibentry
  @bibentry
end

#gscholar_pdfObject

Returns the value of attribute gscholar_pdf.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def gscholar_pdf
  @gscholar_pdf
end

#journalObject

Returns the value of attribute journal.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def journal
  @journal
end

#pdf_urlObject

Returns the value of attribute pdf_url.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def pdf_url
  @pdf_url
end

#pmc_pdfObject

Returns the value of attribute pmc_pdf.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def pmc_pdf
  @pmc_pdf
end

#pmidObject

Returns the value of attribute pmid.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def pmid
  @pmid
end

#titleObject

Returns the value of attribute title.



121
122
123
# File 'lib/rbbt/sources/pubmed.rb', line 121

def title
  @title
end

Class Method Details

.escape_title(title) ⇒ Object



63
64
65
# File 'lib/rbbt/sources/pubmed.rb', line 63

def self.escape_title(title)
  title.gsub(/(\w*[A-Z][A-Z]+\w*)/, '{\1}')
end

.make_bibentry(lastname, year, title) ⇒ Object



67
68
69
70
71
72
73
74
75
# File 'lib/rbbt/sources/pubmed.rb', line 67

def self.make_bibentry(lastname, year, title)
  words = title.downcase.scan(/\w+/)
  if words.first.length > 3
    abrev = words.first
  else
    abrev = words[0..2].collect{|w| w.chars.first} * ""
  end
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
end

.parse_xml(xml) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/rbbt/sources/pubmed.rb', line 76

def self.parse_xml(xml)
  parser  = LibXML::XML::Parser.string(xml)
  pubmed  = parser.parse.find("/PubmedArticle").first
  medline = pubmed.find("MedlineCitation").first
  article = medline.find("Article").first

  info = {}

  info[:pmid] = medline.find("PMID").first.content

  XML_KEYS.each do |p|
    name, key = p
    node = article.find(key).first

    next if node.nil?

    info[name] = node.content
  end

  bibentry = nil
  info[:author] = article.find("AuthorList/Author").collect do |author|
    begin
      lastname = author.find("LastName").first.content
      if author.find("ForeName").first.nil?
        forename = nil
      else
        forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
      end
      bibentry ||= make_bibentry lastname, info[:year], info[:title]
    rescue
    end
    [lastname, forename] * ", "
  end * " and "

  info[:bibentry] = bibentry.downcase if bibentry

  info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first

  if info[:pmc_pdf]
    info[:pmc_pdf] = PMC_PDF_URL.sub(/PMCID/, info[:pmc_pdf].content)
  end

  info
end

Instance Method Details

#bibtexObject



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/rbbt/sources/pubmed.rb', line 161

def bibtex
  keys = [:author] + XML_KEYS.collect{|p| p.first } - [:bibentry]
  bibtex = "@article{#{bibentry},\n"

  keys.each do |key|
    next if self.send(key).nil?

    case key

    when :title
      bibtex += "  title = { #{ PubMed::Article.escape_title title } },\n"

    when :issue
      bibtex += "  number = { #{ issue } },\n"

    else
      bibtex += "  #{ key } = { #{ self.send(key) } },\n"
    end

  end

  bibtex += "  fulltext = { #{ pdf_url } },\n" if pdf_url
  bibtex += "  pmid = { #{ pmid } }\n}"


  bibtex
end

#full_textObject



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/rbbt/sources/pubmed.rb', line 144

def full_text
  return nil if pdf_url.nil?

  text = nil
  TmpFile.with_file do |pdf|

    # Change user-agent, oh well...
    `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
    TmpFile.with_file do |txt|
      `pdftotext #{ pdf } #{ txt }`
      text = Open.read(txt) if File.exists? txt
    end
  end

  Misc.fixutf8(text)
end

#textObject

Join the text from title and abstract



190
191
192
193
194
# File 'lib/rbbt/sources/pubmed.rb', line 190

def text
  text = [title, abstract].join("\n")

  Misc.fixutf8(text)
end