Class: PubMed::Article

Inherits:
Object
  • Object
show all
Defined in:
lib/rbbt/sources/pubmed.rb

Overview

Processes the xml with an articles as served by MedLine and extracts the abstract, title and journal information

Constant Summary collapse

XML_KEYS =
[
  [:title    , "ArticleTitle"],
  [:journal  , "Journal/Title"],
  [:issue    , "Journal/JournalIssue/Issue"],
  [:volume   , "Journal/JournalIssue/Volume"],
  [:issn     , "Journal/ISSN"],
  [:year     , "Journal/JournalIssue/PubDate/Year"],
  [:month    , "Journal/JournalIssue/PubDate/Month"],
  [:pages    , "Pagination/MedlinePgn"],
  [:author    , "AuthorList/Author"],
  [:abstract , "Abstract/AbstractText"],
]
PMC_PDF_URL =
"http://www.ncbi.nlm.nih.gov/pmc/articles/PMCID/pdf/"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xml) ⇒ Article

Returns a new instance of Article.



103
104
105
106
107
108
109
110
# File 'lib/rbbt/sources/pubmed.rb', line 103

def initialize(xml)
  if xml && ! xml.empty?
    info = PubMed::Article.parse_xml xml
    info.each do |key, value|
      self.send("#{ key }=", value)
    end
  end
end

Instance Attribute Details

#abstractObject

Returns the value of attribute abstract.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def abstract
  @abstract
end

#authorObject

Returns the value of attribute author.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def author
  @author
end

#bibentryObject

Returns the value of attribute bibentry.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def bibentry
  @bibentry
end

#gscholar_pdfObject

Returns the value of attribute gscholar_pdf.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def gscholar_pdf
  @gscholar_pdf
end

#journalObject

Returns the value of attribute journal.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def journal
  @journal
end

#pdf_urlObject

Returns the value of attribute pdf_url.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def pdf_url
  @pdf_url
end

#pmc_pdfObject

Returns the value of attribute pmc_pdf.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def pmc_pdf
  @pmc_pdf
end

#pmidObject

Returns the value of attribute pmid.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def pmid
  @pmid
end

#titleObject

Returns the value of attribute title.



100
101
102
# File 'lib/rbbt/sources/pubmed.rb', line 100

def title
  @title
end

Class Method Details

.escape_title(title) ⇒ Object



42
43
44
# File 'lib/rbbt/sources/pubmed.rb', line 42

def self.escape_title(title)
  title.gsub(/(\w*[A-Z][A-Z]+\w*)/, '{\1}')
end

.make_bibentry(lastname, year, title) ⇒ Object



46
47
48
49
50
51
52
53
54
# File 'lib/rbbt/sources/pubmed.rb', line 46

def self.make_bibentry(lastname, year, title)
  words = title.downcase.scan(/\w+/)
  if words.first.length > 3
    abrev = words.first
  else
    abrev = words[0..2].collect{|w| w.chars.first} * ""
  end
  [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
end

.parse_xml(xml) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/rbbt/sources/pubmed.rb', line 55

def self.parse_xml(xml)
  parser  = LibXML::XML::Parser.string(xml)
  pubmed  = parser.parse.find("/PubmedArticle").first
  medline = pubmed.find("MedlineCitation").first
  article = medline.find("Article").first

  info = {}

  info[:pmid] = medline.find("PMID").first.content

  XML_KEYS.each do |p|
    name, key = p
    node = article.find(key).first

    next if node.nil?

    info[name] = node.content
  end

  bibentry = nil
  info[:author] = article.find("AuthorList/Author").collect do |author|
    begin
      lastname = author.find("LastName").first.content
      if author.find("ForeName").first.nil?
        forename = nil
      else
        forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
      end
      bibentry ||= make_bibentry lastname, info[:year], info[:title]
    rescue
    end
    [lastname, forename] * ", "
  end * " and "

  info[:bibentry] = bibentry.downcase if bibentry

  info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first

  if info[:pmc_pdf]
    info[:pmc_pdf] = PMC_PDF_URL.sub(/PMCID/, info[:pmc_pdf].content)
  end

  info
end

Instance Method Details

#bibtexObject



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/rbbt/sources/pubmed.rb', line 140

def bibtex
  keys = [:author] + XML_KEYS.collect{|p| p.first } - [:bibentry]
  bibtex = "@article{#{bibentry},\n"

  keys.each do |key|
    next if self.send(key).nil?

    case key

    when :title
      bibtex += "  title = { #{ PubMed::Article.escape_title title } },\n"

    when :issue
      bibtex += "  number = { #{ issue } },\n"

    else
      bibtex += "  #{ key } = { #{ self.send(key) } },\n"
    end

  end

  bibtex += "  fulltext = { #{ pdf_url } },\n" if pdf_url
  bibtex += "  pmid = { #{ pmid } }\n}"


  bibtex
end

#full_textObject



123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/rbbt/sources/pubmed.rb', line 123

def full_text
  return nil if pdf_url.nil?

  text = nil
  TmpFile.with_file do |pdf|

    # Change user-agent, oh well...
    `wget --user-agent=firefox #{ pdf_url } -O #{ pdf } -t 3`
    TmpFile.with_file do |txt|
      `pdftotext #{ pdf } #{ txt }`
      text = Open.read(txt) if File.exists? txt
    end
  end

  Misc.fixutf8(text)
end

#textObject

Join the text from title and abstract



169
170
171
172
173
# File 'lib/rbbt/sources/pubmed.rb', line 169

def text
  text = [title, abstract].join("\n")

  Misc.fixutf8(text)
end