Module: Arxiv

Defined in:
lib/arxiv.rb,
lib/arxiv/version.rb,
lib/arxiv/models/link.rb,
lib/arxiv/models/author.rb,
lib/arxiv/models/category.rb,
lib/arxiv/string_scrubber.rb,
lib/arxiv/models/manuscript.rb

Defined Under Namespace

Modules: Error Classes: Author, Category, Link, Manuscript, StringScrubber

Constant Summary collapse

LEGACY_URL_FORMAT =

In 2007, the ArXiv API changed document ID formats:

https://arxiv.org/abs/math/0510097v1  (legacy)
https://arxiv.org/abs/1202.0819v1     (current)

These constants help us deal with both use cases.

/[^\/]+\/\d+(?:v\d+)?$/
CURRENT_URL_FORMAT =
/\d{4,}\.\d{4,}(?:v\d+)?$/
LEGACY_ID_FORMAT =
/^#{LEGACY_URL_FORMAT}/
ID_FORMAT =
/^#{CURRENT_URL_FORMAT}/
VERSION =
"0.1.9"

Class Method Summary collapse

Class Method Details

.get(identifier) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/arxiv.rb', line 34

def self.get(identifier)
  id = parse_arxiv_identifier(identifier)

  unless id =~ ID_FORMAT || id =~ LEGACY_ID_FORMAT
    raise Arxiv::Error::MalformedId, "Manuscript ID format is invalid"
  end

  url = ::URI.parse("http://export.arxiv.org/api/query?id_list=#{id}")
  response = ::Nokogiri::XML(open(url)).remove_namespaces!
  manuscript = Arxiv::Manuscript.parse(response.to_s, single: id)

  raise Arxiv::Error::ManuscriptNotFound, "Manuscript #{id} doesn't exist on arXiv" if manuscript.title.nil?
  manuscript
end