Module: Arxiv
- Defined in:
- lib/arxiv.rb,
lib/arxiv/version.rb,
lib/arxiv/models/link.rb,
lib/arxiv/models/author.rb,
lib/arxiv/models/category.rb,
lib/arxiv/string_scrubber.rb,
lib/arxiv/models/manuscript.rb
Defined Under Namespace
Modules: Error
Classes: Author, Category, Link, Manuscript, StringScrubber
Constant Summary
collapse
- LEGACY_URL_FORMAT =
In 2007, the ArXiv API changed document ID formats:
http://arxiv.org/abs/math/0510097v1 (legacy)
http://arxiv.org/abs/1202.0819v1 (current)
These constants help us deal with both use cases.
/[^\/]+\/\d+(?:v\d+)?$/
- CURRENT_URL_FORMAT =
/\d{4}\.\d{4}(?:v\d+)?$/
- LEGACY_ID_FORMAT =
/^#{LEGACY_URL_FORMAT}/
- ID_FORMAT =
/^#{CURRENT_URL_FORMAT}/
- VERSION =
"0.1.0"
Class Method Summary
collapse
Class Method Details
.get(identifier) ⇒ Object
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
# File 'lib/arxiv.rb', line 33
def self.get(identifier)
id = parse_arxiv_identifier(identifier)
unless id =~ ID_FORMAT || id =~ LEGACY_ID_FORMAT
raise Arxiv::Error::MalformedId, "Manuscript ID format is invalid"
end
url = ::URI.parse("http://export.arxiv.org/api/query?id_list=#{id}")
response = ::Nokogiri::XML(open(url)).remove_namespaces!
manuscript = Arxiv::Manuscript.parse(response.to_s, single: id)
raise Arxiv::Error::ManuscriptNotFound, "Manuscript #{id} doesn't exist on arXiv" if manuscript.title.nil?
manuscript
end
|
.legacy_url?(identifier) ⇒ Boolean
69
70
71
|
# File 'lib/arxiv.rb', line 69
def self.legacy_url?(identifier)
identifier =~ LEGACY_URL_FORMAT
end
|
.parse_arxiv_identifier(identifier) ⇒ Object
50
51
52
53
54
55
56
57
58
59
|
# File 'lib/arxiv.rb', line 50
def self.parse_arxiv_identifier(identifier)
if valid_id?(identifier)
identifier
elsif valid_url?(identifier)
format = legacy_url?(identifier) ? LEGACY_URL_FORMAT : CURRENT_URL_FORMAT
identifier.match(/(#{format})/)[1]
else
identifier
end
end
|
.valid_id?(identifier) ⇒ Boolean
61
62
63
|
# File 'lib/arxiv.rb', line 61
def self.valid_id?(identifier)
identifier =~ ID_FORMAT || identifier =~ LEGACY_ID_FORMAT
end
|
.valid_url?(identifier) ⇒ Boolean
65
66
67
|
# File 'lib/arxiv.rb', line 65
def self.valid_url?(identifier)
identifier =~ LEGACY_URL_FORMAT || identifier =~ CURRENT_URL_FORMAT
end
|