Module: AccessibleBooks

Defined in:
lib/accessible-books.rb

Constant Summary collapse

TARHEEL_REGEX =
/https?:\/\/tarheelreader.org\/.+\/.+\/.+\/(.+)\//

Class Method Summary collapse

Class Method Details

.find_json(url) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/accessible-books.rb', line 17

def self.find_json(url)
  return nil unless url
  if url.match(/dropbox\.com/) && url.match(/\?dl=0$/)
    url = url.sub(/\?dl=0$/, '?dl=1')
  end
  json = nil
  if url.match(AccessibleBooks::TARHEEL_REGEX)
    json = AccessibleBooks.tarheel_json(url)
  else
    req = Typhoeus.get(url, followlocation: true)
    json = JSON.parse(req.body) rescue nil
    if !json
      elem = Nokogiri(req.body).css("head meta[property='book:url']")[0]
      if elem && elem['content']
        req = Typhoeus.get(elem['content'])
        json = JSON.parse(req.body) rescue nil
      end
    end
  end
  if json && json['pages']
    json['image_url'] ||= json['pages'][0]['image_url']
  end
  json
end

.tarheel_id(url) ⇒ Object



8
9
10
11
12
13
14
15
# File 'lib/accessible-books.rb', line 8

def self.tarheel_id(url)
  return nil unless url
  if url.match(TARHEEL_REGEX)
    url.match(TARHEEL_REGEX)[1]
  else
    nil
  end
end

.tarheel_json(url) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/accessible-books.rb', line 47

def self.tarheel_json(url)
  return nil unless url
  id = tarheel_id(url)
  return nil unless id
  url = self.tarheel_json_url(id)
  return nil unless url
  res = Typhoeus.get(url)
  json = JSON.parse(res.body) rescue nil
  if json && json['title'] && json['pages']
    json['book_url'] = "https://tarheelreader.org#{json['link']}"
    json['attribution_url'] = "https://tarheelreader.org/photo-credits/?id=#{id}"
    json['pages'].each_with_index do |page, idx|
      page['id'] ||= idx == 0 ? 'title_page' : "page_#{idx}"
      page['image_url'] = page['url']
      page['image_url'] = "https://tarheelreader.org#{page['image_url']}" unless page['image_url'].match(/^http/)
    end
    json['pages'][0]['small_image'] = true
    json['image_url'] = json['pages'][1]['image_url']
  else
    json = nil
  end
  json
end

.tarheel_json_url(id) ⇒ Object



42
43
44
45
# File 'lib/accessible-books.rb', line 42

def self.tarheel_json_url(id)
  return nil unless id
  "https://tarheelreader.org/book-as-json/?slug=#{CGI.escape(id)}"
end