Class: WordTree::Archdown

Inherits:
Object
  • Object
show all
Defined in:
lib/wordtree/archdown.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeArchdown

Returns a new instance of Archdown.



8
9
10
# File 'lib/wordtree/archdown.rb', line 8

def initialize
  @client = Archivist::Client::Base.new
end

Instance Attribute Details

#clientObject (readonly)

Returns the value of attribute client.



6
7
8
# File 'lib/wordtree/archdown.rb', line 6

def client
  @client
end

Instance Method Details

#content_for(archivist_book) ⇒ Object



24
25
26
27
28
# File 'lib/wordtree/archdown.rb', line 24

def content_for(archivist_book)
  [archivist_book.download, nil]
rescue StandardError, Archivist::Model::Document::UnsupportedFormat => e
  [nil, e]
end

#download(archivist_book) {|metadata, content, failure| ... } ⇒ Object

Yields:

  • (metadata, content, failure)


48
49
50
51
52
53
# File 'lib/wordtree/archdown.rb', line 48

def download(archivist_book, &block)
   = (archivist_book)
  content, failure = content_for(archivist_book)

  yield , content, failure if block_given?
end

#download_all(search_terms, &each_book) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/wordtree/archdown.rb', line 30

def download_all(search_terms, &each_book)
  page = 1
  loop do
    archivist_books =
      ::Retriable.retriable(:on => Faraday::Error::TimeoutError) do
        @client.search(search_terms.merge(:page => page))
      end
  
    break if archivist_books.empty?
  
    archivist_books.each do |archivist_book|
      download(archivist_book, &each_book)
    end

    page += 1
  end
end

#metadata_for(archivist_book) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
# File 'lib/wordtree/archdown.rb', line 12

def (archivist_book)
  author = archivist_book.creators ? archivist_book.creators.join('; ') : nil
  {
    'title'          => archivist_book.title,
    'author'         => author,
    'year'           => archivist_book.date.year,
    'source'         => "http://archive.org/details/#{archivist_book.identifier}",
    'status'         => "OCR ONLY",
    'archive_org_id' => archivist_book.identifier,
  }
end