Class: GoodreadsBooks::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/goodreads_books/scraper.rb

Constant Summary collapse

BASE_URL =
"https://www.goodreads.com"
PAGE_URL =
"/choiceawards"
@@all =
[]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(awards_year = nil) ⇒ Scraper

Returns a new instance of Scraper.



9
10
11
# File 'lib/goodreads_books/scraper.rb', line 9

def initialize(awards_year = nil)
  @awards_year = awards_year
end

Instance Attribute Details

#awards_yearObject

Returns the value of attribute awards_year.



2
3
4
# File 'lib/goodreads_books/scraper.rb', line 2

def awards_year
  @awards_year
end

#main_urlObject

Returns the value of attribute main_url.



2
3
4
# File 'lib/goodreads_books/scraper.rb', line 2

def main_url
  @main_url
end

Class Method Details

.allObject

– initialize –



13
14
15
# File 'lib/goodreads_books/scraper.rb', line 13

def self.all
  @@all
end

.create(awards_year = nil) ⇒ Object

– self.find_by_year –



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/goodreads_books/scraper.rb', line 33

def self.create(awards_year = nil)
  #choice_awards = new(awards_year)
  #choice_awards.save
  #replaced with one line of code below using .tap method
  choice_awards = new(awards_year).tap { |s| s.save }

  # if awards_year is missing from the url,
  # goodreads.com defaults to latest choice awards year
  # /best-books-#{latest awards year}"
  if awards_year == nil
    choice_awards.main_url = "#{BASE_URL}#{PAGE_URL}"
    doc = Nokogiri::HTML(open(choice_awards.main_url))
    choice_awards.awards_year = doc.css("head title").text.split(" ")[2].to_i
  else
    choice_awards.main_url = "#{BASE_URL}#{PAGE_URL}/best-books-#{awards_year}"
    choice_awards.awards_year = awards_year
  end

  choice_awards # return instance of scraper
end

.find_by_year(awards_year = nil) ⇒ Object

– self.find_or_create_by_year –



29
30
31
# File 'lib/goodreads_books/scraper.rb', line 29

def self.find_by_year(awards_year = nil)
  all.detect { |r| r.awards_year == awards_year }
end

.find_or_create_by_year(awards_year = nil) ⇒ Object

– save –



21
22
23
24
25
26
27
# File 'lib/goodreads_books/scraper.rb', line 21

def self.find_or_create_by_year(awards_year = nil)
  if !(choice_awards = find_by_year(awards_year))
     choice_awards = create(awards_year)
     choice_awards.scrape_books
  end
  find_by_year(choice_awards.awards_year)
end

Instance Method Details

#saveObject

– self.all –



17
18
19
# File 'lib/goodreads_books/scraper.rb', line 17

def save
  self.class.all << self
end

#scrape_booksObject

– self.create –



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/goodreads_books/scraper.rb', line 54

def scrape_books
  doc = Nokogiri::HTML(open(@main_url))

  # Category winners page
  doc.css(".category.clearFix").each do |category|
    cate_name = category.css("h4").text
    cate_url = category.css("a").attr("href").text
    cate_title = category.css("img").attr("alt").text

    # for each winner element, assemble the book_details hash
    book_details = {
      :awards_year => @awards_year,
      :category => cate_name,
      :title => cate_title,
      :cate_url => "#{BASE_URL}#{cate_url}"
    }

    GoodreadsBooks::Book.new_from_web_page(book_details)
  end

  #binding.pry
end