Class: GoodreadsBooks::Scraper
- Inherits:
-
Object
- Object
- GoodreadsBooks::Scraper
- Defined in:
- lib/goodreads_books/scraper.rb
Constant Summary collapse
- BASE_URL =
"https://www.goodreads.com"- PAGE_URL =
"/choiceawards"- @@all =
[]
Instance Attribute Summary collapse
-
#awards_year ⇒ Object
Returns the value of attribute awards_year.
-
#main_url ⇒ Object
Returns the value of attribute main_url.
Class Method Summary collapse
-
.all ⇒ Object
– initialize –.
-
.create(awards_year = nil) ⇒ Object
– self.find_by_year –.
-
.find_by_year(awards_year = nil) ⇒ Object
– self.find_or_create_by_year –.
-
.find_or_create_by_year(awards_year = nil) ⇒ Object
– save –.
Instance Method Summary collapse
-
#initialize(awards_year = nil) ⇒ Scraper
constructor
A new instance of Scraper.
-
#save ⇒ Object
– self.all –.
-
#scrape_books ⇒ Object
– self.create –.
Constructor Details
#initialize(awards_year = nil) ⇒ Scraper
Returns a new instance of Scraper.
9 10 11 |
# File 'lib/goodreads_books/scraper.rb', line 9 def initialize(awards_year = nil) @awards_year = awards_year end |
Instance Attribute Details
#awards_year ⇒ Object
Returns the value of attribute awards_year.
2 3 4 |
# File 'lib/goodreads_books/scraper.rb', line 2 def awards_year @awards_year end |
#main_url ⇒ Object
Returns the value of attribute main_url.
2 3 4 |
# File 'lib/goodreads_books/scraper.rb', line 2 def main_url @main_url end |
Class Method Details
.all ⇒ Object
– initialize –
13 14 15 |
# File 'lib/goodreads_books/scraper.rb', line 13 def self.all @@all end |
.create(awards_year = nil) ⇒ Object
– self.find_by_year –
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/goodreads_books/scraper.rb', line 33 def self.create(awards_year = nil) #choice_awards = new(awards_year) #choice_awards.save #replaced with one line of code below using .tap method choice_awards = new(awards_year).tap { |s| s.save } # if awards_year is missing from the url, # goodreads.com defaults to latest choice awards year # /best-books-#{latest awards year}" if awards_year == nil choice_awards.main_url = "#{BASE_URL}#{PAGE_URL}" doc = Nokogiri::HTML(open(choice_awards.main_url)) choice_awards.awards_year = doc.css("head title").text.split(" ")[2].to_i else choice_awards.main_url = "#{BASE_URL}#{PAGE_URL}/best-books-#{awards_year}" choice_awards.awards_year = awards_year end choice_awards # return instance of scraper end |
.find_by_year(awards_year = nil) ⇒ Object
– self.find_or_create_by_year –
29 30 31 |
# File 'lib/goodreads_books/scraper.rb', line 29 def self.find_by_year(awards_year = nil) all.detect { |r| r.awards_year == awards_year } end |
.find_or_create_by_year(awards_year = nil) ⇒ Object
– save –
21 22 23 24 25 26 27 |
# File 'lib/goodreads_books/scraper.rb', line 21 def self.find_or_create_by_year(awards_year = nil) if !(choice_awards = find_by_year(awards_year)) choice_awards = create(awards_year) choice_awards.scrape_books end find_by_year(choice_awards.awards_year) end |
Instance Method Details
#save ⇒ Object
– self.all –
17 18 19 |
# File 'lib/goodreads_books/scraper.rb', line 17 def save self.class.all << self end |
#scrape_books ⇒ Object
– self.create –
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/goodreads_books/scraper.rb', line 54 def scrape_books doc = Nokogiri::HTML(open(@main_url)) # Category winners page doc.css(".category.clearFix").each do |category| cate_name = category.css("h4").text cate_url = category.css("a").attr("href").text cate_title = category.css("img").attr("alt").text # for each winner element, assemble the book_details hash book_details = { :awards_year => @awards_year, :category => cate_name, :title => cate_title, :cate_url => "#{BASE_URL}#{cate_url}" } GoodreadsBooks::Book.new_from_web_page(book_details) end #binding.pry end |