Class: Scraper
- Inherits:
-
Object
- Object
- Scraper
- Defined in:
- lib/libri/scraper.rb
Instance Attribute Summary collapse
-
#award ⇒ Object
Returns the value of attribute award.
-
#book ⇒ Object
Returns the value of attribute book.
-
#url ⇒ Object
Returns the value of attribute url.
Class Method Summary collapse
- .scrape_award(award) ⇒ Object
- .scrape_barnes_noble ⇒ Object
- .scrape_book(book) ⇒ Object
- .scrape_quote ⇒ Object
Instance Attribute Details
#award ⇒ Object
Returns the value of attribute award.
3 4 5 |
# File 'lib/libri/scraper.rb', line 3 def award @award end |
#book ⇒ Object
Returns the value of attribute book.
3 4 5 |
# File 'lib/libri/scraper.rb', line 3 def book @book end |
#url ⇒ Object
Returns the value of attribute url.
3 4 5 |
# File 'lib/libri/scraper.rb', line 3 def url @url end |
Class Method Details
.scrape_award(award) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/libri/scraper.rb', line 22 def self.scrape_award(award) html = award[:url] books_page = Nokogiri::HTML(open(html)) books_array = [] books = {} books_page.css("div.product-shelf-info").take(20).each { |book| books = { :title => book.css("div.product-shelf-title").text.strip, :author => book.css("div.product-shelf-author").text.strip, :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value } books_array << books } books_array.uniq end |
.scrape_barnes_noble ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/libri/scraper.rb', line 5 def self. html = "https://www.barnesandnoble.com/b/books/awards/_/N-29Z8q8Z1d6q" awards_page = Nokogiri::HTML(open(html)) awards_array = [] awards = {} awards_page.css("ul#sidebar-section-0 li a").take(15).each { |award| awards = { :name => award.text.chomp, :url => "https://www.barnesandnoble.com" + award.attribute("href").value } awards_array << awards } awards_array end |
.scrape_book(book) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/libri/scraper.rb', line 42 def self.scrape_book(book) html = book[:url] book_page = Nokogiri::HTML(open(html)) info_section = book_page.css("div.tabpanel") # related_books_hash = {} book_info_hash = { :title_by_author => info_section.css("div#productInfoOverview div.mb-m").text, :blurbs_and_plot => info_section.css("div#productInfoOverview p").map(&:text).join("\n").strip, :about_author => info_section.css("div#MeetTheAuthor div.text--medium").text.strip, :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<5]").map(&:text).join("\n"), # :related_books => book_page.css("div.product-shelf-info").each { |book| # related_books_hash = { # :title => book.css("div.product-shelf-title").text.strip, # :author => book.css("div.product-shelf-author").text.strip, # :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value # } # }, :availability => book_page.css("button#pdp-marketplace-btn").text.chomp, :url => book[:url] } book_info_hash.delete_if { |key, val| val.to_s.strip.empty? } end |
.scrape_quote ⇒ Object
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/libri/scraper.rb', line 69 def self.scrape_quote html = "https://www.goodreads.com/quotes/tag/books" quotes_page = Nokogiri::HTML(open(html)) quote_section = quotes_page.css("div.quote") quotes_array = [] quote_hash = {} quote_section.each { |quote| quote_hash = { :quote => quote.css("div.quoteText").first.text.scan(/(“.+”)/).join(""), :author => quote.css("div.quoteText a").first.text } quotes_array << quote_hash } quotes_array end |