Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/libri/scraper.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Attribute Details

#awardObject

Returns the value of attribute award.



3
4
5
# File 'lib/libri/scraper.rb', line 3

def award
  @award
end

#bookObject

Returns the value of attribute book.



3
4
5
# File 'lib/libri/scraper.rb', line 3

def book
  @book
end

#urlObject

Returns the value of attribute url.



3
4
5
# File 'lib/libri/scraper.rb', line 3

def url
  @url
end

Class Method Details

.scrape_award(award) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/libri/scraper.rb', line 22

def self.scrape_award(award)
    html = award[:url]
    books_page = Nokogiri::HTML(open(html))

    books_array = []
    books = {}

    books_page.css("div.product-shelf-info").take(20).each { |book|
        books = {
            :title => book.css("div.product-shelf-title").text.strip,
            :author => book.css("div.product-shelf-author").text.strip,
            :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value
        } 

        books_array << books
    }

    books_array.uniq
end

.scrape_barnes_nobleObject



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/libri/scraper.rb', line 5

def self.scrape_barnes_noble
    html = "https://www.barnesandnoble.com/b/books/awards/_/N-29Z8q8Z1d6q"
    awards_page = Nokogiri::HTML(open(html))

    awards_array = []
    awards = {}

    awards_page.css("ul#sidebar-section-0 li a").take(15).each { |award|
        awards = {
            :name => award.text.chomp,
            :url => "https://www.barnesandnoble.com" + award.attribute("href").value
        }
        awards_array << awards
    }
    awards_array
end

.scrape_book(book) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/libri/scraper.rb', line 42

def self.scrape_book(book)
    html = book[:url]
    book_page = Nokogiri::HTML(open(html))
    info_section = book_page.css("div.tabpanel")

    # related_books_hash = {}

    book_info_hash = {
        :title_by_author => info_section.css("div#productInfoOverview div.mb-m").text,
        :blurbs_and_plot => info_section.css("div#productInfoOverview p").map(&:text).join("\n").strip,
        :about_author => info_section.css("div#MeetTheAuthor div.text--medium").text.strip,
        :excerpt => info_section.xpath("//div[@class='read-an-excerpt']/p[not(@class) and position()<5]").map(&:text).join("\n"),
        # :related_books => book_page.css("div.product-shelf-info").each { |book|
        #     related_books_hash = {
        #         :title => book.css("div.product-shelf-title").text.strip,
        #         :author => book.css("div.product-shelf-author").text.strip,
        #         :url => "https://www.barnesandnoble.com" + book.css("a").attribute("href").value
        #     }
        # },
        :availability => book_page.css("button#pdp-marketplace-btn").text.chomp,
        :url => book[:url]
    }

    book_info_hash.delete_if { |key, val| val.to_s.strip.empty? }

end

.scrape_quoteObject



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/libri/scraper.rb', line 69

def self.scrape_quote
    html = "https://www.goodreads.com/quotes/tag/books"
    quotes_page = Nokogiri::HTML(open(html))
    quote_section = quotes_page.css("div.quote")

    quotes_array = []
    quote_hash = {}

    quote_section.each { |quote|
        quote_hash = {
            :quote => quote.css("div.quoteText").first.text.scan(/(“.+”)/).join(""),
            :author => quote.css("div.quoteText a").first.text
        }

        quotes_array << quote_hash
    }
    quotes_array
end