Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/scraper.rb

Instance Method Summary collapse

Instance Method Details

#get_quote_pagesObject



28
29
30
31
32
33
34
# File 'lib/scraper.rb', line 28

def get_quote_pages
  216.times do |i|
  # 10.times do |i|
    page = "https://www.tvfanatic.com/quotes/shows/the-office/page-" + "#{i + 1}" + ".html"
    scrape_office_quotes(page)
  end
end

#scrape_office_quotes(page) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/scraper.rb', line 7

def scrape_office_quotes(page)
  index_page = Nokogiri::HTML(open(page))
  line_string = ""
  index_page.css("div.quotes blockquote").each do |quote|
    character = quote.css("small").text.strip
    if character.length > 40
      character = ""
    end
    quote.css("p").each do |line|
      # Using .text will not push items with breaks right next to each other
      # Using .to_s on the nokogiri element allow me to identify <br>'s create space between each line within the <p> element
      line_string = line.to_s
      line_string.gsub!("<br>", " ") 
      line_string.gsub!("<p>", "") 
      line_string.gsub!("</p>", "") 
      line_string = line_string.strip
    end 
    Quote.new(line_string, character)
  end
end