Class: Scraper
- Inherits:
-
Object
- Object
- Scraper
- Defined in:
- lib/scraper.rb
Instance Method Summary collapse
Instance Method Details
#get_quote_pages ⇒ Object
28 29 30 31 32 33 34 |
# File 'lib/scraper.rb', line 28 def get_quote_pages 216.times do |i| # 10.times do |i| page = "https://www.tvfanatic.com/quotes/shows/the-office/page-" + "#{i + 1}" + ".html" scrape_office_quotes(page) end end |
#scrape_office_quotes(page) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/scraper.rb', line 7 def scrape_office_quotes(page) index_page = Nokogiri::HTML(open(page)) line_string = "" index_page.css("div.quotes blockquote").each do |quote| character = quote.css("small").text.strip if character.length > 40 character = "" end quote.css("p").each do |line| # Using .text will not push items with breaks right next to each other # Using .to_s on the nokogiri element allow me to identify <br>'s create space between each line within the <p> element line_string = line.to_s line_string.gsub!("<br>", " ") line_string.gsub!("<p>", "") line_string.gsub!("</p>", "") line_string = line_string.strip end Quote.new(line_string, character) end end |