Class: Scraper
- Inherits:
-
Object
- Object
- Scraper
- Defined in:
- lib/ESPNScraper/scraper.rb
Class Method Summary collapse
- .get_content(article) ⇒ Object
- .scrape_new_articles(url) ⇒ Object
- .scrape_new_nhl_articles(url) ⇒ Object
Class Method Details
.get_content(article) ⇒ Object
35 36 37 38 39 40 41 |
# File 'lib/ESPNScraper/scraper.rb', line 35 def self.get_content(article) html = open(article.url) doc = Nokogiri::HTML(html) #for ESPN Radio links, this will return an empty array article.content = doc.css("div.article-body p") article.content = doc.css("div.Story__Body p") if article.content.size == 0 end |
.scrape_new_articles(url) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
# File 'lib/ESPNScraper/scraper.rb', line 3 def self.scrape_new_articles(url) html = open(url) doc = Nokogiri::HTML(html) stories = doc.css("div.item-info-wrap") stories.each do |story| title = story.css("h1").text description = story.css("p").text if story.css("a").attribute("href").value.start_with?("http") url = story.css("a").attribute("href").value else url = "https://www.espn.com#{story.css("a").attribute("href").value}" end Article.new(title, description, url) end end |
.scrape_new_nhl_articles(url) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/ESPNScraper/scraper.rb', line 19 def self.scrape_new_nhl_articles(url) html = open(url) doc = Nokogiri::HTML(html) stories = doc.css("article").select{|story| story.css('a.contentItem__content').attribute('href')} stories.each do |story| title = story.css("h2.contentItem__title").text description = nil if story.css('a.contentItem__content').attribute('href').value.start_with?("http") url = story.css('a.contentItem__content').attribute('href').value else url = "https://www.espn.com#{story.css('a.contentItem__content').attribute('href').value}" end Article.new(title, description, url) end end |