Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/ESPNScraper/scraper.rb

Class Method Summary collapse

Class Method Details

.get_content(article) ⇒ Object



35
36
37
38
39
40
41
# File 'lib/ESPNScraper/scraper.rb', line 35

def self.get_content(article)
  html = open(article.url)
  doc = Nokogiri::HTML(html)
  #for ESPN Radio links, this will return an empty array
  article.content = doc.css("div.article-body p")
  article.content = doc.css("div.Story__Body p") if article.content.size == 0
end

.scrape_new_articles(url) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/ESPNScraper/scraper.rb', line 3

def self.scrape_new_articles(url)
  html = open(url)
  doc = Nokogiri::HTML(html)
  stories = doc.css("div.item-info-wrap")
  stories.each do |story|
    title = story.css("h1").text
    description = story.css("p").text
    if story.css("a").attribute("href").value.start_with?("http")
      url = story.css("a").attribute("href").value
    else
      url = "https://www.espn.com#{story.css("a").attribute("href").value}"
    end
    Article.new(title, description, url)
  end
end

.scrape_new_nhl_articles(url) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/ESPNScraper/scraper.rb', line 19

def self.scrape_new_nhl_articles(url)
  html = open(url)
  doc = Nokogiri::HTML(html)
  stories = doc.css("article").select{|story| story.css('a.contentItem__content').attribute('href')}
  stories.each do |story|
    title = story.css("h2.contentItem__title").text
    description = nil
    if story.css('a.contentItem__content').attribute('href').value.start_with?("http")
      url = story.css('a.contentItem__content').attribute('href').value
    else
      url = "https://www.espn.com#{story.css('a.contentItem__content').attribute('href').value}"
    end
    Article.new(title, description, url)
  end
end