Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/crowder_news/scraper.rb

Overview

> Scraper scrapes the articles from the front page of www.louderwithcrowder.com

Scrapes the data from each article to be displayed to the user via CLI

Constant Summary collapse

@@url =
"https://www.louderwithcrowder.com"

Class Method Summary collapse

Class Method Details

.initiate_scrapeObject

> This method iniates the scrape from our website



12
13
14
15
16
17
18
19
# File 'lib/crowder_news/scraper.rb', line 12

def self.initiate_scrape
  Article.create_from_collection(self.scrape_featured)
  Article.create_from_collection(self.scrape_recent)
  Article.all.each {|article|
    details = self.scrape_details(article.link)
    article.add_details(details)
  }
end

.scrape_details(article_url) ⇒ Object

> Pulls the articles details from each article url so we can complete out Article objects



60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/crowder_news/scraper.rb', line 60

def self.scrape_details(article_url)
  doc = Nokogiri::HTML(open(article_url))
  
   = {}

  [:author] = doc.css("h2 span.lwc-author").text
  [:date] = doc.css("h2 span.lwc-date").text
  [:body] = doc.css("p").text
  [:youtube_links] = []
  doc.css("div.fluid-width-video-wrapper").each { |wrapper|
    [:youtube_links] << wrapper.css("iframe").attribute("src").value
  }
  
end

> Scrapes the featured articles from LwC



24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/crowder_news/scraper.rb', line 24

def self.scrape_featured
  doc = Nokogiri::HTML(open(@@url))
  articles = []
  doc.css("div.lwc-featured").each {|featured_article|
    featured_article.css(".featured-box").each { |box|
      title = box.css("h3.featured-title a").text
      link = box.css("h3.featured-title a").attribute("href").value
      excerpt = box.css("p.lwc-excerpt").text
      type = "Featured"
      articles << {:title => title, :link => link, :excerpt => excerpt, :type => type}
    }
  }
  articles
end

.scrape_recentObject

> Scrapes the recent articles from LwC



42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/crowder_news/scraper.rb', line 42

def self.scrape_recent
  doc = Nokogiri::HTML(open(@@url))
  articles = []
  doc.css("div.lwc-recent").each {|article|
    article.css(".recent-box").each { |box|
      title = box.css("h3.recent-title a").text
      link = box.css("h3.recent-title a").attribute("href").value
      type = "Recent"
      excerpt = ""
      articles << {:title => title, :link => link, :excerpt => excerpt, :type => type}
    }
  }
  articles
end