Class: TopHeadlines::Source
- Inherits:
-
Object
- Object
- TopHeadlines::Source
- Defined in:
- lib/top-headlines/source.rb
Constant Summary collapse
- SOURCES =
{ "CNN" => { url: "http://www.cnn.com/", headlines_selector: "div.column.zn__column--idx-1 span.cd__headline-text", urls_selector: "div.column.zn__column--idx-1" }, "MSNBC" => { url: "http://www.msnbc.com/", headlines_selector: "span.featured-slider-menu__item__link__title", urls_selector: "ul.featured-slider-menu" }, "FOX" => { url: "http://www.foxnews.com/", headlines_selector: "section#trending li a", urls_selector: "section#trending li" }, "NYTIMES" => { url: "http://www.nytimes.com/", headlines_selector: "section#top-news h2.story-heading a", urls_selector: "section#top-news h2.story-heading" }, "BLOOMBERG" => { url: "http://www.bloomberg.com/", headlines_selector: "section.top-news-v3 h1 a", urls_selector: "section.top-news-v3 h1" } }
Class Method Summary collapse
- .all ⇒ Object
- .list_all_headlines ⇒ Object
- .scrape_headlines(source) ⇒ Object
- .scrape_urls(source) ⇒ Object
Class Method Details
.all ⇒ Object
31 32 33 |
# File 'lib/top-headlines/source.rb', line 31 def self.all SOURCES end |
.list_all_headlines ⇒ Object
35 36 37 38 39 40 41 |
# File 'lib/top-headlines/source.rb', line 35 def self.list_all_headlines SOURCES.keys.sort.each do |source| puts "*** #{source} ***" scrape_headlines(source)[0,5].each_with_index {|headline, index| puts "#{index+1}. #{headline}"} puts "\n" end end |
.scrape_headlines(source) ⇒ Object
43 44 45 46 47 48 49 50 |
# File 'lib/top-headlines/source.rb', line 43 def self.scrape_headlines(source) source = SOURCES[source] page_url = source[:url] headlines_selector = source[:headlines_selector] doc = Nokogiri::HTML(open(page_url)) headlines = doc.css(headlines_selector).map {|headline| headline.text} end |
.scrape_urls(source) ⇒ Object
52 53 54 55 56 57 58 59 |
# File 'lib/top-headlines/source.rb', line 52 def self.scrape_urls(source) source = SOURCES[source] page_url = source[:url] urls_selector = source[:urls_selector] doc = Nokogiri::HTML(open(page_url)) urls = doc.css(urls_selector).children.css('a').map {|url| url.attribute('href').value[0] == 'h' ? url.attribute('href').value : page_url + url.attribute('href').value} end |