Class: NprCliNewsReader::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/npr_cli_news_reader/scraper.rb

Constant Summary collapse

@@base_url =
"https://www.npr.org"

Class Method Summary collapse

Class Method Details

.scrape_articles_for_category(selected_category) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/npr_cli_news_reader/scraper.rb', line 5

def self.scrape_articles_for_category(selected_category)
  if NprCliNewsReader::Article.all.detect {|article| article.category == selected_category}
    return
  else
    html = ""
    if selected_category == "race & culture"
      html = open("#{@@base_url}/sections/codeswitch")
    else
      html = open("#{@@base_url}/sections/#{selected_category}")
    end
    doc = Nokogiri::HTML(html)
    articles = doc.css('article.item')
    articles.each do |article|
      article_attributes = {
        category: selected_category.downcase,
        title: article.css('h2.title a').text.strip,
        teaser: article.css('p.teaser a').text.strip,
        article_url: article.css('h2.title a').attr('href').to_s.strip
      }
      NprCliNewsReader::Article.new(article_attributes)
    end
  end
  
end

.scrape_full_article(article) ⇒ Object



30
31
32
33
34
35
36
# File 'lib/npr_cli_news_reader/scraper.rb', line 30

def self.scrape_full_article(article)
  html = open (article.article_url)
  doc = Nokogiri::HTML(html)
  # discard the first p tag
  paragraphs = doc.css('#storytext > p')
  article.full_article = paragraphs
end