Class: BillboardScraper

Inherits:
Object
  • Object
show all
Defined in:
lib/top_100/billboard_scraper.rb

Class Method Summary collapse

Class Method Details

.scrape_from_artist_bio_page(url) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/top_100/billboard_scraper.rb', line 20

def self.scrape_from_artist_bio_page(url)
  bio = Nokogiri::HTML(open(url))
  location_date = bio.css('dl.facts > dd').text
  info = bio.css('article.bio_content').text
  artist = {
    name: bio.css('h1.title').text,
    # missing information for some artists, need to catch that by checking if nokogiri returned empty strings.
    location: location_date.empty? ? "Not Specified" : location_date.split("  ")[0].strip,
    date: location_date.empty? ? "Not Specified" : location_date.match(/\d+/)[0],
    #issues with .empty? for bio due to return  values of long whitespace strings, so using regex to filter those out instead.
    # calling split on info to separate bio aside and main text which does not have a different grouping outside 'article.bio_content'
    bio: info.match(/\A\s*\z/) ? "Not Specified" : info.split("      ")[-1].strip
  }
end

.scrape_from_chart_pageObject

will only use class methods, no need to create instances as there’s nothing unique between these scrapers and we only need one.



6
7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/top_100/billboard_scraper.rb', line 6

def self.scrape_from_chart_page
  nokogiri_object = Nokogiri::HTML(open('http://www.billboard.com/charts/hot-100'))
  nokogiri_object.css('div.chart-row__primary').each do |song|
    name = song.css('h3.chart-row__artist').text.strip.split(//)
    song_hash = {
      rank: song.css('span.chart-row__current-week').text,
      name: song.css('h2.chart-row__song').text,
      artist_bio_url: song.css('a.chart-row__link').attribute('href').value + '/biography',
      artist_name: song.css('h3.chart-row__artist').text.strip,
    }
    Song.new(song_hash)
  end
end