Module: Bnm::Scrape

Defined in:
lib/bnm/scrape.rb

Class Method Summary collapse

Class Method Details

.deep_scrape(artists) ⇒ Object

SCRAPES ARTISTS STORY AND SCORE######



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/bnm/scrape.rb', line 32

def self.deep_scrape(artists)
  artists.each do |artist|
    doc = Nokogiri::HTML(open("http://pitchfork.com#{artist[:review_url]}"))
    html = doc.css('div.review-detail')

    html.each do |i|
      editorial = scrub(HTMLEntities.new.decode(i.css('div.article-content').text))
      author = scrub(i.css('a.display-name').text)
      score = i.css('span.score').text

      # CHECKS THAT URI IS NOT NIL
      if !(i.css('.find-it-at a')).empty?
        listen = i.css('.find-it-at a').attribute('href').text
      end

      # NOT EVERY ARTIST HAS THE SAME INFORMATION, SOME ATTRIUBUTES ARE EMPTY, HENCE THE CONDITIONAL
      artist[:editorial] = (editorial) if !editorial.empty?
      artist[:score] = (score) if !score.empty?
      artist[:listen] = (listen) if listen
      artist[:author] = (author) if !author.empty?

      # FINDS GENRES FOR ARTIST - SOME HAVE MORE THAN ONE GENRE.
      i.css('ul.genre-list').each do |i|
        artist[:genres] = scrub(i.css('a').text)
      end
    end
  end
  artists.sort! {|a, b| b[:score] <=> a[:score]}
end

.init_scrape(url) ⇒ Object

SCRAPES URL#########################



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/bnm/scrape.rb', line 8

def self.init_scrape(url)
  artists = []
  doc = Nokogiri::HTML(open(url))
  html = doc.css("div.review")

  html.each do |i|
    artists << {
      review_url: i.css("a").attribute("href").text,
      name: scrub(i.css('.artist-list li').text),
      album: scrub(i.css('.album-artist .title').text),
      date: i.css('.meta .pub-date').text
    }
  end

  # IF SCRAPE COMES UP EMPTY
  artists.each do |i|
    i.each do |k, v|
      artists.delete(i) if v == ""
    end
  end
  artists
end

.scrub(data) ⇒ Object

SCRUBS DATA#########################



63
64
65
66
67
68
69
70
71
72
73
# File 'lib/bnm/scrape.rb', line 63

def self.scrub(data)
  ### REMOVES AMOEBA LINK FROM ARTICLE
  if data.include? 'Find it at:Amoeba Music'
    data.slice! 'Find it at:Amoeba Music'
  end
  ### REPLACING UTF-8 ARTIFACTS
  data.gsub!(/â/, "'")
  data.gsub!(/Â/, "")
  data.gsub!(/é/, "é")
  data
end