Class: ComingSoon::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/coming_soon/scraper.rb

Instance Method Summary collapse

Instance Method Details

#scrape_details(soon) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/coming_soon/scraper.rb', line 30

def scrape_details(soon)
  
  redirect_failed = false

  begin
    doc_synop1 = Nokogiri::HTML(open(soon.url)) # Uses the HTTP 'movieoverview' url
  rescue
    redirect_failed = true # A HTTP to HTTPS redirect failed
  end

  if !redirect_failed
    if !doc_synop1.css("a.movie-synopsis-link").any? &&
      doc_synop1.css("span#SynopsisTextLabel").any?
      # If a READ FULL SYNOPSIS link is not present and any
      # text is available, use that text for the synopsis
      soon.synopsis = doc_synop1.css("span#SynopsisTextLabel").text
    else
      scrape_plotsummary(soon)
    end
  else
    scrape_plotsummary(soon)
  end

end

#scrape_moviesObject



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/coming_soon/scraper.rb', line 3

def scrape_movies

  doc = Nokogiri::HTML(open("http://www.fandango.com/moviescomingsoon"))
  # name: doc.css("li.visual-item a.visual-title").text.strip
  # start_date: doc.css("li.visual-item span").text
  # url: doc.css("li.visual-item a").attribute("href").value

  movie_list = doc.css("li.visual-item")
  
  count = 0

  movie_list.each do |movie| 
    soon = ComingSoon::Movie.new
    soon.name = movie.css("a.visual-title").text.strip
    soon.start_date = movie.css("span").text
    soon.url = movie.css("a").attribute("href").value

    scrape_details(soon)

    count+=1
    if count > 19 # Scrapes only 20 movies
      break
    end
  end
  
end

#scrape_plotsummary(soon) ⇒ Object



55
56
57
58
59
60
61
62
# File 'lib/coming_soon/scraper.rb', line 55

def scrape_plotsummary(soon)
  # Scrape the synopsis using the HTTP 'plotsummary' url
  # This is also executed after an HTTP to HTTPS redirect failed
  synop_url = soon.url.sub(/movieoverview/, 'plotsummary')
  doc_synop2 = Nokogiri::HTML(open(synop_url))
  soon.synopsis = doc_synop2.css("p.subpage-descriptive-content").text
  
end