Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/imdb_term/scraper.rb

Constant Summary collapse

BASE_URL =
'http://www.imdb.com'

Class Method Summary collapse

Class Method Details

.scrape_movie_by_id(id) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/imdb_term/scraper.rb', line 55

def self.scrape_movie_by_id(id)
  doc = Nokogiri::HTML(open("#{BASE_URL}/title/tt#{id}"))
  title_overview = doc.css('div#title-overview-widget')
  title_cast = doc.css('div#titleCast')
  directors = title_overview.css('div.plot_summary_wrapper span[itemprop="director"] span')
  movie = {
      :id => id,
      :title => title_overview.css('div.title_wrapper > h1').text.strip[(0..-8)],
      :release_year => title_overview.css('span#titleYear a').text.strip,
      :content_rating => title_overview.css('div.subtext').text,
      :runtime => title_overview.css('div.subtext time').text.strip,
      :genres => title_overview.css('div.subtext span.itemprop').map { |e| e.text },
      :summary => title_overview.css('div.plot_summary_wrapper div.summary_text').text.strip,
      :director => directors.size > 1 ? directors.map{|e| e.text}.join(', ') : directors.text,
      :stars => title_overview.css('div.plot_summary_wrapper span[itemprop="actors"] span').map { |e| e.text }
  }
  movie
end

.scrape_movie_by_title(title) ⇒ Object

/find?q=Iron+Man



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/imdb_term/scraper.rb', line 38

def self.scrape_movie_by_title(title) # /find?q=Iron+Man
  doc = Nokogiri::HTML(open("#{BASE_URL}/find?q=#{title}"))
  movies = doc.css('div.findSection').first.css('td.result_text')
  movies_data = Array.new
  movies.each_with_index do |movie, index|
    type = movie.text.scan(/\([\w ]+\)/).reverse.detect{|e| e[1].match(/[A-Za-z]/) || break }
    movie_data = {
      :id => movie.css('a').attribute('href').value.split('/')[2][(2..-1)],
      :title => movie.css('a').text,
      :release_year => movie.text.scan(/[12]\d{3}/)[-1],
      :type => begin type.match(/[\w ]+/) rescue nil end
    }
    movies_data << movie_data
  end
  movies_data
end

.scrape_now_playingObject



8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/imdb_term/scraper.rb', line 8

def self.scrape_now_playing
  doc = Nokogiri::HTML(open("#{BASE_URL}/movies-in-theaters"))
  movies = doc.css('div#main div.list.detail.sub-list')
            .last.css('td#img_primary a')
  movies_data = Array.new
  movies.each do |movie|
    movie_data = {
      :id    => movie.attribute('href').value.split('/')[2][(2..-1)],
      :title => movie.css('img').attribute('title').value[(0..-7)].strip
    }
    movies_data << movie_data
  end
  movies_data
end

.scrape_opening_this_weekObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/imdb_term/scraper.rb', line 23

def self.scrape_opening_this_week
  doc = Nokogiri::HTML(open("#{BASE_URL}/movies-in-theaters"))
  movies = doc.css('div#main div.list.detail.sub-list')
           .first.css('td#img_primary a')
  movies_data = Array.new
  movies.each do |movie|
    movie_data = {
      :id    => movie.attribute('href').value.split('/')[2][(2..-1)],
      :title => movie.css('img').attribute('title').value[(0..-7)].strip
    }
    movies_data << movie_data
  end
  movies_data
end