Class: Scraper
- Inherits:
-
Object
- Object
- Scraper
- Defined in:
- lib/imdb_term/scraper.rb
Constant Summary collapse
- BASE_URL =
'http://www.imdb.com'
Class Method Summary collapse
- .scrape_movie_by_id(id) ⇒ Object
-
.scrape_movie_by_title(title) ⇒ Object
/find?q=Iron+Man.
- .scrape_now_playing ⇒ Object
- .scrape_opening_this_week ⇒ Object
Class Method Details
.scrape_movie_by_id(id) ⇒ Object
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/imdb_term/scraper.rb', line 55 def self.scrape_movie_by_id(id) doc = Nokogiri::HTML(open("#{BASE_URL}/title/tt#{id}")) title_overview = doc.css('div#title-overview-widget') title_cast = doc.css('div#titleCast') directors = title_overview.css('div.plot_summary_wrapper span[itemprop="director"] span') movie = { :id => id, :title => title_overview.css('div.title_wrapper > h1').text.strip[(0..-8)], :release_year => title_overview.css('span#titleYear a').text.strip, :content_rating => title_overview.css('div.subtext').text, :runtime => title_overview.css('div.subtext time').text.strip, :genres => title_overview.css('div.subtext span.itemprop').map { |e| e.text }, :summary => title_overview.css('div.plot_summary_wrapper div.summary_text').text.strip, :director => directors.size > 1 ? directors.map{|e| e.text}.join(', ') : directors.text, :stars => title_overview.css('div.plot_summary_wrapper span[itemprop="actors"] span').map { |e| e.text } } movie end |
.scrape_movie_by_title(title) ⇒ Object
/find?q=Iron+Man
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/imdb_term/scraper.rb', line 38 def self.scrape_movie_by_title(title) # /find?q=Iron+Man doc = Nokogiri::HTML(open("#{BASE_URL}/find?q=#{title}")) movies = doc.css('div.findSection').first.css('td.result_text') movies_data = Array.new movies.each_with_index do |movie, index| type = movie.text.scan(/\([\w ]+\)/).reverse.detect{|e| e[1].match(/[A-Za-z]/) || break } movie_data = { :id => movie.css('a').attribute('href').value.split('/')[2][(2..-1)], :title => movie.css('a').text, :release_year => movie.text.scan(/[12]\d{3}/)[-1], :type => begin type.match(/[\w ]+/) rescue nil end } movies_data << movie_data end movies_data end |
.scrape_now_playing ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/imdb_term/scraper.rb', line 8 def self. doc = Nokogiri::HTML(open("#{BASE_URL}/movies-in-theaters")) movies = doc.css('div#main div.list.detail.sub-list') .last.css('td#img_primary a') movies_data = Array.new movies.each do |movie| movie_data = { :id => movie.attribute('href').value.split('/')[2][(2..-1)], :title => movie.css('img').attribute('title').value[(0..-7)].strip } movies_data << movie_data end movies_data end |
.scrape_opening_this_week ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/imdb_term/scraper.rb', line 23 def self.scrape_opening_this_week doc = Nokogiri::HTML(open("#{BASE_URL}/movies-in-theaters")) movies = doc.css('div#main div.list.detail.sub-list') .first.css('td#img_primary a') movies_data = Array.new movies.each do |movie| movie_data = { :id => movie.attribute('href').value.split('/')[2][(2..-1)], :title => movie.css('img').attribute('title').value[(0..-7)].strip } movies_data << movie_data end movies_data end |