Class: ImdbMovie

Inherits:

Object

Object
ImdbMovie

show all

Includes:: Comparable

Defined in:: lib/imdb/imdb_movie.rb

Instance Attribute Summary collapse

#id ⇒ Object readonly

, :title.
#url ⇒ Object readonly

, :title.

Class Method Summary collapse

.use_html_cache ⇒ Object

this is intended to be stubed by rspec where it should return true.

Instance Method Summary collapse

#<=>(other) ⇒ Object

add comparator so Arrays containing ImdbMovie objects can use uniq().
#also_known_as ⇒ Object

return an Array of Strings containing AKA titles.
#aspect_ratio ⇒ Object
#cast_members ⇒ Object
#certifications ⇒ Object

older films may not have MPAA ratings but usually have a certification.
#color ⇒ Object
#company ⇒ Object
#countries ⇒ Object
#directors ⇒ Object
#genres ⇒ Object
#initialize(id, title = nil) ⇒ ImdbMovie constructor

A new instance of ImdbMovie.
#languages ⇒ Object
#length ⇒ Object
#mpaa ⇒ Object

The MPAA rating, i.e.
#photos ⇒ Object
#plot ⇒ Object
#poster ⇒ Object
#poster_url ⇒ Object
#rating ⇒ Object
#raw_title ⇒ Object

return the raw title.
#release_date ⇒ Object
#release_year ⇒ Object

find the release year Note, this is needed because not all entries on IMDB have a full release date as parsed by release_date.
#remove_parens(str) ⇒ Object
#tagline ⇒ Object
#tiny_poster_url ⇒ Object
#title ⇒ Object
#to_hash ⇒ Object
#to_xml ⇒ Object
#to_yaml ⇒ Object
#video_game? ⇒ Boolean

is this a video game as indicated by a ‘(VG)’ in the raw title?.
#writers ⇒ Object
#year ⇒ Object

Constructor Details

#initialize(id, title = nil) ⇒ `ImdbMovie`

Returns a new instance of ImdbMovie.

# File 'lib/imdb/imdb_movie.rb', line 9

def initialize(id, title = nil)
  @id = id
  @url = "http://www.imdb.com/title/tt#{@id}/"
  @title = title
end

Instance Attribute Details

#id ⇒ `Object` (readonly)

, :title



7
8
9

# File 'lib/imdb/imdb_movie.rb', line 7

def id
  @id
end

#url ⇒ `Object` (readonly)

, :title



7
8
9

# File 'lib/imdb/imdb_movie.rb', line 7

def url
  @url
end

Class Method Details

.use_html_cache ⇒ `Object`

this is intended to be stubed by rspec where it should return true.



17
18
19

# File 'lib/imdb/imdb_movie.rb', line 17

def self.use_html_cache
  false
end

Instance Method Details

#<=>(other) ⇒ `Object`

add comparator so Arrays containing ImdbMovie objects can use uniq()



23
24
25

# File 'lib/imdb/imdb_movie.rb', line 23

def <=>(other)
  @id <=> other.id
end

#also_known_as ⇒ `Object`

return an Array of Strings containing AKA titles

# File 'lib/imdb/imdb_movie.rb', line 139

def also_known_as
  el = document.search("//h5[text()^='Also Known As:']/..").at('h5')
  aka = []
  while(!el.nil?)
    aka << el.to_s unless el.elem?
    el = el.next
  end
  aka.collect!{|a| remove_parens(a).strip}
  aka.uniq!
  aka.compact!
  aka.select{|a| !a.empty?}
end

#aspect_ratio ⇒ `Object`



93
94
95

# File 'lib/imdb/imdb_movie.rb', line 93

def aspect_ratio
  document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
end

#cast_members ⇒ `Object`

# File 'lib/imdb/imdb_movie.rb', line 54

def cast_members
  # document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
  document.search("table.cast tr").inject([]) do |result, row|
    a = row.search("td.nm a").innerHTML.strip.unescape_html
    c = row.search("td.char a").innerHTML.strip.unescape_html
    if c.empty?
      c = row.search("td.char").innerHTML.strip.unescape_html
    end
    result << [a,c]
  end
end

#certifications ⇒ `Object`

older films may not have MPAA ratings but usually have a certification. return a hash with country abbreviations for keys and the certification string for the value example: => ‘Approved’

# File 'lib/imdb/imdb_movie.rb', line 167

def certifications
  certs = []
  cert_set = document.search("h5[text()='Certification:'] ~ a[@href*=/List?certificates']").map { |link| link.innerHTML.strip } rescue []
  cert_set.each do |line|
    if line =~ /(.*):(.*)/
      cert_hash = {}
      cert_hash['country'] = $1
      cert_hash['rating'] = $2
      certs << cert_hash
    end
  end
  certs
end

#color ⇒ `Object`



109
110
111

# File 'lib/imdb/imdb_movie.rb', line 109

def color
  document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
end

#company ⇒ `Object`



113
114
115

# File 'lib/imdb/imdb_movie.rb', line 113

def company
  document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
end

#countries ⇒ `Object`



101
102
103

# File 'lib/imdb/imdb_movie.rb', line 101

def countries
  document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
end

#directors ⇒ `Object`



34
35
36

# File 'lib/imdb/imdb_movie.rb', line 34

def directors
  document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
end

#genres ⇒ `Object`



81
82
83

# File 'lib/imdb/imdb_movie.rb', line 81

def genres
  document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
end

#languages ⇒ `Object`



105
106
107

# File 'lib/imdb/imdb_movie.rb', line 105

def languages
  document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
end

#length ⇒ `Object`



97
98
99

# File 'lib/imdb/imdb_movie.rb', line 97

def length
  document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
end

#mpaa ⇒ `Object`

The MPAA rating, i.e. “PG-13”



160
161
162

# File 'lib/imdb/imdb_movie.rb', line 160

def mpaa
  document.search("//h5[text()^='MPAA']/..").text.gsub('MPAA:', '').strip rescue nil
end

#photos ⇒ `Object`



117
118
119

# File 'lib/imdb/imdb_movie.rb', line 117

def photos
  document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
end

#plot ⇒ `Object`



85
86
87

# File 'lib/imdb/imdb_movie.rb', line 85

def plot
  document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
end

#poster ⇒ `Object`



46
47
48

# File 'lib/imdb/imdb_movie.rb', line 46

def poster
  ImdbImage.new(poster_url) rescue nil
end

#poster_url ⇒ `Object`



38
39
40

# File 'lib/imdb/imdb_movie.rb', line 38

def poster_url
  document.at("a[@name='poster']")['href'] rescue nil
end

#rating ⇒ `Object`



50
51
52

# File 'lib/imdb/imdb_movie.rb', line 50

def rating
  document.at("h5[text()='User Rating:'] ~ b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil
end

#raw_title ⇒ `Object`

return the raw title



122
123
124

# File 'lib/imdb/imdb_movie.rb', line 122

def raw_title
  document.at("h1").innerText
end

#release_date ⇒ `Object`

# File 'lib/imdb/imdb_movie.rb', line 74

def release_date
  date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
  Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
rescue
  nil
end

#release_year ⇒ `Object`

find the release year Note, this is needed because not all entries on IMDB have a full release date as parsed by release_date.



134
135
136

# File 'lib/imdb/imdb_movie.rb', line 134

def release_year
  document.search("//h5[text()^='Release Date']/..").innerHTML[/\d{4}/]
end

#remove_parens(str) ⇒ `Object`

# File 'lib/imdb/imdb_movie.rb', line 152

def remove_parens(str)
  while str =~ /\(.*\)/
    str.gsub!(/\([^\)\(]*\)/, '')
  end
  str
end

#tagline ⇒ `Object`



89
90
91

# File 'lib/imdb/imdb_movie.rb', line 89

def tagline
  document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
end

#tiny_poster_url ⇒ `Object`



42
43
44

# File 'lib/imdb/imdb_movie.rb', line 42

def tiny_poster_url
  document.at("a[@name='poster'] img")['src'] rescue nil
end

#title ⇒ `Object`

# File 'lib/imdb/imdb_movie.rb', line 27

def title
  if @title.nil?
    @title = document.at("div#tn15title h1").innerHTML.split('<span>').first.strip.unescape_html rescue nil
  end
  @title
end

#to_hash ⇒ `Object`

# File 'lib/imdb/imdb_movie.rb', line 181

def to_hash
  hash = {}
  [:title, :directors, :poster_url, :tiny_poster_url, :poster, :rating, :cast_members,
   :writers, :year, :genres, :plot, :tagline, :aspect_ratio, :length, :release_date,
   :countries, :languages, :color, :company, :photos, :raw_title, :release_year,
   :also_known_as, :mpaa, :certifications
  ].each do |sym|
    begin
      value = send(sym.to_s)
      hash[sym.to_s] = value unless value.nil?
    rescue Exception => e
      puts "Error getting data for hash for #{sym} - #{e.to_s}"
    end
  end
  hash
end

#to_xml ⇒ `Object`



198
199
200

# File 'lib/imdb/imdb_movie.rb', line 198

def to_xml
  XmlSimple.xml_out(to_hash, 'NoAttr' => true, 'RootName' => 'movie')
end

#to_yaml ⇒ `Object`



202
203
204

# File 'lib/imdb/imdb_movie.rb', line 202

def to_yaml
  YAML.dump(to_hash)
end

#video_game? ⇒ `Boolean`

is this a video game as indicated by a ‘(VG)’ in the raw title?

Returns:

(Boolean)



127
128
129

# File 'lib/imdb/imdb_movie.rb', line 127

def video_game?
  raw_title =~ /\(VG\)/
end

#writers ⇒ `Object`



66
67
68

# File 'lib/imdb/imdb_movie.rb', line 66

def writers
  document.search("h5[text()^='Writer'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
end

#year ⇒ `Object`



70
71
72

# File 'lib/imdb/imdb_movie.rb', line 70

def year
  document.search('a[@href^="/Sections/Years/"]').innerHTML
end

Class: ImdbMovie

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(id, title = nil) ⇒ ImdbMovie

Instance Attribute Details

#id ⇒ Object (readonly)

#url ⇒ Object (readonly)

Class Method Details

.use_html_cache ⇒ Object

Instance Method Details

#<=>(other) ⇒ Object

#also_known_as ⇒ Object

#aspect_ratio ⇒ Object

#cast_members ⇒ Object

#certifications ⇒ Object

#color ⇒ Object

#company ⇒ Object

#countries ⇒ Object

#directors ⇒ Object

#genres ⇒ Object

#languages ⇒ Object

#length ⇒ Object

#mpaa ⇒ Object

#photos ⇒ Object

#plot ⇒ Object

#poster ⇒ Object

#poster_url ⇒ Object

#rating ⇒ Object

#raw_title ⇒ Object

#release_date ⇒ Object

#release_year ⇒ Object

#remove_parens(str) ⇒ Object

#tagline ⇒ Object

#tiny_poster_url ⇒ Object

#title ⇒ Object

#to_hash ⇒ Object

#to_xml ⇒ Object

#to_yaml ⇒ Object

#video_game? ⇒ Boolean

#writers ⇒ Object

#year ⇒ Object