Class: ImdbMovie

Inherits:
Object
  • Object
show all
Includes:
Comparable
Defined in:
lib/imdb/imdb_movie.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(id, title = nil) ⇒ ImdbMovie

Returns a new instance of ImdbMovie.



9
10
11
12
13
# File 'lib/imdb/imdb_movie.rb', line 9

def initialize(id, title = nil)
  @id = id
  @url = "http://www.imdb.com/title/tt#{@id}/"
  @title = title
end

Instance Attribute Details

#idObject (readonly)

, :title



7
8
9
# File 'lib/imdb/imdb_movie.rb', line 7

def id
  @id
end

#urlObject (readonly)

, :title



7
8
9
# File 'lib/imdb/imdb_movie.rb', line 7

def url
  @url
end

Class Method Details

.use_html_cacheObject

this is intended to be stubed by rspec where it should return true.



17
18
19
# File 'lib/imdb/imdb_movie.rb', line 17

def self.use_html_cache
  false
end

Instance Method Details

#<=>(other) ⇒ Object

add comparator so Arrays containing ImdbMovie objects can use uniq()



23
24
25
# File 'lib/imdb/imdb_movie.rb', line 23

def <=>(other)
  @id <=> other.id
end

#also_known_asObject

return an Array of Strings containing AKA titles



139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/imdb/imdb_movie.rb', line 139

def also_known_as
  el = document.search("//h5[text()^='Also Known As:']/..").at('h5')
  aka = []
  while(!el.nil?)
    aka << el.to_s unless el.elem?
    el = el.next
  end
  aka.collect!{|a| remove_parens(a).strip}
  aka.uniq!
  aka.compact!
  aka.select{|a| !a.empty?}
end

#aspect_ratioObject



93
94
95
# File 'lib/imdb/imdb_movie.rb', line 93

def aspect_ratio
  document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
end

#cast_membersObject



54
55
56
57
58
59
60
61
62
63
64
# File 'lib/imdb/imdb_movie.rb', line 54

def cast_members
  # document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
  document.search("table.cast tr").inject([]) do |result, row|
    a = row.search("td.nm a").innerHTML.strip.unescape_html
    c = row.search("td.char a").innerHTML.strip.unescape_html
    if c.empty?
      c = row.search("td.char").innerHTML.strip.unescape_html
    end
    result << [a,c]
  end
end

#certificationsObject

older films may not have MPAA ratings but usually have a certification. return a hash with country abbreviations for keys and the certification string for the value example: => ‘Approved’



167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/imdb/imdb_movie.rb', line 167

def certifications
  certs = []
  cert_set = document.search("h5[text()='Certification:'] ~ a[@href*=/List?certificates']").map { |link| link.innerHTML.strip } rescue []
  cert_set.each do |line|
    if line =~ /(.*):(.*)/
      cert_hash = {}
      cert_hash['country'] = $1
      cert_hash['rating'] = $2
      certs << cert_hash
    end
  end
  certs
end

#colorObject



109
110
111
# File 'lib/imdb/imdb_movie.rb', line 109

def color
  document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
end

#companyObject



113
114
115
# File 'lib/imdb/imdb_movie.rb', line 113

def company
  document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
end

#countriesObject



101
102
103
# File 'lib/imdb/imdb_movie.rb', line 101

def countries
  document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
end

#directorsObject



34
35
36
# File 'lib/imdb/imdb_movie.rb', line 34

def directors
  document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
end

#genresObject



81
82
83
# File 'lib/imdb/imdb_movie.rb', line 81

def genres
  document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
end

#languagesObject



105
106
107
# File 'lib/imdb/imdb_movie.rb', line 105

def languages
  document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
end

#lengthObject



97
98
99
# File 'lib/imdb/imdb_movie.rb', line 97

def length
  document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
end

#mpaaObject

The MPAA rating, i.e. “PG-13”



160
161
162
# File 'lib/imdb/imdb_movie.rb', line 160

def mpaa
  document.search("//h5[text()^='MPAA']/..").text.gsub('MPAA:', '').strip rescue nil
end

#photosObject



117
118
119
# File 'lib/imdb/imdb_movie.rb', line 117

def photos
  document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
end

#plotObject



85
86
87
# File 'lib/imdb/imdb_movie.rb', line 85

def plot
  document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
end

#posterObject



46
47
48
# File 'lib/imdb/imdb_movie.rb', line 46

def poster
  ImdbImage.new(poster_url) rescue nil
end

#poster_urlObject



38
39
40
# File 'lib/imdb/imdb_movie.rb', line 38

def poster_url
  document.at("a[@name='poster']")['href'] rescue nil
end

#ratingObject



50
51
52
# File 'lib/imdb/imdb_movie.rb', line 50

def rating
  document.at("h5[text()='User Rating:'] ~ b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil
end

#raw_titleObject

return the raw title



122
123
124
# File 'lib/imdb/imdb_movie.rb', line 122

def raw_title
  document.at("h1").innerText
end

#release_dateObject



74
75
76
77
78
79
# File 'lib/imdb/imdb_movie.rb', line 74

def release_date
  date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
  Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
rescue
  nil
end

#release_yearObject

find the release year Note, this is needed because not all entries on IMDB have a full release date as parsed by release_date.



134
135
136
# File 'lib/imdb/imdb_movie.rb', line 134

def release_year
  document.search("//h5[text()^='Release Date']/..").innerHTML[/\d{4}/]
end

#remove_parens(str) ⇒ Object



152
153
154
155
156
157
# File 'lib/imdb/imdb_movie.rb', line 152

def remove_parens(str)
  while str =~ /\(.*\)/
    str.gsub!(/\([^\)\(]*\)/, '')
  end
  str
end

#taglineObject



89
90
91
# File 'lib/imdb/imdb_movie.rb', line 89

def tagline
  document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
end

#tiny_poster_urlObject



42
43
44
# File 'lib/imdb/imdb_movie.rb', line 42

def tiny_poster_url
  document.at("a[@name='poster'] img")['src'] rescue nil
end

#titleObject



27
28
29
30
31
32
# File 'lib/imdb/imdb_movie.rb', line 27

def title
  if @title.nil?
    @title = document.at("div#tn15title h1").innerHTML.split('<span>').first.strip.unescape_html rescue nil
  end
  @title
end

#to_hashObject



181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/imdb/imdb_movie.rb', line 181

def to_hash
  hash = {}
  [:title, :directors, :poster_url, :tiny_poster_url, :poster, :rating, :cast_members,
   :writers, :year, :genres, :plot, :tagline, :aspect_ratio, :length, :release_date,
   :countries, :languages, :color, :company, :photos, :raw_title, :release_year,
   :also_known_as, :mpaa, :certifications
  ].each do |sym|
    begin
      value = send(sym.to_s)
      hash[sym.to_s] = value unless value.nil?
    rescue Exception => e
      puts "Error getting data for hash for #{sym} - #{e.to_s}"
    end
  end
  hash
end

#to_xmlObject



198
199
200
# File 'lib/imdb/imdb_movie.rb', line 198

def to_xml
  XmlSimple.xml_out(to_hash, 'NoAttr' => true, 'RootName' => 'movie')
end

#to_yamlObject



202
203
204
# File 'lib/imdb/imdb_movie.rb', line 202

def to_yaml
  YAML.dump(to_hash)
end

#video_game?Boolean

is this a video game as indicated by a ‘(VG)’ in the raw title?

Returns:

  • (Boolean)


127
128
129
# File 'lib/imdb/imdb_movie.rb', line 127

def video_game?
  raw_title =~ /\(VG\)/
end

#writersObject



66
67
68
# File 'lib/imdb/imdb_movie.rb', line 66

def writers
  document.search("h5[text()^='Writer'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
end

#yearObject



70
71
72
# File 'lib/imdb/imdb_movie.rb', line 70

def year
  document.search('a[@href^="/Sections/Years/"]').innerHTML
end