Class: ImdbMovie
Instance Attribute Summary collapse
-
#id ⇒ Object
readonly
, :title.
-
#url ⇒ Object
readonly
, :title.
Class Method Summary collapse
-
.use_html_cache ⇒ Object
this is intended to be stubed by rspec where it should return true.
Instance Method Summary collapse
-
#<=>(other) ⇒ Object
add comparator so Arrays containing ImdbMovie objects can use uniq().
-
#also_known_as ⇒ Object
return an Array of Strings containing AKA titles.
- #aspect_ratio ⇒ Object
- #cast_members ⇒ Object
-
#certifications ⇒ Object
older films may not have MPAA ratings but usually have a certification.
- #color ⇒ Object
- #company ⇒ Object
- #countries ⇒ Object
- #directors ⇒ Object
- #genres ⇒ Object
-
#initialize(id, title = nil) ⇒ ImdbMovie
constructor
A new instance of ImdbMovie.
- #languages ⇒ Object
- #length ⇒ Object
-
#mpaa ⇒ Object
The MPAA rating, i.e.
- #photos ⇒ Object
- #plot ⇒ Object
- #poster ⇒ Object
- #poster_url ⇒ Object
- #rating ⇒ Object
-
#raw_title ⇒ Object
return the raw title.
- #release_date ⇒ Object
-
#release_year ⇒ Object
find the release year Note, this is needed because not all entries on IMDB have a full release date as parsed by release_date.
- #remove_parens(str) ⇒ Object
- #tagline ⇒ Object
- #tiny_poster_url ⇒ Object
- #title ⇒ Object
- #to_hash ⇒ Object
- #to_xml ⇒ Object
- #to_yaml ⇒ Object
-
#video_game? ⇒ Boolean
is this a video game as indicated by a ‘(VG)’ in the raw title?.
- #writers ⇒ Object
- #year ⇒ Object
Constructor Details
#initialize(id, title = nil) ⇒ ImdbMovie
Returns a new instance of ImdbMovie.
9 10 11 12 13 |
# File 'lib/imdb/imdb_movie.rb', line 9 def initialize(id, title = nil) @id = id @url = "http://www.imdb.com/title/tt#{@id}/" @title = title end |
Instance Attribute Details
#id ⇒ Object (readonly)
, :title
7 8 9 |
# File 'lib/imdb/imdb_movie.rb', line 7 def id @id end |
#url ⇒ Object (readonly)
, :title
7 8 9 |
# File 'lib/imdb/imdb_movie.rb', line 7 def url @url end |
Class Method Details
.use_html_cache ⇒ Object
this is intended to be stubed by rspec where it should return true.
17 18 19 |
# File 'lib/imdb/imdb_movie.rb', line 17 def self.use_html_cache false end |
Instance Method Details
#<=>(other) ⇒ Object
add comparator so Arrays containing ImdbMovie objects can use uniq()
23 24 25 |
# File 'lib/imdb/imdb_movie.rb', line 23 def <=>(other) @id <=> other.id end |
#also_known_as ⇒ Object
return an Array of Strings containing AKA titles
139 140 141 142 143 144 145 146 147 148 149 150 |
# File 'lib/imdb/imdb_movie.rb', line 139 def also_known_as el = document.search("//h5[text()^='Also Known As:']/..").at('h5') aka = [] while(!el.nil?) aka << el.to_s unless el.elem? el = el.next end aka.collect!{|a| remove_parens(a).strip} aka.uniq! aka.compact! aka.select{|a| !a.empty?} end |
#aspect_ratio ⇒ Object
93 94 95 |
# File 'lib/imdb/imdb_movie.rb', line 93 def aspect_ratio document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil end |
#cast_members ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/imdb/imdb_movie.rb', line 54 def cast_members # document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue [] document.search("table.cast tr").inject([]) do |result, row| a = row.search("td.nm a").innerHTML.strip.unescape_html c = row.search("td.char a").innerHTML.strip.unescape_html if c.empty? c = row.search("td.char").innerHTML.strip.unescape_html end result << [a,c] end end |
#certifications ⇒ Object
older films may not have MPAA ratings but usually have a certification. return a hash with country abbreviations for keys and the certification string for the value example: => ‘Approved’
167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/imdb/imdb_movie.rb', line 167 def certifications certs = [] cert_set = document.search("h5[text()='Certification:'] ~ a[@href*=/List?certificates']").map { |link| link.innerHTML.strip } rescue [] cert_set.each do |line| if line =~ /(.*):(.*)/ cert_hash = {} cert_hash['country'] = $1 cert_hash['rating'] = $2 certs << cert_hash end end certs end |
#color ⇒ Object
109 110 111 |
# File 'lib/imdb/imdb_movie.rb', line 109 def color document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil end |
#company ⇒ Object
113 114 115 |
# File 'lib/imdb/imdb_movie.rb', line 113 def company document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil end |
#countries ⇒ Object
101 102 103 |
# File 'lib/imdb/imdb_movie.rb', line 101 def countries document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue [] end |
#directors ⇒ Object
34 35 36 |
# File 'lib/imdb/imdb_movie.rb', line 34 def directors document.search("h5[text()^='Director'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue [] end |
#genres ⇒ Object
81 82 83 |
# File 'lib/imdb/imdb_movie.rb', line 81 def genres document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue [] end |
#languages ⇒ Object
105 106 107 |
# File 'lib/imdb/imdb_movie.rb', line 105 def languages document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue [] end |
#length ⇒ Object
97 98 99 |
# File 'lib/imdb/imdb_movie.rb', line 97 def length document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil end |
#mpaa ⇒ Object
The MPAA rating, i.e. “PG-13”
160 161 162 |
# File 'lib/imdb/imdb_movie.rb', line 160 def mpaa document.search("//h5[text()^='MPAA']/..").text.gsub('MPAA:', '').strip rescue nil end |
#photos ⇒ Object
117 118 119 |
# File 'lib/imdb/imdb_movie.rb', line 117 def photos document.search(".media_strip_thumb img").map { |img| img['src'] } rescue [] end |
#plot ⇒ Object
85 86 87 |
# File 'lib/imdb/imdb_movie.rb', line 85 def plot document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil end |
#poster ⇒ Object
46 47 48 |
# File 'lib/imdb/imdb_movie.rb', line 46 def poster ImdbImage.new(poster_url) rescue nil end |
#poster_url ⇒ Object
38 39 40 |
# File 'lib/imdb/imdb_movie.rb', line 38 def poster_url document.at("a[@name='poster']")['href'] rescue nil end |
#rating ⇒ Object
50 51 52 |
# File 'lib/imdb/imdb_movie.rb', line 50 def document.at("h5[text()='User Rating:'] ~ b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil end |
#raw_title ⇒ Object
return the raw title
122 123 124 |
# File 'lib/imdb/imdb_movie.rb', line 122 def raw_title document.at("h1").innerText end |
#release_date ⇒ Object
74 75 76 77 78 79 |
# File 'lib/imdb/imdb_movie.rb', line 74 def release_date date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/] Date.parse(Chronic.parse(date).strftime('%Y/%m/%d')) rescue nil end |
#release_year ⇒ Object
find the release year Note, this is needed because not all entries on IMDB have a full release date as parsed by release_date.
134 135 136 |
# File 'lib/imdb/imdb_movie.rb', line 134 def release_year document.search("//h5[text()^='Release Date']/..").innerHTML[/\d{4}/] end |
#remove_parens(str) ⇒ Object
152 153 154 155 156 157 |
# File 'lib/imdb/imdb_movie.rb', line 152 def remove_parens(str) while str =~ /\(.*\)/ str.gsub!(/\([^\)\(]*\)/, '') end str end |
#tagline ⇒ Object
89 90 91 |
# File 'lib/imdb/imdb_movie.rb', line 89 def tagline document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil end |
#tiny_poster_url ⇒ Object
42 43 44 |
# File 'lib/imdb/imdb_movie.rb', line 42 def tiny_poster_url document.at("a[@name='poster'] img")['src'] rescue nil end |
#title ⇒ Object
27 28 29 30 31 32 |
# File 'lib/imdb/imdb_movie.rb', line 27 def title if @title.nil? @title = document.at("div#tn15title h1").innerHTML.split('<span>').first.strip.unescape_html rescue nil end @title end |
#to_hash ⇒ Object
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/imdb/imdb_movie.rb', line 181 def to_hash hash = {} [:title, :directors, :poster_url, :tiny_poster_url, :poster, :rating, :cast_members, :writers, :year, :genres, :plot, :tagline, :aspect_ratio, :length, :release_date, :countries, :languages, :color, :company, :photos, :raw_title, :release_year, :also_known_as, :mpaa, :certifications ].each do |sym| begin value = send(sym.to_s) hash[sym.to_s] = value unless value.nil? rescue Exception => e puts "Error getting data for hash for #{sym} - #{e.to_s}" end end hash end |
#to_xml ⇒ Object
198 199 200 |
# File 'lib/imdb/imdb_movie.rb', line 198 def to_xml XmlSimple.xml_out(to_hash, 'NoAttr' => true, 'RootName' => 'movie') end |
#to_yaml ⇒ Object
202 203 204 |
# File 'lib/imdb/imdb_movie.rb', line 202 def to_yaml YAML.dump(to_hash) end |
#video_game? ⇒ Boolean
is this a video game as indicated by a ‘(VG)’ in the raw title?
127 128 129 |
# File 'lib/imdb/imdb_movie.rb', line 127 def video_game? raw_title =~ /\(VG\)/ end |
#writers ⇒ Object
66 67 68 |
# File 'lib/imdb/imdb_movie.rb', line 66 def writers document.search("h5[text()^='Writer'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue [] end |
#year ⇒ Object
70 71 72 |
# File 'lib/imdb/imdb_movie.rb', line 70 def year document.search('a[@href^="/Sections/Years/"]').innerHTML end |