25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
|
# File 'lib/imdb/imdb.rb', line 25
def self.find_movie_by_id(id)
coder = HTMLEntities.new
data = Hpricot(open(IMDB_MOVIE_BASE_URL + id))
movie = ImdbMovie.new
movie.imdb_id = id
movie.title = coder.decode(data.at("meta[@name='title']")['content'].gsub(/\((\d{4}(\/[^)]*)?|[A-Z]+)\)/,'').strip)
rating_text = (data/"div.starbar-meta/b").inner_text
if rating_text =~ /([\d\.]+)\/10/
movie.rating = $1
end
begin
movie.poster_url = data.at("div.photo/a[@name='poster']/img")['src']
rescue
movie.poster_url = nil
end
infos = (data/"div.info")
infos.each do |info|
info_title = (info/"h5").inner_text
case info_title
when /Directors?:/
movie.directors = parse_names(info)
when /Writers?[^:]+:/
movie.writers = parse_names(info)
when /Company:/
movie.company = parse_company(info)
when "Tagline:"
movie.tagline = coder.decode(parse_info(info).strip)
when "Runtime:"
movie.runtime = parse_info(info).strip
if (movie.runtime)
movie.runtime.gsub!(/^[^:]+:\s*/, '')
movie.runtime.gsub!(/min .*/, 'min')
end
when "Plot:"
movie.plot = parse_info(info).strip
movie.plot = movie.plot.gsub(/\s*\|\s*add synopsis$/, '')
movie.plot = movie.plot.gsub(/\s*\|\s*full synopsis$/, '')
movie.plot = movie.plot.gsub(/\s*\|\s*add summary$/, '')
movie.plot = movie.plot.gsub(/full summary$/, '')
movie.plot = movie.plot.gsub(/more$/, '')
movie.plot = coder.decode(movie.plot.strip)
when "Genre:"
movie.genres = parse_genres(info)
when "Release Date:"
begin
if (parse_info(info).strip =~ /(\d{1,2}) ([a-zA-Z]+) (\d{4})/)
movie.release_date = Date.parse("#{$2} #{$1}, #{$3}")
end
rescue
movie.release_date = nil
end
when "Certification:"
begin
movie.certification = (info/"a").map { |v| v.inner_html }.select { |v| v =~ /^USA:/ && v !~ /Unrated/ }.map { |v| v[/^USA:/]=''; v.strip }.first
end
end
end
cast = (data/"table.cast"/"tr")
cast.each do |cast_member|
actor_a = (cast_member/"td.nm").inner_html
actor_a =~ /name\/([^"]+)\//
actor_id = $1
actor_name = coder.decode((cast_member/"td.nm"/"a").inner_text)
actor_role = coder.decode((cast_member/"td.char").inner_text)
movie.actors = movie.actors << ImdbName.new(actor_id, actor_name, actor_role)
end
movie
end
|