14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
# File 'lib/rfilma/crawler.rb', line 14
def obtener_pelicula(id)
data = {}
page = @a.get("http://www.filmaffinity.com/es/film#{id}.html").body
doc = Nokogiri::HTML(page)
data["id"] = id
data["titulo"] = doc.xpath("//h1[@id='main-title']/a/span").text
data["puntuacion"] = doc.xpath('//div[@id="movie-rat-avg"]').text.strip.gsub(",",".").to_f
begin
data["portada"] = doc.xpath('//div[@id="movie-main-image-container"]/a')[0]["href"]
rescue
data["portada"] = doc.xpath('//div[@id="movie-main-image-container"]/img')[0]["src"]
end
doc.xpath('//dl[@class="movie-info"]/dt').each{|m|
dt = m.inner_html
case
when dt.include?("Título original")
data["titulo_original"] = m.next_element.text
when dt.include?("Año")
data["año"] = m.next_element.text.to_i
when dt.include?("Duración")
data["duracion"] = m.next_element.text.match('(\d*)')[1].to_i
when dt.include?("País")
data["pais"] = m.next_element.at('img')['title']
when dt.include?("Director")
data["director"] = m.next_element.search('a').map{|e| e.inner_html.strip}
when dt.include?("Guión")
data["guion"] = m.next_element.text.split(",").map{|e|e.strip}
when dt.include?("Música")
data["musica"] = m.next_element.text.split(",").map{|e|e.strip}
when dt.include?("Fotografía")
data["fotografia"] = m.next_element.text.split(",").map{|e|e.strip}
when dt.include?("Reparto")
data["reparto"] = m.next_element.text.split(",").map{|e|e.strip}
when dt.include?("Productora")
data["productora"] = m.next_element.text
when dt.include?("Género")
data["genero"] = m.next_element.search('a').map{|e| e.inner_html}
when dt.include?("Web")
data["web"] = m.next_element.text
when dt.include?("Sinopsis")
data["sinopsis"] = m.next_element.text
end
}
data
end
|