16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
|
# File 'lib/congress-scrapper.rb', line 16
def scrape
search_page = agent.get("http://www.congreso.es/portal/page/portal/Congreso/Congreso/Iniciativas/Busqueda%20Avanzada")
search_form = search_page.form_with(:action => /enviarCgiBuscadorAvIniciativas/)
search_form["TPTR"] = "Competencia Legislativa Plena"
results_page = search_form.submit
total_results = results_page.search("//*[contains(text(), 'Iniciativas encontradas')]/span").first.text.to_i
progress = ProgressBar.new("Scrapping", total_results)
proposals = []
while results_page
results_page.search(".titulo_iniciativa a").each do |title|
@proposal_page = agent.get(title[:href])
proposal_type = clean_text(text_for(".subtitulo_competencias"))
resolution = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Resultado de la tramitación')]/following-sibling::*[@class='texto']"))
commission_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Comisión competente:')]/following-sibling::*[@class='texto']"))
proposer_name = clean_text(text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Autor:')]/following-sibling::*[@class='texto']"))
proposed_at_text = text_for("//*[@class='texto' and contains(normalize-space(text()),'Presentado el')]")
proposed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if proposed_at_text && proposed_at_text.match(/Presentado\s+el\s+(\d\d)\/(\d\d)\/(\d\d\d\d)/)
closed_at_text = text_for("//*[@class='apartado_iniciativa' and contains(normalize-space(text()),'Tramitación seguida por la iniciativa:')]/following-sibling::*[@class='texto']")
closed_at = Date.new($3.to_i, $2.to_i, $1.to_i) if closed_at_text && closed_at_text.match(/Concluido\s+.+\s+desde (\d\d)\/(\d\d)\/(\d\d\d\d)/)
proposal = {:title => clean_text(title.content),
:official_url => "http://www.congreso.es" + title[:href],
:proposal_type => proposal_type,
:closed_at => closed_at,
:official_resolution => resolution,
:category_name => category(commission_name),
:proposer_name => proposer(proposer_name),
:proposed_at => proposed_at}
progress.inc
proposals << proposal
end
next_page = results_page.link_with(:text => /Siguiente/)
results_page = next_page.nil? ? nil : next_page.click
end
progress.finish
proposals
end
|