7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
|
# File 'lib/pageinfo.rb', line 7
def self.detect(url)
content = ["url", "status", "time", "title", "description", "keyword"].join(",")
content << new_line
@@no = 0
@@main_host = get_host(URI.parse(url))
scrapped_links, scrapped_urls = [url], [url]
conn = Typhoeus.get(url)
page = Nokogiri::HTML(conn.body)
content << get_info(conn, page)
content << new_line
@links = get_page_links(page)
while true do
if link = @links.shift
full_url = get_full_url(link)
unless full_url.nil?
if (scrapped_urls & [full_url, "#{full_url}/", "#{full_url}/#"]).empty?
conn = Typhoeus.get(full_url)
page = Nokogiri::HTML(conn.body)
content << get_info(conn, page)
content << new_line
scrapped_links << link
scrapped_urls << full_url
new_links = get_page_links(page)
new_links = new_links - @links
new_links = new_links - scrapped_links
@links = @links + new_links unless new_links.empty?
end
end
else
break;
end
end
File.open("pageinfo.csv", "w") { |file| file.write content }
end
|