Module: Pageinfo

Defined in:
lib/pageinfo.rb,
lib/pageinfo/version.rb

Constant Summary collapse

VERSION =
"0.2.4"

Class Method Summary collapse

Class Method Details

.detect(url) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/pageinfo.rb', line 7

def self.detect(url)
  content = ["url", "status", "time", "title", "description", "keyword"].join(",")
  content << new_line

  @@no = 0
  @@main_host = get_host(URI.parse(url))
  scrapped_links, scrapped_urls = [url], [url]

  conn = Typhoeus.get(url)
  page = Nokogiri::HTML(conn.body)

  content << get_info(conn, page)
  content << new_line

  @links = get_page_links(page)
  while true do
    if link = @links.shift
      full_url = get_full_url(link)
      unless full_url.nil?
        if (scrapped_urls & [full_url, "#{full_url}/", "#{full_url}/#"]).empty?
          conn = Typhoeus.get(full_url)
          page = Nokogiri::HTML(conn.body)
          content << get_info(conn, page)
          content << new_line

          scrapped_links << link
          scrapped_urls << full_url

          new_links = get_page_links(page)
          new_links = new_links - @links
          new_links = new_links - scrapped_links
          @links = @links + new_links unless new_links.empty?
        end
      end
    else
      break;
    end
  end

  File.open("pageinfo.csv", "w") { |file| file.write content }
end