Class: Grabber::Site

Inherits:
Object
  • Object
show all
Includes:
Util
Defined in:
lib/grabber/site.rb

Instance Method Summary collapse

Methods included from Util

#format_url, #strip_non_url_parts, #uri, #with_url_protocol

Constructor Details

#initialize(url, path) ⇒ Site

Returns a new instance of Site.



5
6
7
8
# File 'lib/grabber/site.rb', line 5

def initialize(url, path)
  @url = with_url_protocol(url)
  @download_path = path
end

Instance Method Details

#crawlObject



10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/grabber/site.rb', line 10

def crawl
  index = 0
  page_urls = [format_url(@url)]

  while (url = page_urls[index])
    page = process_page(url)
    other_urls = page.links.map { |link| format_url(link) }.select do |link|
      URI.parse(link).host == uri.host
    end
    page_urls = page_urls | other_urls.compact

    index += 1
  end
end

#process_page(url) ⇒ Object



25
26
27
28
29
30
31
# File 'lib/grabber/site.rb', line 25

def process_page(url)
  page = Page.new(url)
  page.crawl
  page.download(@download_path)
  page.download_assets(@download_path)
  page
end