Class: Anaximander::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/anaximander/crawler.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Crawler

Returns a new instance of Crawler.



5
6
7
8
9
# File 'lib/anaximander/crawler.rb', line 5

def initialize(url)
  @url = url.chomp("/")
  @root = Page.new(url)
  @visited = [url]
end

Instance Attribute Details

#rootObject (readonly)

Returns the value of attribute root.



3
4
5
# File 'lib/anaximander/crawler.rb', line 3

def root
  @root
end

#urlObject (readonly)

Returns the value of attribute url.



3
4
5
# File 'lib/anaximander/crawler.rb', line 3

def url
  @url
end

Instance Method Details

#crawl(page = self.root) ⇒ Object



11
12
13
14
# File 'lib/anaximander/crawler.rb', line 11

def crawl(page=self.root)
  page.children = page.links.map { |link| visit(link.chomp("/")) }.compact
  page.children.each { |child| crawl(child) }
end

#loggerObject



27
28
29
# File 'lib/anaximander/crawler.rb', line 27

def logger
  Anaximander.logger
end

#visit(link) ⇒ Object



16
17
18
19
20
21
22
23
24
25
# File 'lib/anaximander/crawler.rb', line 16

def visit(link)
  return if @visited.include?(link)

  logger.debug(link)
  @visited << link

  Page.new(link)
rescue Anaximander::PageNotAccessibleError
  nil
end