Class: Sphinxcrawl::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/sphinxcrawl/crawler.rb

Direct Known Subclasses

FileCrawler, WebCrawler

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(depth = 0) ⇒ Crawler

Returns a new instance of Crawler.



7
8
9
# File 'lib/sphinxcrawl/crawler.rb', line 7

def initialize(depth=0)
  @depth = depth
end

Instance Attribute Details

#depthObject (readonly)

Returns the value of attribute depth.



5
6
7
# File 'lib/sphinxcrawl/crawler.rb', line 5

def depth
  @depth
end

Instance Method Details

#pagesObject



11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/sphinxcrawl/crawler.rb', line 11

def pages
  return @pages if @pages
  return [] unless index
  @pages = Set.new([index])
  return @pages if @depth == 0

  current_pages = Set.new([index])
  depth.times do
    links = current_pages.map(&:links).flatten.compact.uniq
    current_pages = Set.new(links.map{ |url| get_page(url) }.compact) - @pages
    @pages += current_pages
  end
  @pages
end