Class: Monet::CaptureMap::PathSpider

Inherits:
PathCollection show all
Includes:
PageLogger::Helpers
Defined in:
lib/monet/capture_map.rb

Constant Summary collapse

SKIP_EXT =
%w(js css png jpg mp4 txt zip ico ogv ogg pdf gz)
SKIP_PATHS =
[/\?.*/]

Instance Attribute Summary

Attributes inherited from PathCollection

#root_url

Instance Method Summary collapse

Methods included from PageLogger::Helpers

#failed?, #log_page

Methods inherited from PathCollection

#add, #initialize, #normalized_path

Constructor Details

This class inherits a constructor from Monet::CaptureMap::PathCollection

Instance Method Details

#ignoresObject



55
56
57
# File 'lib/monet/capture_map.rb', line 55

def ignores
  SKIP_EXT.map {|x| Regexp.new x }.concat SKIP_PATHS
end

#pathsObject



43
44
45
46
47
48
49
50
51
52
53
# File 'lib/monet/capture_map.rb', line 43

def paths
  return @paths unless @paths.empty?

  results = Spidr.site(@root_url, ignore_links: ignores) do |spider|
    spider.every_page {|page| log_page page.url, page.code }
  end

  normalize results

  @paths.uniq!
end