Class: HttpSpell::Spider
- Inherits:
-
Object
- Object
- HttpSpell::Spider
- Defined in:
- lib/http_spell/spider.rb
Instance Attribute Summary collapse
-
#done ⇒ Object
readonly
Returns the value of attribute done.
-
#todo ⇒ Object
readonly
Returns the value of attribute todo.
Instance Method Summary collapse
-
#initialize(starting_point, included: nil, excluded: [], verbose: false, tracing: false) ⇒ Spider
constructor
A new instance of Spider.
- #start ⇒ Object
Constructor Details
#initialize(starting_point, included: nil, excluded: [], verbose: false, tracing: false) ⇒ Spider
Returns a new instance of Spider.
13 14 15 16 17 18 19 20 21 |
# File 'lib/http_spell/spider.rb', line 13 def initialize(starting_point, included: nil, excluded: [], verbose: false, tracing: false) @todo = [] @done = [] todo << URI(starting_point) @included = included || [/^#{starting_point}/] @excluded = excluded @verbose = verbose @tracing = tracing end |
Instance Attribute Details
#done ⇒ Object (readonly)
Returns the value of attribute done.
11 12 13 |
# File 'lib/http_spell/spider.rb', line 11 def done @done end |
#todo ⇒ Object (readonly)
Returns the value of attribute todo.
11 12 13 |
# File 'lib/http_spell/spider.rb', line 11 def todo @todo end |
Instance Method Details
#start ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/http_spell/spider.rb', line 23 def start success = true while todo.any? url = todo.pop begin extracted = links(url) do |u, d| yield u, d if block_given? rescue StandardError warn "Callback error for #{url}: #{$ERROR_INFO}" warn $ERROR_INFO.backtrace if @tracing end done.append(url) new_links = (extracted - done - todo).uniq if new_links.any? warn "Adding #{new_links.size} new links found at #{url}" if @verbose todo.concat(extracted - done - todo).uniq! end rescue StandardError warn "Skipping #{url} because of #{$ERROR_INFO.}" warn $ERROR_INFO.backtrace if @tracing success = false end end success end |