Class: HttpSpell::Spider
- Inherits:
-
Object
- Object
- HttpSpell::Spider
- Defined in:
- lib/httpspell/spider.rb
Instance Attribute Summary collapse
-
#done ⇒ Object
readonly
Returns the value of attribute done.
-
#todo ⇒ Object
readonly
Returns the value of attribute todo.
Instance Method Summary collapse
-
#initialize(starting_point, whitelist: nil, blacklist: [], verbose: false, tracing: false) ⇒ Spider
constructor
A new instance of Spider.
- #start ⇒ Object
Constructor Details
#initialize(starting_point, whitelist: nil, blacklist: [], verbose: false, tracing: false) ⇒ Spider
Returns a new instance of Spider.
11 12 13 14 15 16 17 18 19 |
# File 'lib/httpspell/spider.rb', line 11 def initialize(starting_point, whitelist: nil, blacklist: [], verbose: false, tracing: false) @todo = [] @done = [] todo << Addressable::URI.parse(starting_point) @whitelist = whitelist || [/^#{starting_point}/] @blacklist = blacklist @verbose = verbose @tracing = tracing end |
Instance Attribute Details
#done ⇒ Object (readonly)
Returns the value of attribute done.
9 10 11 |
# File 'lib/httpspell/spider.rb', line 9 def done @done end |
#todo ⇒ Object (readonly)
Returns the value of attribute todo.
9 10 11 |
# File 'lib/httpspell/spider.rb', line 9 def todo @todo end |
Instance Method Details
#start ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/httpspell/spider.rb', line 21 def start success = true while todo.any? url = todo.pop begin extracted = links(url) do |u, d| yield u, d if block_given? rescue warn "Callback error for #{url}: #{$ERROR_INFO}" warn $ERROR_INFO.backtrace if @tracing end done.append(url) todo.concat(extracted - done - todo) rescue StandardError warn "Skipping #{url} because of #{$ERROR_INFO.}" warn $ERROR_INFO.backtrace if @tracing success = false end end return success end |