Class: Crawl::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/crawl/engine.rb

Constant Summary collapse

DEFAULT_OPTIONS =
{:domain => '',
:start => ['/'],
:username => '',
:password => '',
:verbose => false,
:session_id => false}
IGNORE =
[/#/, /mailto:/, /skype:/, /logout/, /javascript:/, %r(/xhr/), /https:/, /\.pdf$/, /^$/, /tel:/]
VALID_RESPONSE_CODES =
[200, 302]
MAX_REDIRECTS =
3
LINE_WIDTH =
78

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(caller_options = {}) ⇒ Engine



19
20
21
22
23
24
25
26
27
# File 'lib/crawl/engine.rb', line 19

def initialize(caller_options = {})
  @options = DEFAULT_OPTIONS.merge(caller_options)
  @authorization = Base64.encode64("#{options[:username]}:#{options[:password]}")
  @register = Crawl::Register.new

  start_pages = options[:start].to_a.map{|page| Page.new(@register, page, 'the command line')}

  @register.add(start_pages)
end

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



17
18
19
# File 'lib/crawl/engine.rb', line 17

def options
  @options
end

Instance Method Details

#errors?Boolean



49
50
51
# File 'lib/crawl/engine.rb', line 49

def errors?
  @register.errors?
end


53
54
55
# File 'lib/crawl/engine.rb', line 53

def no_links_found?
  @register.no_links_found?
end

#process_nextObject



35
36
37
38
39
40
41
42
43
# File 'lib/crawl/engine.rb', line 35

def process_next
  return if @register.processing_size >= EM.threadpool_size
  if @register.finished?
    EventMachine.stop
  elsif (page = @register.next_page)
    retrieve(page)
    process_next
  end
end

#runObject



29
30
31
32
33
# File 'lib/crawl/engine.rb', line 29

def run
  EventMachine.run do
    process_next
  end
end

#summarizeObject



45
46
47
# File 'lib/crawl/engine.rb', line 45

def summarize
  @register.summarize
end