Class: Kudzu::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/kudzu/crawler.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}, &block) ⇒ Crawler

Returns a new instance of Crawler.


14
15
16
17
18
19
20
21
# File 'lib/kudzu/crawler.rb', line 14

def initialize(options = {}, &block)
  @uuid = options[:uuid] || SecureRandom.uuid
  @config = Kudzu::Config.new(options, &block)

  @frontier = Kudzu.adapter::Frontier.new(@uuid)
  @repository = Kudzu.adapter::Repository.new
  @agent = Kudzu.agent.new(@config)
end

Instance Attribute Details

#agentObject (readonly)

Returns the value of attribute agent


12
13
14
# File 'lib/kudzu/crawler.rb', line 12

def agent
  @agent
end

#configObject (readonly)

Returns the value of attribute config


11
12
13
# File 'lib/kudzu/crawler.rb', line 11

def config
  @config
end

#frontierObject (readonly)

Returns the value of attribute frontier


12
13
14
# File 'lib/kudzu/crawler.rb', line 12

def frontier
  @frontier
end

#repositoryObject (readonly)

Returns the value of attribute repository


12
13
14
# File 'lib/kudzu/crawler.rb', line 12

def repository
  @repository
end

#uuidObject (readonly)

Returns the value of attribute uuid


11
12
13
# File 'lib/kudzu/crawler.rb', line 11

def uuid
  @uuid
end

Instance Method Details

#run(seed_url, &block) ⇒ Object


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/kudzu/crawler.rb', line 23

def run(seed_url, &block)
  @callback = Kudzu::Callback.new(&block)

  seed_refs = Array(seed_url).map { |url| Kudzu::Agent::Reference.new(url: url) }
  enqueue_links(refs_to_links(seed_refs, 1))

  @agent.start do
    if @config.thread_num.to_i <= 1
      single_thread
    else
      multi_thread(@config.thread_num)
    end
  end

  @frontier.clear
end