Class: Kudzu::Crawler
- Inherits:
-
Object
- Object
- Kudzu::Crawler
- Defined in:
- lib/kudzu/crawler.rb
Instance Attribute Summary collapse
-
#agent ⇒ Object
readonly
Returns the value of attribute agent.
-
#config ⇒ Object
readonly
Returns the value of attribute config.
-
#frontier ⇒ Object
readonly
Returns the value of attribute frontier.
-
#repository ⇒ Object
readonly
Returns the value of attribute repository.
-
#uuid ⇒ Object
readonly
Returns the value of attribute uuid.
Instance Method Summary collapse
-
#initialize(options = {}, &block) ⇒ Crawler
constructor
A new instance of Crawler.
- #run(seed_url, &block) ⇒ Object
Constructor Details
#initialize(options = {}, &block) ⇒ Crawler
Returns a new instance of Crawler.
16 17 18 19 20 21 22 23 |
# File 'lib/kudzu/crawler.rb', line 16 def initialize( = {}, &block) @uuid = [:uuid] || SecureRandom.uuid @config = Kudzu::Config.new(, &block) @frontier = Kudzu.adapter::Frontier.new(@uuid) @repository = Kudzu.adapter::Repository.new @agent = Kudzu.agent.new(@config) end |
Instance Attribute Details
#agent ⇒ Object (readonly)
Returns the value of attribute agent.
14 15 16 |
# File 'lib/kudzu/crawler.rb', line 14 def agent @agent end |
#config ⇒ Object (readonly)
Returns the value of attribute config.
13 14 15 |
# File 'lib/kudzu/crawler.rb', line 13 def config @config end |
#frontier ⇒ Object (readonly)
Returns the value of attribute frontier.
14 15 16 |
# File 'lib/kudzu/crawler.rb', line 14 def frontier @frontier end |
#repository ⇒ Object (readonly)
Returns the value of attribute repository.
14 15 16 |
# File 'lib/kudzu/crawler.rb', line 14 def repository @repository end |
#uuid ⇒ Object (readonly)
Returns the value of attribute uuid.
13 14 15 |
# File 'lib/kudzu/crawler.rb', line 13 def uuid @uuid end |
Instance Method Details
#run(seed_url, &block) ⇒ Object
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/kudzu/crawler.rb', line 25 def run(seed_url, &block) @callback = Kudzu::Callback.new(&block) seed_refs = Array(seed_url).map { |url| Kudzu::Agent::Reference.new(url: url) } enqueue_links(refs_to_links(seed_refs, 1)) @agent.start do if @config.thread_num.to_i <= 1 single_thread else multi_thread(@config.thread_num) end end @frontier.clear end |