Class: Kudzu::Crawler
- Inherits:
-
Object
- Object
- Kudzu::Crawler
- Defined in:
- lib/kudzu/crawler.rb
Instance Attribute Summary collapse
-
#agent ⇒ Object
readonly
Returns the value of attribute agent.
-
#config ⇒ Object
readonly
Returns the value of attribute config.
-
#frontier ⇒ Object
readonly
Returns the value of attribute frontier.
-
#repository ⇒ Object
readonly
Returns the value of attribute repository.
-
#uuid ⇒ Object
readonly
Returns the value of attribute uuid.
Instance Method Summary collapse
-
#initialize(options = {}, &block) ⇒ Crawler
constructor
A new instance of Crawler.
- #run(seed_url, &block) ⇒ Object
Constructor Details
#initialize(options = {}, &block) ⇒ Crawler
Returns a new instance of Crawler.
14 15 16 17 18 19 20 21 |
# File 'lib/kudzu/crawler.rb', line 14 def initialize( = {}, &block) @uuid = [:uuid] || SecureRandom.uuid @config = Kudzu::Config.new(, &block) @frontier = Kudzu.adapter::Frontier.new(@uuid) @repository = Kudzu.adapter::Repository.new @agent = Kudzu.agent.new(@config) end |
Instance Attribute Details
#agent ⇒ Object (readonly)
Returns the value of attribute agent.
12 13 14 |
# File 'lib/kudzu/crawler.rb', line 12 def agent @agent end |
#config ⇒ Object (readonly)
Returns the value of attribute config.
11 12 13 |
# File 'lib/kudzu/crawler.rb', line 11 def config @config end |
#frontier ⇒ Object (readonly)
Returns the value of attribute frontier.
12 13 14 |
# File 'lib/kudzu/crawler.rb', line 12 def frontier @frontier end |
#repository ⇒ Object (readonly)
Returns the value of attribute repository.
12 13 14 |
# File 'lib/kudzu/crawler.rb', line 12 def repository @repository end |
#uuid ⇒ Object (readonly)
Returns the value of attribute uuid.
11 12 13 |
# File 'lib/kudzu/crawler.rb', line 11 def uuid @uuid end |
Instance Method Details
#run(seed_url, &block) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/kudzu/crawler.rb', line 23 def run(seed_url, &block) @callback = Kudzu::Callback.new(&block) seed_refs = Array(seed_url).map { |url| Kudzu::Agent::Reference.new(url: url) } enqueue_links(refs_to_links(seed_refs, 1)) @agent.start do if @config.thread_num.to_i <= 1 single_thread else multi_thread(@config.thread_num) end end @frontier.clear end |