Class: RubyGems::GemsCrawler
- Inherits:
-
Object
- Object
- RubyGems::GemsCrawler
- Defined in:
- lib/rubygems-crawler/gems_crawler.rb
Constant Summary collapse
- GRACE_PERIOD =
be gentle
5
Instance Method Summary collapse
- #crawl(gem_name) ⇒ Object
- #crawl_from(initial_name = 'a') ⇒ Object
-
#initialize(mongo) ⇒ GemsCrawler
constructor
A new instance of GemsCrawler.
-
#save(gem_object) ⇒ Object
Save all the gem data into Mongo.
Constructor Details
#initialize(mongo) ⇒ GemsCrawler
Returns a new instance of GemsCrawler.
10 11 12 |
# File 'lib/rubygems-crawler/gems_crawler.rb', line 10 def initialize(mongo) @mongo = mongo end |
Instance Method Details
#crawl(gem_name) ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/rubygems-crawler/gems_crawler.rb', line 24 def crawl(gem_name) STDOUT.puts "[RubyGems Web Crawler] Acquiring data for gem #{gem_name}" gem_object = Gems.info(gem_name) gem_object['versions'] = Gems.versions(gem_name) gem_object['owners'] = Gems.owners(gem_name) save(gem_object) rescue STDERR.puts "[RubyGems Web Crawler] Error while acquiring data for gem #{gem_name}" end |
#crawl_from(initial_name = 'a') ⇒ Object
14 15 16 17 18 19 20 21 22 |
# File 'lib/rubygems-crawler/gems_crawler.rb', line 14 def crawl_from(initial_name='a') #name: {'$gte' => initial_name} - to filter by name @mongo[:gems].find({owners: nil}, {fields: ["name"]}).each_slice(10) do |bulk| bulk.each do |mongo_doc| crawl(mongo_doc['name']) sleep GRACE_PERIOD #be nice end end end |
#save(gem_object) ⇒ Object
Save all the gem data into Mongo
37 38 39 |
# File 'lib/rubygems-crawler/gems_crawler.rb', line 37 def save(gem_object) @mongo[:gems].find_and_modify(query: {name: gem_object['name']}, update: gem_object) end |