Class: RubyGems::GemsCrawler

Inherits:
Object
  • Object
show all
Defined in:
lib/rubygems-crawler/gems_crawler.rb

Constant Summary collapse

GRACE_PERIOD =

be gentle

5

Instance Method Summary collapse

Constructor Details

#initialize(mongo) ⇒ GemsCrawler

Returns a new instance of GemsCrawler.



10
11
12
# File 'lib/rubygems-crawler/gems_crawler.rb', line 10

def initialize(mongo)
  @mongo = mongo
end

Instance Method Details

#crawl(gem_name) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
# File 'lib/rubygems-crawler/gems_crawler.rb', line 24

def crawl(gem_name)
  STDOUT.puts "[RubyGems Web Crawler] Acquiring data for gem #{gem_name}"
  
  gem_object = Gems.info(gem_name)
  gem_object['versions'] = Gems.versions(gem_name)
  gem_object['owners'] = Gems.owners(gem_name)
  
  save(gem_object)
rescue
  STDERR.puts "[RubyGems Web Crawler] Error while acquiring data for gem #{gem_name}"
end

#crawl_from(initial_name = 'a') ⇒ Object



14
15
16
17
18
19
20
21
22
# File 'lib/rubygems-crawler/gems_crawler.rb', line 14

def crawl_from(initial_name='a')
  #name: {'$gte' => initial_name} - to filter by name
  @mongo[:gems].find({owners: nil}, {fields: ["name"]}).each_slice(10) do |bulk|
    bulk.each do |mongo_doc|
      crawl(mongo_doc['name'])
      sleep GRACE_PERIOD  #be nice
    end
  end
end

#save(gem_object) ⇒ Object

Save all the gem data into Mongo



37
38
39
# File 'lib/rubygems-crawler/gems_crawler.rb', line 37

def save(gem_object)
  @mongo[:gems].find_and_modify(query: {name: gem_object['name']}, update: gem_object)
end