Class: GHADownloader

Inherits:
Object
  • Object
show all
Includes:
GHAUtils
Defined in:
lib/gh-archive.rb

Instance Method Summary collapse

Methods included from GHAUtils

#each_time, #get_gha_filename, #read_gha_file, #read_gha_file_content

Constructor Details

#initialize(folder, decompress = false) ⇒ GHADownloader



442
443
444
445
446
447
448
449
450
# File 'lib/gh-archive.rb', line 442

def initialize(folder, decompress = false)
    @logger = Logger.new(STDERR)
    @decompress = decompress
    @folder = folder
    @max = nil
    
    Dir.mkdir(@folder) unless FileTest.exist?(@folder)
    raise "A file exist with the desired folder name #{folder}" unless FileTest.directory?(@folder)
end

Instance Method Details

#download(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object



461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
# File 'lib/gh-archive.rb', line 461

def download(from = Time.gm(2015, 1, 1), to = Time.now)
    archive = []
    self.each_time(from, to) do |current_time|
        filename = self.get_gha_filename(current_time)
        out_filename = filename.clone
        out_filename.gsub!(".json.gz", ".json") if @decompress
        
        target_file = File.join(@folder, out_filename)
        if FileTest.exist?(target_file)
            @logger.info("Skipping existing file for #{current_time}")
            next
        else
            @logger.info("Downloading file for #{current_time}")
        end
        
        File.open(target_file, 'w') do |f|
            URI.open("http://data.gharchive.org/#{filename}") do |gz|
                if @decompress
                    f << self.read_gha_file_content(gz)
                else
                    f << gz.read
                end
            end
        end
        archive << target_file
        
        if @max && archive.size > @max
            last = archive.shift
            @logger.info("Removing local file #{last}")
            File.unlink(last)
        end
        
        yield filename if block_given?
    end
end

#logger=(logger) ⇒ Object



457
458
459
# File 'lib/gh-archive.rb', line 457

def logger=(logger)
    @logger = logger
end

#max(max) ⇒ Object



452
453
454
455
# File 'lib/gh-archive.rb', line 452

def max(max)
    @max = max
    return self
end