Class: GHADownloader

Inherits:
Object
  • Object
show all
Includes:
GHAUtils
Defined in:
lib/gh-archive.rb

Instance Method Summary collapse

Methods included from GHAUtils

#each_time, #get_gha_filename, #read_gha_file, #read_gha_file_content

Constructor Details

#initialize(folder, decompress = false) ⇒ GHADownloader

Returns a new instance of GHADownloader.



432
433
434
435
436
437
438
439
440
# File 'lib/gh-archive.rb', line 432

def initialize(folder, decompress = false)
    @logger = Logger.new(STDERR)
    @decompress = decompress
    @folder = folder
    @max = nil
    
    Dir.mkdir(@folder) unless FileTest.exist?(@folder)
    raise "A file exist with the desired folder name #{folder}" unless FileTest.directory?(@folder)
end

Instance Method Details

#download(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object



451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
# File 'lib/gh-archive.rb', line 451

def download(from = Time.gm(2015, 1, 1), to = Time.now)
    archive = []
    self.each_time(from, to) do |current_time|
        filename = self.get_gha_filename(current_time)
        out_filename = filename.clone
        out_filename.gsub!(".json.gz", ".json") if @decompress
        
        target_file = File.join(@folder, out_filename)
        if FileTest.exist?(target_file)
            @logger.info("Skipping existing file for #{current_time}")
            next
        else
            @logger.info("Downloading file for #{current_time}")
        end
        
        File.open(target_file, 'w') do |f|
            URI.open("http://data.gharchive.org/#{filename}") do |gz|
                if @decompress
                    f << self.read_gha_file_content(gz)
                else
                    f << gz.read
                end
            end
        end
        archive << target_file
        
        if @max && archive.size > @max
            last = archive.shift
            @logger.info("Removing local file #{last}")
            File.unlink(last)
        end
        
        yield filename if block_given?
    end
end

#logger=(logger) ⇒ Object



447
448
449
# File 'lib/gh-archive.rb', line 447

def logger=(logger)
    @logger = logger
end

#max(max) ⇒ Object



442
443
444
445
# File 'lib/gh-archive.rb', line 442

def max(max)
    @max = max
    return self
end