Class: GHADownloader
Instance Method Summary
collapse
Methods included from GHAUtils
#each_time, #get_gha_filename, #read_gha_file, #read_gha_file_content
Constructor Details
#initialize(folder, decompress = false) ⇒ GHADownloader
442
443
444
445
446
447
448
449
450
|
# File 'lib/gh-archive.rb', line 442
def initialize(folder, decompress = false)
@logger = Logger.new(STDERR)
@decompress = decompress
@folder = folder
@max = nil
Dir.mkdir(@folder) unless FileTest.exist?(@folder)
raise "A file exist with the desired folder name #{folder}" unless FileTest.directory?(@folder)
end
|
Instance Method Details
#download(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
|
# File 'lib/gh-archive.rb', line 461
def download(from = Time.gm(2015, 1, 1), to = Time.now)
archive = []
self.each_time(from, to) do |current_time|
filename = self.get_gha_filename(current_time)
out_filename = filename.clone
out_filename.gsub!(".json.gz", ".json") if @decompress
target_file = File.join(@folder, out_filename)
if FileTest.exist?(target_file)
@logger.info("Skipping existing file for #{current_time}")
next
else
@logger.info("Downloading file for #{current_time}")
end
File.open(target_file, 'w') do |f|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
if @decompress
f << self.read_gha_file_content(gz)
else
f << gz.read
end
end
end
archive << target_file
if @max && archive.size > @max
last = archive.shift
@logger.info("Removing local file #{last}")
File.unlink(last)
end
yield filename if block_given?
end
end
|
#logger=(logger) ⇒ Object
457
458
459
|
# File 'lib/gh-archive.rb', line 457
def logger=(logger)
@logger = logger
end
|
#max(max) ⇒ Object
452
453
454
455
|
# File 'lib/gh-archive.rb', line 452
def max(max)
@max = max
return self
end
|