Class: GHADownloader

Inherits:
Object
  • Object
show all
Includes:
GHAUtils
Defined in:
lib/gh-archive.rb

Instance Method Summary collapse

Methods included from GHAUtils

#each_date, #get_gha_filename, #read_gha_file, #read_gha_file_content

Constructor Details

#initialize(folder, decompress = false) ⇒ GHADownloader

Returns a new instance of GHADownloader.



242
243
244
245
246
247
248
249
250
# File 'lib/gh-archive.rb', line 242

def initialize(folder, decompress = false)
    @logger = Logger.new(STDERR)
    @decompress = decompress
    @folder = folder
    @max = nil
    
    Dir.mkdir(@folder) unless FileTest.exist?(@folder)
    raise "A file exist with the desired folder name #{folder}" unless FileTest.directory?(@folder)
end

Instance Method Details

#download(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object



261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
# File 'lib/gh-archive.rb', line 261

def download(from = Time.gm(2015, 1, 1), to = Time.now)
    archive = []
    self.each_date(from, to) do |current_date|
        filename = self.get_gha_filename(current_date)
        out_filename = filename.clone
        out_filename.gsub!(".json.gz", ".json") if @decompress
        
        target_file = File.join(@folder, out_filename)
        if FileTest.exist?(target_file)
            @logger.info("Skipping existing file for #{current_date}")
            next
        else
            @logger.info("Downloading file for #{current_date}")
        end
        
        File.open(target_file, 'w') do |f|
            URI.open("http://data.gharchive.org/#{filename}") do |gz|
                if @decompress
                    f << self.read_gha_file_content(gz)
                else
                    f << gz.read
                end
            end
        end
        archive << target_file
        
        if @max && archive.size > @max
            last = archive.shift
            @logger.info("Removing local file #{last}")
            File.unlink(last)
        end
        
        yield filename if block_given?
    end
end

#logger=(logger) ⇒ Object



257
258
259
# File 'lib/gh-archive.rb', line 257

def logger=(logger)
    @logger = logger
end

#max(max) ⇒ Object



252
253
254
255
# File 'lib/gh-archive.rb', line 252

def max(max)
    @max = max
    return self
end