Class: GHADownloader
Instance Method Summary
collapse
Methods included from GHAUtils
#each_date, #get_gha_filename, #read_gha_file, #read_gha_file_content
Constructor Details
#initialize(folder, decompress = false) ⇒ GHADownloader
Returns a new instance of GHADownloader.
242
243
244
245
246
247
248
249
250
|
# File 'lib/gh-archive.rb', line 242
def initialize(folder, decompress = false)
@logger = Logger.new(STDERR)
@decompress = decompress
@folder = folder
@max = nil
Dir.mkdir(@folder) unless FileTest.exist?(@folder)
raise "A file exist with the desired folder name #{folder}" unless FileTest.directory?(@folder)
end
|
Instance Method Details
#download(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
|
# File 'lib/gh-archive.rb', line 261
def download(from = Time.gm(2015, 1, 1), to = Time.now)
archive = []
self.each_date(from, to) do |current_date|
filename = self.get_gha_filename(current_date)
out_filename = filename.clone
out_filename.gsub!(".json.gz", ".json") if @decompress
target_file = File.join(@folder, out_filename)
if FileTest.exist?(target_file)
@logger.info("Skipping existing file for #{current_date}")
next
else
@logger.info("Downloading file for #{current_date}")
end
File.open(target_file, 'w') do |f|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
if @decompress
f << self.read_gha_file_content(gz)
else
f << gz.read
end
end
end
archive << target_file
if @max && archive.size > @max
last = archive.shift
@logger.info("Removing local file #{last}")
File.unlink(last)
end
yield filename if block_given?
end
end
|
#logger=(logger) ⇒ Object
257
258
259
|
# File 'lib/gh-archive.rb', line 257
def logger=(logger)
@logger = logger
end
|
#max(max) ⇒ Object
252
253
254
255
|
# File 'lib/gh-archive.rb', line 252
def max(max)
@max = max
return self
end
|