Class: OnlineGHAProvider

Inherits:
GHAProvider show all
Defined in:
lib/gh-archive.rb

Defined Under Namespace

Classes: Cache, DownloadArchiveException

Instance Method Summary collapse

Methods inherited from GHAProvider

#exclude, #include, #logger=

Methods included from GHAUtils

#each_date, #get_gha_filename, #read_gha_file, #read_gha_file_content

Constructor Details

#initialize(max_retries = 3, proactive = false) ⇒ OnlineGHAProvider

Returns a new instance of OnlineGHAProvider.



104
105
106
107
108
109
110
# File 'lib/gh-archive.rb', line 104

def initialize(max_retries = 3, proactive = false)
    super()
    
    @max_retries = max_retries
    @proactive = proactive
    @cache = Cache.new
end

Instance Method Details

#cache(current_time) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/gh-archive.rb', line 133

def cache(current_time)
    @max_retries.times do
        begin
            filename = self.get_gha_filename(current_time)
            
            URI.open("http://data.gharchive.org/#{filename}") do |gz|
                @cache.put(filename, gz.read)
                return
            end
        rescue
        end
    end
end

#each(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/gh-archive.rb', line 147

def each(from = Time.gm(2015, 1, 1), to = Time.now)
    if @proactive
        @logger.info("Proactive download thread started")
        Thread.start do
            self.each_date(from, to) do |current_date|
                self.cache(current_date)
                @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
                
                if @cache.full?
                    @logger.info("Full cache. Waiting...")
                end
                
                while @cache.full?
                    sleep 1
                end
            end
        end
    end
    
    super
end

#get(current_time) ⇒ Object



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/gh-archive.rb', line 112

def get(current_time)        
    @max_retries.times do
        begin
            filename = self.get_gha_filename(current_time)
            
            if @cache.has?(filename)
                result = self.read_gha_file(@cache.get(filename))
            else
                URI.open("http://data.gharchive.org/#{filename}") do |gz|
                    # Save to cache
                    return self.read_gha_file(gz)
                end
            end
        rescue
            @logger.warning($!)
        end
    end
    
    raise DownloadArchiveException, "Exceeded maximum number of tentative downloads."
end