Class: WebArchive::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/webarchive.rb

Overview

Client with multiple queues

Instance Method Summary collapse

Constructor Details

#initialize(wait_secs: 1, max_retry: 3, redirect: false, canonical_uri: true) ⇒ Client

Returns a new instance of Client.



132
133
134
135
136
137
138
139
140
141
142
# File 'lib/webarchive.rb', line 132

def initialize(wait_secs: 1, max_retry: 3,
               redirect: false, canonical_uri: true)
  @wait_secs = wait_secs
  @max_retry = max_retry
  @redirect = redirect
  @canonical_uri = canonical_uri

  @wait_secs = 0 if @wait_secs.negative?
  @max_retry = 0 if @max_retry.negative?
  @queues = []
end

Instance Method Details

#add_queue(queue) ⇒ Object

Parameters:



145
146
147
# File 'lib/webarchive.rb', line 145

def add_queue(queue)
  @queues << queue
end

#add_scheme(uri, scheme) ⇒ Object



165
166
167
168
169
170
171
# File 'lib/webarchive.rb', line 165

def add_scheme(uri, scheme)
  if uri.relative?
    uri = uri.dup
    uri.scheme = scheme
  end
  uri
end

#equivalent_uri?(uri, str) ⇒ Boolean

Returns:

  • (Boolean)


173
174
175
176
# File 'lib/webarchive.rb', line 173

def equivalent_uri?(uri, str)
  uri = add_scheme(uri, Addressable::URI.parse(str).scheme)
  uri.to_s == str
end

#queued_urisObject



149
150
151
# File 'lib/webarchive.rb', line 149

def queued_uris
  @queues.map(&:remaining).inject(:+)
end

#send_single_uri(uri) ⇒ void

This method returns an undefined value.

Parameters:

  • uri (String)


209
210
211
212
213
# File 'lib/webarchive.rb', line 209

def send_single_uri(uri)
  @queues.each do |q|
    q.enq Req.new(uri, @wait_secs, @max_retry)
  end
end

#send_uri(uri) ⇒ Concurrent::Promises::Future

Parameters:

  • uri (String)

Returns:

  • (Concurrent::Promises::Future)


217
218
219
220
221
222
223
224
# File 'lib/webarchive.rb', line 217

def send_uri(uri)
  f0 = Concurrent::Promises.future{ send_single_uri(uri) }
  f1 = with_canonical_uri(uri).then { |x| send_single_uri(x) } if @canonical_uri
  f2 = with_redirect(uri).then { |x| send_single_uri(x) } if @redirect
  f1 ||= Concurrent::Promises.future{}
  f2 ||= Concurrent::Promises.future{}
  f0.zip(f1).zip(f2)
end

#wait_for_queuesObject



226
227
228
# File 'lib/webarchive.rb', line 226

def wait_for_queues
  @queues.each(&:done_sending)
end

#with_canonical_uri(uri) ⇒ Concurrent::Promises::Future

Returns Gives the canonical URI if there is one.

Parameters:

  • uri (String)

Returns:

  • (Concurrent::Promises::Future)

    Gives the canonical URI if there is one



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/webarchive.rb', line 180

def with_canonical_uri(uri)
  Concurrent::Promises.future do
    agent = Mechanize.new
    page = agent.get(uri)
    ret = nil
    raise NoAlternativeURIError, 'no canonical URI found' unless
      page.canonical_uri &&
      page.class == Mechanize::Page &&
      page.canonical_uri != page.uri

    if page.canonical_uri.relative?
      u2 = URI.join(page.uri, page.canonical_uri)
      ret = u2.to_s if !equivalent_uri?(u2, uri) &&
                       !equivalent_uri?(u2, page.uri)
    else
      u1 = page.canonical_uri
      u1 = add_scheme(u1, 'http') unless u1.scheme
      ret = u1.to_s if !equivalent_uri?(u1, uri) &&
                       !equivalent_uri?(u1, page.uri)
    end

    raise NoAlternativeURIError, 'no canonical URI found' unless ret

    ret
  end
end

#with_redirect(uri) ⇒ Concurrent::Promises::Future

Returns Gives the target URI if redirected.

Parameters:

  • uri (String)

Returns:

  • (Concurrent::Promises::Future)

    Gives the target URI if redirected



155
156
157
158
159
160
161
162
163
# File 'lib/webarchive.rb', line 155

def with_redirect(uri)
  Concurrent::Promises.future do
    res = Net::HTTP.get_response(Addressable::URI.parse(uri))
    raise NoAlternativeURIError, 'no redirect found' if
      !res['location'] || res['location'] == uri

    res['location']
  end
end