Class: WebArchive::Client
- Inherits:
-
Object
- Object
- WebArchive::Client
- Defined in:
- lib/webarchive.rb
Overview
Client with multiple queues
Instance Method Summary collapse
- #add_queue(queue) ⇒ Object
- #add_scheme(uri, scheme) ⇒ Object
- #equivalent_uri?(uri, str) ⇒ Boolean
-
#initialize(wait_secs: 1, max_retry: 3, redirect: false, canonical_uri: true) ⇒ Client
constructor
A new instance of Client.
- #queued_uris ⇒ Object
- #send_single_uri(uri) ⇒ void
- #send_uri(uri) ⇒ Concurrent::Promises::Future
- #wait_for_queues ⇒ Object
-
#with_canonical_uri(uri) ⇒ Concurrent::Promises::Future
Gives the canonical URI if there is one.
-
#with_redirect(uri) ⇒ Concurrent::Promises::Future
Gives the target URI if redirected.
Constructor Details
#initialize(wait_secs: 1, max_retry: 3, redirect: false, canonical_uri: true) ⇒ Client
Returns a new instance of Client.
132 133 134 135 136 137 138 139 140 141 142 |
# File 'lib/webarchive.rb', line 132 def initialize(wait_secs: 1, max_retry: 3, redirect: false, canonical_uri: true) @wait_secs = wait_secs @max_retry = max_retry @redirect = redirect @canonical_uri = canonical_uri @wait_secs = 0 if @wait_secs.negative? @max_retry = 0 if @max_retry.negative? @queues = [] end |
Instance Method Details
#add_queue(queue) ⇒ Object
145 146 147 |
# File 'lib/webarchive.rb', line 145 def add_queue(queue) @queues << queue end |
#add_scheme(uri, scheme) ⇒ Object
165 166 167 168 169 170 171 |
# File 'lib/webarchive.rb', line 165 def add_scheme(uri, scheme) if uri.relative? uri = uri.dup uri.scheme = scheme end uri end |
#equivalent_uri?(uri, str) ⇒ Boolean
173 174 175 176 |
# File 'lib/webarchive.rb', line 173 def equivalent_uri?(uri, str) uri = add_scheme(uri, Addressable::URI.parse(str).scheme) uri.to_s == str end |
#queued_uris ⇒ Object
149 150 151 |
# File 'lib/webarchive.rb', line 149 def queued_uris @queues.map(&:remaining).inject(:+) end |
#send_single_uri(uri) ⇒ void
This method returns an undefined value.
209 210 211 212 213 |
# File 'lib/webarchive.rb', line 209 def send_single_uri(uri) @queues.each do |q| q.enq Req.new(uri, @wait_secs, @max_retry) end end |
#send_uri(uri) ⇒ Concurrent::Promises::Future
217 218 219 220 221 222 223 224 |
# File 'lib/webarchive.rb', line 217 def send_uri(uri) f0 = Concurrent::Promises.future{ send_single_uri(uri) } f1 = with_canonical_uri(uri).then { |x| send_single_uri(x) } if @canonical_uri f2 = with_redirect(uri).then { |x| send_single_uri(x) } if @redirect f1 ||= Concurrent::Promises.future{} f2 ||= Concurrent::Promises.future{} f0.zip(f1).zip(f2) end |
#wait_for_queues ⇒ Object
226 227 228 |
# File 'lib/webarchive.rb', line 226 def wait_for_queues @queues.each(&:done_sending) end |
#with_canonical_uri(uri) ⇒ Concurrent::Promises::Future
Returns Gives the canonical URI if there is one.
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
# File 'lib/webarchive.rb', line 180 def with_canonical_uri(uri) Concurrent::Promises.future do agent = Mechanize.new page = agent.get(uri) ret = nil raise NoAlternativeURIError, 'no canonical URI found' unless page.canonical_uri && page.class == Mechanize::Page && page.canonical_uri != page.uri if page.canonical_uri.relative? u2 = URI.join(page.uri, page.canonical_uri) ret = u2.to_s if !equivalent_uri?(u2, uri) && !equivalent_uri?(u2, page.uri) else u1 = page.canonical_uri u1 = add_scheme(u1, 'http') unless u1.scheme ret = u1.to_s if !equivalent_uri?(u1, uri) && !equivalent_uri?(u1, page.uri) end raise NoAlternativeURIError, 'no canonical URI found' unless ret ret end end |
#with_redirect(uri) ⇒ Concurrent::Promises::Future
Returns Gives the target URI if redirected.
155 156 157 158 159 160 161 162 163 |
# File 'lib/webarchive.rb', line 155 def with_redirect(uri) Concurrent::Promises.future do res = Net::HTTP.get_response(Addressable::URI.parse(uri)) raise NoAlternativeURIError, 'no redirect found' if !res['location'] || res['location'] == uri res['location'] end end |