Class: LinkPreview::HTTPCrawler
- Inherits:
-
Object
- Object
- LinkPreview::HTTPCrawler
- Defined in:
- lib/link_preview/http_crawler.rb
Instance Method Summary collapse
-
#dequeue!(priority_order = []) ⇒ Hash
Latest normalized content discovered by crawling.
- #enqueue!(uri, priority = :default) ⇒ Object
-
#finished? ⇒ Boolean
True if all known discovered content has been crawled.
-
#full? ⇒ Boolean
True crawler is at capacity.
-
#initialize(config, options = {}) ⇒ HTTPCrawler
constructor
A new instance of HTTPCrawler.
-
#success? ⇒ Boolean
True if any content discovered thus far has been successfully fetched.
Constructor Details
#initialize(config, options = {}) ⇒ HTTPCrawler
Returns a new instance of HTTPCrawler.
30 31 32 33 34 35 |
# File 'lib/link_preview/http_crawler.rb', line 30 def initialize(config, = {}) @config = config @options = @status = {} @queue = Hash.new { |h, k| h[k] = [] } end |
Instance Method Details
#dequeue!(priority_order = []) ⇒ Hash
Returns latest normalized content discovered by crawling.
47 48 49 50 51 52 53 |
# File 'lib/link_preview/http_crawler.rb', line 47 def dequeue!(priority_order = []) return if finished? uri = dequeue_by_priority(priority_order) @config.http_client.get(uri, @options).tap do |response| @status[uri] = response.status.to_i end end |
#enqueue!(uri, priority = :default) ⇒ Object
38 39 40 41 42 43 44 |
# File 'lib/link_preview/http_crawler.rb', line 38 def enqueue!(uri, priority = :default) return if full? return unless uri parsed_uri = LinkPreview::URI.parse(uri, @options) enqueue_uri(parsed_uri., :oembed) enqueue_uri(parsed_uri.as_content_uri, priority) end |
#finished? ⇒ Boolean
Returns true if all known discovered content has been crawled.
61 62 63 |
# File 'lib/link_preview/http_crawler.rb', line 61 def finished? @queue.values.flatten.empty? end |
#full? ⇒ Boolean
Returns true crawler is at capacity.
66 67 68 |
# File 'lib/link_preview/http_crawler.rb', line 66 def full? @queue.values.flatten.size > @config.max_requests end |
#success? ⇒ Boolean
Returns true if any content discovered thus far has been successfully fetched.
56 57 58 |
# File 'lib/link_preview/http_crawler.rb', line 56 def success? @status.any? { |_, status| status == 200 } end |