Class: LinkPreview::HTTPCrawler

Inherits:
Object
  • Object
show all
Defined in:
lib/link_preview/http_crawler.rb

Instance Method Summary collapse

Constructor Details

#initialize(config, options = {}) ⇒ HTTPCrawler

Returns a new instance of HTTPCrawler.



30
31
32
33
34
35
# File 'lib/link_preview/http_crawler.rb', line 30

def initialize(config, options = {})
  @config = config
  @options = options
  @status = {}
  @queue = Hash.new { |h, k| h[k] = [] }
end

Instance Method Details

#dequeue!(priority_order = []) ⇒ Hash

Returns latest normalized content discovered by crawling.

Returns:

  • (Hash)

    latest normalized content discovered by crawling



47
48
49
50
51
52
53
# File 'lib/link_preview/http_crawler.rb', line 47

def dequeue!(priority_order = [])
  return if finished?
  uri = dequeue_by_priority(priority_order)
  @config.http_client.get(uri, @options).tap do |response|
    @status[uri] = response.status.to_i
  end
end

#enqueue!(uri, priority = :default) ⇒ Object

Parameters:

  • URI (String)

    of content to crawl



38
39
40
41
42
43
44
# File 'lib/link_preview/http_crawler.rb', line 38

def enqueue!(uri, priority = :default)
  return if full?
  return unless uri
  parsed_uri = LinkPreview::URI.parse(uri, @options)
  enqueue_uri(parsed_uri.as_oembed_uri, :oembed)
  enqueue_uri(parsed_uri.as_content_uri, priority)
end

#finished?Boolean

Returns true if all known discovered content has been crawled.

Returns:

  • (Boolean)

    true if all known discovered content has been crawled



61
62
63
# File 'lib/link_preview/http_crawler.rb', line 61

def finished?
  @queue.values.flatten.empty?
end

#full?Boolean

Returns true crawler is at capacity.

Returns:

  • (Boolean)

    true crawler is at capacity



66
67
68
# File 'lib/link_preview/http_crawler.rb', line 66

def full?
  @queue.values.flatten.size > @config.max_requests
end

#success?Boolean

Returns true if any content discovered thus far has been successfully fetched.

Returns:

  • (Boolean)

    true if any content discovered thus far has been successfully fetched



56
57
58
# File 'lib/link_preview/http_crawler.rb', line 56

def success?
  @status.any? { |_, status| status == 200 }
end