Class: Spidr::SessionCache

Inherits:
Object
  • Object
show all
Includes:
Spidr::Settings::Proxy, Spidr::Settings::Timeouts
Defined in:
lib/spidr/session_cache.rb

Overview

Stores active HTTP Sessions organized by scheme, host-name and port.

Instance Attribute Summary

Attributes included from Spidr::Settings::Timeouts

#continue_timeout, #keep_alive_timeout, #open_timeout, #read_timeout, #ssl_timeout

Instance Method Summary collapse

Methods included from Spidr::Settings::Proxy

#disable_proxy!, #proxy, #proxy=

Constructor Details

#initialize(options = {}) ⇒ SessionCache

Creates a new session cache.

Parameters:

  • options (Hash) (defaults to: {})

    Configuration options.

  • [Hash] (Hash)

    a customizable set of options

  • [Integer] (Hash)

    a customizable set of options

Since:

  • 0.6.0


43
44
45
46
47
48
49
50
51
52
53
# File 'lib/spidr/session_cache.rb', line 43

def initialize(options={})
  @proxy = options.fetch(:proxy,Spidr.proxy)

  @open_timeout       = options.fetch(:open_timeout,Spidr.open_timeout)
  @ssl_timeout        = options.fetch(:ssl_timeout,Spidr.ssl_timeout)
  @read_timeout       = options.fetch(:read_timeout,Spidr.read_timeout)
  @continue_timeout   = options.fetch(:continue_timeout,Spidr.continue_timeout)
  @keep_alive_timeout = options.fetch(:keep_alive_timeout,Spidr.keep_alive_timeout)

  @sessions = {}
end

Instance Method Details

#[](url) ⇒ Net::HTTP

Provides an active HTTP session for a given URL.

Parameters:

  • url (URI::HTTP, String)

    The URL which will be requested later.

Returns:

  • (Net::HTTP)

    The active HTTP session object.


85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/spidr/session_cache.rb', line 85

def [](url)
  # normalize the url
  url = URI(url)

  # session key
  key = key_for(url)

  unless @sessions[key]
    session = Net::HTTP::Proxy(
      @proxy.host,
      @proxy.port,
      @proxy.user,
      @proxy.password
    ).new(url.host,url.port)

    session.open_timeout       = @open_timeout       if @open_timeout
    session.read_timeout       = @read_timeout       if @read_timeout
    session.continue_timeout   = @continue_timeout   if @continue_timeout
    session.keep_alive_timeout = @keep_alive_timeout if @keep_alive_timeout

    if url.scheme == 'https'
      session.use_ssl     = true
      session.verify_mode = OpenSSL::SSL::VERIFY_NONE
      session.ssl_timeout = @ssl_timeout
      session.start
    end

    @sessions[key] = session
  end

  return @sessions[key]
end

#active?(url) ⇒ Boolean

Determines if there is an active HTTP session for a given URL.

Parameters:

  • url (URI::HTTP, String)

    The URL that represents a session.

Returns:

  • (Boolean)

    Specifies whether there is an active HTTP session.

Since:

  • 0.2.3


66
67
68
69
70
71
72
73
74
# File 'lib/spidr/session_cache.rb', line 66

def active?(url)
  # normalize the url
  url = URI(url)

  # session key
  key = key_for(url)

  return @sessions.has_key?(key)
end

#clearSessionCache

Clears the session cache.

Returns:

Since:

  • 0.2.2


153
154
155
156
157
158
159
160
161
162
163
# File 'lib/spidr/session_cache.rb', line 153

def clear
  @sessions.each_value do |session|
    begin
      session.finish
    rescue IOError
    end
  end

  @sessions.clear
  return self
end

#kill!(url) ⇒ nil

Destroys an HTTP session for the given scheme, host and port.

Parameters:

  • url (URI::HTTP, String)

    The URL of the requested session.

Returns:

  • (nil)

Since:

  • 0.2.2


128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/spidr/session_cache.rb', line 128

def kill!(url)
  # normalize the url
  url = URI(url)

  # session key
  key = key_for(url)

  if (sess = @sessions[key])
    begin 
      sess.finish
    rescue IOError
    end

    @sessions.delete(key)
  end
end