Module: FeedTools::RetrievalHelper

Defined in:
lib/feed_tools/helpers/retrieval_helper.rb

Overview

Methods for pulling remote data

Constant Summary collapse

ACCEPT_HEADER =

Stolen from the Universal Feed Parser

"application/atom+xml,application/rdf+xml," +
"application/rss+xml,application/x-netcdf,application/xml;" +
"q=0.9,text/xml;q=0.2,*/*;q=0.1"

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.http_get(url, options = {}, &block) ⇒ Object

Makes an HTTP GET request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.



250
251
252
253
# File 'lib/feed_tools/helpers/retrieval_helper.rb', line 250

def self.http_get(url, options={}, &block)
  return FeedTools::RetrievalHelper.http_request(
    :get, url, options, &block)
end

.http_post(url, options = {}, &block) ⇒ Object

Makes an HTTP POST request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.



258
259
260
261
# File 'lib/feed_tools/helpers/retrieval_helper.rb', line 258

def self.http_post(url, options={}, &block)
  return FeedTools::RetrievalHelper.http_request(
    :post, url, options, &block)
end

.http_request(http_operation, url, options = {}, &block) ⇒ Object

Makes an HTTP request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/feed_tools/helpers/retrieval_helper.rb', line 43

def self.http_request(http_operation, url, options={}, &block)
  response = nil
  
  options = {
    :feed_object => nil,
    :form_data => nil,
    :request_headers => {},
    :follow_redirects => true,
    :redirect_limit => 10,
    :response_chain => []
  }.merge(options)
  
  if options[:redirect_limit] == 0
    raise FeedAccessError, 'Redirect too deep'
  end
  
  if options[:response_chain].blank? ||
      !options[:response_chain].kind_of?(Array)
    options[:response_chain] = []
  end
  
  if !options[:request_headers].kind_of?(Hash)
    options[:request_headers] = {}
  end
  if !options[:form_data].kind_of?(Hash)
    options[:form_data] = nil
  end

  if options[:request_headers].blank? && options[:feed_object] != nil
    options[:request_headers] = {}
    unless options[:feed_object].http_headers.nil?
      unless options[:feed_object].http_headers['etag'].nil?
        options[:request_headers]["If-None-Match"] =
          options[:feed_object].http_headers['etag']
      end
      unless options[:feed_object].http_headers['last-modified'].nil?
        options[:request_headers]["If-Modified-Since"] =
          options[:feed_object].http_headers['last-modified']
      end
    end
    unless options[:feed_object].configurations[:user_agent].nil?
      options[:request_headers]["User-Agent"] =
        options[:feed_object].configurations[:user_agent]
    end
  end
  if options[:request_headers]["Accept"].nil?
    options[:request_headers]["Accept"] =
      FeedTools::RetrievalHelper::ACCEPT_HEADER
  end
  if options[:request_headers]["User-Agent"].nil?
    options[:request_headers]["User-Agent"] =
      FeedTools.configurations[:user_agent]
  end
  
  uri = nil
  begin
    uri = URI.parse(url)
  rescue URI::InvalidURIError
    # Uh, maybe try to fix it?
    uri = URI.parse(FeedTools::UriHelper.normalize_url(url))
  end
  
  begin
    proxy_address = nil
    proxy_port = nil
    proxy_user = nil
    proxy_password = nil
    
    auth_user = nil
    auth_password = nil
    auth_scheme = nil
    
    if options[:feed_object] != nil
      proxy_address =
        options[:feed_object].configurations[:proxy_address] || nil
      proxy_port =
        options[:feed_object].configurations[:proxy_port].to_i || nil
      proxy_user =
        options[:feed_object].configurations[:proxy_user] || nil
      proxy_password =
        options[:feed_object].configurations[:proxy_password] || nil

      auth_user =
        options[:feed_object].configurations[:auth_user] || nil
      auth_password =
        options[:feed_object].configurations[:auth_password] || nil
      auth_scheme =
        options[:feed_object].configurations[:auth_scheme] || nil
    end        
    
    if (auth_user &&
        (auth_scheme == nil || auth_scheme.to_s.to_sym == :basic))
      options[:request_headers]["Authorization"] =
        "Basic " + [
          "#{auth_user}:#{auth_password}"
        ].pack('m').delete("\r\n")
    end
    
    # No need to check for nil
    http = Net::HTTP::Proxy(
      proxy_address, proxy_port, proxy_user, proxy_password).new(
        uri.host, (uri.port or 80))

    if options[:feed_object] != nil &&
        options[:feed_object].configurations[:http_timeout] != nil
      http.open_timeout = 
        options[:feed_object].configurations[:http_timeout].to_f
    elsif FeedTools.configurations[:http_timeout] != nil
      http.open_timeout = FeedTools.configurations[:http_timeout].to_f
    end
    if http.open_timeout != nil && http.open_timeout == 0
      http.open_timeout = nil
    end
    
    path = uri.path 
    path += ('?' + uri.query) if uri.query
    
    request_params = [path, options[:request_headers]]
    if http_operation == :post
      options[:form_data] = {} if options[:form_data].blank?
      request_params << options[:form_data]
    end
    Thread.pass
    response = http.send(http_operation, *request_params)
    Thread.pass
    
    case response
    when Net::HTTPSuccess
      if options[:feed_object] != nil
        # We've reached the final destination, process all previous
        # redirections, and see if we need to update the url.
        for redirected_response in options[:response_chain]
          if redirected_response.last.code.to_i == 301
            # Reset the cache object or we may get duplicate entries

            # TODO: verify this line is necessary!
#=============================================================================
            options[:feed_object].cache_object = nil
            
            options[:feed_object].href =
              redirected_response.last['location']
          else
            # Jump out as soon as we hit anything that isn't a
            # permanently moved redirection.
            break
          end
        end
      end
    when Net::HTTPNotModified
      # Do nothing, we just don't want it processed as a redirection
    when Net::HTTPRedirection
      if response['location'].nil?
        raise FeedAccessError,
          "No location to redirect to supplied for " + response.code
      end
      options[:response_chain] << [url, response]

      redirected_location = response['location']
      redirected_location = FeedTools::UriHelper.resolve_relative_uri(
        redirected_location, [uri.to_s])
      
      if options[:response_chain].assoc(redirected_location) != nil
        raise FeedAccessError,
          "Redirection loop detected: #{redirected_location}"
      end
      
      # Let the block handle redirects
      follow_redirect = true
      if block != nil
        follow_redirect = block.call(redirected_location, response)
      end
      
      if follow_redirect
        response = FeedTools::RetrievalHelper.http_request(
          http_operation,
          redirected_location, 
          options.merge(
            {:redirect_limit => (options[:redirect_limit] - 1)}),
          &block)
      end
    end
  rescue SocketError
    raise FeedAccessError, 'Socket error prevented feed retrieval'
  rescue Timeout::Error, Errno::ETIMEDOUT
    raise FeedAccessError, 'Timeout while attempting to retrieve feed'
  rescue Errno::ENETUNREACH
    raise FeedAccessError, 'Network was unreachable'
  rescue Errno::ECONNRESET
    raise FeedAccessError, 'Connection was reset by peer'
  end
  
  if response != nil
    class << response
      def response_chain
        return @response_chain
      end
    end
    response.instance_variable_set("@response_chain",
      options[:response_chain])
  end
  
  return response
end

Instance Method Details

#http_head(url, options = {}, &block) ⇒ Object

Makes an HTTP HEAD request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.



266
267
268
269
# File 'lib/feed_tools/helpers/retrieval_helper.rb', line 266

def http_head(url, options={}, &block)
  return FeedTools::RetrievalHelper.http_request(
    :head, url, options, &block)
end