Class: WebCrawler::Request

Inherits:
Object
  • Object
show all
Defined in:
lib/web_crawler/request.rb

Direct Known Subclasses

CachedRequest

Constant Summary collapse

HEADERS =
{
    'User-Agent'      => 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
    'Accept'          => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
    'Accept-Language' => 'en-us,en;q=0.5',
    'Accept-Charset'  => 'utf-8;windows-1251;q=0.7,*;q=0.7',
    'Cache-Control'   => 'max-age=0'
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, custom_headers = { }) ⇒ Request

Returns a new instance of Request.



15
16
17
18
19
# File 'lib/web_crawler/request.rb', line 15

def initialize(url, custom_headers = { })
  @url, @request = normalize_url(url), { }
  @headers = HEADERS.dup.merge(custom_headers)
  @ready   = false
end

Instance Attribute Details

#responseObject (readonly)

Returns the value of attribute response.



13
14
15
# File 'lib/web_crawler/request.rb', line 13

def response
  @response
end

#urlObject (readonly)

Returns the value of attribute url.



13
14
15
# File 'lib/web_crawler/request.rb', line 13

def url
  @url
end

Instance Method Details

#inspectObject



34
35
36
# File 'lib/web_crawler/request.rb', line 34

def inspect
  "#<#{self.class}:#{self.object_id} @url=\"#{@url.to_s}\">"
end

#processObject



25
26
27
28
29
30
31
32
# File 'lib/web_crawler/request.rb', line 25

def process
  @response = Response.new *fetch(url)
  @ready    = true
  response
rescue Errno::ECONNREFUSED => e
  WebCrawler.logger.error "request to #{url} failed: #{e.message}"
  return nil
end

#ready?Boolean

Returns:

  • (Boolean)


21
22
23
# File 'lib/web_crawler/request.rb', line 21

def ready?
  @ready
end