Class: Scruber::QueueAdapters::AbstractAdapter::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/scruber/queue_adapters/abstract_adapter.rb

Direct Known Subclasses

Memory::Page

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(queue, url, options = {}) ⇒ Page

Returns a new instance of Page.



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 28

def initialize(queue, url, options={})
  @queue = queue
  @url = url

  options = options.with_indifferent_access
  @method = options.fetch(:method) { :get }
  @user_agent = options.fetch(:user_agent) { nil }
  @post_body = options.fetch(:post_body) { nil }
  @headers = options.fetch(:headers) { {} }
  @fetcher_agent_id = options.fetch(:fetcher_agent_id) { nil }
  @proxy_id = options.fetch(:proxy_id) { nil }
  @response_body = options.fetch(:response_body) { nil }
  @response_code = options.fetch(:response_code) { nil }
  @response_headers = options.fetch(:response_headers) { {} }
  @response_total_time = options.fetch(:response_total_time) { nil }
  @retry_at = options.fetch(:retry_at) { 0 }
  @fetched_at = options.fetch(:fetched_at) { 0 }
  @retry_count = options.fetch(:retry_count) { 0 }
  @max_retry_times = options.fetch(:max_retry_times) { nil }
  @enqueued_at = options.fetch(:enqueued_at) { 0 }
  @page_type = options.fetch(:page_type) { :seed }
  # @queue = options.fetch(:queue) { 'default' }
  @priority = options.fetch(:priority) { 0 }
  @processed_at = options.fetch(:processed_at) { 0 }
  @options = options

  @_fetcher_agent = false
  @_proxy = false
  @_redownload = false
end

Instance Attribute Details

#enqueued_atObject

Returns the value of attribute enqueued_at.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def enqueued_at
  @enqueued_at
end

#fetched_atObject

Returns the value of attribute fetched_at.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def fetched_at
  @fetched_at
end

#fetcher_agent_idObject

Returns the value of attribute fetcher_agent_id.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def fetcher_agent_id
  @fetcher_agent_id
end

#headersObject

Returns the value of attribute headers.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def headers
  @headers
end

#max_retry_timesObject

Returns the value of attribute max_retry_times.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def max_retry_times
  @max_retry_times
end

#methodObject

Returns the value of attribute method.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def method
  @method
end

#optionsObject

Returns the value of attribute options.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def options
  @options
end

#page_typeObject

Returns the value of attribute page_type.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def page_type
  @page_type
end

#post_bodyObject

Returns the value of attribute post_body.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def post_body
  @post_body
end

#priorityObject

Returns the value of attribute priority.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def priority
  @priority
end

#processed_atObject

Returns the value of attribute processed_at.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def processed_at
  @processed_at
end

#proxy_idObject

Returns the value of attribute proxy_id.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def proxy_id
  @proxy_id
end

#queueObject

Returns the value of attribute queue.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def queue
  @queue
end

#response_bodyObject

Returns the value of attribute response_body.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def response_body
  @response_body
end

#response_codeObject

Returns the value of attribute response_code.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def response_code
  @response_code
end

#response_headersObject

Returns the value of attribute response_headers.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def response_headers
  @response_headers
end

#response_total_timeObject

Returns the value of attribute response_total_time.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def response_total_time
  @response_total_time
end

#retry_atObject

Returns the value of attribute retry_at.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def retry_at
  @retry_at
end

#retry_countObject

Returns the value of attribute retry_count.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def retry_count
  @retry_count
end

#urlObject

Returns the value of attribute url.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def url
  @url
end

#user_agentObject

Returns the value of attribute user_agent.



6
7
8
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 6

def user_agent
  @user_agent
end

Instance Method Details

#[](k) ⇒ Object



92
93
94
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 92

def [](k)
  instance_variable_get("@#{k.to_s}")
end

#deleteObject

Raises:

  • (NotImplementedError)


96
97
98
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 96

def delete
  raise NotImplementedError
end

#fetcher_agentObject



59
60
61
62
63
64
65
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 59

def fetcher_agent
  if @_fetcher_agent == false
    @_fetcher_agent = (@fetcher_agent_id ? Scruber::Helpers::FetcherAgent.find(@fetcher_agent_id) : nil)
  else
    @_fetcher_agent
  end
end

#processed!Object



100
101
102
103
104
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 100

def processed!
  @processed_at = Time.now.to_i
  @_redownload = false
  save
end

#proxyObject



67
68
69
70
71
72
73
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 67

def proxy
  if @_proxy == false
    @_proxy = (@proxy_id ? Scruber::Helpers::ProxyRotator.find(@proxy_id) : nil)
  else
    @_proxy
  end
end

#redownload!Object



106
107
108
109
110
111
112
113
114
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 106

def redownload!
  @_redownload = true

  @processed_at = nil
  @retry_count += 1
  @fetched_at = 0
  @response_body = nil
  save
end

#response_cookiesObject



75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 75

def response_cookies
  cookies = self.response_headers['Set-Cookie']
  if cookies.blank?
    []
  else
    if cookies.is_a?(Array)
      cookies
    else
      [cookies]
    end
  end
end

#saveObject

Raises:

  • (NotImplementedError)


88
89
90
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 88

def save
  raise NotImplementedError
end

#sent_to_redownload?Boolean

Returns:

  • (Boolean)


116
117
118
# File 'lib/scruber/queue_adapters/abstract_adapter.rb', line 116

def sent_to_redownload?
  @_redownload
end