Class: HttpProxyPool::Basetask

Inherits:
Object
  • Object
show all
Defined in:
lib/http_proxy_pool/basetask.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Basetask

Returns a new instance of Basetask.



11
12
13
14
15
# File 'lib/http_proxy_pool/basetask.rb', line 11

def initialize(opts = {})
  @agent  = opts[:agent]
  @logger = opts[:logger]
  @url    = opts[:url]
end

Instance Attribute Details

#agentObject

Returns the value of attribute agent.



5
6
7
# File 'lib/http_proxy_pool/basetask.rb', line 5

def agent
  @agent
end

#loggerObject

Returns the value of attribute logger.



5
6
7
# File 'lib/http_proxy_pool/basetask.rb', line 5

def logger
  @logger
end

#next_pageObject

Returns the value of attribute next_page.



5
6
7
# File 'lib/http_proxy_pool/basetask.rb', line 5

def next_page
  @next_page
end

#page_parserObject

Returns the value of attribute page_parser.



5
6
7
# File 'lib/http_proxy_pool/basetask.rb', line 5

def page_parser
  @page_parser
end

#urlObject

Returns the value of attribute url.



5
6
7
# File 'lib/http_proxy_pool/basetask.rb', line 5

def url
  @url
end

Instance Method Details

#curr_pageObject



69
70
71
# File 'lib/http_proxy_pool/basetask.rb', line 69

def curr_page
  @agent.page.uri
end

#ips(lastest = true) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/http_proxy_pool/basetask.rb', line 30

def ips(lastest = true)
  uri = @url

  loop do
    @logger.info("start crawling page [#{uri}] ...")
    @agent.get(uri)
    # get all page need sleep a random time
    rand_sleep unless lastest

    begin
      instance_eval(&page_parser).each do |field|
        yield field
      end  
    rescue Exception => e
      @logger.error("parsing page error[#{uri}]. #{e.to_s}")
      break
    end

    begin
      break unless @next_page
      uri = instance_eval(&next_page)
      break unless uri
    rescue => e
      @logger.error("error occoured when get next page[#{uri}]. #{e.to_s}")
      break
    end

    break if lastest
  end
end

#nextpage(&block) ⇒ Object



65
66
67
# File 'lib/http_proxy_pool/basetask.rb', line 65

def nextpage(&block)
  @next_page = block if block_given?
end

#parser(&block) ⇒ Object



61
62
63
# File 'lib/http_proxy_pool/basetask.rb', line 61

def parser(&block)
  @page_parser = block if block_given?
end

#rand_sleep(max_tick = 2) ⇒ Object



77
78
79
# File 'lib/http_proxy_pool/basetask.rb', line 77

def rand_sleep(max_tick = 2)
  sleep rand(max_tick)
end

#sitenameObject



73
74
75
# File 'lib/http_proxy_pool/basetask.rb', line 73

def sitename
  URI.parse(URI.encode(@url)).host
end

#sitetask(url, opts = {}) ⇒ Object

Raises:



17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/http_proxy_pool/basetask.rb', line 17

def sitetask(url, opts = {})
  raise ScriptError.new("script do not specify a url!") unless url

  @url        = url
  @agent      = opts[:agent] || Mechanize.new
  @logger   ||= opts[:logger]

  #for debug
  #@agent.set_proxy '127.0.0.1', 8888

  yield
end