Class: Pioneer::Base

Inherits:
Object
  • Object
show all
Defined in:
lib/pioneer/base.rb

Direct Known Subclasses

Crawler

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Base

Returns a new instance of Base.

Raises:



16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/pioneer/base.rb', line 16

def initialize(opts = {})
  raise UndefinedLocations, "you should specify `locations` method in your `self.class`" unless self.methods.include? :locations
  raise UndefinedProcessing, "you should specify `processing` method in your `self.class`" unless self.methods.include? :processing
  raise LocationsNotEnumerator, "you should specify `locations` to return Enumerator" unless self.locations.methods.include? :each
  @name          = opts[:name]          || "crawler"
  @concurrency   = opts[:concurrency]   || 10
  @sleep         = opts[:sleep]         || 0 # sleep is reversed RPS (1/RPS) - frequency of requests.
  @log_enabled   = opts[:log_enabled]   || true # Logger is enabled by default
  @log_level     = opts[:log_level]     || Logger::DEBUG
  @random_header = opts[:random_header] || false
  @header        = opts[:header]        || nil
  @redirects     = opts[:redirects]     || nil
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(method_name, *args, &block) ⇒ Object

we should override only our methods: locations, processing, if_XXX



82
83
84
85
86
87
88
89
90
# File 'lib/pioneer/base.rb', line 82

def method_missing(method_name, *args, &block)
  case method_name
  when /locations.*=|processing.*=|if_.+=/
    method_name = method_name.to_s.gsub("=", "").to_sym
    override_method(method_name, args.first)
  else
    super(method_name, *args, &block)
  end
end

Instance Attribute Details

#concurrencyObject (readonly)

Returns the value of attribute concurrency.



14
15
16
# File 'lib/pioneer/base.rb', line 14

def concurrency
  @concurrency
end

#log_levelObject (readonly)

Returns the value of attribute log_level.



14
15
16
# File 'lib/pioneer/base.rb', line 14

def log_level
  @log_level
end

#nameObject (readonly)

Returns the value of attribute name.



14
15
16
# File 'lib/pioneer/base.rb', line 14

def name
  @name
end

#redirectObject (readonly)

Returns the value of attribute redirect.



14
15
16
# File 'lib/pioneer/base.rb', line 14

def redirect
  @redirect
end

#sleepObject (readonly)

Sleep if the last request was recently (less then timout period)



50
51
52
# File 'lib/pioneer/base.rb', line 50

def sleep
  @sleep
end

Instance Method Details

#http_optsObject



69
70
71
72
73
74
75
# File 'lib/pioneer/base.rb', line 69

def http_opts
  opts = {}
  opts[:head] = random_header if @random_header
  opts[:head] = @header if @header
  opts[:redirects] = @redirects if @redirects
  opts
end

#loggerObject



61
62
63
64
65
66
67
# File 'lib/pioneer/base.rb', line 61

def logger
  @logger ||= begin
    logger = Logger.new(STDOUT)
    logger.level = log_level
    logger
  end
end

#override_method(method_name, arg) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
# File 'lib/pioneer/base.rb', line 92

def override_method(method_name, arg)
  if Proc === arg
    self.define_singleton_method method_name do |req|
      arg.call(req)
    end
  else
    self.define_singleton_method method_name do
      arg
    end
  end
end

#random_headerObject



77
78
79
# File 'lib/pioneer/base.rb', line 77

def random_header
  HttpHeader.random
end

#startObject



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/pioneer/base.rb', line 30

def start
  raise LocationsNotEnumerable, "location should respond to `each`" unless locations.respond_to? :each
  result = []
  EM.synchrony do
    # Using FiberPeriodicTimerIterator that implements RPS (request per second feature)
    # In case @sleep is 0 it behaves like standart FiberIterator
    EM::Synchrony::FiberIterator.new(locations, concurrency).map do |url|
      sleep
      begin
        result << Request.new(url, self).perform
      rescue Pioneer::HttpSkipRequest => e
        nil # do nothing?
      end
    end
    EM.stop
  end
  result
end