Class: CapybaraCrawler::Crawler

Inherits:
Object
  • Object
show all
Includes:
Capybara::DSL, Capybara::UserAgent::DSL
Defined in:
lib/capybara_crawler/crawler.rb

Overview

Crawler is a class can use DSL of Capybara

Defined Under Namespace

Modules: MoreDSL

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeCrawler

Returns a new instance of Crawler.



106
107
108
# File 'lib/capybara_crawler/crawler.rb', line 106

def initialize
  config_user_agent
end

Class Method Details

.config_users_agentsObject



43
44
45
46
47
48
49
50
51
# File 'lib/capybara_crawler/crawler.rb', line 43

def config_users_agents
  Capybara::UserAgent.add_user_agents(
    :phantomjs        => "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1",
    :capybara_crawler => "Mozilla/5.0 (Macintosh; Intel Mac OS X) AppleWebKit/538.1 (KHTML, like Gecko) CapybaraCrawler/#{CapybaraCrawler::VERSION} Safari/538.1",
    :windows_firefox  => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1",
    :mac_safari       => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/601.5.17 (KHTML, like Gecko) Version/9.1 Safari/537.86.5",
    :default          => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/538.1 (KHTML, like Gecko) Version/9.1 Safari/538.1",
  )
end

.inherited(subclass) ⇒ Object

when a new class inherited



64
65
66
67
# File 'lib/capybara_crawler/crawler.rb', line 64

def self.inherited(subclass)
  init
  subclass.send :include, MoreDSL
end

.initObject



53
54
55
56
57
58
59
60
# File 'lib/capybara_crawler/crawler.rb', line 53

def init
  if !@initialized # make sure init once
    init_driver
    config_users_agents
    @initialized = true
  end
  Capybara.default_driver = :poltergeist_crawler
end

.init_driverObject



14
15
16
17
18
19
# File 'lib/capybara_crawler/crawler.rb', line 14

def init_driver
  register_driver_poltergeist_crawler
  register_driver_mechanize_crawler
  Capybara.default_max_wait_time = 3
  Capybara.run_server = false
end

.register_driver_mechanize_crawlerObject



32
33
34
35
36
37
38
39
40
41
# File 'lib/capybara_crawler/crawler.rb', line 32

def register_driver_mechanize_crawler
  Capybara.register_driver :mechanize_crawler do |app|
    driver = Capybara::Mechanize::Driver.new(app || {})
    driver.configure do |agent|
      # Configure other Mechanize options here.
      agent.log = Logger.new "tmp/mechanize_crawler.log"
    end
    driver
  end
end

.register_driver_poltergeist_crawlerObject



21
22
23
24
25
26
27
28
29
30
# File 'lib/capybara_crawler/crawler.rb', line 21

def register_driver_poltergeist_crawler
  Capybara.register_driver :poltergeist_crawler do |app|
    Capybara::Poltergeist::Driver.new(app, {
      js_errors: false, # when false, JavaScript errors do not get re-raised in Ruby.
      inspector: false, # when false, remote debugging will be disabled
      debug: false, # output log messages like {"id":"68d74fdd-adab-4331-ab8e-48153b9a3176","name":"set_js_errors","args":[false]}
      phantomjs_logger: Capybara::Poltergeist::Suppressor.new # silences noisy phantomjs warnings such as 'CoreText performance note...'.
    })
  end
end

Instance Method Details

#default_user_agentObject

use a user agent which is similar as :mac_safari to make it easy to check target page on Mac using Safari by default but using “Safari/538.1” which is extracted from default user agent of PhantomJS/2.1.1



112
113
114
# File 'lib/capybara_crawler/crawler.rb', line 112

def default_user_agent
  Capybara::UserAgent.user_agents[:default]
end