Class: Blinkr::Engine

Inherits:
Object
  • Object
show all
Includes:
HttpUtils, Sitemap
Defined in:
lib/blinkr/engine.rb

Defined Under Namespace

Classes: ErrorArray

Instance Method Summary collapse

Methods included from Sitemap

#sitemap_locations

Methods included from HttpUtils

#retry?, #sanitize

Constructor Details

#initialize(config) ⇒ Engine

Returns a new instance of Engine.



33
34
35
36
37
# File 'lib/blinkr/engine.rb', line 33

def initialize(config)
  @config = config.validate
  @extensions = []
  load_pipeline
end

Instance Method Details

#analyze(context, typhoeus) ⇒ Object



95
96
97
# File 'lib/blinkr/engine.rb', line 95

def analyze(context, typhoeus)
  execute :analyze, context, typhoeus
end

#append(context) ⇒ Object



81
82
83
# File 'lib/blinkr/engine.rb', line 81

def append(context)
  execute :append, context
end

#collect(page) ⇒ Object



99
100
101
# File 'lib/blinkr/engine.rb', line 99

def collect(page)
  execute :collect, page
end

#runObject



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/blinkr/engine.rb', line 39

def run
  context = OpenStruct.new({:pages => {}})
  if defined?(JRUBY_VERSION) && @config.browser == 'manticore'
    require 'blinkr/manticore_wrapper'
    bulk_browser = browser = ManticoreWrapper.new(@config, context)
  else
    bulk_browser = browser = TyphoeusWrapper.new(@config, context)
  end
  browser = PhantomJSWrapper.new(@config, context) if @config.browser == 'phantomjs'
  browser = SlimerJSWrapper.new(@config, context) if @config.browser == 'slimerjs'
  page_count = 0
  urls = sitemap_locations.uniq
  puts "Fetching #{urls.size} pages from sitemap"
  browser.process_all(urls, @config.max_page_retrys) do |response, resource_errors, javascript_errors|
    url = response.request.base_url
    if response.success?
      puts "Loaded page #{url}" if @config.verbose
      body = Nokogiri::HTML(response.body)
      page = OpenStruct.new({:response => response, :body => body.freeze,
                             :errors => ErrorArray.new(@config),
                             :resource_errors => resource_errors || [],
                             :javascript_errors => javascript_errors || []})
      context.pages[url] = page
      collect page
      page_count += 1
    else
      puts "#{response.code} #{response.status_message} Unable to load page #{url} #{'(' + response.return_message + ')' unless response.return_message.nil?}"
    end
  end
  puts 'Executing Typhoeus::Hydra.run, this could take awhile' if @config.browser == 'typhoeus'
  # browser.hydra.run if @config.browser == 'typhoeus'
  puts "Loaded #{page_count} pages using #{browser.name}."
  puts 'Analyzing pages'
  analyze context, bulk_browser
  context.pages.reject! { |_, page| page.errors.empty? }

  unless @config.export.nil?
    FileUtils.mkdir_p Pathname.new(@config.report).parent
  end
  Blinkr::Report.new(context, self, @config).render
end

#transform(page, error, &block) ⇒ Object



85
86
87
88
89
90
91
92
93
# File 'lib/blinkr/engine.rb', line 85

def transform(page, error, &block)
  default = yield
  result = execute(:transform, page, error, default)
  if result.empty?
    default
  else
    result.join
  end
end