Class: Shrimple

Inherits:
Object
  • Object
show all
Defined in:
lib/shrimple.rb,
lib/shrimple/phantom.rb,
lib/shrimple/process.rb,
lib/shrimple/default_config.rb,
lib/shrimple/process_monitor.rb

Defined Under Namespace

Classes: Phantom, PhantomError, Process, ProcessMonitor, TimedOut, TooManyProcessesError

Constant Summary collapse

DefaultConfig =
{

  #
  # options for launching the PhantomJS executable
  #

  background: nil,       # false blocks until page is rendered, true returns immediately
  executable: nil,       # specifies the PhantomJS executable to use. If unspecified then Shrimple will search for one.
  renderer: nil,         # the render script to use. Useful for testing, or if you want to do something other than rendering the page.
  timeout: nil,          # time in seconds after which the PhantomJS process should simply be killed
  input: nil,            # specifies the URL to request (use file:// for local assets).  Can also be specified by the optional first argument to render calls.
  output: nil,           # path to the rendered output file, nil to buffer output in memory.  "to" is a more readable synonym: 'render url, to: file'.
  stderr: nil,           # path to the file to receive PhantomJS's stderr, leave nil to store it in a string
  onSuccess: nil,        # a function to call when the pdf has been successfully rendered.  called before process is removed from Shrimple.processes so, if it blocks, process table fills up.  Useful for rate limiting.
  onError: nil,          # a function to call when the pdf has failed for whatever reason (timeout, killed, network error, etc).  Called before process is removed from Shrimple.processes.


  #
  # arguments passed to the PhantomJS render method  http://phantomjs.org/api/webpage/method/render.html
  #

  render: {
    format: nil,         # format for the output file.  usually supplied by a helper (render_pdf, render_png, etc)
    quality: nil         # only relevant to format=jpeg I think, range is 1-100.  not sure what Phantom's default is
  },


  #
  # command-line options passed to PhantomJS in --config: http://phantomjs.org/api/command-line.html
  #

  config: {
    cookiesFile: nil,        # path to the persitent cookies file
    diskCache: nil,          # if true, caches requested assets.  Defaults to false.  See config.maxDiskCacheSize.  The cache location is not currently configurable.
    ignoreSslErrors: nil,    # if true, SSL errors won't prevent page from being rendered.  defaults to false
    loadImages: nil,         # load inlined images?  defaults to true.  see also page.settings.loadImages
    localStoragePath: nil,   # directory to save LocalStorage and WebSQL content
    localStorageQuota: nil,  # maximum size for local data
    localToRemoteUrlAccess: nil,   # local content can initiate requests for remote assets?  Defaults to false. also see page.settings.localToRemoteUrlAccessEnabled
    maxDiskCacheSize: nil,   # maximum size for disk cache in KB.  Also see config.diskCache.
    outputEncoding: nil,     # sets the encoding used in the logfile.  nil means "utf8"
    remoteDebuggerPort: nil, # starts the render script in a debug harness and listens on this port
    remoteDebuggerAutorun: nil, # run the render script in a debugger?  defaults to false, probably never needed
    proxy: nil,              # proxy to use in "address:port" format
    proxyType: nil,          # type of proxy to use
    proxyAuth: nil,          # authentication information for proxy
    scriptEncoding: nil,     # encoding of the render script, defaults to "utf8"
    sslProtocol: nil,        # the protocol to use for SSL connections, defaults to "SSLv3"
    webSecurity: nil         # enable web security and forbid cross-domain XHR?  Defaults to true
  },


  #
  # settings for rendering the page: http://phantomjs.org/api/webpage/
  #

  page: {
    canGoBack: nil,          # allow javascript navigation, defaults to false
    canGoForward: nil,       # allow javascript navigation, defaults to false
    clipRect: {              # area to rasterize when page.render is called
      left: nil,             # Defaults to (0,0,0,0) meaning render the entire page
      top: nil,
      width: nil,
      height: nil
    },
    customHeaders: {       # headers added to every HTTP request.  if nil, Shrimple.DefaultHeaders is used.
      "Accept-Encoding" => "identity" # Don't accept gzipped responses, work around https://github.com/ariya/phantomjs/issues/10930
    },
    # event?  http://phantomjs.org/api/webpage/property/event.html
    # libraryPath?           # might be useful if we add support for calling injectJS
    navigationLocked: nil,   # if true, phantomjs prevents navigating away from the page. Defaults to false.
    offlineStoragePath: nil, # file to contain offline storage data
    offlineStorageQuota: nil, # maximum amount of data allowed in offline storage
    ownsPages: nil,          # should child pages (opened with window.open()) be closed when parent closes?  Defaults to true.
    paperSize: {             # the size of the rendered output http://phantomjs.org/api/webpage/property/paper-size.html
      format: nil,           # size for pdf pages, defaults to 'A4'?
      orientation: nil,      # orientation for pdf pages, defautls to 'portrait?'
      width: nil,            # width of png/jpeg/gif
      height: nil,           # height of png/jpeg/gif
      border: nil            # blank border around the page, defaults to '1cm'?
      # margin: nil          # use border instead
    },
    scrollPosition: {        # scroll page to here before rendering
      left: nil,             # defaults to (0,0) which renders the entire page
      top: nil
    },
    settings: {             # request settings: http://phantomjs.org/api/webpage/property/settings.html
      javascriptCanCloseWindows: nil,        # whether window.open() is allowed, defaults to true
      javascriptCanOpenWindows: nil,         # whether window.close() is allowed, defaults to true
      javascriptEnabled: nil,                # if false, Javascript in the requested page is not executed.  Defaults to true.
      loadImages: nil,                       # if false, inlined images in the requested page are not loaded (see also config.loadImages).  Defaults to true.
      localToRemoteUrlAccessEnabled: nil,    # if true, local resources (like a page loaded using file:// url) are able to load remote assets.  Defaults to false.
      password: nil,                         # password for basic HTTP authentication, see also userName
      resourceTimeout: nil,                  # time in ms after which request will stop and onResourceTimeout() is called
      userAgent: nil,                        # user agent string for requests (nil means use PhantomJS's default WebKitty one)
      userName: nil,                         # name for basic HTTP authentication, see also password
      webSecurityEnabled: nil,               # see config.webSecurity.  Defaults to true.
      XSSAuditingEnabled: nil                # monitor requests for XSS attempts.  Defaults to false.
    },
    viewportSize: {            # sets the size of the virtual browser window
      width: nil,
      height: nil
    },
    zoomFactor: nil            # 4.0 increases page by 4X before rendering (right?), 0.25 shrinks page by 4X.  Defaults to 1.0.
  }
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Shrimple

Returns a new instance of Shrimple.



40
41
42
43
44
45
# File 'lib/shrimple.rb', line 40

def initialize opts={}
  @options = Hashie::Mash.new(Shrimple::DefaultConfig)
  @options.deep_merge!(opts)
  self.executable ||= self.class.default_executable
  self.renderer ||= self.class.default_renderer
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(name, *args, &block) ⇒ Object

allows setting config options directly on this object: s.timeout = 10



35
36
37
# File 'lib/shrimple.rb', line 35

def method_missing name, *args, &block
  options.send(name, *args, &block)
end

Instance Attribute Details

#optionsObject

Returns the value of attribute options.



32
33
34
# File 'lib/shrimple.rb', line 32

def options
  @options
end

Class Method Details

.compact!(hash) ⇒ Object

how are these not a part of Hash?



101
102
103
# File 'lib/shrimple.rb', line 101

def self.compact! hash
  hash.delete_if { |k,v| v.nil? or (v.is_a?(Hash) && compact!(v).empty?) or (v.respond_to?('empty?') && v.empty?) }
end

.deep_dup(hash) ⇒ Object



105
106
107
# File 'lib/shrimple.rb', line 105

def self.deep_dup hash
  Marshal.load(Marshal.dump(hash))
end

.default_executableObject



118
119
120
# File 'lib/shrimple.rb', line 118

def self.default_executable
  (defined?(Bundler::GemfileError) ? `bundle exec which phantomjs` : `which phantomjs`).chomp
end

.default_rendererObject



114
115
116
# File 'lib/shrimple.rb', line 114

def self.default_renderer
  File.expand_path('../render.js', __FILE__)
end

.processesObject



110
111
112
# File 'lib/shrimple.rb', line 110

def self.processes
  @processes ||= Shrimple::ProcessMonitor.new
end

Instance Method Details

#get_full_options(src, *inopts) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/shrimple.rb', line 82

def get_full_options src, *inopts
  exopts = options.dup
  # can't deep_dup procs so remove them and add them back
  onSuccess = exopts.delete(:onSuccess)
  onError = exopts.delete(:onError)

  full_opts = Shrimple.deep_dup(exopts)
  full_opts.merge!(onSuccess: onSuccess, onError: onError)
  full_opts.deep_merge!(src) if src && src.kind_of?(Hash)
  inopts.each { |opt| full_opts.deep_merge!(opt) }
  full_opts.merge!(input: src) if src && !src.kind_of?(Hash)
  full_opts.merge!(output: full_opts.delete(:to)) if full_opts[:to]

  self.class.compact!(full_opts)
  full_opts
end

#render(src = {}, *opts) ⇒ Object



75
76
77
78
79
80
# File 'lib/shrimple.rb', line 75

def render src={}, *opts
  full_opts = get_full_options(src, *opts)
  phantom = Shrimple::Phantom.new(full_opts)
  phantom.wait unless full_opts[:background]
  phantom
end

#render_gif(src, *opts) ⇒ Object



61
62
63
# File 'lib/shrimple.rb', line 61

def render_gif src, *opts
  render src, {render: {format: 'gif'}}, *opts
end

#render_html(src, *opts) ⇒ Object



65
66
67
# File 'lib/shrimple.rb', line 65

def render_html src, *opts
  render src, {render: {format: 'html'}}, *opts
end

#render_jpeg(src, *opts) ⇒ Object



57
58
59
# File 'lib/shrimple.rb', line 57

def render_jpeg src, *opts
  render src, {render: {format: 'jpeg'}}, *opts
end

#render_pdf(src, *opts) ⇒ Object

might be time to allow method_missing to handle these helpers…



49
50
51
# File 'lib/shrimple.rb', line 49

def render_pdf src, *opts
  render src, {render: {format: 'pdf'}}, *opts
end

#render_png(src, *opts) ⇒ Object



53
54
55
# File 'lib/shrimple.rb', line 53

def render_png src, *opts
  render src, {render: {format: 'png'}}, *opts
end

#render_text(src, *opts) ⇒ Object



69
70
71
# File 'lib/shrimple.rb', line 69

def render_text src, *opts
  render src, {render: {format: 'text'}}, *opts
end