Class: Wraith::SaveImages

Inherits:

Object

Object
Wraith::SaveImages

show all

Includes:: Logging

Defined in:: lib/wraith/save_images.rb

Instance Attribute Summary collapse

#history ⇒ Object readonly

Returns the value of attribute history.
#meta ⇒ Object readonly

Returns the value of attribute meta.
#wraith ⇒ Object readonly

Returns the value of attribute wraith.

Instance Method Summary collapse

#attempt_image_capture(capture_page_image, filename) ⇒ Object
#capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object
#check_paths ⇒ Object
#construct_command(width, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object
#create_invalid_image(filename, width, invalid_image_name) ⇒ Object
#crop_selector(driver, selector, image_location) ⇒ Object

crop an image around the coordinates of an element.
#define_individual_job(label, settings, width) ⇒ Object
#define_jobs ⇒ Object
#get_driver ⇒ Object

currently only chrome headless at 1x scaling.
#image_was_created(filename) ⇒ Object
#initialize(config, history = false, yaml_passed = false) ⇒ SaveImages constructor

A new instance of SaveImages.
#parallel_task(jobs) ⇒ Object
#prepare_widths_for_cli(width) ⇒ Object
#resize_to_fit_page(driver) ⇒ Object

resize to fit entire page.
#run_command(command) ⇒ Object
#save_images ⇒ Object
#set_image_width(image, width) ⇒ Object

Methods included from Logging

#logger, logger

Constructor Details

#initialize(config, history = false, yaml_passed = false) ⇒ `SaveImages`

Returns a new instance of SaveImages.

# File 'lib/wraith/save_images.rb', line 15

def initialize(config, history = false, yaml_passed = false)
  @wraith = Wraith::Wraith.new(config, { yaml_passed: yaml_passed })
  @history = history
  @meta = SaveMetadata.new(@wraith, history)
end

Instance Attribute Details

#history ⇒ `Object` (readonly)

Returns the value of attribute history.



13
14
15

# File 'lib/wraith/save_images.rb', line 13

def history
  @history
end

#meta ⇒ `Object` (readonly)

Returns the value of attribute meta.



13
14
15

# File 'lib/wraith/save_images.rb', line 13

def meta
  @meta
end

#wraith ⇒ `Object` (readonly)

Returns the value of attribute wraith.



13
14
15

# File 'lib/wraith/save_images.rb', line 13

def wraith
  @wraith
end

Instance Method Details

#attempt_image_capture(capture_page_image, filename) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 164

def attempt_image_capture(capture_page_image, filename)
  max_attempts = 5
  max_attempts.times do |i|
    run_command capture_page_image
    return true if image_was_created filename
    logger.warn "Failed to capture image #{filename} on attempt number #{i + 1} of #{max_attempts}"
  end

  fail "Unable to capture image #{filename} after #{max_attempts} attempt(s)" unless image_was_created filename
end

#capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 128

def capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture)
  driver = get_driver
  driver.manage.timeouts.implicit_wait = 10;
  screen_sizes.to_s.split(",").each do |screen_size|
    for attempt in 1..3 do
      begin
        width, height = screen_size.split("x")
        new_file_name = file_name.sub('MULTI', screen_size)
        driver.manage.window.resize_to(width, height || 1500)
        driver.navigate.to url
        driver.manage.timeouts.implicit_wait = wraith.settle
        driver.execute_script(File.read(global_before_capture)) if global_before_capture
        driver.execute_script(File.read(path_before_capture)) if path_before_capture
        resize_to_fit_page(driver) unless height
        driver.save_screenshot(new_file_name)
        crop_selector(driver, selector, new_file_name) if selector && selector.length > 0
        break
      rescue Net::ReadTimeout => e
        logger.error "Got #{e} on attempt #{attempt} at screen size #{screensize}. URL = #{url}"
      end
    end
  end
  driver.quit
end

#check_paths ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 21

def check_paths
  if !wraith.paths
    path = File.read(wraith.spider_file)
    eval(path)
  else
    wraith.paths
  end
end

#construct_command(width, url, file_name, selector, global_before_capture, path_before_capture) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 153

def construct_command(width, url, file_name, selector, global_before_capture, path_before_capture)
  width    = prepare_widths_for_cli(width)
  selector = selector.gsub '#', '\#' # make sure id selectors aren't escaped in the CLI
  global_before_capture = convert_to_absolute global_before_capture
  path_before_capture   = convert_to_absolute path_before_capture

  command_to_run = "#{meta.engine} #{wraith.phantomjs_options} '#{wraith.snap_file}' '#{url}' '#{width}' '#{file_name}' '#{selector}' '#{global_before_capture}' '#{path_before_capture}'"
  logger.debug command_to_run
  command_to_run
end

#create_invalid_image(filename, width, invalid_image_name) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 180

def create_invalid_image(filename, width, invalid_image_name)
  logger.warn "Using fallback image instead"
  invalid = File.expand_path("../../assets/#{invalid_image_name}", File.dirname(__FILE__))
  FileUtils.cp invalid, filename

  set_image_width(filename, width)
end

#crop_selector(driver, selector, image_location) ⇒ `Object`

crop an image around the coordinates of an element

# File 'lib/wraith/save_images.rb', line 121

def crop_selector driver, selector, image_location
  el = driver.find_element(:css, selector)
  image = MiniMagick::Image.open(image_location)
  image.crop "#{el.rect.width}x#{el.rect.height}+#{el.rect.x}+#{el.rect.y}"
  image.write(image_location)
end

#define_individual_job(label, settings, width) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 51

def define_individual_job(label, settings, width)
  base_file_name    = meta.file_names(width, label, meta.base_label)
  compare_file_name = meta.file_names(width, label, meta.compare_label)

  jobs = []
  jobs << [label, settings.path, prepare_widths_for_cli(width), settings.base_url,    base_file_name,    settings.selector, wraith.before_capture, settings.before_capture, 'invalid1.jpg']
  jobs << [label, settings.path, prepare_widths_for_cli(width), settings.compare_url, compare_file_name, settings.selector, wraith.before_capture, settings.before_capture, 'invalid2.jpg'] unless settings.compare_url.nil?

  jobs
end

#define_jobs ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 35

def define_jobs
  jobs = []
  check_paths.each do |label, options|
    settings = CaptureOptions.new(options, wraith)

    if settings.resize
      jobs += define_individual_job(label, settings, wraith.widths)
    else
      wraith.widths.each do |width|
        jobs += define_individual_job(label, settings, width)
      end
    end
  end
  jobs
end

#get_driver ⇒ `Object`

currently only chrome headless at 1x scaling

# File 'lib/wraith/save_images.rb', line 95

def get_driver
  case meta.engine
  when "chrome"
    options = Selenium::WebDriver::Chrome::Options.new
    [
      'disable-gpu',
      'headless',
      'no-sandbox',
      'device-scale-factor=1',
      'force-device-scale-factor',
      'window-size=1200,1500',
      'hide-scrollbars',
      'ignore-certificate-errors'
    ].each { |arg| options.add_argument("--#{arg}") }
    Selenium::WebDriver.for :chrome, options: options
  end
end

#image_was_created(filename) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 175

def image_was_created(filename)
   # @TODO - need to check if the image was generated even if in resize mode
  wraith.resize or File.exist? filename
end

#parallel_task(jobs) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 78

def parallel_task(jobs)
  Parallel.each(jobs, :in_threads => wraith.threads) do |_label, _path, width, url, filename, selector, global_before_capture, path_before_capture|
    begin
      if meta.engine == "chrome"
        capture_image_selenium(width, url, filename, selector, global_before_capture, path_before_capture)
      else
        command = construct_command(width, url, filename, selector, global_before_capture, path_before_capture)
        attempt_image_capture(command, filename)
      end
    rescue => e
      logger.error "#{e}\n  URL = #{url}"
      create_invalid_image(filename, width, invalid_image_name)
    end
  end
end

#prepare_widths_for_cli(width) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 62

def prepare_widths_for_cli(width)
  # prepare for the command line. [30,40,50] => "30,40,50"
  width = width.join(",") if width.is_a? Array
  width
end

#resize_to_fit_page(driver) ⇒ `Object`

resize to fit entire page

# File 'lib/wraith/save_images.rb', line 114

def resize_to_fit_page driver
  width  = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
  height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
  driver.manage.window.resize_to(width, height)
end

#run_command(command) ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 68

def run_command(command)
  output = []
  command.gsub!(/'/, '')
  IO.popen(command).each do |line|
    logger.info line
    output << line.chomp!
  end.close
  output
end

#save_images ⇒ `Object`

# File 'lib/wraith/save_images.rb', line 30

def save_images
  jobs = define_jobs
  parallel_task(jobs)
end

#set_image_width(image, width) ⇒ `Object`



188
189
190

# File 'lib/wraith/save_images.rb', line 188

def set_image_width(image, width)
  `convert #{image.shellescape} -background none -extent #{width}x0 #{image.shellescape}`
end

Class: Wraith::SaveImages

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Logging

Constructor Details

#initialize(config, history = false, yaml_passed = false) ⇒ SaveImages

Instance Attribute Details

#history ⇒ Object (readonly)

#meta ⇒ Object (readonly)

#wraith ⇒ Object (readonly)

Instance Method Details

#attempt_image_capture(capture_page_image, filename) ⇒ Object

#capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object

#check_paths ⇒ Object

#construct_command(width, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object

#create_invalid_image(filename, width, invalid_image_name) ⇒ Object

#crop_selector(driver, selector, image_location) ⇒ Object

#define_individual_job(label, settings, width) ⇒ Object

#define_jobs ⇒ Object

#get_driver ⇒ Object

#image_was_created(filename) ⇒ Object

#parallel_task(jobs) ⇒ Object

#prepare_widths_for_cli(width) ⇒ Object

#resize_to_fit_page(driver) ⇒ Object

#run_command(command) ⇒ Object

#save_images ⇒ Object

#set_image_width(image, width) ⇒ Object