Class: Wraith::SaveImages

Inherits:
Object
  • Object
show all
Includes:
Logging
Defined in:
lib/wraith/save_images.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Logging

#logger, logger

Constructor Details

#initialize(config, history = false, yaml_passed = false) ⇒ SaveImages

Returns a new instance of SaveImages.



15
16
17
18
19
# File 'lib/wraith/save_images.rb', line 15

def initialize(config, history = false, yaml_passed = false)
  @wraith = Wraith::Wraith.new(config, { yaml_passed: yaml_passed })
  @history = history
  @meta = SaveMetadata.new(@wraith, history)
end

Instance Attribute Details

#historyObject (readonly)

Returns the value of attribute history.



13
14
15
# File 'lib/wraith/save_images.rb', line 13

def history
  @history
end

#metaObject (readonly)

Returns the value of attribute meta.



13
14
15
# File 'lib/wraith/save_images.rb', line 13

def meta
  @meta
end

#wraithObject (readonly)

Returns the value of attribute wraith.



13
14
15
# File 'lib/wraith/save_images.rb', line 13

def wraith
  @wraith
end

Instance Method Details

#attempt_image_capture(capture_page_image, filename) ⇒ Object



164
165
166
167
168
169
170
171
172
173
# File 'lib/wraith/save_images.rb', line 164

def attempt_image_capture(capture_page_image, filename)
  max_attempts = 5
  max_attempts.times do |i|
    run_command capture_page_image
    return true if image_was_created filename
    logger.warn "Failed to capture image #{filename} on attempt number #{i + 1} of #{max_attempts}"
  end

  fail "Unable to capture image #{filename} after #{max_attempts} attempt(s)" unless image_was_created filename
end

#capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/wraith/save_images.rb', line 128

def capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture)
  driver = get_driver
  driver.manage.timeouts.implicit_wait = 10;
  screen_sizes.to_s.split(",").each do |screen_size|
    for attempt in 1..3 do
      begin
        width, height = screen_size.split("x")
        new_file_name = file_name.sub('MULTI', screen_size)
        driver.manage.window.resize_to(width, height || 1500)
        driver.navigate.to url
        driver.manage.timeouts.implicit_wait = wraith.settle
        driver.execute_script(File.read(global_before_capture)) if global_before_capture
        driver.execute_script(File.read(path_before_capture)) if path_before_capture
        resize_to_fit_page(driver) unless height
        driver.save_screenshot(new_file_name)
        crop_selector(driver, selector, new_file_name) if selector && selector.length > 0
        break
      rescue Net::ReadTimeout => e
        logger.error "Got #{e} on attempt #{attempt} at screen size #{screensize}. URL = #{url}"
      end
    end
  end
  driver.quit
end

#check_pathsObject



21
22
23
24
25
26
27
28
# File 'lib/wraith/save_images.rb', line 21

def check_paths
  if !wraith.paths
    path = File.read(wraith.spider_file)
    eval(path)
  else
    wraith.paths
  end
end

#construct_command(width, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object



153
154
155
156
157
158
159
160
161
162
# File 'lib/wraith/save_images.rb', line 153

def construct_command(width, url, file_name, selector, global_before_capture, path_before_capture)
  width    = prepare_widths_for_cli(width)
  selector = selector.gsub '#', '\#' # make sure id selectors aren't escaped in the CLI
  global_before_capture = convert_to_absolute global_before_capture
  path_before_capture   = convert_to_absolute path_before_capture

  command_to_run = "#{meta.engine} #{wraith.phantomjs_options} '#{wraith.snap_file}' '#{url}' '#{width}' '#{file_name}' '#{selector}' '#{global_before_capture}' '#{path_before_capture}'"
  logger.debug command_to_run
  command_to_run
end

#create_invalid_image(filename, width, invalid_image_name) ⇒ Object



180
181
182
183
184
185
186
# File 'lib/wraith/save_images.rb', line 180

def create_invalid_image(filename, width, invalid_image_name)
  logger.warn "Using fallback image instead"
  invalid = File.expand_path("../../assets/#{invalid_image_name}", File.dirname(__FILE__))
  FileUtils.cp invalid, filename

  set_image_width(filename, width)
end

#crop_selector(driver, selector, image_location) ⇒ Object

crop an image around the coordinates of an element



121
122
123
124
125
126
# File 'lib/wraith/save_images.rb', line 121

def crop_selector driver, selector, image_location
  el = driver.find_element(:css, selector)
  image = MiniMagick::Image.open(image_location)
  image.crop "#{el.rect.width}x#{el.rect.height}+#{el.rect.x}+#{el.rect.y}"
  image.write(image_location)
end

#define_individual_job(label, settings, width) ⇒ Object



51
52
53
54
55
56
57
58
59
60
# File 'lib/wraith/save_images.rb', line 51

def define_individual_job(label, settings, width)
  base_file_name    = meta.file_names(width, label, meta.base_label)
  compare_file_name = meta.file_names(width, label, meta.compare_label)

  jobs = []
  jobs << [label, settings.path, prepare_widths_for_cli(width), settings.base_url,    base_file_name,    settings.selector, wraith.before_capture, settings.before_capture, 'invalid1.jpg']
  jobs << [label, settings.path, prepare_widths_for_cli(width), settings.compare_url, compare_file_name, settings.selector, wraith.before_capture, settings.before_capture, 'invalid2.jpg'] unless settings.compare_url.nil?

  jobs
end

#define_jobsObject



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/wraith/save_images.rb', line 35

def define_jobs
  jobs = []
  check_paths.each do |label, options|
    settings = CaptureOptions.new(options, wraith)

    if settings.resize
      jobs += define_individual_job(label, settings, wraith.widths)
    else
      wraith.widths.each do |width|
        jobs += define_individual_job(label, settings, width)
      end
    end
  end
  jobs
end

#get_driverObject

currently only chrome headless at 1x scaling



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/wraith/save_images.rb', line 95

def get_driver
  case meta.engine
  when "chrome"
    options = Selenium::WebDriver::Chrome::Options.new
    [
      'disable-gpu',
      'headless',
      'no-sandbox',
      'device-scale-factor=1',
      'force-device-scale-factor',
      'window-size=1200,1500',
      'hide-scrollbars',
      'ignore-certificate-errors'
    ].each { |arg| options.add_argument("--#{arg}") }
    Selenium::WebDriver.for :chrome, options: options
  end
end

#image_was_created(filename) ⇒ Object



175
176
177
178
# File 'lib/wraith/save_images.rb', line 175

def image_was_created(filename)
   # @TODO - need to check if the image was generated even if in resize mode
  wraith.resize or File.exist? filename
end

#parallel_task(jobs) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/wraith/save_images.rb', line 78

def parallel_task(jobs)
  Parallel.each(jobs, :in_threads => wraith.threads) do |_label, _path, width, url, filename, selector, global_before_capture, path_before_capture|
    begin
      if meta.engine == "chrome"
        capture_image_selenium(width, url, filename, selector, global_before_capture, path_before_capture)
      else
        command = construct_command(width, url, filename, selector, global_before_capture, path_before_capture)
        attempt_image_capture(command, filename)
      end
    rescue => e
      logger.error "#{e}\n  URL = #{url}"
      create_invalid_image(filename, width, invalid_image_name)
    end
  end
end

#prepare_widths_for_cli(width) ⇒ Object



62
63
64
65
66
# File 'lib/wraith/save_images.rb', line 62

def prepare_widths_for_cli(width)
  # prepare for the command line. [30,40,50] => "30,40,50"
  width = width.join(",") if width.is_a? Array
  width
end

#resize_to_fit_page(driver) ⇒ Object

resize to fit entire page



114
115
116
117
118
# File 'lib/wraith/save_images.rb', line 114

def resize_to_fit_page driver
  width  = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);")
  height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);")
  driver.manage.window.resize_to(width, height)
end

#run_command(command) ⇒ Object



68
69
70
71
72
73
74
75
76
# File 'lib/wraith/save_images.rb', line 68

def run_command(command)
  output = []
  command.gsub!(/'/, '')
  IO.popen(command).each do |line|
    logger.info line
    output << line.chomp!
  end.close
  output
end

#save_imagesObject



30
31
32
33
# File 'lib/wraith/save_images.rb', line 30

def save_images
  jobs = define_jobs
  parallel_task(jobs)
end

#set_image_width(image, width) ⇒ Object



188
189
190
# File 'lib/wraith/save_images.rb', line 188

def set_image_width(image, width)
  `convert #{image.shellescape} -background none -extent #{width}x0 #{image.shellescape}`
end