Class: Wraith::SaveImages
- Inherits:
-
Object
- Object
- Wraith::SaveImages
- Includes:
- Logging
- Defined in:
- lib/wraith/save_images.rb
Instance Attribute Summary collapse
-
#history ⇒ Object
readonly
Returns the value of attribute history.
-
#meta ⇒ Object
readonly
Returns the value of attribute meta.
-
#wraith ⇒ Object
readonly
Returns the value of attribute wraith.
Instance Method Summary collapse
- #attempt_image_capture(capture_page_image, filename) ⇒ Object
- #capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object
- #check_paths ⇒ Object
- #construct_command(width, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object
- #create_invalid_image(filename, width, invalid_image_name) ⇒ Object
-
#crop_selector(driver, selector, image_location) ⇒ Object
crop an image around the coordinates of an element.
- #define_individual_job(label, settings, width) ⇒ Object
- #define_jobs ⇒ Object
-
#get_driver ⇒ Object
currently only chrome headless at 1x scaling.
- #image_was_created(filename) ⇒ Object
-
#initialize(config, history = false, yaml_passed = false) ⇒ SaveImages
constructor
A new instance of SaveImages.
- #parallel_task(jobs) ⇒ Object
- #prepare_widths_for_cli(width) ⇒ Object
-
#resize_to_fit_page(driver) ⇒ Object
resize to fit entire page.
- #run_command(command) ⇒ Object
- #save_images ⇒ Object
- #set_image_width(image, width) ⇒ Object
Methods included from Logging
Constructor Details
#initialize(config, history = false, yaml_passed = false) ⇒ SaveImages
Returns a new instance of SaveImages.
15 16 17 18 19 |
# File 'lib/wraith/save_images.rb', line 15 def initialize(config, history = false, yaml_passed = false) @wraith = Wraith::Wraith.new(config, { yaml_passed: yaml_passed }) @history = history @meta = SaveMetadata.new(@wraith, history) end |
Instance Attribute Details
#history ⇒ Object (readonly)
Returns the value of attribute history.
13 14 15 |
# File 'lib/wraith/save_images.rb', line 13 def history @history end |
#meta ⇒ Object (readonly)
Returns the value of attribute meta.
13 14 15 |
# File 'lib/wraith/save_images.rb', line 13 def @meta end |
#wraith ⇒ Object (readonly)
Returns the value of attribute wraith.
13 14 15 |
# File 'lib/wraith/save_images.rb', line 13 def wraith @wraith end |
Instance Method Details
#attempt_image_capture(capture_page_image, filename) ⇒ Object
164 165 166 167 168 169 170 171 172 173 |
# File 'lib/wraith/save_images.rb', line 164 def attempt_image_capture(capture_page_image, filename) max_attempts = 5 max_attempts.times do |i| run_command capture_page_image return true if image_was_created filename logger.warn "Failed to capture image #{filename} on attempt number #{i + 1} of #{max_attempts}" end fail "Unable to capture image #{filename} after #{max_attempts} attempt(s)" unless image_was_created filename end |
#capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/wraith/save_images.rb', line 128 def capture_image_selenium(screen_sizes, url, file_name, selector, global_before_capture, path_before_capture) driver = get_driver driver.manage.timeouts.implicit_wait = 10; screen_sizes.to_s.split(",").each do |screen_size| for attempt in 1..3 do begin width, height = screen_size.split("x") new_file_name = file_name.sub('MULTI', screen_size) driver.manage.window.resize_to(width, height || 1500) driver.navigate.to url driver.manage.timeouts.implicit_wait = wraith.settle driver.execute_script(File.read(global_before_capture)) if global_before_capture driver.execute_script(File.read(path_before_capture)) if path_before_capture resize_to_fit_page(driver) unless height driver.save_screenshot(new_file_name) crop_selector(driver, selector, new_file_name) if selector && selector.length > 0 break rescue Net::ReadTimeout => e logger.error "Got #{e} on attempt #{attempt} at screen size #{screensize}. URL = #{url}" end end end driver.quit end |
#check_paths ⇒ Object
21 22 23 24 25 26 27 28 |
# File 'lib/wraith/save_images.rb', line 21 def check_paths if !wraith.paths path = File.read(wraith.spider_file) eval(path) else wraith.paths end end |
#construct_command(width, url, file_name, selector, global_before_capture, path_before_capture) ⇒ Object
153 154 155 156 157 158 159 160 161 162 |
# File 'lib/wraith/save_images.rb', line 153 def construct_command(width, url, file_name, selector, global_before_capture, path_before_capture) width = prepare_widths_for_cli(width) selector = selector.gsub '#', '\#' # make sure id selectors aren't escaped in the CLI global_before_capture = convert_to_absolute global_before_capture path_before_capture = convert_to_absolute path_before_capture command_to_run = "#{.engine} #{wraith.} '#{wraith.snap_file}' '#{url}' '#{width}' '#{file_name}' '#{selector}' '#{global_before_capture}' '#{path_before_capture}'" logger.debug command_to_run command_to_run end |
#create_invalid_image(filename, width, invalid_image_name) ⇒ Object
180 181 182 183 184 185 186 |
# File 'lib/wraith/save_images.rb', line 180 def create_invalid_image(filename, width, invalid_image_name) logger.warn "Using fallback image instead" invalid = File.("../../assets/#{invalid_image_name}", File.dirname(__FILE__)) FileUtils.cp invalid, filename set_image_width(filename, width) end |
#crop_selector(driver, selector, image_location) ⇒ Object
crop an image around the coordinates of an element
121 122 123 124 125 126 |
# File 'lib/wraith/save_images.rb', line 121 def crop_selector driver, selector, image_location el = driver.find_element(:css, selector) image = MiniMagick::Image.open(image_location) image.crop "#{el.rect.width}x#{el.rect.height}+#{el.rect.x}+#{el.rect.y}" image.write(image_location) end |
#define_individual_job(label, settings, width) ⇒ Object
51 52 53 54 55 56 57 58 59 60 |
# File 'lib/wraith/save_images.rb', line 51 def define_individual_job(label, settings, width) base_file_name = .file_names(width, label, .base_label) compare_file_name = .file_names(width, label, .compare_label) jobs = [] jobs << [label, settings.path, prepare_widths_for_cli(width), settings.base_url, base_file_name, settings.selector, wraith.before_capture, settings.before_capture, 'invalid1.jpg'] jobs << [label, settings.path, prepare_widths_for_cli(width), settings.compare_url, compare_file_name, settings.selector, wraith.before_capture, settings.before_capture, 'invalid2.jpg'] unless settings.compare_url.nil? jobs end |
#define_jobs ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/wraith/save_images.rb', line 35 def define_jobs jobs = [] check_paths.each do |label, | settings = CaptureOptions.new(, wraith) if settings.resize jobs += define_individual_job(label, settings, wraith.widths) else wraith.widths.each do |width| jobs += define_individual_job(label, settings, width) end end end jobs end |
#get_driver ⇒ Object
currently only chrome headless at 1x scaling
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/wraith/save_images.rb', line 95 def get_driver case .engine when "chrome" = Selenium::WebDriver::Chrome::Options.new [ 'disable-gpu', 'headless', 'no-sandbox', 'device-scale-factor=1', 'force-device-scale-factor', 'window-size=1200,1500', 'hide-scrollbars', 'ignore-certificate-errors' ].each { |arg| .add_argument("--#{arg}") } Selenium::WebDriver.for :chrome, options: end end |
#image_was_created(filename) ⇒ Object
175 176 177 178 |
# File 'lib/wraith/save_images.rb', line 175 def image_was_created(filename) # @TODO - need to check if the image was generated even if in resize mode wraith.resize or File.exist? filename end |
#parallel_task(jobs) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/wraith/save_images.rb', line 78 def parallel_task(jobs) Parallel.each(jobs, :in_threads => wraith.threads) do |_label, _path, width, url, filename, selector, global_before_capture, path_before_capture| begin if .engine == "chrome" capture_image_selenium(width, url, filename, selector, global_before_capture, path_before_capture) else command = construct_command(width, url, filename, selector, global_before_capture, path_before_capture) attempt_image_capture(command, filename) end rescue => e logger.error "#{e}\n URL = #{url}" create_invalid_image(filename, width, invalid_image_name) end end end |
#prepare_widths_for_cli(width) ⇒ Object
62 63 64 65 66 |
# File 'lib/wraith/save_images.rb', line 62 def prepare_widths_for_cli(width) # prepare for the command line. [30,40,50] => "30,40,50" width = width.join(",") if width.is_a? Array width end |
#resize_to_fit_page(driver) ⇒ Object
resize to fit entire page
114 115 116 117 118 |
# File 'lib/wraith/save_images.rb', line 114 def resize_to_fit_page driver width = driver.execute_script("return Math.max(document.body.scrollWidth, document.body.offsetWidth, document.documentElement.clientWidth, document.documentElement.scrollWidth, document.documentElement.offsetWidth);") height = driver.execute_script("return Math.max(document.body.scrollHeight, document.body.offsetHeight, document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight);") driver.manage.window.resize_to(width, height) end |
#run_command(command) ⇒ Object
68 69 70 71 72 73 74 75 76 |
# File 'lib/wraith/save_images.rb', line 68 def run_command(command) output = [] command.gsub!(/'/, '') IO.popen(command).each do |line| logger.info line output << line.chomp! end.close output end |
#save_images ⇒ Object
30 31 32 33 |
# File 'lib/wraith/save_images.rb', line 30 def save_images jobs = define_jobs parallel_task(jobs) end |
#set_image_width(image, width) ⇒ Object
188 189 190 |
# File 'lib/wraith/save_images.rb', line 188 def set_image_width(image, width) `convert #{image.shellescape} -background none -extent #{width}x0 #{image.shellescape}` end |