Class: Informers::ImageFeatureExtractor
- Inherits:
-
FeatureExtractor
- Object
- FeatureExtractor
- Informers::ImageFeatureExtractor
- Defined in:
- lib/informers/processors.rb
Direct Known Subclasses
CLIPFeatureExtractor, DPTFeatureExtractor, DetrFeatureExtractor, DonutFeatureExtractor, OwlViTFeatureExtractor, Swin2SRImageProcessor, ViTFeatureExtractor
Instance Method Summary collapse
- #call(images, *args) ⇒ Object
- #get_resize_output_image_size(image, size) ⇒ Object
-
#initialize(config) ⇒ ImageFeatureExtractor
constructor
A new instance of ImageFeatureExtractor.
- #pad_image(pixel_data, img_dims, pad_size, mode: "constant", center: false, constant_values: 0) ⇒ Object
- #preprocess(image, do_normalize: nil, do_pad: nil, do_convert_rgb: nil, do_convert_grayscale: nil, do_flip_channel_order: nil) ⇒ Object
- #rescale(pixel_data) ⇒ Object
- #resize(image) ⇒ Object
- #thumbnail(image, size, resample = 2) ⇒ Object
Constructor Details
#initialize(config) ⇒ ImageFeatureExtractor
Returns a new instance of ImageFeatureExtractor.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/informers/processors.rb', line 10 def initialize(config) super(config) @image_mean = @config["image_mean"] || @config["mean"] @image_std = @config["image_std"] || @config["std"] @resample = @config["resample"] || 2 # 2 => bilinear @do_rescale = @config.fetch("do_rescale", true) @rescale_factor = @config["rescale_factor"] || (1 / 255.0) @do_normalize = @config["do_normalize"] @do_resize = @config["do_resize"] @do_thumbnail = @config["do_thumbnail"] @size = @config["size"] @size_divisibility = @config["size_divisibility"] || @config["size_divisor"] @do_center_crop = @config["do_center_crop"] @crop_size = @config["crop_size"] @do_convert_rgb = @config.fetch("do_convert_rgb", true) @do_crop_margin = @config["do_crop_margin"] @pad_size = @config["pad_size"] @do_pad = @config["do_pad"] if @do_pad && !@pad_size && @size && !@size["width"].nil? && !@size["height"].nil? # Should pad, but no pad size specified # We infer the pad size from the resize size @pad_size = @size end @do_flip_channel_order = @config["do_flip_channel_order"] || false end |
Instance Method Details
#call(images, *args) ⇒ Object
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 |
# File 'lib/informers/processors.rb', line 330 def call(images, *args) if !images.is_a?(Array) images = [images] end image_data = images.map { |x| preprocess(x) } # Stack pixel values pixel_values = Utils.stack(image_data.map { |x| x[:pixel_values] }, 0) { pixel_values: pixel_values, # Original sizes of images original_sizes: image_data.map { |x| x[:original_size] }, # Reshaped sizes of images, before padding or cropping reshaped_input_sizes: image_data.map { |x| x[:reshaped_input_size] } } end |
#get_resize_output_image_size(image, size) ⇒ Object
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/informers/processors.rb', line 151 def get_resize_output_image_size(image, size) src_width, src_height = image.size if @do_thumbnail # NOTE: custom logic for `Donut` models height = size["height"] width = size["width"] shortest_edge = [height, width].min elsif size.is_a?(Numeric) shortest_edge = size longest_edge = @config["max_size"] || shortest_edge elsif !size.nil? # Extract known properties from `size` shortest_edge = size["shortest_edge"] longest_edge = size["longest_edge"] end if !shortest_edge.nil? || !longest_edge.nil? # http://opensourcehacker.com/2011/12/01/calculate-aspect-ratio-conserving-resize-for-images-in-javascript/ # Try resize so that shortest edge is `shortest_edge` (target) short_resize_factor = if shortest_edge.nil? 1 # If `shortest_edge` is not set, don't upscale else [shortest_edge / src_width.to_f, shortest_edge / src_height.to_f].max end new_width = src_width * short_resize_factor new_height = src_height * short_resize_factor # The new width and height might be greater than `longest_edge`, so # we downscale again to ensure the largest dimension is `longest_edge` long_resize_factor = if longest_edge.nil? 1 # If `longest_edge` is not set, don't downscale else [longest_edge / new_width.to_f, longest_edge / new_height.to_f].min end # To avoid certain floating point precision issues, we round to 2 decimal places final_width = (new_width * long_resize_factor).round(2).floor final_height = (new_height * long_resize_factor).round(2).floor if !@size_divisibility.nil? raise Todo end [final_width, final_height] elsif !size.nil? && !size["width"].nil? && !size["height"].nil? new_width = size["width"] new_height = size["height"] if @config["keep_aspect_ratio"] && @config["ensure_multiple_of"] raise Todo end [new_width, new_height] else raise Todo end end |
#pad_image(pixel_data, img_dims, pad_size, mode: "constant", center: false, constant_values: 0) ⇒ Object
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/informers/processors.rb', line 65 def pad_image( pixel_data, img_dims, pad_size, mode: "constant", center: false, constant_values: 0 ) image_height, image_width, image_channels = img_dims if pad_size.is_a?(Numeric) padded_image_width = pad_size padded_image_height = pad_size else padded_image_width = pad_size[:width] || pad_size["width"] padded_image_height = pad_size[:height] || pad_size["height"] end # Only add padding if there is a difference in size if padded_image_width != image_width || padded_image_height != image_height padded_pixel_data = Array.new(padded_image_width * padded_image_height * image_channels) if constant_values.is_a?(Array) # Fill with constant values, cycling through the array padded_pixel_data.length.times do |i| padded_pixel_data[i] = constant_values[i % image_channels] end elsif constant_values != 0 padded_pixel_data.fill(constant_values) end left, top = if center [((padded_image_width - image_width) / 2.0).floor, ((padded_image_height - image_height) / 2.0).floor] else [0, 0] end # Copy the original image into the padded image image_height.times do |i| a = (i + top) * padded_image_width b = i * image_width image_width.times do |j| c = (a + j + left) * image_channels d = (b + j) * image_channels image_channels.times do |k| padded_pixel_data[c + k] = pixel_data[d + k] end end end if mode == "symmetric" if center raise Error, "`center` padding is not supported when `mode` is set to `symmetric`." end h1 = image_height - 1 w1 = image_width - 1 padded_image_height.times do |i| a = i * padded_image_width b = Utils.calculate_reflect_offset(i, h1) * image_width padded_image_width.times do |j| next if i < image_height && j < image_width # Do not overwrite original image c = (a + j) * image_channels d = (b + Utils.calculate_reflect_offset(j, w1)) * image_channels # Copy channel-wise image_channels.times do |k| padded_pixel_data[c + k] = pixel_data[d + k] end end end end # Update pixel data and image dimensions pixel_data = padded_pixel_data img_dims = [padded_image_height, padded_image_width, image_channels] end [pixel_data, img_dims] end |
#preprocess(image, do_normalize: nil, do_pad: nil, do_convert_rgb: nil, do_convert_grayscale: nil, do_flip_channel_order: nil) ⇒ Object
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 |
# File 'lib/informers/processors.rb', line 217 def preprocess( image, do_normalize: nil, do_pad: nil, do_convert_rgb: nil, do_convert_grayscale: nil, do_flip_channel_order: nil ) if @do_crop_margin # NOTE: Specific to nougat processors. This is done before resizing, # and can be interpreted as a pre-preprocessing step. image = crop_margin(image) end src_width, src_height = image.size # original image size # Convert image to RGB if specified in config. if !do_convert_rgb.nil? ? do_convert_rgb : @do_convert_rgb image = image.rgb elsif do_convert_grayscale image = image.grayscale end # Resize all images if @do_resize image = resize(image) end # Resize the image using thumbnail method. if @do_thumbnail image = thumbnail(image, @size, @resample) end if @do_center_crop if @crop_size.is_a?(Integer) crop_width = @crop_size crop_height = @crop_size else crop_width = @crop_size["width"] crop_height = @crop_size["height"] end image = image.center_crop(crop_width, crop_height) end reshaped_input_size = [image.height, image.width] # NOTE: All pixel-level manipulation (i.e., modifying `pixelData`) # occurs with data in the hwc format (height, width, channels), # to emulate the behavior of the original Python code (w/ numpy). pixel_data = image.data img_dims = [image.height, image.width, image.channels] if @do_rescale rescale(pixel_data) end if !do_normalize.nil? ? do_normalize : @do_normalize image_mean = @image_mean if !@image_mean.is_a?(Array) image_mean = new Array(image.channels) { image_mean } end image_std = @image_std if !@image_std.is_a?(Array) image_std = new Array(image.channels) { image_std } end if image_mean.length != image.channels || image_std.length != image.channels raise Error, "When set to arrays, the length of `image_mean` (#{image_mean.length}) and `image_std` (#{image_std.length}) must match the number of channels in the image (#{image.channels})." end i = 0 while i < pixel_data.length image.channels.times do |j| pixel_data[i + j] = (pixel_data[i + j] - image_mean[j]) / image_std[j] end i += image.channels end end # do padding after rescaling/normalizing if !do_pad.nil? ? do_pad : @do_pad if @pad_size padded = pad_image(pixel_data, [image.height, image.width, image.channels], @pad_size) pixel_data, img_dims = padded # Update pixel data and image dimensions elsif @size_divisibility raise Todo end end if !do_flip_channel_order.nil? ? do_flip_channel_order : @do_flip_channel_order raise Todo end # convert to channel dimension format (hwc -> chw) h, w, c = img_dims pixel_values = c.times.map do |ci| h.times.map do |hi| w.times.map do |wi| index = (hi * w * c) + (wi * c) + ci pixel_data[index] end end end { original_size: [src_height, src_width], reshaped_input_size: reshaped_input_size, pixel_values: pixel_values } end |
#rescale(pixel_data) ⇒ Object
145 146 147 148 149 |
# File 'lib/informers/processors.rb', line 145 def rescale(pixel_data) pixel_data.length.times do |i| pixel_data[i] *= @rescale_factor end end |
#resize(image) ⇒ Object
212 213 214 215 |
# File 'lib/informers/processors.rb', line 212 def resize(image) new_width, new_height = get_resize_output_image_size(image, @size) image.resize(new_width, new_height, resample: @resample) end |
#thumbnail(image, size, resample = 2) ⇒ Object
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/informers/processors.rb', line 43 def thumbnail(image, size, resample = 2) input_height = image.height input_width = image.width output_height = size["height"] output_width = size["width"] # We always resize to the smallest of either the input or output size. height = [input_height, output_height].min width = [input_width, output_width].min if height == input_height && width == input_width return image end if input_height > input_width width = (input_width * height / input_height).floor elsif input_width > input_height height = (input_height * width / input_width).floor end image.resize(width, height, resample:) end |