Module: Transformers::ImageUtils

Defined in:
lib/transformers/image_utils.rb

Class Method Summary collapse

Class Method Details

.get_channel_dimension_axis(image, input_data_format: nil) ⇒ Object

Raises:

  • (ArgumentError)


110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/transformers/image_utils.rb', line 110

def self.get_channel_dimension_axis(
  image, input_data_format: nil
)
  if input_data_format.nil?
    input_data_format = infer_channel_dimension_format(image)
  end
  if input_data_format == ChannelDimension::FIRST
    return image.ndim - 3
  elsif input_data_format == ChannelDimension::LAST
    return image.ndim - 1
  end
  raise ArgumentError, "Unsupported data format: #{input_data_format}"
end

.infer_channel_dimension_format(image, num_channels: nil) ⇒ Object

Raises:

  • (ArgumentError)


83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/transformers/image_utils.rb', line 83

def self.infer_channel_dimension_format(
  image, num_channels: nil
)
  num_channels = !num_channels.nil? ? num_channels : [1, 3]
  num_channels = num_channels.is_a?(Integer) ? [num_channels] : num_channels

  if image.ndim == 3
    first_dim, last_dim = 0, 2
  elsif image.ndim == 4
    first_dim, last_dim = 1, 3
  else
    raise ArgumentError, "Unsupported number of image dimensions: #{image.ndim}"
  end

  if num_channels.include?(image.shape[first_dim]) && num_channels.include?(image.shape[last_dim])
    Transformers.logger.warn(
      "The channel dimension is ambiguous. Got image shape #{image.shape}. Assuming channels are the first dimension."
    )
    return ChannelDimension::FIRST
  elsif num_channels.include?(image.shape[first_dim])
    return ChannelDimension::FIRST
  elsif num_channels.include?(image.shape[last_dim])
    return ChannelDimension::LAST
  end
  raise ArgumentError, "Unable to infer channel dimension format"
end

.is_scaled_image(image) ⇒ Object



147
148
149
150
151
152
153
154
# File 'lib/transformers/image_utils.rb', line 147

def self.is_scaled_image(image)
  if image.is_a?(Numo::UInt8)
    return false
  end

  # It's possible the image has pixel values in [0, 255] but is of floating type
  image.min >= 0 && image.max <= 1
end

.is_valid_image(img) ⇒ Object



128
129
130
# File 'lib/transformers/image_utils.rb', line 128

def self.is_valid_image(img)
  is_vips_image(img) || is_numo_array(img) || is_torch_tensor(img)
end

.is_vips_image(img) ⇒ Object



124
125
126
# File 'lib/transformers/image_utils.rb', line 124

def self.is_vips_image(img)
  Utils.is_vision_available && img.is_a?(Vips::Image)
end

.load_image(image, timeout: nil) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/transformers/image_utils.rb', line 22

def self.load_image(image, timeout: nil)
  Utils.requires_backends(__method__, ["vision"])
  if image.is_a?(URI)
    require "open-uri"

    image = Vips::Image.new_from_buffer(image.read(open_timeout: timeout, read_timeout: timeout), "")
  elsif image.is_a?(String) && File.exist?(image)
    image = Vips::Image.new_from_file(image)
  elsif image.is_a?(Vips::Image)
    image = image
  else
    raise ArgumentError, "Incorrect format used for image"
  end
  image
end

.make_list_of_images(images, expected_ndims: 3) ⇒ Object



74
75
76
77
# File 'lib/transformers/image_utils.rb', line 74

def self.make_list_of_images(images, expected_ndims: 3)
  # TODO improve
  images.is_a?(Array) ? images : [images]
end

.to_numo_array(img) ⇒ Object



79
80
81
# File 'lib/transformers/image_utils.rb', line 79

def self.to_numo_array(img)
  Numo::UInt8.from_binary(img.write_to_memory, [img.height, img.width, img.bands])
end

.valid_images(imgs) ⇒ Object



132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/transformers/image_utils.rb', line 132

def self.valid_images(imgs)
  # If we have an list of images, make sure every image is valid
  if imgs.is_a?(Array)
    imgs.each do |img|
      if !valid_images(img)
        return false
      end
    end
  # If not a list of tuple, we have been given a single image or batched tensor of images
  elsif !is_valid_image(imgs)
    return false
  end
  true
end

.validate_kwargs(valid_processor_keys:, captured_kwargs:) ⇒ Object



156
157
158
159
160
161
162
163
# File 'lib/transformers/image_utils.rb', line 156

def self.validate_kwargs(valid_processor_keys:, captured_kwargs:)
  unused_keys = Set.new(captured_kwargs).difference(Set.new(valid_processor_keys))
  if unused_keys.any?
    unused_key_str = unused_keys.join(", ")
    # TODO raise a warning here instead of simply logging?
    Transformers.logger.warn("Unused or unrecognized kwargs: #{unused_key_str}.")
  end
end

.validate_preprocess_arguments(do_rescale: nil, rescale_factor: nil, do_normalize: nil, image_mean: nil, image_std: nil, do_pad: nil, size_divisibility: nil, do_center_crop: nil, crop_size: nil, do_resize: nil, size: nil, resample: nil) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/transformers/image_utils.rb', line 38

def self.validate_preprocess_arguments(
  do_rescale: nil,
  rescale_factor: nil,
  do_normalize: nil,
  image_mean: nil,
  image_std: nil,
  do_pad: nil,
  size_divisibility: nil,
  do_center_crop: nil,
  crop_size: nil,
  do_resize: nil,
  size: nil,
  resample:  nil
)
  if do_rescale && rescale_factor.nil?
    raise ArgumentError, "`rescale_factor` must be specified if `do_rescale` is `true`."
  end

  if do_pad && size_divisibility.nil?
    # Here, size_divisor might be passed as the value of size
    raise ArgumentError, "Depending on the model, `size_divisibility`, `size_divisor`, `pad_size` or `size` must be specified if `do_pad` is `true`."
  end

  if do_normalize && (image_mean.nil? || image_std.nil?)
    raise ArgumentError, "`image_mean` and `image_std` must both be specified if `do_normalize` is `true`."
  end

  if do_center_crop && crop_size.nil?
    raise ArgumentError, "`crop_size` must be specified if `do_center_crop` is `true`."
  end

  if do_resize && (size.nil? || resample.nil?)
    raise ArgumentError, "`size` and `resample` must be specified if `do_resize` is `true`."
  end
end