Class: Informers::ZeroShotObjectDetectionPipeline

Inherits:

Pipeline

Object
Pipeline
Informers::ZeroShotObjectDetectionPipeline

show all

Defined in:: lib/informers/pipelines.rb

Instance Method Summary collapse

#call(images, candidate_labels, threshold: 0.1, top_k: nil, percentage: false) ⇒ Object

Methods inherited from Pipeline

#initialize

Constructor Details

This class inherits a constructor from Informers::Pipeline

Instance Method Details

#call(images, candidate_labels, threshold: 0.1, top_k: nil, percentage: false) ⇒ `Object`

# File 'lib/informers/pipelines.rb', line 693

def call(
  images,
  candidate_labels,
  threshold: 0.1,
  top_k: nil,
  percentage: false
)
  is_batched = images.is_a?(Array)
  prepared_images = prepare_images(images)

  # Run tokenization
  text_inputs = @tokenizer.(candidate_labels,
    padding: true,
    truncation: true
  )

  # Run processor
  model_inputs = @processor.(prepared_images)

  # Since non-maximum suppression is performed for exporting, we need to
  # process each image separately. For more information, see:
  # https://github.com/huggingface/optimum/blob/e3b7efb1257c011db907ef40ab340e795cc5684c/optimum/exporters/onnx/model_configs.py#L1028-L1032
  to_return = []
  prepared_images.length.times do |i|
    image = prepared_images[i]
    image_size = percentage ? nil : [[image.height, image.width]]
    pixel_values = [model_inputs[:pixel_values][i]]

    # Run model with both text and pixel inputs
    output = @model.(text_inputs.merge(pixel_values: pixel_values))
    # TODO remove
    output = @model.instance_variable_get(:@session).outputs.map { |v| v[:name].to_sym }.zip(output).to_h

    processed = @processor.feature_extractor.post_process_object_detection(output, threshold, image_size, true)[0]
    result =
      processed[:boxes].map.with_index do |box, i|
        {
          label: candidate_labels[processed[:classes][i]],
          score: processed[:scores][i],
          box: get_bounding_box(box, !percentage),
        }
      end
    result.sort_by! { |v| -v[:score] }
    if !top_k.nil?
      result = result[0...topk]
    end
    to_return << result
  end

  is_batched ? to_return : to_return[0]
end

Class: Informers::ZeroShotObjectDetectionPipeline

Instance Method Summary collapse

Methods inherited from Pipeline

Constructor Details

Instance Method Details

#call(images, candidate_labels, threshold: 0.1, top_k: nil, percentage: false) ⇒ Object

#call(images, candidate_labels, threshold: 0.1, top_k: nil, percentage: false) ⇒ `Object`