Class: Informers::ZeroShotImageClassificationPipeline

Inherits:
Pipeline
  • Object
show all
Defined in:
lib/informers/pipelines.rb

Instance Method Summary collapse

Methods inherited from Pipeline

#initialize

Constructor Details

This class inherits a constructor from Informers::Pipeline

Instance Method Details

#call(images, candidate_labels, hypothesis_template: "This is a photo of {}") ⇒ Object



614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
# File 'lib/informers/pipelines.rb', line 614

def call(images, candidate_labels, hypothesis_template: "This is a photo of {}")
  is_batched = images.is_a?(Array)
  prepared_images = prepare_images(images)

  # Insert label into hypothesis template
  texts = candidate_labels.map { |x| hypothesis_template.sub("{}", x) }

  #  Run tokenization
  text_inputs = @tokenizer.(texts,
    padding: @model.config[:model_type] == "siglip" ? "max_length" : true,
    truncation: true
  )

  # Run processor
  pixel_values = @processor.(prepared_images)[:pixel_values]

  # Run model with both text and pixel inputs
  output = @model.(text_inputs.merge(pixel_values: pixel_values))

  function_to_apply =
    if @model.config[:model_type] == "siglip"
      ->(batch) { Utils.sigmoid(batch) }
    else
      ->(batch) { Utils.softmax(batch) }
    end

  # Compare each image with each candidate label
  to_return = []
  output[0].each do |batch|
    # Compute softmax per image
    probs = function_to_apply.(batch)

    result = probs
      .map.with_index { |x, i| {label: candidate_labels[i], score: x} }
      .sort_by { |v| -v[:score] }

    to_return << result
  end

  is_batched ? to_return : to_return[0]
end