Class: Informers::DetrFeatureExtractor

Inherits:

ImageFeatureExtractor

Object
FeatureExtractor
ImageFeatureExtractor
Informers::DetrFeatureExtractor

show all

Defined in:: lib/informers/processors.rb

Instance Method Summary collapse

Methods inherited from ImageFeatureExtractor

#get_resize_output_image_size, #initialize, #pad_image, #preprocess, #rescale, #resize, #thumbnail

Methods inherited from FeatureExtractor

#initialize

Constructor Details

This class inherits a constructor from Informers::ImageFeatureExtractor

Instance Method Details

#call(images) ⇒ `Object`

# File 'lib/informers/processors.rb', line 421

def call(images)
  result = super(images)

  # TODO support differently-sized images, for now assume all images are the same size.
  # TODO support different mask sizes (not just 64x64)
  # Currently, just fill pixel mask with 1s
  mask_size = [result[:pixel_values].size, 64, 64]
  pixel_mask =
    mask_size[0].times.map do
      mask_size[1].times.map do
        mask_size[2].times.map do
          1
        end
      end
    end

  result.merge(pixel_mask: pixel_mask)
end

#check_segment_validity(mask_labels, mask_probs, k, mask_threshold = 0.5, overlap_mask_area_threshold = 0.8) ⇒ `Object`

# File 'lib/informers/processors.rb', line 471

def check_segment_validity(
  mask_labels,
  mask_probs,
  k,
  mask_threshold = 0.5,
  overlap_mask_area_threshold = 0.8
)
  # mask_k is a 1D array of indices, indicating where the mask is equal to k
  mask_k = []
  mask_k_area = 0
  original_area = 0

  mask_probs_k_data = mask_probs[k].flatten

  # Compute the area of all the stuff in query k
  mask_labels.length.times do |i|
    if mask_labels[i] == k
      mask_k << i
      mask_k_area += 1
    end

    if mask_probs_k_data[i] >= mask_threshold
      original_area += 1
    end
  end
  mask_exists = mask_k_area > 0 && original_area > 0

  # Eliminate disconnected tiny segments
  if mask_exists
    # Perform additional check
    area_ratio = mask_k_area / original_area
    mask_exists = area_ratio > overlap_mask_area_threshold
  end

  [mask_exists, mask_k]
end

#compute_segments(mask_probs, pred_scores, pred_labels, mask_threshold, overlap_mask_area_threshold, label_ids_to_fuse = nil, target_size = nil) ⇒ `Object`

# File 'lib/informers/processors.rb', line 508

def compute_segments(
  mask_probs,
  pred_scores,
  pred_labels,
  mask_threshold,
  overlap_mask_area_threshold,
  label_ids_to_fuse = nil,
  target_size = nil
)
  height, width = target_size || Utils.dims(mask_probs[0])

  segmentation = Array.new(height * width)
  segments = []

  # 1. If target_size is not null, we need to resize the masks to the target size
  if !target_size.nil?
    # resize the masks to the target size
    mask_probs.length.times do |i|
      mask_probs[i] = Utils.interpolate(mask_probs[i], target_size, "bilinear", false)
    end
  end

  # 2. Weigh each mask by its prediction score
  # NOTE: `mask_probs` is updated in-place
  #
  # Temporary storage for the best label/scores for each pixel ([height, width]):
  mask_labels = Array.new(mask_probs[0].flatten.length)
  best_scores = Array.new(mask_probs[0].flatten.length, 0)

  mask_probs.length.times do |i|
    score = pred_scores[i]

    mask_probs_i_data = mask_probs[i].flatten
    mask_probs_i_dims = Utils.dims(mask_probs[i])

    mask_probs_i_data.length.times do |j|
      mask_probs_i_data[j] *= score
      if mask_probs_i_data[j] > best_scores[j]
        mask_labels[j] = i
        best_scores[j] = mask_probs_i_data[j]
      end
    end

    mask_probs[i] = Utils.reshape(mask_probs_i_data, mask_probs_i_dims)
  end

  current_segment_id = 0

  # stuff_memory_list = {}
  pred_labels.length.times do |k|
    pred_class = pred_labels[k]

    # TODO add `should_fuse`
    # should_fuse = label_ids_to_fuse.include?(pred_class)

    # Check if mask exists and large enough to be a segment
    mask_exists, mask_k = check_segment_validity(
      mask_labels,
      mask_probs,
      k,
      mask_threshold,
      overlap_mask_area_threshold
    )

    if !mask_exists
      # Nothing to see here
      next
    end

    current_segment_id += 1

    # Add current object segment to final segmentation map
    mask_k.each do |index|
      segmentation[index] = current_segment_id
    end

    segments << {
      id: current_segment_id,
      label_id: pred_class,
      score: pred_scores[k]
    }
  end

  segmentation = Utils.reshape(segmentation, [height, width])

  [segmentation, segments]
end

#post_process_object_detection(*args) ⇒ `Object`



440
441
442

# File 'lib/informers/processors.rb', line 440

def post_process_object_detection(*args)
  Utils.post_process_object_detection(*args)
end

#post_process_panoptic_segmentation(outputs, threshold: 0.5, mask_threshold: 0.5, overlap_mask_area_threshold: 0.8, label_ids_to_fuse: nil, target_sizes: nil) ⇒ `Object`

# File 'lib/informers/processors.rb', line 596

def post_process_panoptic_segmentation(
  outputs,
  threshold: 0.5,
  mask_threshold: 0.5,
  overlap_mask_area_threshold: 0.8,
  label_ids_to_fuse: nil,
  target_sizes: nil
)
  if label_ids_to_fuse.nil?
    warn "`label_ids_to_fuse` unset. No instance will be fused."
    label_ids_to_fuse = Set.new
  end

  class_queries_logits = outputs[:logits] # [batch_size, num_queries, num_classes+1]
  masks_queries_logits = outputs[:pred_masks] # [batch_size, num_queries, height, width]

  mask_probs = Utils.sigmoid(masks_queries_logits) # [batch_size, num_queries, height, width]

  batch_size, _num_queries, num_labels = class_queries_logits.size, class_queries_logits[0].size, class_queries_logits[0][0].size
  num_labels -= 1 # Remove last class (background)

  if !target_sizes.nil? && target_sizes.length != batch_size
    raise Error, "Make sure that you pass in as many target sizes as the batch dimension of the logits"
  end

  to_return = []
  batch_size.times do |i|
    target_size = !target_sizes.nil? ? target_sizes[i] : nil

    class_logits = class_queries_logits[i]
    mask_logits = mask_probs[i]

    mask_probs_item, pred_scores_item, pred_labels_item = remove_low_and_no_objects(class_logits, mask_logits, threshold, num_labels)

    if pred_labels_item.length == 0
      raise Todo
    end

    # Get segmentation map and segment information of batch item
    segmentation, segments = compute_segments(
      mask_probs_item,
      pred_scores_item,
      pred_labels_item,
      mask_threshold,
      overlap_mask_area_threshold,
      label_ids_to_fuse,
      target_size
    )

    to_return << {
      segmentation: segmentation,
      segments_info: segments
    }
  end

  to_return
end

#remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) ⇒ `Object`

# File 'lib/informers/processors.rb', line 444

def remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels)
  mask_probs_item = []
  pred_scores_item = []
  pred_labels_item = []

  class_logits.size.times do |j|
    cls = class_logits[j]
    mask = mask_logits[j]

    pred_label = Utils.max(cls)[1]
    if pred_label == num_labels
      # Is the background, so we ignore it
      next
    end

    scores = Utils.softmax(cls)
    pred_score = scores[pred_label]
    if pred_score > object_mask_threshold
      mask_probs_item << mask
      pred_scores_item << pred_score
      pred_labels_item << pred_label
    end
  end

  [mask_probs_item, pred_scores_item, pred_labels_item]
end

Class: Informers::DetrFeatureExtractor

Instance Method Summary collapse

Methods inherited from ImageFeatureExtractor

Methods inherited from FeatureExtractor

Constructor Details

Instance Method Details

#call(images) ⇒ Object

#check_segment_validity(mask_labels, mask_probs, k, mask_threshold = 0.5, overlap_mask_area_threshold = 0.8) ⇒ Object

#compute_segments(mask_probs, pred_scores, pred_labels, mask_threshold, overlap_mask_area_threshold, label_ids_to_fuse = nil, target_size = nil) ⇒ Object

#post_process_object_detection(*args) ⇒ Object

#post_process_panoptic_segmentation(outputs, threshold: 0.5, mask_threshold: 0.5, overlap_mask_area_threshold: 0.8, label_ids_to_fuse: nil, target_sizes: nil) ⇒ Object

#remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) ⇒ Object

#call(images) ⇒ `Object`

#check_segment_validity(mask_labels, mask_probs, k, mask_threshold = 0.5, overlap_mask_area_threshold = 0.8) ⇒ `Object`

#compute_segments(mask_probs, pred_scores, pred_labels, mask_threshold, overlap_mask_area_threshold, label_ids_to_fuse = nil, target_size = nil) ⇒ `Object`

#post_process_object_detection(*args) ⇒ `Object`

#post_process_panoptic_segmentation(outputs, threshold: 0.5, mask_threshold: 0.5, overlap_mask_area_threshold: 0.8, label_ids_to_fuse: nil, target_sizes: nil) ⇒ `Object`

#remove_low_and_no_objects(class_logits, mask_logits, object_mask_threshold, num_labels) ⇒ `Object`