Module: Transformers::ModuleUtilsMixin

Included in:: PreTrainedModel

Defined in:: lib/transformers/modeling_utils.rb

Instance Method Summary collapse

Instance Method Details

#get_extended_attention_mask(attention_mask, input_shape, device: nil, dtype: nil) ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 18

def get_extended_attention_mask(
  attention_mask,
  input_shape,
  device: nil,
  dtype: nil
)
  if dtype.nil?
    dtype = @dtype
  end

  if !(attention_mask.dim == 2 && @config.is_decoder)
    # show warning only if it won't be shown in `create_extended_attention_mask_for_decoder`
    if !device.nil?
      raise Todo
    end
  end
  # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
  # ourselves in which case we just need to make it broadcastable to all heads.
  if attention_mask.dim == 3
    raise Todo
  elsif attention_mask.dim == 2
    # Provided a padding mask of dimensions [batch_size, seq_length]
    # - if the model is a decoder, apply a causal mask in addition to the padding mask
    # - if the model is an encoder, make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length]
    if @config.is_decoder
      raise Todo
    else
      extended_attention_mask = attention_mask[0.., nil, nil, 0..]
    end
  else
    raise Todo
  end

  # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
  # masked positions, this operation will create a tensor which is 0.0 for
  # positions we want to attend and the dtype's smallest value for masked positions.
  # Since we are adding it to the raw scores before the softmax, this is
  # effectively the same as removing these entirely.
  extended_attention_mask = extended_attention_mask.to(dtype: dtype)  # fp16 compatibility
  # TODO use Torch.finfo
  extended_attention_mask = (1.0 - extended_attention_mask) * -3.40282e+38
  extended_attention_mask
end

#get_head_mask(head_mask, num_hidden_layers, is_attention_chunked: false) ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 62

def get_head_mask(head_mask, num_hidden_layers, is_attention_chunked: false)
  if !head_mask.nil?
    head_mask = _convert_head_mask_to_5d(head_mask, num_hidden_layers)
    if is_attention_chunked == true
      head_mask = head_mask.unsqueeze(-1)
    end
  else
    head_mask = [nil] * num_hidden_layers
  end

  head_mask
end

Module: Transformers::ModuleUtilsMixin

Instance Method Summary collapse

Instance Method Details

#get_extended_attention_mask(attention_mask, input_shape, device: nil, dtype: nil) ⇒ Object

#get_head_mask(head_mask, num_hidden_layers, is_attention_chunked: false) ⇒ Object

#get_extended_attention_mask(attention_mask, input_shape, device: nil, dtype: nil) ⇒ `Object`

#get_head_mask(head_mask, num_hidden_layers, is_attention_chunked: false) ⇒ `Object`