Module: Transformers::ModuleUtilsMixin
- Included in:
- PreTrainedModel
- Defined in:
- lib/transformers/modeling_utils.rb
Instance Method Summary collapse
- #get_extended_attention_mask(attention_mask, input_shape, device: nil, dtype: nil) ⇒ Object
- #get_head_mask(head_mask, num_hidden_layers, is_attention_chunked: false) ⇒ Object
Instance Method Details
#get_extended_attention_mask(attention_mask, input_shape, device: nil, dtype: nil) ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/transformers/modeling_utils.rb', line 18 def get_extended_attention_mask( attention_mask, input_shape, device: nil, dtype: nil ) if dtype.nil? dtype = @dtype end if !(attention_mask.dim == 2 && @config.is_decoder) # show warning only if it won't be shown in `create_extended_attention_mask_for_decoder` if !device.nil? raise Todo end end # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length] # ourselves in which case we just need to make it broadcastable to all heads. if attention_mask.dim == 3 raise Todo elsif attention_mask.dim == 2 # Provided a padding mask of dimensions [batch_size, seq_length] # - if the model is a decoder, apply a causal mask in addition to the padding mask # - if the model is an encoder, make the mask broadcastable to [batch_size, num_heads, seq_length, seq_length] if @config.is_decoder raise Todo else extended_attention_mask = attention_mask[0.., nil, nil, 0..] end else raise Todo end # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and the dtype's smallest value for masked positions. # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. extended_attention_mask = extended_attention_mask.to(dtype: dtype) # fp16 compatibility # TODO use Torch.finfo extended_attention_mask = (1.0 - extended_attention_mask) * -3.40282e+38 extended_attention_mask end |
#get_head_mask(head_mask, num_hidden_layers, is_attention_chunked: false) ⇒ Object
62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/transformers/modeling_utils.rb', line 62 def get_head_mask(head_mask, num_hidden_layers, is_attention_chunked: false) if !head_mask.nil? head_mask = _convert_head_mask_to_5d(head_mask, num_hidden_layers) if is_attention_chunked == true head_mask = head_mask.unsqueeze(-1) end else head_mask = [nil] * num_hidden_layers end head_mask end |