Class: Informers::PreTrainedModel

Inherits:

Object

Object
Informers::PreTrainedModel

show all

Defined in:: lib/informers/models.rb

Constant Summary collapse

MAIN_INPUT_NAME =

:input_ids

Instance Attribute Summary collapse

#config ⇒ Object readonly

Returns the value of attribute config.

Class Method Summary collapse

.construct_session(pretrained_model_name_or_path, file_name, **options) ⇒ Object
.from_pretrained(pretrained_model_name_or_path, quantized: true, progress_callback: nil, config: nil, cache_dir: nil, local_files_only: false, revision: "main", device: nil, dtype: nil, model_file_name: nil, session_options: {}) ⇒ Object

Instance Method Summary collapse

#call(model_inputs, **kwargs) ⇒ Object
#generate(inputs, generation_config = nil, logits_processor = nil, inputs_attention_mask: nil) ⇒ Object
#initialize(config, session) ⇒ PreTrainedModel constructor

A new instance of PreTrainedModel.

Constructor Details

#initialize(config, session) ⇒ `PreTrainedModel`

Returns a new instance of PreTrainedModel.

# File 'lib/informers/models.rb', line 74

def initialize(config, session)
  super()

  @config = config
  @session = session

  @output_names = nil

  model_name = MODEL_CLASS_TO_NAME_MAPPING[self.class]
  model_type = MODEL_TYPE_MAPPING[model_name]

  case model_type
  when MODEL_TYPES[:DecoderOnly]
    @can_generate = true

    @run_beam = method(:decoder_run_beam)
    @get_start_beams = method(:decoder_start_beams)
    @update_beam = method(:decoder_update_beam)
    @forward = method(:decoder_forward)

  when MODEL_TYPES[:Seq2Seq], MODEL_TYPES[:Vision2Seq]
    @can_generate = true

    @run_beam = method(:seq2seq_run_beam)
    @get_start_beams = method(:seq2seq_start_beams)
    @update_beam = method(:seq2seq_update_beam)
    @forward = method(:seq2seq_forward)

  when MODEL_TYPES[:EncoderDecoder]
    @forward = method(:encoder_forward)

  else
    @forward = method(:encoder_forward)
  end
end

Instance Attribute Details

#config ⇒ `Object` (readonly)

Returns the value of attribute config.



72
73
74

# File 'lib/informers/models.rb', line 72

def config
  @config
end

Class Method Details

.construct_session(pretrained_model_name_or_path, file_name, **options) ⇒ `Object`

# File 'lib/informers/models.rb', line 178

def self.construct_session(pretrained_model_name_or_path, file_name, **options)
  prefix = "onnx/"
  if file_name.start_with?("../")
    prefix = ""
    file_name = file_name[3..]
  elsif file_name.start_with?("/")
    prefix = ""
    file_name = file_name[1..]
  end
  dtype = options[:dtype] || (options[:quantized] ? "q8" : "fp32")
  suffix = Utils::DEFAULT_DTYPE_SUFFIX_MAPPING[dtype.to_sym]
  if !suffix
    raise ArgumentError, "Invalid dtype: #{dtype}. Should be one of: #{Utils::DEFAULT_DTYPE_SUFFIX_MAPPING.keys.join(", ")}"
  end
  model_file_name = "#{prefix}#{file_name}#{suffix}.onnx"
  path = Utils::Hub.get_model_file(pretrained_model_name_or_path, model_file_name, true, **options)

  session_options = {
    providers: Backends::Onnx.device_to_execution_providers(options[:device]),
    log_severity_level: 4
  }.merge(options[:session_options] || {})

  begin
    OnnxRuntime::InferenceSession.new(path, **session_options)
  rescue OnnxRuntime::Error => e
    raise e unless e.message.include?("No such file or directory") && e.message.include?(".onnx_data")

    Utils::Hub.get_model_file(pretrained_model_name_or_path, "#{model_file_name}_data", true, **options)
    OnnxRuntime::InferenceSession.new(path, **session_options)
  end
end

.from_pretrained(pretrained_model_name_or_path, quantized: true, progress_callback: nil, config: nil, cache_dir: nil, local_files_only: false, revision: "main", device: nil, dtype: nil, model_file_name: nil, session_options: {}) ⇒ `Object`

# File 'lib/informers/models.rb', line 110

def self.from_pretrained(
  pretrained_model_name_or_path,
  quantized: true,
  progress_callback: nil,
  config: nil,
  cache_dir: nil,
  local_files_only: false,
  revision: "main",
  device: nil,
  dtype: nil,
  model_file_name: nil,
  session_options: {}
)
  options = {
    quantized:,
    progress_callback:,
    config:,
    cache_dir:,
    local_files_only:,
    revision:,
    device:,
    dtype:,
    model_file_name:,
    session_options:
  }

  model_name = MODEL_CLASS_TO_NAME_MAPPING[self]
  model_type = MODEL_TYPE_MAPPING[model_name]

  config ||= AutoConfig.from_pretrained(pretrained_model_name_or_path, **options)

  if model_type == MODEL_TYPES[:DecoderOnly]
    info = [
      construct_session(pretrained_model_name_or_path, options[:model_file_name] || "decoder_model_merged", **options),
      Utils::Hub.get_model_json(pretrained_model_name_or_path, "generation_config.json", false, **options)
    ]

  elsif model_type == MODEL_TYPES[:Seq2Seq] || model_type == MODEL_TYPES[:Vision2Seq]
    info = [
      construct_session(pretrained_model_name_or_path, "encoder_model", **options),
      construct_session(pretrained_model_name_or_path, "decoder_model_merged", **options),
      Utils::Hub.get_model_json(pretrained_model_name_or_path, "generation_config.json", false, **options)
    ]

  elsif model_type == MODEL_TYPES[:MaskGeneration]
    info = [
      construct_session(pretrained_model_name_or_path, "vision_encoder", **options),
      construct_session(pretrained_model_name_or_path, "prompt_encoder_mask_decoder", **options)
    ]

  elsif model_type == MODEL_TYPES[:EncoderDecoder]
    info = [
      construct_session(pretrained_model_name_or_path, "encoder_model", **options),
      construct_session(pretrained_model_name_or_path, "decoder_model_merged", **options)
    ]

  else
    if model_type != MODEL_TYPES[:EncoderOnly]
      warn "Model type for '#{model_name || config[:model_type]}' not found, assuming encoder-only architecture. Please report this."
    end
    info = [
      construct_session(pretrained_model_name_or_path, options[:model_file_name] || "model", **options)
    ]
  end

  new(config, *info)
end

Instance Method Details

#call(model_inputs, **kwargs) ⇒ `Object`



210
211
212

# File 'lib/informers/models.rb', line 210

def call(model_inputs, **kwargs)
  @forward.(model_inputs, **kwargs)
end

#generate(inputs, generation_config = nil, logits_processor = nil, inputs_attention_mask: nil) ⇒ `Object`

# File 'lib/informers/models.rb', line 214

def generate(inputs, generation_config = nil, logits_processor = nil, inputs_attention_mask: nil)
  if !@can_generate
    model_name = MODEL_CLASS_TO_NAME_MAPPING[self.class]
    error_message = "The current model class (#{model_name}) is not compatible with `.generate()`, as it doesn't have a language model head."
    raise Error, error_message
  end

  if !inputs.is_a?(Array)
    raise ArgumentError, "`inputs` must be an Array, but is #{inputs.class.name}"
  end

  if @config[:is_encoder_decoder]
    # Generating from the encoder outputs
    input_ids_seq_length = 0
  else
    input_ids_seq_length = inputs.length

    # decoder-only
    if input_ids_seq_length == 0
      raise Error, "Must supply a non-empty array of input token ids."
    end
  end

  # Update generation config with defaults
  generation_config = get_generation_config(generation_config)

  logits_processor ||= Utils::LogitsProcessorList.new

  # Update logits processor
  logits_processor = get_logits_processor(
    generation_config,
    input_ids_seq_length,
    logits_processor
  )

  eos_token_ids = generation_config[:eos_token_id]
  if !eos_token_ids.nil? && !eos_token_ids.is_a?(Array)
    eos_token_ids = [eos_token_ids]
  end

  num_output_tokens = 1
  max_output_tokens = num_output_tokens + (generation_config[:max_new_tokens] || Float::INFINITY)

  # Only use max length if max_new_tokens is not provided
  use_max_length = generation_config[:max_length].is_a?(Integer) && generation_config[:max_new_tokens].nil?
  sampler = Utils::Sampler.get_sampler(generation_config)

  beams = get_start_beams(inputs, generation_config, num_output_tokens, inputs_attention_mask)

  while beams.any? { |x| !x[:done] } && num_output_tokens < max_output_tokens
    newest_beams = []
    beams.each do |beam|
      if beam[:done]
        # Add this beam back into the pool
        newest_beams << beam
        next
      end
      if use_max_length && beam[:output_token_ids].length >= generation_config["max_length"]
        # Set this beam to done and add it back into the pool
        beam[:done] = true
        newest_beams << beam
        next
      end

      output = run_beam(beam)

      # add attentions/scores to beam only if user requested
      if generation_config["output_attentions"]
        add_attentions_to_beam(beam, output)
      end

      # Logits are of the form [batch_size, out_seq_length, vocab_size]
      # In most cases, this will be [batch_size, 1, vocab_size]
      # So, we select the last token's logits:
      # (equivalent to `logits = outputs.logits[:, -1, :]`)
      logits = output["logits"].map { |v| v[-1] }

      # Apply logits processor
      logits_processor.(beam[:output_token_ids], logits)

      sampled_tokens = sampler.(logits)
      sampled_tokens.each do |new_token_id, log_prob|
        # use previous beam as a starting point
        new_beam = beam.dup

        # update new beam
        update_beam(new_beam, new_token_id)

        new_beam[:score] += log_prob

        if eos_token_ids && eos_token_ids.include?(new_token_id)
          new_beam[:done] = true
        end

        newest_beams << new_beam
      end
    end
    num_output_tokens += 1

    # Next, we get the best beams, per ID
    newest_beams =
      group_beams(newest_beams).map do |group|
        group.sort_by { |v| -v[:score] }[0...generation_config["num_beams"]]
      end

    # Flatten beams
    beams = newest_beams.flatten(1)

    # Run callback
    if generation_config["callback_function"]
      generation_config["callback_function"].(beams)
    end
  end

  # TODO: Ensure that we can return non-batched outputs

  grouped_beams = group_beams(beams)

  get_flattened = lambda do |key|
    grouped_beams.flat_map do |batch|
      if generation_config["num_return_sequences"] > 1
        raise Todo
      else
        [batch[0][key]]
      end
    end
  end

  sequences = get_flattened.(:output_token_ids) # [1, seqLength]

  if generation_config["return_dict_in_generate"]
    raise Todo
  else
    sequences
  end
end

Class: Informers::PreTrainedModel

Direct Known Subclasses

Constant Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config, session) ⇒ PreTrainedModel

Instance Attribute Details

#config ⇒ Object (readonly)

Class Method Details

.construct_session(pretrained_model_name_or_path, file_name, **options) ⇒ Object

.from_pretrained(pretrained_model_name_or_path, quantized: true, progress_callback: nil, config: nil, cache_dir: nil, local_files_only: false, revision: "main", device: nil, dtype: nil, model_file_name: nil, session_options: {}) ⇒ Object

Instance Method Details

#call(model_inputs, **kwargs) ⇒ Object

#generate(inputs, generation_config = nil, logits_processor = nil, inputs_attention_mask: nil) ⇒ Object

#initialize(config, session) ⇒ `PreTrainedModel`

#config ⇒ `Object` (readonly)

.construct_session(pretrained_model_name_or_path, file_name, **options) ⇒ `Object`

.from_pretrained(pretrained_model_name_or_path, quantized: true, progress_callback: nil, config: nil, cache_dir: nil, local_files_only: false, revision: "main", device: nil, dtype: nil, model_file_name: nil, session_options: {}) ⇒ `Object`

#call(model_inputs, **kwargs) ⇒ `Object`

#generate(inputs, generation_config = nil, logits_processor = nil, inputs_attention_mask: nil) ⇒ `Object`