Class: Transformers::Pipeline

Inherits:

Object

Object
Transformers::Pipeline

show all

Defined in:: lib/transformers/pipelines/base.rb

Direct Known Subclasses

ChunkPipeline, EmbeddingPipeline, FeatureExtractionPipeline, ImageClassificationPipeline, ImageFeatureExtractionPipeline, RerankingPipeline, TextClassificationPipeline

Instance Method Summary collapse

#call(inputs, *args, num_workers: nil, batch_size: nil, **kwargs) ⇒ Object
#check_model_type(supported_models) ⇒ Object
#get_iterator(inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params) ⇒ Object
#initialize(model, tokenizer: nil, feature_extractor: nil, image_processor: nil, modelcard: nil, framework: nil, task: "", device: nil, **kwargs) ⇒ Pipeline constructor

A new instance of Pipeline.
#torch_dtype ⇒ Object

Constructor Details

#initialize(model, tokenizer: nil, feature_extractor: nil, image_processor: nil, modelcard: nil, framework: nil, task: "", device: nil, **kwargs) ⇒ `Pipeline`

Returns a new instance of Pipeline.

# File 'lib/transformers/pipelines/base.rb', line 94

def initialize(
  model,
  tokenizer: nil,
  feature_extractor: nil,
  image_processor: nil,
  modelcard: nil,
  framework: nil,
  task: "",
  device: nil,
  **kwargs
)
  if framework.nil?
    raise Todo
  end

  @task = task
  @model = model
  @tokenizer = tokenizer
  @feature_extractor = feature_extractor
  @image_processor = image_processor
  @modelcard = modelcard
  @framework = framework

  if device.nil?
    if Torch::CUDA.available? || Torch::Backends::MPS.available?
      Transformers.logger.warn(
        "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument" +
        " is passed to the `Pipeline` object. Model will be on CPU."
      )
    end
  end

  @call_count = 0
  @batch_size = kwargs.delete(:batch_size)
  @num_workers = kwargs.delete(:num_workers)
  @preprocess_params, @forward_params, @postprocess_params = _sanitize_parameters(**kwargs)
end

Instance Method Details

#call(inputs, *args, num_workers: nil, batch_size: nil, **kwargs) ⇒ `Object`

# File 'lib/transformers/pipelines/base.rb', line 183

def call(inputs, *args, num_workers: nil, batch_size: nil, **kwargs)
  if args.any?
    Transformers.logger.warn("Ignoring args : #{args}")
  end

  if num_workers.nil?
    if @num_workers.nil?
      num_workers = 0
    else
      num_workers = @num_workers
    end
  end
  if batch_size.nil?
    if @batch_size.nil?
      batch_size = 1
    else
      batch_size = @batch_size
    end
  end

  preprocess_params, forward_params, postprocess_params = _sanitize_parameters(**kwargs)

  preprocess_params = @preprocess_params.merge(preprocess_params)
  forward_params = @forward_params.merge(forward_params)
  postprocess_params = @postprocess_params.merge(postprocess_params)

  @call_count += 1
  if @call_count > 10 && @framework == "pt" && @device.type == "cuda"
    Transformers.logger.warn(
      "You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a" +
      " dataset"
    )
  end

  is_dataset = inputs.is_a?(Torch::Utils::Data::Dataset)
  is_generator = inputs.is_a?(Enumerable)
  is_list = inputs.is_a?(Array)

  _is_iterable = is_dataset || is_generator || is_list

  # TODO make the get_iterator work also for `tf` (and `flax`).
  can_use_iterator = @framework == "pt" && (is_dataset || is_generator || is_list)

  if is_list
    if can_use_iterator
      final_iterator = get_iterator(
        inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
      )
      outputs = final_iterator.to_a
      outputs
    else
      run_multi(inputs, preprocess_params, forward_params, postprocess_params)
    end
  else
    run_single(inputs, preprocess_params, forward_params, postprocess_params)
  end
end

#check_model_type(supported_models) ⇒ `Object`

# File 'lib/transformers/pipelines/base.rb', line 136

def check_model_type(supported_models)
  if !supported_models.is_a?(Array)
    supported_models_names = []
    supported_models.each do |_, model_name|
      # Mapping can now contain tuples of models for the same configuration.
      if model_name.is_a?(Array)
        supported_models_names.concat(model_name)
      else
        supported_models_names << model_name
      end
    end
    supported_models = supported_models_names
  end
  if !supported_models.include?(@model.class.name.split("::").last)
    Transformers.logger.error(
      "The model '#{@model.class.name}' is not supported for #{@task}. Supported models are" +
      " #{supported_models}."
    )
  end
end

#get_iterator(inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params) ⇒ `Object`

# File 'lib/transformers/pipelines/base.rb', line 157

def get_iterator(
  inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
)
  if inputs.respond_to?(:size)
    dataset = PipelineDataset.new(inputs, method(:preprocess), preprocess_params)
  else
    if num_workers > 1
      Transformers.logger.warn(
        "For iterable dataset using num_workers>1 is likely to result" +
        " in errors since everything is iterable, setting `num_workers: 1`" +
        " to guarantee correctness."
      )
      num_workers = 1
    end
    dataset = PipelineIterator.new(inputs, method(:preprocess), preprocess_params)
  end

  # TODO hack by collating feature_extractor and image_processor
  feature_extractor = !@feature_extractor.nil? ? @feature_extractor : @image_processor
  collate_fn = batch_size == 1 ? method(:no_collate_fn) : pad_collate_fn(@tokenizer, feature_extractor)
  dataloader = Torch::Utils::Data::DataLoader.new(dataset, batch_size: batch_size, collate_fn: collate_fn) # num_workers: num_workers,
  model_iterator = PipelineIterator.new(dataloader, method(:forward), forward_params, loader_batch_size: batch_size)
  final_iterator = PipelineIterator.new(model_iterator, method(:postprocess), postprocess_params)
  final_iterator
end

#torch_dtype ⇒ `Object`



132
133
134

# File 'lib/transformers/pipelines/base.rb', line 132

def torch_dtype
  @model.dtype
end

Class: Transformers::Pipeline

Direct Known Subclasses

Instance Method Summary collapse

Constructor Details

#initialize(model, tokenizer: nil, feature_extractor: nil, image_processor: nil, modelcard: nil, framework: nil, task: "", device: nil, **kwargs) ⇒ Pipeline

Instance Method Details

#call(inputs, *args, num_workers: nil, batch_size: nil, **kwargs) ⇒ Object

#check_model_type(supported_models) ⇒ Object

#get_iterator(inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params) ⇒ Object

#torch_dtype ⇒ Object

#initialize(model, tokenizer: nil, feature_extractor: nil, image_processor: nil, modelcard: nil, framework: nil, task: "", device: nil, **kwargs) ⇒ `Pipeline`

#call(inputs, *args, num_workers: nil, batch_size: nil, **kwargs) ⇒ `Object`

#check_model_type(supported_models) ⇒ `Object`

#get_iterator(inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params) ⇒ `Object`

#torch_dtype ⇒ `Object`