Module: Informers

Defined in:
lib/informers.rb,
lib/informers/env.rb,
lib/informers/model.rb,
lib/informers/models.rb,
lib/informers/configs.rb,
lib/informers/version.rb,
lib/informers/pipelines.rb,
lib/informers/utils/hub.rb,
lib/informers/tokenizers.rb,
lib/informers/utils/core.rb,
lib/informers/utils/math.rb,
lib/informers/utils/tensor.rb

Defined Under Namespace

Modules: Utils Classes: AutoConfig, AutoModel, AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoTokenizer, BertForSequenceClassification, BertForTokenClassification, BertModel, BertPreTrainedModel, BertTokenizer, DistilBertForQuestionAnswering, DistilBertForSequenceClassification, DistilBertModel, DistilBertPreTrainedModel, DistilBertTokenizer, Error, FeatureExtractionPipeline, Model, ModelOutput, Pipeline, PreTrainedModel, PreTrainedTokenizer, PretrainedConfig, PretrainedMixin, QuestionAnsweringModelOutput, QuestionAnsweringPipeline, SequenceClassifierOutput, TextClassificationPipeline, Todo, TokenClassificationPipeline, TokenClassifierOutput

Constant Summary collapse

CACHE_HOME =
ENV.fetch("XDG_CACHE_HOME", File.join(ENV.fetch("HOME"), ".cache"))
DEFAULT_CACHE_DIR =
File.expand_path(File.join(CACHE_HOME, "informers"))
MODEL_TYPES =
{
  EncoderOnly: 0,
  EncoderDecoder: 1,
  Seq2Seq: 2,
  Vision2Seq: 3,
  DecoderOnly: 4,
  MaskGeneration: 5
}
MODEL_TYPE_MAPPING =

NOTE: These will be populated fully later

{}
MODEL_NAME_TO_CLASS_MAPPING =
{}
MODEL_CLASS_TO_NAME_MAPPING =
{}
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES =
{
  "bert" => ["BertForSequenceClassification", BertForSequenceClassification],
  "distilbert" => ["DistilBertForSequenceClassification", DistilBertForSequenceClassification]
}
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES =
{
  "bert" => ["BertForTokenClassification", BertForTokenClassification]
}
MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES =
{
  "distilbert" => ["DistilBertForQuestionAnswering", DistilBertForQuestionAnswering]
}
MODEL_CLASS_TYPE_MAPPING =
[
  [MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]],
  [MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]],
  [MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]]
]
VERSION =
"1.0.1"
SUPPORTED_TASKS =
{
  "text-classification" => {
    tokenizer: AutoTokenizer,
    pipeline: TextClassificationPipeline,
    model: AutoModelForSequenceClassification,
    default: {
      model: "Xenova/distilbert-base-uncased-finetuned-sst-2-english"
    },
    type: "text"
  },
  "token-classification" => {
    tokenizer: AutoTokenizer,
    pipeline: TokenClassificationPipeline,
    model: AutoModelForTokenClassification,
    default: {
      model: "Xenova/bert-base-multilingual-cased-ner-hrl"
    },
    type: "text"
  },
  "question-answering" => {
    tokenizer: AutoTokenizer,
    pipeline: QuestionAnsweringPipeline,
    model: AutoModelForQuestionAnswering,
    default: {
      model: "Xenova/distilbert-base-cased-distilled-squad"
    },
    type: "text"
  },
  "feature-extraction" => {
    tokenizer: AutoTokenizer,
    pipeline: FeatureExtractionPipeline,
    model: AutoModel,
    default: {
      model: "Xenova/all-MiniLM-L6-v2"
    },
    type: "text"
  }
}
TASK_ALIASES =
{
  "sentiment-analysis" => "text-classification",
  "ner" => "token-classification"
}
DEFAULT_PROGRESS_CALLBACK =
lambda do |msg|
  stream = $stderr
  tty = stream.tty?
  width = tty ? stream.winsize[1] : 80

  if msg[:status] == "progress" && tty
    stream.print "\r#{Utils::Hub.display_progress(msg[:file], width, msg[:size], msg[:total_size])}"
  elsif msg[:status] == "done" && !msg[:cache_hit]
    if tty
      stream.puts
    else
      stream.puts Utils::Hub.display_progress(msg[:file], width, 1, 1)
    end
  end
end

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.allow_remote_modelsObject

Returns the value of attribute allow_remote_models.



6
7
8
# File 'lib/informers/env.rb', line 6

def allow_remote_models
  @allow_remote_models
end

.cache_dirObject

Returns the value of attribute cache_dir.



6
7
8
# File 'lib/informers/env.rb', line 6

def cache_dir
  @cache_dir
end

.remote_hostObject

Returns the value of attribute remote_host.



6
7
8
# File 'lib/informers/env.rb', line 6

def remote_host
  @remote_host
end

.remote_path_templateObject

Returns the value of attribute remote_path_template.



6
7
8
# File 'lib/informers/env.rb', line 6

def remote_path_template
  @remote_path_template
end

Class Method Details

.pipeline(task, model = nil, quantized: true, progress_callback: DEFAULT_PROGRESS_CALLBACK, config: nil, cache_dir: nil, local_files_only: false, revision: "main", model_file_name: nil) ⇒ Object



365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
# File 'lib/informers/pipelines.rb', line 365

def pipeline(
  task,
  model = nil,
  quantized: true,
  progress_callback: DEFAULT_PROGRESS_CALLBACK,
  config: nil,
  cache_dir: nil,
  local_files_only: false,
  revision: "main",
  model_file_name: nil
)
  # Apply aliases
  task = TASK_ALIASES[task] || task

  # Get pipeline info
  pipeline_info = SUPPORTED_TASKS[task.split("_", 1)[0]]
  if !pipeline_info
    raise Error, "Unsupported pipeline: #{task}. Must be one of #{SUPPORTED_TASKS.keys}"
  end

  # Use model if specified, otherwise, use default
  if !model
    model = pipeline_info[:default][:model]
    warn "No model specified. Using default model: #{model.inspect}."
  end

  pretrained_options = {
    quantized:,
    progress_callback:,
    config:,
    cache_dir:,
    local_files_only:,
    revision:,
    model_file_name:
  }

  classes = {
    tokenizer: pipeline_info[:tokenizer],
    model: pipeline_info[:model],
    processor: pipeline_info[:processor]
  }

  # Load model, tokenizer, and processor (if they exist)
  results = load_items(classes, model, pretrained_options)
  results[:task] = task

  Utils.dispatch_callback(progress_callback, {
    status: "ready",
    task: task,
    model: model
  })

  pipeline_class = pipeline_info.fetch(:pipeline)
  pipeline_class.new(**results)
end