Module: Informers
- Defined in:
- lib/informers.rb,
lib/informers/env.rb,
lib/informers/model.rb,
lib/informers/models.rb,
lib/informers/configs.rb,
lib/informers/version.rb,
lib/informers/pipelines.rb,
lib/informers/utils/hub.rb,
lib/informers/processors.rb,
lib/informers/tokenizers.rb,
lib/informers/utils/core.rb,
lib/informers/utils/math.rb,
lib/informers/utils/image.rb,
lib/informers/utils/tensor.rb,
lib/informers/utils/generation.rb
Defined Under Namespace
Modules: Utils Classes: AutoConfig, AutoModel, AutoModelForCausalLM, AutoModelForDepthEstimation, AutoModelForDocumentQuestionAnswering, AutoModelForImageClassification, AutoModelForImageFeatureExtraction, AutoModelForImageSegmentation, AutoModelForImageToImage, AutoModelForMaskedLM, AutoModelForObjectDetection, AutoModelForQuestionAnswering, AutoModelForSemanticSegmentation, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoModelForVision2Seq, AutoModelForZeroShotObjectDetection, AutoProcessor, AutoTokenizer, BartForConditionalGeneration, BartForSequenceClassification, BartModel, BartPretrainedModel, BartTokenizer, BertForMaskedLM, BertForSequenceClassification, BertForTokenClassification, BertModel, BertPreTrainedModel, BertTokenizer, CLIPFeatureExtractor, CLIPModel, CLIPPreTrainedModel, CLIPTokenizer, DPTFeatureExtractor, DPTForDepthEstimation, DPTModel, DPTPreTrainedModel, DebertaV2Model, DebertaV2PreTrainedModel, DebertaV2Tokenizer, DepthEstimationPipeline, DetrFeatureExtractor, DetrForObjectDetection, DetrForSegmentation, DetrModel, DetrObjectDetectionOutput, DetrPreTrainedModel, DetrSegmentationOutput, DistilBertForQuestionAnswering, DistilBertForSequenceClassification, DistilBertModel, DistilBertPreTrainedModel, DistilBertTokenizer, DocumentQuestionAnsweringPipeline, DonutFeatureExtractor, DonutSwinModel, DonutSwinPreTrainedModel, EmbeddingPipeline, Error, FeatureExtractionPipeline, FeatureExtractor, FillMaskPipeline, GPT2LMHeadModel, GPT2Model, GPT2PreTrainedModel, GPT2Tokenizer, ImageClassificationPipeline, ImageFeatureExtractionPipeline, ImageFeatureExtractor, ImageSegmentationPipeline, ImageToImagePipeline, ImageToTextPipeline, M2M100ForConditionalGeneration, M2M100Model, M2M100PreTrainedModel, M2M100Tokenizer, MBartForCausalLM, MBartModel, MBartPreTrainedModel, MPNetModel, MPNetPreTrainedModel, MPNetTokenizer, MaskedLMOutput, Model, ModelOutput, NllbTokenizer, NomicBertModel, NomicBertPreTrainedModel, ObjectDetectionPipeline, OwlViTFeatureExtractor, OwlViTForObjectDetection, OwlViTModel, OwlViTPreTrainedModel, Pipeline, PreTrainedModel, PreTrainedTokenizer, PretrainedConfig, PretrainedMixin, Processor, QuestionAnsweringModelOutput, QuestionAnsweringPipeline, RerankingPipeline, RobertaForMaskedLM, RobertaModel, RobertaPreTrainedModel, RobertaTokenizer, Seq2SeqLMOutput, SequenceClassifierOutput, SummarizationPipeline, Swin2SRForImageSuperResolution, Swin2SRImageProcessor, Swin2SRModel, Swin2SRPreTrainedModel, T5ForConditionalGeneration, T5Model, T5PreTrainedModel, T5Tokenizer, Text2TextGenerationPipeline, TextClassificationPipeline, TextGenerationPipeline, Todo, TokenClassificationPipeline, TokenClassifierOutput, TranslationPipeline, ViTFeatureExtractor, ViTForImageClassification, ViTModel, ViTPreTrainedModel, VisionEncoderDecoderModel, XLMRobertaForSequenceClassification, XLMRobertaModel, XLMRobertaPreTrainedModel, XLMRobertaTokenizer, ZeroShotClassificationPipeline, ZeroShotImageClassificationPipeline, ZeroShotObjectDetectionPipeline
Constant Summary collapse
- CACHE_HOME =
ENV.fetch("XDG_CACHE_HOME", File.join(ENV.fetch("HOME"), ".cache"))
- DEFAULT_CACHE_DIR =
File.(File.join(CACHE_HOME, "informers"))
- MODEL_TYPES =
{ EncoderOnly: 0, EncoderDecoder: 1, Seq2Seq: 2, Vision2Seq: 3, DecoderOnly: 4, MaskGeneration: 5 }
- MODEL_TYPE_MAPPING =
NOTE: These will be populated fully later
{}
- MODEL_NAME_TO_CLASS_MAPPING =
{}
- MODEL_CLASS_TO_NAME_MAPPING =
{}
- MODEL_MAPPING_NAMES_ENCODER_ONLY =
{ "bert" => ["BertModel", BertModel], "nomic_bert" => ["NomicBertModel", NomicBertModel], "deberta-v2" => ["DebertaV2Model", DebertaV2Model], "mpnet" => ["MPNetModel", MPNetModel], "distilbert" => ["DistilBertModel", DistilBertModel], "roberta" => ["RobertaModel", RobertaModel], "xlm-roberta" => ["XLMRobertaModel", XLMRobertaModel], "clip" => ["CLIPModel", CLIPModel], "detr" => ["DetrModel", DetrModel], "vit" => ["ViTModel", ViTModel], "owlvit" => ["OwlViTModel", OwlViTModel], "donut-swin" => ["DonutSwinModel", DonutSwinModel] }
- MODEL_MAPPING_NAMES_ENCODER_DECODER =
{ "bart" => ["BartModel", BartModel] }
- MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES =
{ "bert" => ["BertForSequenceClassification", BertForSequenceClassification], "distilbert" => ["DistilBertForSequenceClassification", DistilBertForSequenceClassification], "xlm-roberta" => ["XLMRobertaForSequenceClassification", XLMRobertaForSequenceClassification], "bart" => ["BartForSequenceClassification", BartForSequenceClassification] }
- MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES =
{ "bert" => ["BertForTokenClassification", BertForTokenClassification] }
- MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES =
{ "t5" => ["T5ForConditionalGeneration", T5ForConditionalGeneration], "bart" => ["BartForConditionalGeneration", BartForConditionalGeneration], "m2m_100" => ["M2M100ForConditionalGeneration", M2M100ForConditionalGeneration] }
- MODEL_WITH_LM_HEAD_MAPPING_NAMES =
{ "gpt2" => ["GPT2LMHeadModel", GPT2LMHeadModel], "mbart" => ["MBartForCausalLM", MBartForCausalLM] }
- MODEL_FOR_MASKED_LM_MAPPING_NAMES =
{ "bert" => ["BertForMaskedLM", BertForMaskedLM], "roberta" => ["RobertaForMaskedLM", RobertaForMaskedLM] }
- MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES =
{ "distilbert" => ["DistilBertForQuestionAnswering", DistilBertForQuestionAnswering] }
- MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES =
{ "vision-encoder-decoder" => ["VisionEncoderDecoderModel", VisionEncoderDecoderModel] }
- MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING_NAMES =
{ "vision-encoder-decoder" => ["VisionEncoderDecoderModel", VisionEncoderDecoderModel] }
- MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES =
{ "vit" => ["ViTForImageClassification", ViTForImageClassification] }
- MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES =
{ "detr" => ["DetrForObjectDetection", DetrForObjectDetection] }
- MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES =
{ "owlvit" => ["OwlViTForObjectDetection", OwlViTForObjectDetection] }
- MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES =
{ "detr" => ["DetrForSegmentation", DetrForSegmentation] }
- MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES =
{ }
- MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES =
{ "swin2sr" => ["Swin2SRForImageSuperResolution", Swin2SRForImageSuperResolution] }
- MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES =
{ "dpt" => ["DPTForDepthEstimation", DPTForDepthEstimation] }
- MODEL_FOR_IMAGE_FEATURE_EXTRACTION_MAPPING_NAMES =
{ }
- MODEL_CLASS_TYPE_MAPPING =
[ [MODEL_MAPPING_NAMES_ENCODER_ONLY, MODEL_TYPES[:EncoderOnly]], [MODEL_MAPPING_NAMES_ENCODER_DECODER, MODEL_TYPES[:EncoderDecoder]], [MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES, MODEL_TYPES[:Seq2Seq]], [MODEL_WITH_LM_HEAD_MAPPING_NAMES, MODEL_TYPES[:DecoderOnly]], [MODEL_FOR_MASKED_LM_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES, MODEL_TYPES[:Vision2Seq]], [MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]], [MODEL_FOR_IMAGE_FEATURE_EXTRACTION_MAPPING_NAMES, MODEL_TYPES[:EncoderOnly]] ]
- VERSION =
"1.1.0"- SUPPORTED_TASKS =
{ "text-classification" => { tokenizer: AutoTokenizer, pipeline: TextClassificationPipeline, model: AutoModelForSequenceClassification, default: { model: "Xenova/distilbert-base-uncased-finetuned-sst-2-english" }, type: "text" }, "token-classification" => { tokenizer: AutoTokenizer, pipeline: TokenClassificationPipeline, model: AutoModelForTokenClassification, default: { model: "Xenova/bert-base-multilingual-cased-ner-hrl" }, type: "text" }, "question-answering" => { tokenizer: AutoTokenizer, pipeline: QuestionAnsweringPipeline, model: AutoModelForQuestionAnswering, default: { model: "Xenova/distilbert-base-cased-distilled-squad" }, type: "text" }, "fill-mask" => { tokenizer: AutoTokenizer, pipeline: FillMaskPipeline, model: AutoModelForMaskedLM, default: { model: "Xenova/bert-base-uncased" }, type: "text" }, "summarization" => { tokenizer: AutoTokenizer, pipeline: SummarizationPipeline, model: AutoModelForSeq2SeqLM, default: { model: "Xenova/distilbart-cnn-6-6" }, type: "text" }, "translation" => { tokenizer: AutoTokenizer, pipeline: TranslationPipeline, model: AutoModelForSeq2SeqLM, default: { model: "Xenova/t5-small" }, type: "text" }, "text2text-generation" => { tokenizer: AutoTokenizer, pipeline: Text2TextGenerationPipeline, model: AutoModelForSeq2SeqLM, default: { model: "Xenova/flan-t5-small" }, type: "text" }, "text-generation" => { tokenizer: AutoTokenizer, pipeline: TextGenerationPipeline, model: AutoModelForCausalLM, default: { model: "Xenova/gpt2" }, type: "text" }, "zero-shot-classification" => { tokenizer: AutoTokenizer, pipeline: ZeroShotClassificationPipeline, model: AutoModelForSequenceClassification, default: { model: "Xenova/distilbert-base-uncased-mnli" }, type: "text" }, "image-to-text" => { tokenizer: AutoTokenizer, pipeline: ImageToTextPipeline, model: AutoModelForVision2Seq, processor: AutoProcessor, default: { model: "Xenova/vit-gpt2-image-captioning" }, type: "multimodal" }, "image-classification" => { pipeline: ImageClassificationPipeline, model: AutoModelForImageClassification, processor: AutoProcessor, default: { model: "Xenova/vit-base-patch16-224", }, type: "multimodal" }, "image-segmentation" => { pipeline: ImageSegmentationPipeline, model: [AutoModelForImageSegmentation, AutoModelForSemanticSegmentation], processor: AutoProcessor, default: { model: "Xenova/detr-resnet-50-panoptic", }, type: "multimodal" }, "zero-shot-image-classification" => { tokenizer: AutoTokenizer, pipeline: ZeroShotImageClassificationPipeline, model: AutoModel, processor: AutoProcessor, default: { model: "Xenova/clip-vit-base-patch32" }, type: "multimodal" }, "object-detection" => { pipeline: ObjectDetectionPipeline, model: AutoModelForObjectDetection, processor: AutoProcessor, default: { model: "Xenova/detr-resnet-50", }, type: "multimodal" }, "zero-shot-object-detection" => { tokenizer: AutoTokenizer, pipeline: ZeroShotObjectDetectionPipeline, model: AutoModelForZeroShotObjectDetection, processor: AutoProcessor, default: { model: "Xenova/owlvit-base-patch32" }, type: "multimodal" }, "document-question-answering" => { tokenizer: AutoTokenizer, pipeline: DocumentQuestionAnsweringPipeline, model: AutoModelForDocumentQuestionAnswering, processor: AutoProcessor, default: { model: "Xenova/donut-base-finetuned-docvqa" }, type: "multimodal" }, "image-to-image" => { pipeline: ImageToImagePipeline, model: AutoModelForImageToImage, processor: AutoProcessor, default: { model: "Xenova/swin2SR-classical-sr-x2-64" }, type: "image" }, "depth-estimation" => { pipeline: DepthEstimationPipeline, model: AutoModelForDepthEstimation, processor: AutoProcessor, default: { model: "Xenova/dpt-large" }, type: "image" }, "feature-extraction" => { tokenizer: AutoTokenizer, pipeline: FeatureExtractionPipeline, model: AutoModel, default: { model: "Xenova/all-MiniLM-L6-v2" }, type: "text" }, "image-feature-extraction" => { processor: AutoProcessor, pipeline: ImageFeatureExtractionPipeline, model: [AutoModelForImageFeatureExtraction, AutoModel], default: { model: "Xenova/vit-base-patch16-224" }, type: "image" }, "embedding" => { tokenizer: AutoTokenizer, pipeline: EmbeddingPipeline, model: AutoModel, default: { model: "sentence-transformers/all-MiniLM-L6-v2" }, type: "text" }, "reranking" => { tokenizer: AutoTokenizer, pipeline: RerankingPipeline, model: AutoModel, default: { model: "mixedbread-ai/mxbai-rerank-base-v1" }, type: "text" } }
- TASK_ALIASES =
{ "sentiment-analysis" => "text-classification", "ner" => "token-classification" }
- DEFAULT_PROGRESS_CALLBACK =
lambda do |msg| stream = $stderr tty = stream.tty? width = tty ? stream.winsize[1] : 80 if msg[:status] == "progress" && tty stream.print "\r#{Utils::Hub.display_progress(msg[:file], width, msg[:size], msg[:total_size])}" elsif msg[:status] == "done" && !msg[:cache_hit] if tty stream.puts else stream.puts Utils::Hub.display_progress(msg[:file], width, 1, 1) end end end
- NO_DEFAULT =
Object.new
Class Attribute Summary collapse
-
.allow_remote_models ⇒ Object
Returns the value of attribute allow_remote_models.
-
.cache_dir ⇒ Object
Returns the value of attribute cache_dir.
-
.remote_host ⇒ Object
Returns the value of attribute remote_host.
-
.remote_path_template ⇒ Object
Returns the value of attribute remote_path_template.
Class Method Summary collapse
Class Attribute Details
.allow_remote_models ⇒ Object
Returns the value of attribute allow_remote_models.
6 7 8 |
# File 'lib/informers/env.rb', line 6 def allow_remote_models @allow_remote_models end |
.cache_dir ⇒ Object
Returns the value of attribute cache_dir.
6 7 8 |
# File 'lib/informers/env.rb', line 6 def cache_dir @cache_dir end |
.remote_host ⇒ Object
Returns the value of attribute remote_host.
6 7 8 |
# File 'lib/informers/env.rb', line 6 def remote_host @remote_host end |
.remote_path_template ⇒ Object
Returns the value of attribute remote_path_template.
6 7 8 |
# File 'lib/informers/env.rb', line 6 def remote_path_template @remote_path_template end |
Class Method Details
.pipeline(task, model = nil, quantized: NO_DEFAULT, progress_callback: DEFAULT_PROGRESS_CALLBACK, config: nil, cache_dir: nil, local_files_only: false, revision: "main", model_file_name: nil) ⇒ Object
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 |
# File 'lib/informers/pipelines.rb', line 1183 def pipeline( task, model = nil, quantized: NO_DEFAULT, progress_callback: DEFAULT_PROGRESS_CALLBACK, config: nil, cache_dir: nil, local_files_only: false, revision: "main", model_file_name: nil ) # Apply aliases task = TASK_ALIASES[task] || task if quantized == NO_DEFAULT # TODO move default to task class quantized = ["text-classification", "token-classification", "question-answering", "feature-extraction"].include?(task) end # Get pipeline info pipeline_info = SUPPORTED_TASKS[task.split("_", 1)[0]] if !pipeline_info raise Error, "Unsupported pipeline: #{task}. Must be one of #{SUPPORTED_TASKS.keys}" end # Use model if specified, otherwise, use default if !model model = pipeline_info[:default][:model] warn "No model specified. Using default model: #{model.inspect}." end = { quantized:, progress_callback:, config:, cache_dir:, local_files_only:, revision:, model_file_name: } classes = { tokenizer: pipeline_info[:tokenizer], model: pipeline_info[:model], processor: pipeline_info[:processor] } # Load model, tokenizer, and processor (if they exist) results = load_items(classes, model, ) results[:task] = task if model == "sentence-transformers/all-MiniLM-L6-v2" results[:model].instance_variable_set(:@output_names, ["token_embeddings"]) end Utils.dispatch_callback(progress_callback, { status: "ready", task: task, model: model }) pipeline_class = pipeline_info.fetch(:pipeline) pipeline_class.new(**results) end |