Class: Clip::MultilingualModel
- Inherits:
-
Object
- Object
- Clip::MultilingualModel
- Defined in:
- lib/clip/multilingual_model.rb
Instance Method Summary collapse
- #encode_image(image) ⇒ Object
- #encode_text(text) ⇒ Object
- #image_model ⇒ Object
-
#initialize(textual_model_path: ".clip_models/multilingual/textual.onnx", visual_model_path: ".clip_models/multilingual/visual.onnx", tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"), image_preprocessor: Clip::ImagePreprocessor.new, download_models: true, download_dir: ".clip_models/multilingual") ⇒ MultilingualModel
constructor
A new instance of MultilingualModel.
- #text_model ⇒ Object
Constructor Details
#initialize(textual_model_path: ".clip_models/multilingual/textual.onnx", visual_model_path: ".clip_models/multilingual/visual.onnx", tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"), image_preprocessor: Clip::ImagePreprocessor.new, download_models: true, download_dir: ".clip_models/multilingual") ⇒ MultilingualModel
Returns a new instance of MultilingualModel.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/clip/multilingual_model.rb', line 6 def initialize( textual_model_path: ".clip_models/multilingual/textual.onnx", visual_model_path: ".clip_models/multilingual/visual.onnx", tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"), image_preprocessor: Clip::ImagePreprocessor.new, download_models: true, download_dir: ".clip_models/multilingual" ) @textual_model_path = textual_model_path @visual_model_path = visual_model_path Clip.download_models(download_dir, Clip::MULTILINGUAL_MODELS) if download_models && !Clip.models_exist?(textual_model_path: textual_model_path, visual_model_path: visual_model_path) @tokenizer = tokenizer @image_preprocessor = image_preprocessor end |
Instance Method Details
#encode_image(image) ⇒ Object
29 30 31 32 |
# File 'lib/clip/multilingual_model.rb', line 29 def encode_image(image) image = image_preprocessor.preprocess(image).to_a image_model.predict({ pixel_values: [ image ] })["output"].first end |
#encode_text(text) ⇒ Object
21 22 23 24 25 26 27 |
# File 'lib/clip/multilingual_model.rb', line 21 def encode_text(text) encoding = tokenizer.encode(text) input_ids = [encoding.ids] attention_mask = [Array.new(encoding.ids.size, 1)] text_model.predict({ "input_ids" => input_ids, "attention_mask" => attention_mask })['output'].first end |
#image_model ⇒ Object
38 39 40 |
# File 'lib/clip/multilingual_model.rb', line 38 def image_model @image_model ||= OnnxRuntime::Model.new(visual_model_path) end |
#text_model ⇒ Object
34 35 36 |
# File 'lib/clip/multilingual_model.rb', line 34 def text_model @text_model ||= OnnxRuntime::Model.new(textual_model_path) end |