Class: Clip::MultilingualModel

Inherits:
Object
  • Object
show all
Defined in:
lib/clip/multilingual_model.rb

Instance Method Summary collapse

Constructor Details

#initialize(textual_model_path: ".clip_models/multilingual/textual.onnx", visual_model_path: ".clip_models/multilingual/visual.onnx", tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"), image_preprocessor: Clip::ImagePreprocessor.new, download_models: true, download_dir: ".clip_models/multilingual") ⇒ MultilingualModel

Returns a new instance of MultilingualModel.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/clip/multilingual_model.rb', line 6

def initialize(
  textual_model_path: ".clip_models/multilingual/textual.onnx",
  visual_model_path: ".clip_models/multilingual/visual.onnx",
  tokenizer: Tokenizers.from_pretrained("M-CLIP/XLM-Roberta-Large-Vit-B-32"),
  image_preprocessor: Clip::ImagePreprocessor.new,
  download_models: true,
  download_dir: ".clip_models/multilingual"
)
  @textual_model_path = textual_model_path
  @visual_model_path = visual_model_path
  Clip.download_models(download_dir, Clip::MULTILINGUAL_MODELS) if download_models && !Clip.models_exist?(textual_model_path: textual_model_path, visual_model_path: visual_model_path)
  @tokenizer = tokenizer
  @image_preprocessor = image_preprocessor
end

Instance Method Details

#encode_image(image) ⇒ Object



29
30
31
32
# File 'lib/clip/multilingual_model.rb', line 29

def encode_image(image)
  image = image_preprocessor.preprocess(image).to_a
  image_model.predict({ pixel_values: [ image ] })["output"].first
end

#encode_text(text) ⇒ Object



21
22
23
24
25
26
27
# File 'lib/clip/multilingual_model.rb', line 21

def encode_text(text)
  encoding  = tokenizer.encode(text)
  input_ids      = [encoding.ids]
  attention_mask = [Array.new(encoding.ids.size, 1)]

  text_model.predict({ "input_ids" => input_ids, "attention_mask" => attention_mask })['output'].first
end

#image_modelObject



38
39
40
# File 'lib/clip/multilingual_model.rb', line 38

def image_model
  @image_model ||= OnnxRuntime::Model.new(visual_model_path)
end

#text_modelObject



34
35
36
# File 'lib/clip/multilingual_model.rb', line 34

def text_model
  @text_model ||= OnnxRuntime::Model.new(textual_model_path)
end