Class: HuggingfaceModel
- Inherits:
-
TorchModel
- Object
- ScoutModel
- PythonModel
- TorchModel
- HuggingfaceModel
- Defined in:
- lib/scout/model/python/huggingface.rb
Direct Known Subclasses
Instance Attribute Summary
Attributes inherited from TorchModel
#criterion, #device, #dtype, #optimizer
Attributes inherited from ScoutModel
Instance Method Summary collapse
- #fix_options ⇒ Object
-
#initialize(task = nil, checkpoint = nil, dir = nil, options = {}) ⇒ HuggingfaceModel
constructor
A new instance of HuggingfaceModel.
Methods inherited from TorchModel
criterion, device, dtype, feature_dataset, feature_tsv, freeze, #freeze_layer, freeze_layer, get_layer, #get_layer, get_weights, #get_weights, init_python, load, load_architecture, load_state, model_architecture, optimizer, #reset_state, save, save_architecture, save_state, tensor, text_dataset
Methods inherited from ScoutModel
#add, #add_list, #eval, #eval_list, #execute, #extract_features, #extract_features_list, #init, #load_method, #load_options, #load_ruby_code, #load_state, #post_process, #post_process_list, #restore, #save, #save_method, #save_options, #save_state, #state_file, #train
Constructor Details
#initialize(task = nil, checkpoint = nil, dir = nil, options = {}) ⇒ HuggingfaceModel
Returns a new instance of HuggingfaceModel.
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/scout/model/python/huggingface.rb', line 18 def initialize(task=nil, checkpoint=nil, dir = nil, = {}) super(dir, nil, nil, ) [:checkpoint] = checkpoint [:task] = task init do TorchModel.init_python checkpoint = state_file && File.directory?(state_file) ? state_file : self.[:checkpoint] model = ScoutPython.call_method("scout_ai.huggingface.model", :load_model, self.[:task], checkpoint, **(IndiferentHash.setup( self..except( :training_args, :tokenizer_args, :task, :checkpoint, :class_labels, :model_options, :return_logits )))) tokenizer_checkpoint = self.[:tokenizer_args][:checkpoint] || checkpoint tokenizer = ScoutPython.call_method("scout_ai.huggingface.model", :load_tokenizer, tokenizer_checkpoint, **(IndiferentHash.setup(self.[:tokenizer_args]))) [model, tokenizer] end load_state do |state_file| model, tokenizer = @state TorchModel.init_python if state_file && Open.directory?(state_file) model.from_pretrained(state_file) tokenizer.from_pretrained(state_file) end end save_state do |state_file,state| model, tokenizer = @state TorchModel.init_python if state_file model.save_pretrained(state_file) tokenizer.save_pretrained(state_file) end end #self.eval do |features,list| # model, tokenizer = @state # res = case options[:task] # when "CausalLM" # if not list # list = [features] # end # # Allow for options :chat_template, :chat_template_kwargs, :generation_kwargs # #options[:generation_kwargs] = {max_new_tokens: 1000} # ScoutPython.call_method( # "scout_ai.huggingface.eval", :eval_causal_lm_chat, # model, tokenizer, list, # options[:chat_template], # options[:chat_template_kwargs], # options[:generation_kwargs] # ) # else # texts = list ? list : [features] # ScoutPython.call_method("scout_ai.huggingface.eval", :eval_model, model, tokenizer, texts, options[:locate_tokens]) # end # list ? res : res[0] #end #train do |texts,labels| # model, tokenizer = @state # # if directory # tsv_file = File.join(directory, 'dataset.tsv') # checkpoint_dir = File.join(directory, 'checkpoints') # else # tmpdir = TmpFile.tmp_file # Open.mkdir tmpdir # tsv_file = File.join(tmpdir, 'dataset.tsv') # checkpoint_dir = File.join(tmpdir, 'checkpoints') # end # training_args_obj = ScoutPython.call_method("scout_ai.huggingface.train", :training_args, checkpoint_dir, options[:training_args]) # dataset_file = HuggingfaceModel.text_dataset(tsv_file, texts, labels, options[:class_labels]) # ScoutPython.call_method("scout_ai.huggingface.train", :train_model, model, tokenizer, training_args_obj, dataset_file, options[:class_weights]) # Open.rm_rf tmpdir if tmpdir #end end |
Instance Method Details
#fix_options ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 |
# File 'lib/scout/model/python/huggingface.rb', line 5 def @options[:training_options] = @options.delete(:training_args) if @options.include?(:training_args) @options[:training_options] = @options.delete(:training_kwargs) if @options.include?(:training_kwargs) training_args = IndiferentHash.pull_keys(@options, :training) || {} @options[:tokenizer_options] = @options.delete(:tokenizer_args) if @options.include?(:tokenizer_args) @options[:tokenizer_options] = @options.delete(:tokenizer_kwargs) if @options.include?(:tokenizer_kwargs) tokenizer_args = IndiferentHash.pull_keys(@options, :tokenizer) || {} @options[:training_args] = training_args @options[:tokenizer_args] = tokenizer_args end |