Module: Informers::Utils::Hub
- Defined in:
- lib/informers/utils/hub.rb
Defined Under Namespace
Classes: FileCache, FileResponse
Class Method Summary collapse
- .display_progress(filename, width, size, expected_size) ⇒ Object
- .get_file(url_or_path, progress_callback = nil, progress_info = {}) ⇒ Object
- .get_model_file(path_or_repo_id, filename, fatal = true, **options) ⇒ Object
- .get_model_json(model_path, file_name, fatal = true, **options) ⇒ Object
- .is_valid_url(string, protocols = nil, valid_hosts = nil) ⇒ Object
- .path_join(*parts) ⇒ Object
- .try_cache(cache, *names) ⇒ Object
Class Method Details
.display_progress(filename, width, size, expected_size) ⇒ Object
235 236 237 238 239 240 241 |
# File 'lib/informers/utils/hub.rb', line 235 def self.display_progress(filename, width, size, expected_size) = [width - (filename.length + 3), 1].max progress = expected_size && expected_size > 0 ? size / expected_size.to_f : 0 done = (progress * ).round not_done = - done "#{filename} |#{"█" * done}#{" " * not_done}|" end |
.get_file(url_or_path, progress_callback = nil, progress_info = {}) ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/informers/utils/hub.rb', line 38 def self.get_file(url_or_path, progress_callback = nil, progress_info = {}) if !is_valid_url(url_or_path, ["http", "https"]) raise Error, "Invalid url" else headers = {} headers["User-Agent"] = "informers/#{VERSION};" # Check whether we are making a request to the Hugging Face Hub. is_hfurl = is_valid_url(url_or_path, ["http", "https"], ["huggingface.co", "hf.co"]) if is_hfurl # If an access token is present in the environment variables, # we add it to the request headers. token = ENV["HF_TOKEN"] if token headers["Authorization"] = "Bearer #{token}" end end = {} if progress_callback total_size = nil [:content_length_proc] = lambda do |size| total_size = size Utils.dispatch_callback(progress_callback, {status: "download"}.merge(progress_info).merge(total_size: size)) end [:progress_proc] = lambda do |size| Utils.dispatch_callback(progress_callback, {status: "progress"}.merge(progress_info).merge(size: size, total_size: total_size)) end end URI.parse(url_or_path).open(**headers, **) end end |
.get_model_file(path_or_repo_id, filename, fatal = true, **options) ⇒ Object
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/informers/utils/hub.rb', line 118 def self.get_model_file(path_or_repo_id, filename, fatal = true, **) # Initiate file retrieval Utils.dispatch_callback([:progress_callback], { status: "initiate", name: path_or_repo_id, file: filename }) # If `cache_dir` is not specified, use the default cache directory cache = FileCache.new([:cache_dir] || Informers.cache_dir) revision = [:revision] || "main" request_url = path_join(path_or_repo_id, filename) remote_url = path_join( Informers.remote_host, Informers.remote_path_template .gsub("{model}", path_or_repo_id) .gsub("{revision}", URI.encode_www_form_component(revision)), filename ) # Choose cache key for filesystem cache # When using the main revision (default), we use the request URL as the cache key. # If a specific revision is requested, we account for this in the cache key. fs_cache_key = revision == "main" ? request_url : path_join(path_or_repo_id, revision, filename) proposed_cache_key = fs_cache_key resolved_path = cache.resolve_path(proposed_cache_key) # Whether to cache the final response in the end. to_cache_response = false # A caching system is available, so we try to get the file from it. response = try_cache(cache, proposed_cache_key) cache_hit = !response.nil? if response.nil? # File is not cached, so we perform the request if response.nil? || response.status[0] == "404" # File not found locally. This means either: # - The user has disabled local file access (`Informers.allow_local_models = false`) # - the path is a valid HTTP url (`response.nil?`) # - the path is not a valid HTTP url and the file is not present on the file system or local server (`response.status[0] == "404"`) if [:local_files_only] || !Informers.allow_remote_models # User requested local files only, but the file is not found locally. if fatal raise Error, "`local_files_only: true` or `Informers.allow_remote_models = false` and file was not found locally at #{resolved_path.inspect}." else # File not found, but this file is optional. # TODO in future, cache the response? return nil end end progress_info = { name: path_or_repo_id, file: filename } # File not found locally, so we try to download it from the remote server response = get_file(remote_url, [:progress_callback], progress_info) if response.status[0] != "200" # should not happen raise Todo end # Success! We use the proposed cache key from earlier cache_key = proposed_cache_key end to_cache_response = cache && !response.is_a?(FileResponse) && response.status[0] == "200" end if to_cache_response && cache_key && cache.match(cache_key).nil? cache.put(cache_key, response) end Utils.dispatch_callback([:progress_callback], { status: "done", name: path_or_repo_id, file: filename, cache_hit: cache_hit }) resolved_path end |
.get_model_json(model_path, file_name, fatal = true, **options) ⇒ Object
212 213 214 215 216 217 218 219 220 |
# File 'lib/informers/utils/hub.rb', line 212 def self.get_model_json(model_path, file_name, fatal = true, **) buffer = get_model_file(model_path, file_name, fatal, **) if buffer.nil? # Return empty object return {} end JSON.load_file(buffer) end |
.is_valid_url(string, protocols = nil, valid_hosts = nil) ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/informers/utils/hub.rb', line 23 def self.is_valid_url(string, protocols = nil, valid_hosts = nil) begin url = URI.parse(string) rescue return false end if protocols && !protocols.include?(url.scheme) return false end if valid_hosts && !valid_hosts.include?(url.host) return false end true end |
.path_join(*parts) ⇒ Object
222 223 224 225 226 227 228 229 230 231 232 233 |
# File 'lib/informers/utils/hub.rb', line 222 def self.path_join(*parts) parts = parts.map.with_index do |part, index| if index != 0 part = part.delete_prefix("/") end if index != parts.length - 1 part = part.delete_suffix("/") end part end parts.join("/") end |
.try_cache(cache, *names) ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/informers/utils/hub.rb', line 106 def self.try_cache(cache, *names) names.each do |name| begin result = cache.match(name) return result if result rescue next end end nil end |