Class: Opener::Core::ResourceSwitcher
- Inherits:
-
Object
- Object
- Opener::Core::ResourceSwitcher
- Defined in:
- lib/opener/core/resource_switcher.rb
Overview
Class for downloading and extracting external resources such as models/lexicons.
Instance Attribute Summary collapse
- #http ⇒ HTTPClient readonly
Instance Method Summary collapse
-
#bind(slop) ⇒ Object
Adds extra CLI options to the given Slop instance.
-
#create_directory(path) ⇒ Object
Creates the path.
-
#download(url, path) ⇒ Object
Downloads the given file.
- #download_and_extract(url, path) ⇒ Object
-
#filename_from_url(url) ⇒ String
Returns the filename of the file located at ‘url`.
- #get_headers(url) ⇒ Hash
-
#initialize ⇒ ResourceSwitcher
constructor
A new instance of ResourceSwitcher.
-
#remove_file(path) ⇒ Object
Removes the given file, mainly exists to make testing easier.
Constructor Details
#initialize ⇒ ResourceSwitcher
Returns a new instance of ResourceSwitcher.
13 14 15 |
# File 'lib/opener/core/resource_switcher.rb', line 13 def initialize @http = HTTPClient.new end |
Instance Attribute Details
#http ⇒ HTTPClient (readonly)
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/opener/core/resource_switcher.rb', line 10 class ResourceSwitcher attr_reader :http def initialize @http = HTTPClient.new end ## # Adds extra CLI options to the given Slop instance. # # @param [Slop] slop # def bind(slop) slop.separator "\nResource Options:\n" slop.on :'resource-url=', 'URL pointing to a .zip/.tar.gz file to download', :as => String slop.on :'resource-path=', 'Path where the resources should be saved', :as => String # Hijack Slop's run block so we can inject our own code before it. This # is quite grotesque, but sadly the only way. old_runner = slop.instance_variable_get(:@runner) slop.run do |opts, args| if opts[:'resource-path'] and opts[:'resource-url'] download_and_extract(opts[:'resource-url'], opts[:'resource-path']) end old_runner.call(opts, args) end end ## # @param [String] url # @param [String] path # def download_and_extract(url, path) filename = filename_from_url(url) temp_path = File.join(path, filename) create_directory(path) download(url, temp_path) Archive.extract(temp_path, path) remove_file(temp_path) end ## # Downloads the given file. # # @param [String] url # @param [String] path # def download(url, path) File.open(path, 'w', :encoding => Encoding::BINARY) do |handle| http.get(url) do |chunk| handle.write(chunk) end end end ## # Returns the filename of the file located at `url`. # # @param [String] url # @return [String] # def filename_from_url(url) headers = get_headers(url) unless headers['Content-Disposition'] raise "The URL #{url.inspect} did not return a Content-Disposition " \ "header. This header is required to figure out the filename" end matches = headers['Content-Disposition'].match(/filename=(.+)/) if !matches or !matches[1] raise 'No filename could be found in the Content-Disposition header' end return matches[1] end ## # Creates the path. This method mainly exists to make testing a bit # easier. # # @param [String] path # def create_directory(path) FileUtils.mkdir_p(path) end ## # Removes the given file, mainly exists to make testing easier. # # @param [String] path # def remove_file(path) File.unlink(path) end ## # @param [String] url # @return [Hash] # def get_headers(url) return http.head(url).headers end end |
Instance Method Details
#bind(slop) ⇒ Object
Adds extra CLI options to the given Slop instance.
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/opener/core/resource_switcher.rb', line 22 def bind(slop) slop.separator "\nResource Options:\n" slop.on :'resource-url=', 'URL pointing to a .zip/.tar.gz file to download', :as => String slop.on :'resource-path=', 'Path where the resources should be saved', :as => String # Hijack Slop's run block so we can inject our own code before it. This # is quite grotesque, but sadly the only way. old_runner = slop.instance_variable_get(:@runner) slop.run do |opts, args| if opts[:'resource-path'] and opts[:'resource-url'] download_and_extract(opts[:'resource-url'], opts[:'resource-path']) end old_runner.call(opts, args) end end |
#create_directory(path) ⇒ Object
Creates the path. This method mainly exists to make testing a bit easier.
106 107 108 |
# File 'lib/opener/core/resource_switcher.rb', line 106 def create_directory(path) FileUtils.mkdir_p(path) end |
#download(url, path) ⇒ Object
Downloads the given file.
69 70 71 72 73 74 75 |
# File 'lib/opener/core/resource_switcher.rb', line 69 def download(url, path) File.open(path, 'w', :encoding => Encoding::BINARY) do |handle| http.get(url) do |chunk| handle.write(chunk) end end end |
#download_and_extract(url, path) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/opener/core/resource_switcher.rb', line 50 def download_and_extract(url, path) filename = filename_from_url(url) temp_path = File.join(path, filename) create_directory(path) download(url, temp_path) Archive.extract(temp_path, path) remove_file(temp_path) end |
#filename_from_url(url) ⇒ String
Returns the filename of the file located at ‘url`.
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/opener/core/resource_switcher.rb', line 83 def filename_from_url(url) headers = get_headers(url) unless headers['Content-Disposition'] raise "The URL #{url.inspect} did not return a Content-Disposition " \ "header. This header is required to figure out the filename" end matches = headers['Content-Disposition'].match(/filename=(.+)/) if !matches or !matches[1] raise 'No filename could be found in the Content-Disposition header' end return matches[1] end |
#get_headers(url) ⇒ Hash
123 124 125 |
# File 'lib/opener/core/resource_switcher.rb', line 123 def get_headers(url) return http.head(url).headers end |
#remove_file(path) ⇒ Object
Removes the given file, mainly exists to make testing easier.
115 116 117 |
# File 'lib/opener/core/resource_switcher.rb', line 115 def remove_file(path) File.unlink(path) end |