Class: Opener::Core::ResourceSwitcher

Inherits:
Object
  • Object
show all
Defined in:
lib/opener/core/resource_switcher.rb

Overview

Class for downloading and extracting external resources such as models/lexicons.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeResourceSwitcher

Returns a new instance of ResourceSwitcher.



13
14
15
# File 'lib/opener/core/resource_switcher.rb', line 13

def initialize
  @http = HTTPClient.new
end

Instance Attribute Details

#httpHTTPClient (readonly)

Returns:

  • (HTTPClient)


10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/opener/core/resource_switcher.rb', line 10

class ResourceSwitcher
  attr_reader :http

  def initialize
    @http = HTTPClient.new
  end

  ##
  # Adds extra CLI options to the given Slop instance.
  #
  # @param [Slop] slop
  #
  def bind(slop)
    slop.separator "\nResource Options:\n"

    slop.on :'resource-url=',
      'URL pointing to a .zip/.tar.gz file to download',
      :as => String

    slop.on :'resource-path=',
      'Path where the resources should be saved',
      :as => String

    # Hijack Slop's run block so we can inject our own code before it.  This
    # is quite grotesque, but sadly the only way.
    old_runner = slop.instance_variable_get(:@runner)

    slop.run do |opts, args|
      if opts[:'resource-path'] and opts[:'resource-url']
        download_and_extract(opts[:'resource-url'], opts[:'resource-path'])
      end

      old_runner.call(opts, args)
    end
  end

  ##
  # @param [String] url
  # @param [String] path
  #
  def download_and_extract(url, path)
    filename  = filename_from_url(url)
    temp_path = File.join(path, filename)

    create_directory(path)

    download(url, temp_path)

    Archive.extract(temp_path, path)

    remove_file(temp_path)
  end

  ##
  # Downloads the given file.
  #
  # @param [String] url
  # @param [String] path
  #
  def download(url, path)
    File.open(path, 'w', :encoding => Encoding::BINARY) do |handle|
      http.get(url) do |chunk|
        handle.write(chunk)
      end
    end
  end

  ##
  # Returns the filename of the file located at `url`.
  #
  # @param [String] url
  # @return [String]
  #
  def filename_from_url(url)
    headers = get_headers(url)

    unless headers['Content-Disposition']
      raise "The URL #{url.inspect} did not return a Content-Disposition " \
        "header. This header is required to figure out the filename"
    end

    matches = headers['Content-Disposition'].match(/filename=(.+)/)

    if !matches or !matches[1]
      raise 'No filename could be found in the Content-Disposition header'
    end

    return matches[1]
  end

  ##
  # Creates the path. This method mainly exists to make testing a bit
  # easier.
  #
  # @param [String] path
  #
  def create_directory(path)
    FileUtils.mkdir_p(path)
  end

  ##
  # Removes the given file, mainly exists to make testing easier.
  #
  # @param [String] path
  #
  def remove_file(path)
    File.unlink(path)
  end

  ##
  # @param [String] url
  # @return [Hash]
  #
  def get_headers(url)
    return http.head(url).headers
  end
end

Instance Method Details

#bind(slop) ⇒ Object

Adds extra CLI options to the given Slop instance.

Parameters:

  • slop (Slop)


22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/opener/core/resource_switcher.rb', line 22

def bind(slop)
  slop.separator "\nResource Options:\n"

  slop.on :'resource-url=',
    'URL pointing to a .zip/.tar.gz file to download',
    :as => String

  slop.on :'resource-path=',
    'Path where the resources should be saved',
    :as => String

  # Hijack Slop's run block so we can inject our own code before it.  This
  # is quite grotesque, but sadly the only way.
  old_runner = slop.instance_variable_get(:@runner)

  slop.run do |opts, args|
    if opts[:'resource-path'] and opts[:'resource-url']
      download_and_extract(opts[:'resource-url'], opts[:'resource-path'])
    end

    old_runner.call(opts, args)
  end
end

#create_directory(path) ⇒ Object

Creates the path. This method mainly exists to make testing a bit easier.

Parameters:

  • path (String)


106
107
108
# File 'lib/opener/core/resource_switcher.rb', line 106

def create_directory(path)
  FileUtils.mkdir_p(path)
end

#download(url, path) ⇒ Object

Downloads the given file.

Parameters:

  • url (String)
  • path (String)


69
70
71
72
73
74
75
# File 'lib/opener/core/resource_switcher.rb', line 69

def download(url, path)
  File.open(path, 'w', :encoding => Encoding::BINARY) do |handle|
    http.get(url) do |chunk|
      handle.write(chunk)
    end
  end
end

#download_and_extract(url, path) ⇒ Object

Parameters:

  • url (String)
  • path (String)


50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/opener/core/resource_switcher.rb', line 50

def download_and_extract(url, path)
  filename  = filename_from_url(url)
  temp_path = File.join(path, filename)

  create_directory(path)

  download(url, temp_path)

  Archive.extract(temp_path, path)

  remove_file(temp_path)
end

#filename_from_url(url) ⇒ String

Returns the filename of the file located at ‘url`.

Parameters:

  • url (String)

Returns:

  • (String)


83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/opener/core/resource_switcher.rb', line 83

def filename_from_url(url)
  headers = get_headers(url)

  unless headers['Content-Disposition']
    raise "The URL #{url.inspect} did not return a Content-Disposition " \
      "header. This header is required to figure out the filename"
  end

  matches = headers['Content-Disposition'].match(/filename=(.+)/)

  if !matches or !matches[1]
    raise 'No filename could be found in the Content-Disposition header'
  end

  return matches[1]
end

#get_headers(url) ⇒ Hash

Parameters:

  • url (String)

Returns:

  • (Hash)


123
124
125
# File 'lib/opener/core/resource_switcher.rb', line 123

def get_headers(url)
  return http.head(url).headers
end

#remove_file(path) ⇒ Object

Removes the given file, mainly exists to make testing easier.

Parameters:

  • path (String)


115
116
117
# File 'lib/opener/core/resource_switcher.rb', line 115

def remove_file(path)
  File.unlink(path)
end