Class: IDl

Inherits:
Object
  • Object
show all
Defined in:
lib/idl.rb

Instance Method Summary collapse

Constructor Details

#initializeIDl

Returns a new instance of IDl.



13
14
15
# File 'lib/idl.rb', line 13

def initialize
  @image_extensions = %w(jpg jpeg png gif tif tiff).collect {|ext| ".#{ext}"}
end

Instance Method Details

#fetch(urls, target_dir = './') ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/idl.rb', line 38

def fetch(urls, target_dir='./')
  EM.run do
    request_pool = EM::MultiRequest.new

    request_pool.callback do
      puts 'All requests finished.'
      EM.stop
    end

    urls.uniq.each do |url|
      puts "Enqueuing [#{url}]"
      request = EM::HttpRequest.new(url).get

      request.stream do |chunk|
        FileUtils.mkdir_p target_dir unless File.exists? target_dir
        request.file = File.open unique_filepath(url, target_dir), 'wb' unless request.file
        request.file.write chunk
      end

      request.callback do
        puts "Image [#{url}] was downloaded successfully."
      end

      request_pool.add request.object_id, request
    end
  end
end

#harvest(url, linked = false, target_dir = './') ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/idl.rb', line 17

def harvest(url, linked=false, target_dir='./')
  doc = Nokogiri.parse open url

  if linked
    urls = []
    doc.css('a[href]').each do |a|
      if path = URI(a['href']).path and path.downcase.end_with?(*@image_extensions)
        urls << URI(url).merge(a['href']).to_s
      end
    end
  else
    urls = doc.css('img[src]').collect do |img|
      URI(url).merge(img['src']).to_s
    end
  end

  if urls
    self.fetch urls, target_dir
  end
end

#unique_filepath(url, target_dir) ⇒ Object



66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/idl.rb', line 66

def unique_filepath(url, target_dir)
  filename = File.basename URI(url).path
  filepath = target_dir + filename
  ext = File.extname filename
  suffix = 1

  while File.exists? filepath
    suffix = suffix + 1
    filepath = target_dir + File.basename(filename, ext) + '-' + suffix.to_s + ext
  end

  return filepath
end