Class: ImageScraper::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/image_scraper/client.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url, options = {}) ⇒ Client

Returns a new instance of Client.



5
6
7
8
9
10
11
12
13
# File 'lib/image_scraper/client.rb', line 5

def initialize(url,options={})
  options.reverse_merge!(:convert_to_absolute_url=>true,:include_css_images=>true, :include_css_data_images=>false)
  @url = URI.escape(url)
  @convert_to_absolute_url = options[:convert_to_absolute_url]
  @include_css_images = options[:include_css_images]
  @include_css_data_images = options[:include_css_data_images]
  html = open(@url).read rescue nil
  @doc = html ? Nokogiri::HTML(html) : nil
end

Instance Attribute Details

#convert_to_absolute_urlObject

Returns the value of attribute convert_to_absolute_url.



3
4
5
# File 'lib/image_scraper/client.rb', line 3

def convert_to_absolute_url
  @convert_to_absolute_url
end

#docObject

Returns the value of attribute doc.



3
4
5
# File 'lib/image_scraper/client.rb', line 3

def doc
  @doc
end

#include_css_data_imagesObject

Returns the value of attribute include_css_data_images.



3
4
5
# File 'lib/image_scraper/client.rb', line 3

def include_css_data_images
  @include_css_data_images
end

#include_css_imagesObject

Returns the value of attribute include_css_images.



3
4
5
# File 'lib/image_scraper/client.rb', line 3

def include_css_images
  @include_css_images
end

#urlObject

Returns the value of attribute url.



3
4
5
# File 'lib/image_scraper/client.rb', line 3

def url
  @url
end

Instance Method Details

#image_urlsObject



15
16
17
18
19
# File 'lib/image_scraper/client.rb', line 15

def image_urls
  images = page_images
  images += stylesheet_images if include_css_images
  images
end

#page_imagesObject



21
22
23
24
25
26
27
28
29
30
31
# File 'lib/image_scraper/client.rb', line 21

def page_images
  urls = []
  return urls if doc.blank?
  doc.xpath("//img").each do |img|
    next if img["src"].blank?
    image = URI.escape(img["src"].strip)
    image = ImageScraper::Util.absolute_url(url,image) if convert_to_absolute_url
    urls << image
  end
  urls
end

#stylesheet_imagesObject



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/image_scraper/client.rb', line 33

def stylesheet_images
  images = []
  stylesheets.each do |stylesheet|
    file = open(stylesheet)
    css = file.string rescue IO.read(file)

    images += css.scan(/url\((.*?)\)/).collect do |image_url|
      image_url = URI.escape image_url[0]
      if image_url.include?("data:image") and @include_css_data_images
        image_url
      else
        image_url = ImageScraper::Util.strip_quotes(image_url)
        @convert_to_absolute_url ? ImageScraper::Util.absolute_url(stylesheet, image_url) : image_url
      end
    end
  end
  images
end

#stylesheetsObject



52
53
54
55
56
57
# File 'lib/image_scraper/client.rb', line 52

def stylesheets
  return [] if doc.blank?
  doc.xpath('//link[@rel="stylesheet"]').collect do |stylesheet|
    ImageScraper::Util.absolute_url url, URI.escape(stylesheet['href'])
  end
end