Class: ImageScraper::Client
- Inherits:
-
Object
- Object
- ImageScraper::Client
- Defined in:
- lib/image_scraper/client.rb
Instance Attribute Summary collapse
-
#convert_to_absolute_url ⇒ Object
Returns the value of attribute convert_to_absolute_url.
-
#doc ⇒ Object
Returns the value of attribute doc.
-
#include_css_data_images ⇒ Object
Returns the value of attribute include_css_data_images.
-
#include_css_images ⇒ Object
Returns the value of attribute include_css_images.
-
#url ⇒ Object
Returns the value of attribute url.
Instance Method Summary collapse
- #image_urls ⇒ Object
-
#initialize(url, options = {}) ⇒ Client
constructor
A new instance of Client.
- #page_images ⇒ Object
- #stylesheet_images ⇒ Object
- #stylesheets ⇒ Object
Constructor Details
#initialize(url, options = {}) ⇒ Client
Returns a new instance of Client.
5 6 7 8 9 10 11 12 13 |
# File 'lib/image_scraper/client.rb', line 5 def initialize(url,={}) .reverse_merge!(:convert_to_absolute_url=>true,:include_css_images=>true, :include_css_data_images=>false) @url = URI.escape(url) @convert_to_absolute_url = [:convert_to_absolute_url] @include_css_images = [:include_css_images] @include_css_data_images = [:include_css_data_images] html = open(@url).read rescue nil @doc = html ? Nokogiri::HTML(html) : nil end |
Instance Attribute Details
#convert_to_absolute_url ⇒ Object
Returns the value of attribute convert_to_absolute_url.
3 4 5 |
# File 'lib/image_scraper/client.rb', line 3 def convert_to_absolute_url @convert_to_absolute_url end |
#doc ⇒ Object
Returns the value of attribute doc.
3 4 5 |
# File 'lib/image_scraper/client.rb', line 3 def doc @doc end |
#include_css_data_images ⇒ Object
Returns the value of attribute include_css_data_images.
3 4 5 |
# File 'lib/image_scraper/client.rb', line 3 def include_css_data_images @include_css_data_images end |
#include_css_images ⇒ Object
Returns the value of attribute include_css_images.
3 4 5 |
# File 'lib/image_scraper/client.rb', line 3 def include_css_images @include_css_images end |
#url ⇒ Object
Returns the value of attribute url.
3 4 5 |
# File 'lib/image_scraper/client.rb', line 3 def url @url end |
Instance Method Details
#image_urls ⇒ Object
15 16 17 18 19 |
# File 'lib/image_scraper/client.rb', line 15 def image_urls images = page_images images += stylesheet_images if include_css_images images end |
#page_images ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/image_scraper/client.rb', line 21 def page_images urls = [] return urls if doc.blank? doc.xpath("//img").each do |img| next if img["src"].blank? image = URI.escape(img["src"].strip) image = ImageScraper::Util.absolute_url(url,image) if convert_to_absolute_url urls << image end urls end |
#stylesheet_images ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/image_scraper/client.rb', line 33 def stylesheet_images images = [] stylesheets.each do |stylesheet| file = open(stylesheet) css = file.string rescue IO.read(file) images += css.scan(/url\((.*?)\)/).collect do |image_url| image_url = URI.escape image_url[0] if image_url.include?("data:image") and @include_css_data_images image_url else image_url = ImageScraper::Util.strip_quotes(image_url) @convert_to_absolute_url ? ImageScraper::Util.absolute_url(stylesheet, image_url) : image_url end end end images end |
#stylesheets ⇒ Object
52 53 54 55 56 57 |
# File 'lib/image_scraper/client.rb', line 52 def stylesheets return [] if doc.blank? doc.xpath('//link[@rel="stylesheet"]').collect do |stylesheet| ImageScraper::Util.absolute_url url, URI.escape(stylesheet['href']) end end |