Class: ThumbnailScraper::Webpage
- Inherits:
-
Object
- Object
- ThumbnailScraper::Webpage
- Includes:
- WithSmartUrl
- Defined in:
- lib/thumbnail_scraper/webpage.rb
Instance Attribute Summary collapse
-
#body ⇒ Object
Returns the value of attribute body.
Instance Method Summary collapse
- #attached_images_urls ⇒ Object
- #document ⇒ Object
- #has_linked_image? ⇒ Boolean
- #has_open_graph_image? ⇒ Boolean
- #image_url(image_path) ⇒ Object
-
#initialize(url, body) ⇒ Webpage
constructor
A new instance of Webpage.
- #linked_image_url ⇒ Object
- #open_graph_image_url ⇒ Object
Methods included from WithSmartUrl
Constructor Details
#initialize(url, body) ⇒ Webpage
Returns a new instance of Webpage.
10 11 12 13 |
# File 'lib/thumbnail_scraper/webpage.rb', line 10 def initialize(url, body) self.url = url self.body = body end |
Instance Attribute Details
#body ⇒ Object
Returns the value of attribute body.
6 7 8 |
# File 'lib/thumbnail_scraper/webpage.rb', line 6 def body @body end |
Instance Method Details
#attached_images_urls ⇒ Object
60 61 62 63 64 65 66 67 68 69 |
# File 'lib/thumbnail_scraper/webpage.rb', line 60 def attached_images_urls elements = document.xpath("//img/@src") elements.map do |element| if element.value.start_with?("http://") || element.value.start_with?("https://") image_url(element.value) else image_url("#{url.scheme}://#{url.host}#{element.value.gsub(/^\/?/,'/')}") end end end |
#document ⇒ Object
15 16 17 |
# File 'lib/thumbnail_scraper/webpage.rb', line 15 def document Nokogiri::HTML::Document.parse(body) end |
#has_linked_image? ⇒ Boolean
56 57 58 |
# File 'lib/thumbnail_scraper/webpage.rb', line 56 def has_linked_image? !linked_image_url.nil? end |
#has_open_graph_image? ⇒ Boolean
44 45 46 |
# File 'lib/thumbnail_scraper/webpage.rb', line 44 def has_open_graph_image? !open_graph_image_url.nil? end |
#image_url(image_path) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/thumbnail_scraper/webpage.rb', line 19 def image_url(image_path) if image_path.start_with?("http://") || image_path.start_with?("https://") image_url = URI(image_path) elsif image_path.start_with?("//") image_url = URI(image_path) image_url.scheme = url.scheme else image_url = URI(url.to_s) if Pathname.new(image_path).absolute? image_url.path = image_path else image_url.path = File.(File.join(File.dirname(url.path), image_path)) end end image_url end |
#linked_image_url ⇒ Object
48 49 50 51 52 53 54 |
# File 'lib/thumbnail_scraper/webpage.rb', line 48 def linked_image_url return @linked_image_url if defined?(@linked_image_url) elements = document.xpath("//link[@rel='img_src']/@href") return nil if elements.empty? image_path = elements.first.value @linked_image_url = image_url(image_path) end |
#open_graph_image_url ⇒ Object
36 37 38 39 40 41 42 |
# File 'lib/thumbnail_scraper/webpage.rb', line 36 def open_graph_image_url return @open_graph_image_url if defined?(@open_graph_image_url) elements = document.xpath("//meta[@property='og:image']/@content") return nil if elements.empty? image_path = elements.first.value @open_graph_image_url = image_url(image_path) end |