Class: ThumbnailScraper::Webpage

Inherits:
Object
  • Object
show all
Includes:
WithSmartUrl
Defined in:
lib/thumbnail_scraper/webpage.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from WithSmartUrl

#url, #url=

Constructor Details

#initialize(url, body) ⇒ Webpage

Returns a new instance of Webpage.



10
11
12
13
# File 'lib/thumbnail_scraper/webpage.rb', line 10

def initialize(url, body)
  self.url = url
  self.body = body
end

Instance Attribute Details

#bodyObject

Returns the value of attribute body.



6
7
8
# File 'lib/thumbnail_scraper/webpage.rb', line 6

def body
  @body
end

Instance Method Details

#attached_images_urlsObject



60
61
62
63
64
65
66
67
68
69
# File 'lib/thumbnail_scraper/webpage.rb', line 60

def attached_images_urls
  elements = document.xpath("//img/@src")
  elements.map do |element|
    if element.value.start_with?("http://") || element.value.start_with?("https://")
      image_url(element.value)
    else
      image_url("#{url.scheme}://#{url.host}#{element.value.gsub(/^\/?/,'/')}")
    end
  end
end

#documentObject



15
16
17
# File 'lib/thumbnail_scraper/webpage.rb', line 15

def document
  Nokogiri::HTML::Document.parse(body)
end

#has_linked_image?Boolean

Returns:

  • (Boolean)


56
57
58
# File 'lib/thumbnail_scraper/webpage.rb', line 56

def has_linked_image?
  !linked_image_url.nil?
end

#has_open_graph_image?Boolean

Returns:

  • (Boolean)


44
45
46
# File 'lib/thumbnail_scraper/webpage.rb', line 44

def has_open_graph_image?
  !open_graph_image_url.nil?
end

#image_url(image_path) ⇒ Object



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/thumbnail_scraper/webpage.rb', line 19

def image_url(image_path)
  if image_path.start_with?("http://") || image_path.start_with?("https://")
    image_url = URI(image_path)
  elsif image_path.start_with?("//")
    image_url = URI(image_path)
    image_url.scheme = url.scheme
  else
    image_url = URI(url.to_s)
    if Pathname.new(image_path).absolute?
      image_url.path = image_path
    else
      image_url.path = File.expand_path(File.join(File.dirname(url.path), image_path))
    end
  end
  image_url
end

#linked_image_urlObject



48
49
50
51
52
53
54
# File 'lib/thumbnail_scraper/webpage.rb', line 48

def linked_image_url
  return @linked_image_url if defined?(@linked_image_url)
  elements = document.xpath("//link[@rel='img_src']/@href")
  return nil if elements.empty?
  image_path = elements.first.value
  @linked_image_url = image_url(image_path)
end

#open_graph_image_urlObject



36
37
38
39
40
41
42
# File 'lib/thumbnail_scraper/webpage.rb', line 36

def open_graph_image_url
  return @open_graph_image_url if defined?(@open_graph_image_url)
  elements = document.xpath("//meta[@property='og:image']/@content")
  return nil if elements.empty?
  image_path = elements.first.value
  @open_graph_image_url = image_url(image_path)
end