Class: Snapcrawl::Page
- Inherits:
-
Object
- Object
- Snapcrawl::Page
- Defined in:
- lib/snapcrawl/page.rb
Constant Summary collapse
- EXTENSION_BLACKLIST =
'png|gif|jpg|pdf|zip'
- PROTOCOL_BLACKLIST =
'mailto|tel'
Instance Attribute Summary collapse
-
#depth ⇒ Object
readonly
Returns the value of attribute depth.
-
#url ⇒ Object
readonly
Returns the value of attribute url.
Instance Method Summary collapse
-
#initialize(url, depth: 0) ⇒ Page
constructor
A new instance of Page.
- #links ⇒ Object
- #pages ⇒ Object
- #path ⇒ Object
- #save_screenshot(outfile) ⇒ Object
- #site ⇒ Object
- #valid? ⇒ Boolean
Constructor Details
#initialize(url, depth: 0) ⇒ Page
Returns a new instance of Page.
16 17 18 19 |
# File 'lib/snapcrawl/page.rb', line 16 def initialize(url, depth: 0) @url = url.protocolize @depth = depth end |
Instance Attribute Details
#depth ⇒ Object (readonly)
Returns the value of attribute depth.
11 12 13 |
# File 'lib/snapcrawl/page.rb', line 11 def depth @depth end |
#url ⇒ Object (readonly)
Returns the value of attribute url.
11 12 13 |
# File 'lib/snapcrawl/page.rb', line 11 def url @url end |
Instance Method Details
#links ⇒ Object
33 34 35 36 37 38 |
# File 'lib/snapcrawl/page.rb', line 33 def links return nil unless valid? doc = Nokogiri::HTML http_response.body normalize_links doc.css('a') end |
#pages ⇒ Object
40 41 42 43 44 |
# File 'lib/snapcrawl/page.rb', line 40 def pages return nil unless valid? links.map { |link| Page.new link, depth: depth + 1 } end |
#path ⇒ Object
29 30 31 |
# File 'lib/snapcrawl/page.rb', line 29 def path @path ||= Addressable::URI.parse(url).request_uri end |
#save_screenshot(outfile) ⇒ Object
46 47 48 49 50 |
# File 'lib/snapcrawl/page.rb', line 46 def save_screenshot(outfile) return false unless valid? Screenshot.new(url).save outfile end |
#site ⇒ Object
25 26 27 |
# File 'lib/snapcrawl/page.rb', line 25 def site @site ||= Addressable::URI.parse(url).site end |
#valid? ⇒ Boolean
21 22 23 |
# File 'lib/snapcrawl/page.rb', line 21 def valid? http_response&.success? end |