Class: SiteMapper::CrawlUrl
- Inherits:
-
Object
- Object
- SiteMapper::CrawlUrl
- Defined in:
- lib/site_mapper/crawl_url.rb
Overview
Crawl URL formatter.
Constant Summary collapse
- TOO_MANY_REQUEST_MSG =
Too many request error message
"You're being challenged with a 'too many requests' captcha"
Instance Attribute Summary collapse
-
#resolved_base_url ⇒ Object
readonly
Returns the value of attribute resolved_base_url.
Instance Method Summary collapse
-
#absolute_url_from(page_url, current_url) ⇒ String
Given a link it constructs the absolute path, if valid URL & URL has same domain as @resolved_base_url.
-
#initialize(base_url) ⇒ CrawlUrl
constructor
Initialize CrawlUrl.
Constructor Details
#initialize(base_url) ⇒ CrawlUrl
Initialize CrawlUrl
13 14 15 16 |
# File 'lib/site_mapper/crawl_url.rb', line 13 def initialize(base_url) @resolved_base_url = Request.resolve_url(base_url) @base_hostname = URI.parse(@resolved_base_url).hostname end |
Instance Attribute Details
#resolved_base_url ⇒ Object (readonly)
Returns the value of attribute resolved_base_url.
4 5 6 |
# File 'lib/site_mapper/crawl_url.rb', line 4 def resolved_base_url @resolved_base_url end |
Instance Method Details
#absolute_url_from(page_url, current_url) ⇒ String
Given a link it constructs the absolute path, if valid URL & URL has same domain as @resolved_base_url.
27 28 29 30 31 32 |
# File 'lib/site_mapper/crawl_url.rb', line 27 def absolute_url_from(page_url, current_url) return unless eligible_url?(page_url) parsed_uri = URI.join(current_url, page_url) rescue return return unless parsed_uri.hostname == @base_hostname parsed_uri.to_s end |