Module: RDig::UrlFilters
- Defined in:
- lib/rdig/url_filters.rb
Defined Under Namespace
Classes: DepthFilter, FilterChain, PathExclusionFilter, PathInclusionFilter, PatternFilter, UrlExclusionFilter, UrlInclusionFilter, VisitedUrlFilter
Class Method Summary collapse
-
.fix_relative_uri(document) ⇒ Object
expands href=“/path/xyz.html”, href=“affe.html” and href=“../lala.html” to full urls.
-
.hostname_filter(document, include_hosts) ⇒ Object
filter uris by hostname list.
-
.maximum_redirect_filter(document, max_redirects) ⇒ Object
checks redirect count of the given document takes it out of the chain if number of redirections exceeds the max_redirects setting.
- .normalize_uri(document, cfg) ⇒ Object
- .scheme_filter_file(document) ⇒ Object
- .scheme_filter_http(document) ⇒ Object
Class Method Details
.fix_relative_uri(document) ⇒ Object
expands href=“/path/xyz.html”, href=“affe.html” and href=“../lala.html” to full urls
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/rdig/url_filters.rb', line 187 def UrlFilters.fix_relative_uri(document) #return nil unless document.uri.scheme.nil? || document.uri.scheme =~ /^https?/i ref = document.referring_uri return document unless ref uri = document.uri uri.scheme = ref.scheme unless uri.scheme uri.host = ref.host unless uri.host uri.port = ref.port unless uri.port || ref.port==ref.default_port uri.path = ref.path unless uri.path old_uri_path = uri.path if uri.path !~ /^\// || uri.path =~ /^\.\./ ref_path = ref.path || '/' ref_path << '/' if ref_path.empty? uri.path = ref_path[0..ref_path.rindex('/')] + uri.path end uri.path = uri.path.sub( /\/[^\/]*\/\.\./, "" ) if old_uri_path =~ /^\.\./ return document rescue p document p document.uri end |
.hostname_filter(document, include_hosts) ⇒ Object
filter uris by hostname list. With a nil or empty list all documents may pass this filter.
212 213 214 215 |
# File 'lib/rdig/url_filters.rb', line 212 def UrlFilters.hostname_filter(document, include_hosts) #RDig.logger.debug "hostname_filter: #{include_hosts}" return document if include_hosts.nil? || include_hosts.empty? || include_hosts.include?(document.uri.host) end |
.maximum_redirect_filter(document, max_redirects) ⇒ Object
checks redirect count of the given document takes it out of the chain if number of redirections exceeds the max_redirects setting
180 181 182 183 |
# File 'lib/rdig/url_filters.rb', line 180 def UrlFilters.maximum_redirect_filter(document, max_redirects) return nil if document.respond_to?(:redirections) && document.redirections > max_redirects return document end |
.normalize_uri(document, cfg) ⇒ Object
217 218 219 220 221 222 223 224 225 226 227 228 229 230 |
# File 'lib/rdig/url_filters.rb', line 217 def UrlFilters.normalize_uri(document, cfg) document.uri.fragment = nil # document.uri.query = nil # trailing slash handling if document.uri.path =~ /\/$/ # append index document if configured if cfg.index_document document.uri.path << cfg.index_document elsif cfg.remove_trailing_slash document.uri.path.gsub! /\/$/, '' end end return document end |
.scheme_filter_file(document) ⇒ Object
232 233 234 235 |
# File 'lib/rdig/url_filters.rb', line 232 def UrlFilters.scheme_filter_file(document) return document if (document.uri.scheme.nil? || document.uri.scheme =~ /^file$/i) nil end |
.scheme_filter_http(document) ⇒ Object
236 237 238 239 |
# File 'lib/rdig/url_filters.rb', line 236 def UrlFilters.scheme_filter_http(document) return document if (document.uri.scheme.nil? || document.uri.scheme =~ /^https?$/i) nil end |