Module: RDig::UrlFilters
- Defined in:
- lib/rdig/url_filters.rb
Defined Under Namespace
Classes: DepthFilter, FilterChain, PathExclusionFilter, PathInclusionFilter, PatternFilter, UrlExclusionFilter, UrlInclusionFilter, VisitedUrlFilter
Class Method Summary collapse
-
.fix_relative_uri(document) ⇒ Object
expands href=“/path/xyz.html”, href=“affe.html” and href=“../lala.html” to full urls.
-
.hostname_filter(document, include_hosts) ⇒ Object
filter uris by hostname list.
-
.maximum_redirect_filter(document, max_redirects) ⇒ Object
checks redirect count of the given document takes it out of the chain if number of redirections exceeds the max_redirects setting.
- .normalize_uri(document, cfg) ⇒ Object
- .scheme_filter_file(document) ⇒ Object
- .scheme_filter_http(document) ⇒ Object
Class Method Details
.fix_relative_uri(document) ⇒ Object
expands href=“/path/xyz.html”, href=“affe.html” and href=“../lala.html” to full urls
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# File 'lib/rdig/url_filters.rb', line 168 def UrlFilters.fix_relative_uri(document) #return nil unless document.uri.scheme.nil? || document.uri.scheme =~ /^https?/i ref = document.referring_uri return document unless ref uri = document.uri uri.scheme = ref.scheme unless uri.scheme uri.host = ref.host unless uri.host uri.port = ref.port unless uri.port || ref.port==ref.default_port uri.path = ref.path unless uri.path old_uri_path = uri.path if uri.path !~ /^\// || uri.path =~ /^\.\./ ref_path = ref.path || '/' ref_path << '/' if ref_path.empty? uri.path = ref_path[0..ref_path.rindex('/')] + uri.path end uri.path = uri.path.sub( /\/[^\/]*\/\.\./, "" ) if old_uri_path =~ /^\.\./ return document rescue p document p document.uri end |
.hostname_filter(document, include_hosts) ⇒ Object
filter uris by hostname list. With a nil or empty list all documents may pass this filter.
193 194 195 196 |
# File 'lib/rdig/url_filters.rb', line 193 def UrlFilters.hostname_filter(document, include_hosts) #RDig.logger.debug "hostname_filter: #{include_hosts}" return document if include_hosts.nil? || include_hosts.empty? || include_hosts.include?(document.uri.host) end |
.maximum_redirect_filter(document, max_redirects) ⇒ Object
checks redirect count of the given document takes it out of the chain if number of redirections exceeds the max_redirects setting
161 162 163 164 |
# File 'lib/rdig/url_filters.rb', line 161 def UrlFilters.maximum_redirect_filter(document, max_redirects) return nil if document.respond_to?(:redirections) && document.redirections > max_redirects return document end |
.normalize_uri(document, cfg) ⇒ Object
198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
# File 'lib/rdig/url_filters.rb', line 198 def UrlFilters.normalize_uri(document, cfg) document.uri.fragment = nil # document.uri.query = nil # trailing slash handling if document.uri.path =~ /\/$/ # append index document if configured if cfg.index_document document.uri.path << RDig.config.index_document elsif cfg.remove_trailing_slash document.uri.path.gsub! /\/$/, '' end end return document end |
.scheme_filter_file(document) ⇒ Object
213 214 215 216 |
# File 'lib/rdig/url_filters.rb', line 213 def UrlFilters.scheme_filter_file(document) return document if (document.uri.scheme.nil? || document.uri.scheme =~ /^file$/i) nil end |
.scheme_filter_http(document) ⇒ Object
217 218 219 220 |
# File 'lib/rdig/url_filters.rb', line 217 def UrlFilters.scheme_filter_http(document) return document if (document.uri.scheme.nil? || document.uri.scheme =~ /^https?$/i) nil end |