Method: ContentLinkParser#all_links
- Defined in:
- lib/content_link_parser.rb
#all_links(options = {}) ⇒ Object
Returns an array of all absolutized links, specify :valid_schemes in options to limit to certain schemes. Also filters repeating folders (ie if the crawler got in a link loop situation)
44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/content_link_parser.rb', line 44 def all_links( = {}) [:valid_schemes] = [:http, :https] unless .has_key? :valid_schemes data = link_data links = data.keys.map{|key| data[key]}.flatten.uniq links = links.map{|link| UriHelper.join_no_fragment(@url, UriHelper.join_no_fragment(@base_url, link))} .reject(&:nil?) .map(&:to_s) links = links.reject{|link| link =~ /\/([^\/]+?)\/\1\// } links = links.reject{|link| link =~ /([^\/]+?)\/([^\/]+?)\/.*?\1\/\2/ } links = links.select{|link| [:valid_schemes].include? link.split(':')[0].to_sym} links end |