Class: MetaInspector::Parsers::LinksParser

Inherits:
Base
  • Object
show all
Defined in:
lib/meta_inspector/parsers/links.rb

Instance Method Summary collapse

Methods inherited from Base

#initialize

Constructor Details

This class inherits a constructor from MetaInspector::Parsers::Base

Instance Method Details

#allObject

Returns all links found, unrelavitized and absolutified



16
17
18
19
# File 'lib/meta_inspector/parsers/links.rb', line 16

def all
  @all ||= raw.map { |link| URL.absolutify(URL.unrelativize(link, scheme), base_url) }
              .compact.uniq
end

#base_urlObject

Returns the base url to absolutify relative links. This can be the one set on a <base> tag, or the url of the document if no <base> tag was found.



55
56
57
# File 'lib/meta_inspector/parsers/links.rb', line 55

def base_url
  base_href || url
end

#externalObject

Returns all external HTTP links found



37
38
39
# File 'lib/meta_inspector/parsers/links.rb', line 37

def external
  @external ||= http.select { |link| URL.new(link).host != host }
end

#feedObject

Returns the parsed document meta rss link



48
49
50
# File 'lib/meta_inspector/parsers/links.rb', line 48

def feed
  @feed ||= (parsed_feed('rss') || parsed_feed('atom'))
end

#httpObject

Returns all HTTP links found



22
23
24
# File 'lib/meta_inspector/parsers/links.rb', line 22

def http
  @http ||= all.select { |link| link =~ /^http(s)?:\/\//i}
end

#internalObject

Returns all internal HTTP links found



32
33
34
# File 'lib/meta_inspector/parsers/links.rb', line 32

def internal
  @internal ||= http.select { |link| URL.new(link).host == host }
end


6
7
8
# File 'lib/meta_inspector/parsers/links.rb', line 6

def links
  self
end

#non_httpObject

Returns all non-HTTP links found



27
28
29
# File 'lib/meta_inspector/parsers/links.rb', line 27

def non_http
  @non_http ||= all.select { |link| link !~ /^http(s)?:\/\//i}
end

#rawObject

Returns all links found, unprocessed



11
12
13
# File 'lib/meta_inspector/parsers/links.rb', line 11

def raw
  @raw ||= cleanup(parsed.search('//a/@href')).compact.uniq
end

#to_hashObject



41
42
43
44
45
# File 'lib/meta_inspector/parsers/links.rb', line 41

def to_hash
  { 'internal' => internal,
    'external' => external,
    'non_http' => non_http }
end