Module: Metapage

Defined in:
lib/metapage.rb,
lib/metapage/version.rb

Defined Under Namespace

Classes: ContentTypeError, HTTPResponseError, IgnoredTitleError, Metadata, ResolveError

Constant Summary collapse

ERROR_CLASSES =
[ResolveError, HTTPResponseError, ContentTypeError, IgnoredTitleError]
IGNORE_LIST =
["signup", "signin", "login", "anmeldung", "anmelden", "registration"]
VERSION =
"0.4.0"

Class Method Summary collapse

Class Method Details

.extract(text) ⇒ Object



30
31
32
# File 'lib/metapage.rb', line 30

def extract(text)
  extract_urls(text).map {|url| fetch(url.gsub(/[\.\,]+\Z/, '')) }.compact
end

.extract!(text) ⇒ Object



34
35
36
# File 'lib/metapage.rb', line 34

def extract!(text)
  extract_urls(text).map {|url| fetch!(url.gsub(/[\.\,]+\Z/, '')) }.compact
end

.extract_urls(text) ⇒ Object



38
39
40
41
42
43
# File 'lib/metapage.rb', line 38

def extract_urls(text)
  processed_text = text.
    gsub(/([^\/])www\./, '\1http://www.').
    gsub(/\Awww\./, 'http://www.')
  URI.extract processed_text, ['http', 'https']
end

.fetch(url) ⇒ Object



20
21
22
23
24
# File 'lib/metapage.rb', line 20

def fetch(url)
  fetch! url
rescue *ERROR_CLASSES => err
  nil
end

.fetch!(url) ⇒ Object



26
27
28
# File 'lib/metapage.rb', line 26

def fetch!(url)
  Metadata.new(url)
end