Module: Domainatrix

Defined in:
lib/domainatrix.rb,
lib/domainatrix/url.rb,
lib/domainatrix/domain_parser.rb

Defined Under Namespace

Classes: DomainParser, Error, ParseError, Url

Constant Summary collapse

VERSION =
"0.0.11"
DOMAIN_PARSER =
DomainParser.new("#{File.dirname(__FILE__)}/effective_tld_names.dat")

Class Method Summary collapse

Class Method Details

.parse(url) ⇒ Object



18
19
20
# File 'lib/domainatrix.rb', line 18

def self.parse(url)
  Url.new(DOMAIN_PARSER.parse(url))
end

.scan(text, &block) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/domainatrix.rb', line 22

def self.scan(text, &block)
  return [] unless text
  @schemes ||= %w(http https)
  all_trailing_clutter = /[.,:);]+$/
  clutter_without_parens = /[.,:);]+$/

  candidate_urls = ::URI.extract(text, @schemes)
  candidate_urls.map! do |url|
    # If the URL has an open paren, allow closing parens.
    if url.include?("(")
      url.gsub(clutter_without_parens, '')
    else
      url.gsub(all_trailing_clutter, '')
    end
  end

  urls = candidate_urls.map do |url|
    begin
      parse(url)
    rescue Addressable::URI::InvalidURIError
    end
  end.compact

  urls.map!(&block) if block
  urls
end