Class: NFODomainParser

Inherits:
Domainatrix::DomainParser
  • Object
show all
Defined in:
lib/nfcollector/domain_parser.rb

Instance Method Summary collapse

Instance Method Details

#parse(url) ⇒ Object



12
13
14
15
16
17
18
19
20
21
# File 'lib/nfcollector/domain_parser.rb', line 12

def parse(url)
  uri = URI.parse(url)
  Domainatrix::Url.new(parse_domains_from_host(uri.host).merge({
    :scheme => uri.scheme,
    :host   => uri.host,
    :path   => uri.path,
    :query  => uri.query,
    :url    => url
  }))
end

#parse_domains_from_host(host) ⇒ Object

TODO: This is a big monkey patch - we should be forking and fixing this



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/nfcollector/domain_parser.rb', line 24

def parse_domains_from_host(host)
  parts = host.split(".").reverse
  public_suffix = []
  domain = ""
  subdomains = []
  sub_hash = @public_suffixes
  parts.each_index do |i|
    part = parts[i]
    sub_parts = sub_hash[part]
    sub_hash = sub_parts
    if sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
      public_suffix << part
      domain = parts[i+1]
      subdomains = parts.slice(i+2, parts.size)
      break
    else
      public_suffix << part
    end
  end
  {:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
rescue
  # Applies to IP Addresses here too
  {:public_suffix => nil, :domain => host, :subdomain => nil}
end