Class: Domainatrix::DomainParser

Inherits:
Object
  • Object
show all
Includes:
Addressable
Defined in:
lib/domainatrix/domain_parser.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file_name) ⇒ DomainParser

Returns a new instance of DomainParser.



7
8
9
10
# File 'lib/domainatrix/domain_parser.rb', line 7

def initialize(file_name)
  @public_suffixes = {}
  read_dat_file(file_name)
end

Instance Attribute Details

#public_suffixesObject (readonly)

Returns the value of attribute public_suffixes.



5
6
7
# File 'lib/domainatrix/domain_parser.rb', line 5

def public_suffixes
  @public_suffixes
end

Instance Method Details

#parse(url) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/domainatrix/domain_parser.rb', line 33

def parse(url)
  url.downcase!
  uri = URI.parse(url)
  if uri.query
    path = "#{uri.path}?#{uri.query}"
  else
    path = uri.path
  end
  parse_domains_from_host(uri.host).merge({
    :scheme => uri.scheme,
    :host   => uri.host,
    :path   => path,
    :url    => url
  })
end

#parse_domains_from_host(host) ⇒ Object



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/domainatrix/domain_parser.rb', line 49

def parse_domains_from_host(host)
  host.downcase!
  parts = host.split(".").reverse
  public_suffix = []
  domain = ""
  subdomains = []
  sub_hash = @public_suffixes
  parts.each_index do |i|
    part = parts[i]

    sub_parts = sub_hash[part]
    sub_hash = sub_parts
    if sub_parts.has_key? "*"
      public_suffix << part
      public_suffix << parts[i+1]
      domain = parts[i+2]
      subdomains = parts.slice(i+3, parts.size)
      break
    elsif sub_parts.empty? || !sub_parts.has_key?(parts[i+1])
      public_suffix << part
      domain = parts[i+1]
      subdomains = parts.slice(i+2, parts.size)
      break
    else
      public_suffix << part
    end
  end
  {:public_suffix => public_suffix.reverse.join("."), :domain => domain, :subdomain => subdomains.reverse.join(".")}
end

#read_dat_file(file_name) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/domainatrix/domain_parser.rb', line 12

def read_dat_file(file_name)
  # If we're in 1.9, make sure we're opening it in UTF-8
  if RUBY_VERSION >= '1.9'
    dat_file = File.open(file_name, "r:UTF-8")
  else
    dat_file = File.open(file_name)
  end
  
  dat_file.each_line do |line|
    line = line.strip
    unless (line =~ /\/\//) || line.empty?
      parts = line.split(".").reverse

      sub_hash = @public_suffixes
      parts.each do |part|
        sub_hash = (sub_hash[part] ||= {})
      end
    end
  end
end