Class: Wmap::HostTracker::PrimaryHost

Inherits:
Wmap::HostTracker show all
Includes:
Singleton, Utils
Defined in:
lib/wmap/host_tracker/primary_host.rb

Overview

Class to differentiate the primary host-name from the potential aliases. This is needed in order to minimize the confusion on our final site inventory list, as it contains a large number of duplicates (aliases). More specifically, a filter could be built by using this class to track the primary url of a website.

Constant Summary

Constants included from Utils::DomainRoot

Utils::DomainRoot::File_ccsld, Utils::DomainRoot::File_cctld, Utils::DomainRoot::File_gtld

Instance Attribute Summary collapse

Attributes inherited from Wmap::HostTracker

#alias, #max_parallel

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Utils::Logger

#wlog

Methods included from Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #make_absolute, #normalize_url, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Utils::DomainRoot

#get_domain_root, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Methods inherited from Wmap::HostTracker

#add, #bulk_add, #bulk_delete, #count, #delete, #dump_sub_domains, #file_add, #file_delete, #get_a_records, #get_root_domains, #host_aliases, #host_known?, #ip_known?, #local_host_2_ip, #local_ip_2_host, #print_host, #print_known_hosts, #refresh, #refresh_all, #save_known_hosts_to_file!, #search, #sub_domain_known?, #top_hostname

Constructor Details

#initialize(params = {}) ⇒ PrimaryHost

Initialize the instance variables



23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/wmap/host_tracker/primary_host.rb', line 23

def initialize (params = {})
  @verbose=params.fetch(:verbose, false)
    @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../../data/')
  # Set default instance variables
  @file_hosts=@data_dir + 'prime_hosts'
  file=params.fetch(:hosts_file, @file_hosts)
  # Initialize the instance variables
    File.write(@file_hosts, "") unless File.exist?(@file_hosts)
  @known_hosts=load_known_hosts_from_file(file)
  @known_ips=Hash.new
  de_duplicate
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



19
20
21
# File 'lib/wmap/host_tracker/primary_host.rb', line 19

def data_dir
  @data_dir
end

#hosts_fileObject

Returns the value of attribute hosts_file.



19
20
21
# File 'lib/wmap/host_tracker/primary_host.rb', line 19

def hosts_file
  @hosts_file
end

#known_hostsObject (readonly)

Returns the value of attribute known_hosts.



20
21
22
# File 'lib/wmap/host_tracker/primary_host.rb', line 20

def known_hosts
  @known_hosts
end

#known_ipsObject (readonly)

Returns the value of attribute known_ips.



20
21
22
# File 'lib/wmap/host_tracker/primary_host.rb', line 20

def known_ips
  @known_ips
end

#verboseObject

Returns the value of attribute verbose.



19
20
21
# File 'lib/wmap/host_tracker/primary_host.rb', line 19

def verbose
  @verbose
end

Instance Method Details

#de_duplicateObject Also known as: deduplicate

Procedures to remove the redundant entries in the primary hosts data repository



99
100
101
102
103
104
105
106
107
108
# File 'lib/wmap/host_tracker/primary_host.rb', line 99

def de_duplicate
  @known_hosts.keys.map do |key|
    ip=@known_hosts[key]
    if @known_ips.key?(ip)
      @known_hosts.delete(key)
    else
      @known_ips[ip]=true
    end
  end
end

#prime(host) ⇒ Object

Method to replace hostname with known primary hostname



112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/wmap/host_tracker/primary_host.rb', line 112

def prime (host)
  begin
    raise "Unknown hostname format: #{host}" unless is_fqdn?(host)
    ip=local_host_2_ip(host)
    ip=host_2_ip(host) if ip.nil?
    if @known_ips.key?(ip)
      return @known_hosts[ip]
    end
    return host
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return host
  end
end

#update_from_site_redirections!Object

Procedures to identify primary host-name from the site store redirection URLs. The assumption is that on site redirection, it must be directed to the well known primary site.



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/wmap/host_tracker/primary_host.rb', line 75

def update_from_site_redirections!
  puts "Invoke internal procedures to update the primary host-name table from the site store."
  begin
    urls=Wmap::SiteTracker.new(:data_dir=>@data_dir).get_redirection_urls
    urls.map do |url|
      if is_url?(url)
        host=url_2_host(url)
        if is_fqdn?(host)
          ip=host_2_ip(host)
          # Add duplication check
          unless @known_hosts.key?(ip)
            self.add(host)
          end
        end
      end
    end
    self.save!
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#update_from_site_store!Object Also known as: update!

Procedures to identify primary host-name from the site store SSL certificates. The assumption is that the CN used in the cert application must be primary hostname and used by the users.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/wmap/host_tracker/primary_host.rb', line 37

def update_from_site_store!
  puts "Invoke internal procedures to update the primary host-name table from the site store."
  begin
    # Step 1 - update the prime host table based on the SSL cert CN fields
    cns=Hash.new
    checker=Wmap::UrlChecker.new(:data_dir=>@data_dir)
      my_tracker = Wmap::SiteTracker.new(:data_dir=>@data_dir)
    my_tracker.get_ssl_sites.map do |site|
      puts "Exam SSL enabled site entry #{site} ..."
      my_host=url_2_host(site)
      next if @known_hosts.key?(my_host) # add the logic to optimize the process
      puts "Pull SSL cert details on site: #{site}"
      cn=checker.get_cert_cn(site)
      unless cn.nil? or cns.key?(cn)
        cns[cn]=true
      end
    end
    cns.keys.map do |cn|
      if is_fqdn?(cn)
        next if @known_hosts.key?(cn)
        self.add(cn)
        puts "New entry added: #{cn}\t#{@known_hosts[cn]}"
      end
    end
    # Step 2 - Save the cache into the file
    self.save!
      checker=nil
      my_tracker=nil
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
      checker=nil
      my_tracker=nil
    return nil
  end
end