Class: Wmap::HostTracker::PrimaryHost

Inherits:
Wmap::HostTracker show all
Includes:
Singleton, Utils
Defined in:
lib/wmap/host_tracker/primary_host.rb

Overview

Class to differentiate the primary host-name from the potential aliases. This is needed in order to minimize the confusion on our final site inventory list, as it contains a large number of duplicates (aliases). More specifically, a filter could be built by using this class to track the primary url of a website.

Constant Summary

Constants included from Utils::DomainRoot

Utils::DomainRoot::File_ccsld, Utils::DomainRoot::File_cctld, Utils::DomainRoot::File_gtld, Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Attributes inherited from Wmap::HostTracker

#alias, #max_parallel

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Utils::Logger

#wlog

Methods included from Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #make_absolute, #normalize_url, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Utils::DomainRoot

#get_domain_root, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Methods inherited from Wmap::HostTracker

#add, #bulk_add, #bulk_delete, #count, #delete, #dump_sub_domains, #file_add, #file_delete, #get_a_records, #get_root_domains, #host_aliases, #host_known?, #ip_known?, #local_host_2_ip, #local_ip_2_host, #print_host, #print_known_hosts, #refresh, #refresh_all, #save_known_hosts_to_file!, #search, #sub_domain_known?, #top_hostname

Constructor Details

#initialize(params = {}) ⇒ PrimaryHost

Initialize the instance variables



23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/wmap/host_tracker/primary_host.rb', line 23

def initialize (params = {})
  @verbose=params.fetch(:verbose, false)
    @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../../data/')
  # Set default instance variables
  @file_hosts=@data_dir + 'prime_hosts'
  file=params.fetch(:hosts_file, @file_hosts)
  # Initialize the instance variables
    File.write(@file_hosts, "") unless File.exist?(@file_hosts)
  @known_hosts=load_known_hosts_from_file(file)
  @known_ips=Hash.new
  de_duplicate
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



19
20
21
# File 'lib/wmap/host_tracker/primary_host.rb', line 19

def data_dir
  @data_dir
end

#hosts_fileObject

Returns the value of attribute hosts_file.



19
20
21
# File 'lib/wmap/host_tracker/primary_host.rb', line 19

def hosts_file
  @hosts_file
end

#known_hostsObject (readonly)

Returns the value of attribute known_hosts.



20
21
22
# File 'lib/wmap/host_tracker/primary_host.rb', line 20

def known_hosts
  @known_hosts
end

#known_ipsObject (readonly)

Returns the value of attribute known_ips.



20
21
22
# File 'lib/wmap/host_tracker/primary_host.rb', line 20

def known_ips
  @known_ips
end

#verboseObject

Returns the value of attribute verbose.



19
20
21
# File 'lib/wmap/host_tracker/primary_host.rb', line 19

def verbose
  @verbose
end

Instance Method Details

#de_duplicateObject Also known as: deduplicate

Procedures to remove the redundant entries in the primary hosts data repository



103
104
105
106
107
108
109
110
111
112
# File 'lib/wmap/host_tracker/primary_host.rb', line 103

def de_duplicate
  @known_hosts.keys.map do |key|
    ip=@known_hosts[key]
    if @known_ips.key?(ip)
      @known_hosts.delete(key)
    else
      @known_ips[ip]=true
    end
  end
end

#prime(host) ⇒ Object

Method to replace hostname with known primary hostname



116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/wmap/host_tracker/primary_host.rb', line 116

def prime (host)
  begin
    raise "Unknown hostname format: #{host}" unless is_fqdn?(host)
    ip=local_host_2_ip(host)
    ip=host_2_ip(host) if ip.nil?
    if @known_ips.key?(ip)
      return @known_hosts[ip]
    end
    return host
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return host
  end
end

#update_from_site_redirections!Object

Procedures to identify primary host-name from the site store redirection URLs. The assumption is that on site redirection, it must be directed to the well known primary site.



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/wmap/host_tracker/primary_host.rb', line 76

def update_from_site_redirections!
  puts "Invoke internal procedures to update the primary host-name table from the site store."
  begin
    my_tracker=Wmap::SiteTracker.instance
      my_tracker.data_dir=@data_dir
      urls = my_tracker.get_redirection_urls
      my_tracker = nil 
    urls.map do |url|
      if is_url?(url)
        host=url_2_host(url)
        if is_fqdn?(host)
          ip=host_2_ip(host)
          # Add duplication check
          unless @known_hosts.key?(ip)
            self.add(host)
          end
        end
      end
    end
    self.save!
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#update_from_site_store!Object Also known as: update!

Procedures to identify primary host-name from the site store SSL certificates. The assumption is that the CN used in the cert application must be primary hostname and used by the users.



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/wmap/host_tracker/primary_host.rb', line 37

def update_from_site_store!
  #begin
      puts "Invoke internal procedures to update the primary host-name table from the site store."
      # Step 1 - update the prime host table based on the SSL cert CN fields
    cns=Hash.new
    checker=Wmap::UrlChecker.new(:data_dir=>@data_dir)
      my_tracker = Wmap::SiteTracker.instance
      my_tracker.data_dir = @data_dir
    my_tracker.get_ssl_sites.map do |site|
      puts "Exam SSL enabled site entry #{site} ..."
      my_host=url_2_host(site)
      next if @known_hosts.key?(my_host) # add the logic to optimize the process
      puts "Pull SSL cert details on site: #{site}"
      cn=checker.get_cert_cn(site)
      unless cn.nil? or cns.key?(cn)
        cns[cn]=true
      end
    end
    cns.keys.map do |cn|
      if is_fqdn?(cn)
        next if @known_hosts.key?(cn)
        self.add(cn)
        puts "New entry added: #{cn}\t#{@known_hosts[cn]}"
      end
    end
    # Step 2 - Save the cache into the file
    self.save!
      checker=nil
      my_tracker=nil
  #rescue Exception => ee
  #  puts "Exception on method #{__method__}: #{ee}" if @verbose
    #  checker=nil
    #  my_tracker=nil
  #  return nil
  #end
end