Class: Wmap::SiteTracker

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/wmap/site_tracker.rb,
lib/wmap/site_tracker/deactivated_site.rb

Overview

Main class to automatically track the site inventory

Direct Known Subclasses

DeactivatedSite

Defined Under Namespace

Classes: DeactivatedSite

Constant Summary

Constants included from Utils::DomainRoot

Utils::DomainRoot::File_ccsld, Utils::DomainRoot::File_cctld, Utils::DomainRoot::File_gtld, Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Utils::Logger

#wlog

Methods included from Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #make_absolute, #normalize_url, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Utils::DomainRoot

#get_domain_root, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Constructor Details

#initialize(params = {}) ⇒ SiteTracker

Set default instance variables



22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/wmap/site_tracker.rb', line 22

def initialize (params = {})
  # Initialize the instance variables
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
  @file_sites=@data_dir+'sites'
  @file_stores=params.fetch(:sites_file, @file_sites)
  @verbose=params.fetch(:verbose, false)
  @max_parallel=params.fetch(:max_parallel, 30)
  # Hash table to hold the site store
  File.write(@file_stores, "") unless File.exist?(@file_stores)
  @known_sites=load_site_stores_from_file(@file_stores)
end

Instance Attribute Details

#data_dirObject

include Singleton



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def data_dir
  @data_dir
end

#known_sitesObject (readonly)

Returns the value of attribute known_sites.



19
20
21
# File 'lib/wmap/site_tracker.rb', line 19

def known_sites
  @known_sites
end

#max_parallelObject

include Singleton



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def max_parallel
  @max_parallel
end

#sites_fileObject

include Singleton



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def sites_file
  @sites_file
end

#verboseObject

include Singleton



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def verbose
  @verbose
end

Instance Method Details

#add(site) ⇒ Object

Setter to add site entry to the cache one at a time



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/wmap/site_tracker.rb', line 105

def add(site)
  puts "Add entry to the site store: #{site}"
  begin
    # Preliminary sanity check
    site=site.strip.downcase unless site.nil?
    raise "Site is already exist. Skip #{site}" if site_known?(site)
    site=normalize_url(site) if is_url?(site)
    site=url_2_site(site) if is_url?(site)
    puts "Site in standard format: #{site}" if @verbose
    raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
    trusted=false
    host=url_2_host(site)
    ip=host_2_ip(host)
    # Additional logic to refresh deactivated site, 02/12/2014
    deact=Wmap::SiteTracker::DeactivatedSite.new(:data_dir=>@data_dir)
    # only trust either the domain or IP we know
    if is_ip?(host)
      trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
    else
      root=get_domain_root(host)
      if root.nil?
        raise "Invalid web site format. Please check your record again."
      else
        trusted=Wmap::DomainTracker.new(:data_dir=>@data_dir).domain_known?(root)
      end
    end
    # add record only if trusted
    if trusted
      # Add logic to check site status before adding it
      checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
      raise "Site is currently down. Skip #{site}" if checker.nil?
      # Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
      if is_https?(site)
        # do nothing
      else
        raise "Site is currently down. Skip #{site}" if checker['code']==10000
      end
      raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
      my_tracker = Wmap::HostTracker.new(:data_dir=>@data_dir)
      # Update the local host table when necessary
      if is_ip?(host)
        # Case #1: Trusted site contains IP
        if my_tracker.ip_known?(host)
          # Try local reverse DNS lookup first
          puts "Local hosts table lookup for IP: #{ip}" if @verbose
          host=my_tracker.local_ip_2_host(host)
          puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
          site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
        else
          # Try reverse DNS lookup over Internet as secondary precaution
          puts "Reverse DNS lookup for IP: #{ip}" if @verbose
          host1=ip_2_host(host)
          puts "host1: #{host1}" if @verbose
          if is_fqdn?(host1)
            if Wmap::HostTracker.new(:data_dir=>@data_dir).domain_known?(host1)
              # replace IP with host-name only if domain root is known
              puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
              host=host1
              site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
            end
          end
        end
        # Adding site for Case #1
        raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
        puts "Adding site: #{site}" if @verbose
        @known_sites[site]=Hash.new
        @known_sites[site]=checker
        if deact.site_known?(site)
          deact.delete(site)
          deact.save!
        end
        puts "Site entry loaded: #{checker}"
        if is_fqdn?(host)
        # Add logic to update the hosts table for case #1 variance
        # -  case that reverse DNS lookup successful
          puts "Update local hosts table for host: #{host}"
          if my_tracker.host_known?(host)
            old_ip=my_tracker.local_host_2_ip(host)
            if old_ip != ip
              my_tracker.refresh(host)
              my_tracker.save!
            else
              puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
            end
          else
            my_tracker.add(host)
            my_tracker.save!
          end
        end
      else
        # Case #2: Trusted site contains valid FQDN
        puts "Ading site: #{site}" if @verbose
        @known_sites[site]=Hash.new
        @known_sites[site]=checker
        if deact.site_known?(site)
          deact.delete(site)
          deact.save!
        end
        puts "Site entry loaded: #{checker}"
        # Add logic to update the hosts table for case #2
        puts "Update local hosts table for host: #{host}"
        if my_tracker.host_known?(host)
          old_ip=my_tracker.local_host_2_ip(host)
          if old_ip != ip
            my_tracker.refresh(host)
            my_tracker.save!
          else
            # Skip - no need to update the local hosts table
          end
        else
          my_tracker.add(host)
          my_tracker.save!
        end
      end
      deact=nil
      my_tracker=nil
      return checker
    else
      puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
      deact=nil
      my_tracker=nil
      return nil
    end
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
    deact=nil
    return nil
  end
end

#bulk_add(list, num = @max_parallel) ⇒ Object Also known as: adds

Setter to add site entry to the cache in batch (from a list)



251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/wmap/site_tracker.rb', line 251

def bulk_add(list,num=@max_parallel)
  puts "Add entries to the local site store from list:\n #{list}"
  #begin
    results=Hash.new
    if list.size > 0
      puts "Start parallel adding on the sites:\n #{list}"
      Parallel.map(list, :in_processes => num) { |target|
        add(target)
      }.each do |process|
        if process.nil?
          next
        elsif process.empty?
          #do nothing
        else
          results[process['url']]=Hash.new
          results[process['url']]=process
        end
      end
      @known_sites.merge!(results)
    else
      puts "Error: no entry is added. Please check your list and try again."
    end
    puts "Done adding site entries."
    if results.size>0
      puts "New entries added: #{results}"
    else
      puts "No new entry added. "
    end
    return results
  #rescue => ee
    #puts "Exception on method #{__method__}: #{ee}" if @verbose
  #end
end

#bulk_delete(list) ⇒ Object Also known as: dels

Setter to delete site entry to the cache in batch (from a list)



329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/wmap/site_tracker.rb', line 329

def bulk_delete(list)
  puts "Delete entries to the local site store from list:\n #{list}" if @verbose
  begin
    sites=list
    changes=Array.new
    if sites.size > 0
      sites.map do |x|
        x=url_2_site(x)
        site=delete(x)
        changes.push(site) unless site.nil?
      end
      puts "Done deleting sites from the list:\n #{list}"
      return changes
    else
      puts "Error: no entry is loaded. Please check your list and try again."
    end
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
  end
end

#bulk_refresh(list, num = @max_parallel) ⇒ Object Also known as: refreshs

‘Refresh sites in the site store in batch (from a list)



403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# File 'lib/wmap/site_tracker.rb', line 403

def bulk_refresh(list,num=@max_parallel)
  puts "Refresh entries in the site store from list:\n #{list}" if @verbose
  begin
    results=Hash.new
    if list.size > 0
      puts "Start parallel refreshing on the sites:\n #{list}"
      Parallel.map(list, :in_processes => num) { |target|
        refresh(target)
      }.each do |process|
        if process.nil?
          next
        elsif process.empty?
          #do nothing
        else
          results[process['url']]=Hash.new
          results[process['url']]=process
        end
      end
      # Clean up old entries, by Y.L. 03/30/2015
      list.map {|x| @known_sites.delete(x)}
      # Add back fresh entries
      @known_sites.merge!(results)
      puts "Done refresh sites."
    else
      puts "Error: no entry is loaded. Please check your list and try again."
    end
    return results
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
  end
end

#countObject

Count numbers of entries in the site store table



95
96
97
98
99
100
101
102
# File 'lib/wmap/site_tracker.rb', line 95

def count
  puts "Counting number of entries in the site store table ..."
  begin
    return @known_sites.size
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
  end
end

#delete(site) ⇒ Object Also known as: del

Setter to remove entry from the site store one at a time



287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/wmap/site_tracker.rb', line 287

def delete(site)
  puts "Remove entry from the site store: #{site} " if @verbose
  begin
    # Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
    deact=Wmap::SiteTracker::DeactivatedSite.new(:data_dir=>@data_dir)
    site=site.strip.downcase
    site=url_2_site(site)
    if @known_sites.key?(site)
      site_info=@known_sites[site]
      deact.add(site,site_info)
      deact.save!
      deact=nil
      del=@known_sites.delete(site)
      puts "Entry cleared: #{site}"
      return del
    else
      puts "Entry not fund. Skip #{site}"
      deact=nil
      return nil
    end
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    deact=nil
  end
end

#file_add(file) ⇒ Object

Setter to add site entry to the cache table in batch (from a file)



236
237
238
239
240
241
242
243
244
245
246
247
248
# File 'lib/wmap/site_tracker.rb', line 236

def file_add(file)
  puts "Add entries to the local site store from file: #{file}"
  begin
    raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
    changes=Hash.new
    sites=file_2_list(file)
    changes=bulk_add(sites) unless sites.nil? or sites.empty?
    puts "Done loading file #{file}. "
    return changes
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
  end
end

#file_delete(file) ⇒ Object Also known as: file_del

Setter to delete site entry to the cache in batch (from a file)



315
316
317
318
319
320
321
322
323
324
325
# File 'lib/wmap/site_tracker.rb', line 315

def file_delete(file)
  begin
    puts "Delete entries to the local site store from file: #{file}" if @verbose
    raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
    sites=file_2_list(file)
    changes=Array.new
    changes=bulk_delete(sites) unless sites.nil? or sites.empty?
  rescue => ee
    puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
  end
end

#file_refresh(file) ⇒ Object

‘Refresh sites in the site store in batch (from a file)



373
374
375
376
377
378
379
380
381
382
383
# File 'lib/wmap/site_tracker.rb', line 373

def file_refresh(file)
  puts "Refresh entries in the site store from file: #{file}" if @verbose
  begin
    changes=Hash.new
    sites=file_2_list(file)
    changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
    return changes
  rescue => ee
    puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
  end
end

#get_ext_sitesObject Also known as: get_ext

Retrieve external hosted sites into a list



517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
# File 'lib/wmap/site_tracker.rb', line 517

def get_ext_sites
  puts "getter to retrieve all the external hosted sites. " if @verbose
  begin
    sites=Array.new
    @known_sites.keys.map do |key|
      if @known_sites[key]['status']=="ext_hosted"
        sites.push(key)
      end
    end
    sites.sort!
    return sites
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#get_int_sitesObject Also known as: get_int

Retrieve a list of internal hosted site URLs



536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
# File 'lib/wmap/site_tracker.rb', line 536

def get_int_sites
  puts "getter to retrieve all the internal hosted sites." if @verbose
  begin
    sites=Array.new
    @known_sites.keys.map do |key|
      if @known_sites[key]['status']=="int_hosted"
        sites.push(key)
      end
    end
    sites.sort!
    return sites
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#get_ip_sitesObject

Retrieve a list of sites that contain an IP in the site URL



555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
# File 'lib/wmap/site_tracker.rb', line 555

def get_ip_sites
  puts "Getter to retrieve sites contain an IP instead of a host-name ." if @verbose
  begin
    sites=Array.new
    @known_sites.keys.map do |key|
      host=url_2_host(key)
      if is_ip?(host)
        sites.push(key)
      end
    end
    sites.sort!
    return sites
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#get_prim_uniq_sitesObject Also known as: get_prime

Retrieve the unique sites from the local site store in the primary host format



837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
# File 'lib/wmap/site_tracker.rb', line 837

def get_prim_uniq_sites
  puts "Retrieve and prime unique sites in the site store. " if @verbose
  #begin
    host_tracker=Wmap::HostTracker.new(:data_dir=>@data_dir)
    primary_host_tracker=Wmap::HostTracker::PrimaryHost.new(:data_dir=>@data_dir)
    # Step 1. Retrieve the unique site list first
    sites=get_uniq_sites
    prim_uniq_sites=Array.new
    # Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
    sites.map do |site|
      puts "Work on priming unique site: #{site}" if @verbose
      host=url_2_host(site)
      # case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
      if is_ip?(host)
        prim_uniq_sites.push(site)
        next
      end
      ip=@known_sites[site]['ip']
      # case#2, for site with an unique IP, do nothing
      puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
      if host_tracker.alias[ip] == 1
        prim_uniq_sites.push(site)
        next
      end
      # case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
      if host_tracker.alias[ip] == nil
        prim_uniq_sites.push(site)
        next
      end
      # case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
      # raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
      if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
        new_host=primary_host_tracker.prime(host)
        puts "Host: #{host}, New host:#{new_host}" if @verbose
        unless host==new_host
          new_site=site.sub(host,new_host)
          raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
          new_ip=@known_sites[new_site]['ip']
          if new_ip==ip    # consistency check
            site=new_site
          else
            # TBD - case of multiple IPs for A DNS record
            #raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
          end
        end
      end
      prim_uniq_sites.push(site)
    end
    primary_host_tracker=nil
    host_tracker=nil
    return prim_uniq_sites
  #rescue => ee
  #  puts "Exception on method #{__method__}: #{ee}"
  #end
end

#get_redirection_url(site) ⇒ Object

Retrieve redirection URL if available



658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
# File 'lib/wmap/site_tracker.rb', line 658

def get_redirection_url (site)
  puts "getter to retrieve the redirection URL from the site store." if @verbose
  begin
    site=site.strip.downcase
    if @known_sites.key?(site)
      return @known_sites[site]['redirection']
    else
      puts "Unknown site: #{site}" if @verbose
      return nil
    end
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#get_redirection_urlsObject

Retrieve a list of redirection URLs from the site store



640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
# File 'lib/wmap/site_tracker.rb', line 640

def get_redirection_urls
  puts "getter to retrieve all the redirection URLs from the site store." if @verbose
  begin
    urls=Array.new
    @known_sites.keys.map do |key|
      unless @known_sites[key]['redirection'].nil?
        urls.push(@known_sites[key]['redirection'])
      end
    end
    urls.sort!
    return urls
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#get_ssl_sitesObject

Retrieve a list of sites that contain an IP in the site URL



623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
# File 'lib/wmap/site_tracker.rb', line 623

def get_ssl_sites
  puts "getter to retrieve https sites from the site store." if @verbose
  begin
    sites=Array.new
    @known_sites.keys.map do |key|
      key =~ /https/i
      sites.push(key)
    end
    sites.sort!
    return sites
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#get_uniq_sitesObject Also known as: uniq_sites

Retrieve a list of unique sites within the known site store



574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
# File 'lib/wmap/site_tracker.rb', line 574

def get_uniq_sites
  puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose
  begin
    #primary_host_tracker=Wmap::HostTracker::PrimaryHost.new
    sites=Hash.new
    #uniqueness=Hash.new
    my_tracker=Wmap::HostTracker.new(:data_dir=>@data_dir)
    @known_sites.keys.map do |key|
      port=url_2_port(key).to_s
      host=url_2_host(key)
      md5=@known_sites[key]['md5']
      code=@known_sites[key]['code']
      ip=my_tracker.local_host_2_ip(host)
      ip=host_2_ip(host) if ip.nil?
      # filtering out 'un-reachable' sites
      next if (code == 10000 or code == 20000)
      # filtering out 'empty' sites
      next if (md5.nil? or md5.empty?)
      next if ip.nil?
      # url_new=key
      #if primary_host_tracker.ip_known?(ip)
      #  p_host=primary_host_tracker.known_hosts[ip]
      #  url_new=key.sub(host,p_host)
      #end
      id=ip+":"+port
      # filtering out duplicates by 'IP:PORT' key pair
      unless sites.key?(id)
        #if @known_sites.key?(key)
        # sites[id]=url_new
        #else
          # Further filtering out redundant site by checking MD5 finger-print
          #unless uniqueness.key?(md5)
            sites[id]=key
          #  uniqueness[md5]=true
          #end
        #end
      end
    end
    #primary_host_tracker=nil
    my_tracker=nil
    return sites.values
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#load_site_stores_from_file(file) ⇒ Object

Setter to load the known hosts into an instance variable



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/wmap/site_tracker.rb', line 36

def load_site_stores_from_file (file)
  puts "Loading the site store data repository from file: #{file} " if @verbose
  begin
    known_sites=Hash.new
    f=File.open(file, 'r')
    f.each do |line|
      line=line.chomp.strip
      next if line.nil?
      next if line.empty?
      next if line =~ /^\s*#/
      entry=line.split(%r{\t+|\,})
      site=entry[0].downcase
      ip=entry[1]
      port=entry[2]
      status=entry[3]
      server=entry[4]
      res=entry[5].to_i
      fp=entry[6]
      loc=entry[7]
      timestamp=entry[8]
      puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
      known_sites[site]= Hash.new unless known_sites.key?(site)
      known_sites[site]['ip']=ip
      known_sites[site]['port']=port
      known_sites[site]['status']=status
      known_sites[site]['server']=server
      known_sites[site]['code']=res
      known_sites[site]['md5']=fp
      known_sites[site]['redirection']=loc
      known_sites[site]['timestamp']=timestamp
    end
    f.close
    puts "Successfully loading file: #{file}" if @verbose
    return known_sites
  rescue => ee
    puts "Exception on method #{__method__} for file #{file}: #{ee}"
  end
end

Print summary report of all sites URL in the site store



751
752
753
754
755
756
757
758
759
760
# File 'lib/wmap/site_tracker.rb', line 751

def print_all_sites
  puts "\nSummary Report of the site store:"
  sites=@known_sites.keys.sort
  sites.each do |site|
    puts site
  end

  puts "End of the summary"
  #return sites
end

Print summary report of external hosted sites URL in the



895
896
897
898
899
900
901
902
# File 'lib/wmap/site_tracker.rb', line 895

def print_ext_sites
  puts "\nSummary Report of the External Hosted Site"
  sites=get_ext_sites
  sites.each do |site|
    puts site
  end
  return nil
end

Print summary report of internal hosted site URLs



906
907
908
909
910
911
912
913
# File 'lib/wmap/site_tracker.rb', line 906

def print_int_sites
  puts "\nSummary Report of the Internal Hosted Site"
  sites=get_int_sites
  sites.each do |site|
    puts site
  end
  return nil
end

Print summary report on all sites that contain an IP in the site URL



721
722
723
724
725
726
# File 'lib/wmap/site_tracker.rb', line 721

def print_ip_sites
  puts "Print sites contain an IP instead of a host-name."
  sites=get_ip_sites
  sites.map { |x| puts x }
  puts "End of report. "
end

Retrieve and print specific information of a site in the site store



729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
# File 'lib/wmap/site_tracker.rb', line 729

def print_site(site)
  puts "Site Information Report for: #{site}" if @verbose
  begin
    site=site.strip unless site.nil?
    raise "Unknown site: #{site}" unless @known_sites.key?(site)
    ip=@known_sites[site]['ip']
    port=@known_sites[site]['port']
    status=@known_sites[site]['status']
    server=@known_sites[site]['server']
    fp=@known_sites[site]['md5']
    loc=@known_sites[site]['redirection']
    res=@known_sites[site]['code']
    timestamp=@known_sites[site]['timestamp']
    puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
  rescue => ee
    puts "Exception on method #{__method__} for #{site}: #{ee}"
  end
end

Print summary report of internal hosted site URLs



917
918
919
920
921
922
923
924
# File 'lib/wmap/site_tracker.rb', line 917

def print_ssl_sites
  puts "\nSummary Report of the HTTPS Sites from the Site Store"
  sites=get_ssl_sites
  sites.each do |site|
    puts site
  end
  return nil
end

Print summary report of unique sites in the site store



927
928
929
930
931
932
933
934
# File 'lib/wmap/site_tracker.rb', line 927

def print_uniq_sites
  puts "Summary Report for the Unique sites:"
  puts "Website,Primary IP,Port,Hosting Status,Server,Response Code,Site MD5 Finger-print,Site Redirection,Timestamp"
  sites=get_uniq_sites
  sites.each do |site|
    print_site(site)
  end
end

#refresh(site) ⇒ Object

Setter to refresh the entry in the site store one at a time



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# File 'lib/wmap/site_tracker.rb', line 352

def refresh(site)
  puts "Refresh the local site store for site: #{site} "
  begin
    raise "Invalid site: #{site}" if site.nil? or site.empty?
    site=site.strip.downcase
    if @known_sites.key?(site)
      delete(site)
      site_info=add(site)
      puts "Done refresh entry: #{site}"
      return site_info
    else
      puts "Error entry non exist: #{site}"
    end
    return nil
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end

#refresh_allObject

Refresh all site entries in the stores in one shot



438
439
440
441
442
443
444
445
446
447
448
449
# File 'lib/wmap/site_tracker.rb', line 438

def refresh_all
  puts "Refresh all the entries within the local site store ... "
  begin
    changes=Hash.new
    changes=bulk_refresh(@known_sites.keys)
    @known_sites.merge!(changes)
    puts "Done refresh all entries."
    return changes
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
  end
end

#refresh_ip_sitesObject

Refresh all site entries in the stores that contains an IP instead of a hostname



452
453
454
455
456
457
458
459
460
461
462
463
464
465
# File 'lib/wmap/site_tracker.rb', line 452

def refresh_ip_sites
  puts "Refresh all entries that contain an IP address instead of a FQDN ... "
  begin
    sites=get_ip_sites
    live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or  @known_sites[x]['code'] == 20000 }
    changes=Hash.new
    changes=bulk_refresh(live_sites)
    @known_sites.merge!(changes)
    puts "Done refresh IP sites."
    return changes
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
  end
end

#refresh_uniq_sitesObject

‘Refresh unique sites in the site store only



386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/wmap/site_tracker.rb', line 386

def refresh_uniq_sites
  puts "Refresh unique site entries in the site store. " if @verbose
  begin
    changes=Hash.new
    sites=get_uniq_sites
    if sites.size > 0
      changes=bulk_refresh(sites)
    else
      puts "Error: no entry is refreshed. Please check your site store and try again."
    end
    return changes
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
  end
end

#resolve_ip_sitesObject

Perform local host table reverse lookup for the IP sites, in hope that the hostname could now be resolved since the site was discovered



675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
# File 'lib/wmap/site_tracker.rb', line 675

def resolve_ip_sites
  puts "Resolve sites that contain an IP address. Update the site cache table once a hostname is found in the local host table." if @verbose
  begin
    updates=Array.new
    sites=get_ip_sites
    my_tracker=Wmap::HostTracker.new(:data_dir=>@data_dir)
    sites.map do |site|
      puts "Work on resolve the IP site: #{site}" if @verbose
      ip=url_2_host(site)
      hostname=my_tracker.local_ip_2_host(ip)
      if hostname.nil?
        puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
      else
        puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
        updates.push(site)
        refresh(site)
      end
    end
    updates.sort!
    puts "The following sites are now refreshed: #{updates}" if @verbose
    my_tracker=nil
    return updates
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
  end
end

#save_sites_to_file!(file_sites = @file_stores) ⇒ Object Also known as: save!

Save the current site store hash table into a file



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/wmap/site_tracker.rb', line 76

def save_sites_to_file!(file_sites=@file_stores)
  puts "Saving the current site store table from memory to file: #{file_sites}"
  begin
    timestamp=Time.now
    f=File.open(file_sites, 'w')
    f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
    f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
    @known_sites.keys.sort.map do |key|
      f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
    end
    f.close
    puts "site store table is successfully saved: #{file_sites}"
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
  end
end

#save_uniq_sites(file) ⇒ Object Also known as: dump

Retrieve and save unique sites information for the quarterly scan into a plain local file



764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
# File 'lib/wmap/site_tracker.rb', line 764

def save_uniq_sites(file)
  puts "Save unique sites information into a flat file: #{file}\nThis may take a long while as it go through a lengthy self correction check process, please be patient ..."
  begin
    prime_sites=get_prim_uniq_sites
    puts "Primary Sites: #{prime_sites}" if @verbose
    f=File.open(file,"w")
    f.write "Unique Sites Information Report\n"
    f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
    prime_sites.map do |key|
      next if key.nil?
      site=key.strip
      raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
      ip=@known_sites[site]['ip']
      port=@known_sites[site]['port']
      status=@known_sites[site]['status']
      server=@known_sites[site]['server']
      fp=@known_sites[site]['md5']
      loc=@known_sites[site]['redirection']
      res=@known_sites[site]['code']
      timestamp=@known_sites[site]['timestamp']
      f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
    end
    f.close
    puts "Done!"
    return true  # success
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
    return false # fail
  end
end

#save_uniq_sites_xml(file) ⇒ Object Also known as: dump_xml

Retrieve and save unique sites information for the quarterly scan into a XML file



797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
# File 'lib/wmap/site_tracker.rb', line 797

def save_uniq_sites_xml(file)
  puts "Save unique sites information into XML file: #{file}\nThis may take a long while as it go through lengthy self correctness check, please be patient ..."
  begin
    prime_sites=get_prim_uniq_sites
    builder = Nokogiri::XML::Builder.new do |xml|
      xml.root {
        xml.websites {
          prime_sites.each do |key|
            next if key.nil?
            site=key.strip
            raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
            xml.site {
              xml.name site
              xml.ip_ @known_sites[site]['ip']
              xml.port_ @known_sites[site]['port']
              xml.status_ @known_sites[site]['status']
              xml.server_ @known_sites[site]['server']
              xml.fingerprint_ @known_sites[site]['md5']
              xml.redirection_ @known_sites[site]['redirection']
              xml.responsecode_ @known_sites[site]['code']
              xml.timestamp_ @known_sites[site]['timestamp']
            }
          end
        }
      }
    end
    puts builder.to_xml if @verbose
    f=File.new(file,'w')
    f.write(builder.to_xml)
    f.close
    puts "Done!"
    return true
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
    return false
  end
end

#search(pattern) ⇒ Object

Search potential matching sites from the site store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case



703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
# File 'lib/wmap/site_tracker.rb', line 703

def search (pattern)
  puts "Search site store based on the regular expression: #{pattern}" if @verbose
  begin
    pattern=pattern.strip.downcase
    results=Array.new
    @known_sites.keys.map do |key|
      if key =~ /#{pattern}/i
        results.push(key)
      end
    end
    return results
  rescue Exception => ee
    puts "Exception on method search: #{ee}" if @verbose
    return nil
  end
end

#site_check(site) ⇒ Object Also known as: check

Quick check of the stored information of a site within the store



503
504
505
506
507
508
509
510
511
512
513
# File 'lib/wmap/site_tracker.rb', line 503

def site_check(site)
  begin
    raise "Web site store not loaded properly! " if @known_sites.nil?
    site=site.strip.downcase unless site.nil?
    site=url_2_site(site)
    return @known_sites[site] unless site.nil?
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
    return nil
  end
end

#site_ip_known?(ip) ⇒ Boolean Also known as: siteip_known?

Quick validation check on an IP is already part of the site store

Returns:

  • (Boolean)


482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
# File 'lib/wmap/site_tracker.rb', line 482

def site_ip_known?(ip)
  begin
    ip=ip.chomp.strip
    known=false
    if is_ip?(ip)
      @known_sites.keys.map do |site|
        if @known_sites[site]['ip']==ip
          return true
        end
      end
    end
    myDis=nil
    return known
  rescue => ee
    puts "Exception on method #{__method__}: #{ee}"
    return false
  end
end

#site_known?(site) ⇒ Boolean Also known as: is_known?

Quick validation if a site is already covered under the site store

Returns:

  • (Boolean)


468
469
470
471
472
473
474
475
476
477
478
# File 'lib/wmap/site_tracker.rb', line 468

def site_known?(site)
  begin
    raise "Web site store not loaded properly! " if @known_sites.nil?
    site=site.strip.downcase unless site.nil?
    site=url_2_site(site)
    return @known_sites.key?(site) unless site.nil?
  rescue => ee
    puts "Error checking web site #{site} against the site store: #{ee}"
  end
  return false
end