Class: Wmap::SiteTracker

Inherits:
Object
  • Object
show all
Includes:
Singleton, Utils
Defined in:
lib/wmap/site_tracker.rb,
lib/wmap/site_tracker/deactivated_site.rb

Overview

Main class to automatically track the site inventory

Direct Known Subclasses

DeactivatedSite

Defined Under Namespace

Classes: DeactivatedSite

Constant Summary

Constants included from Utils::UrlMagic

Utils::UrlMagic::Max_http_timeout

Constants included from Utils::DomainRoot

Utils::DomainRoot::File_ccsld, Utils::DomainRoot::File_cctld, Utils::DomainRoot::File_gtld, Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Utils::Logger

#wlog

Methods included from Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #landing_location, #make_absolute, #normalize_url, #open_page, #redirect_location, #response_code, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Utils::DomainRoot

#get_domain_root, #get_domain_root_by_ccsld, #get_domain_root_by_cctld, #get_domain_root_by_tlds, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Constructor Details

#initialize(params = {}) ⇒ SiteTracker

Set default instance variables



21
22
23
24
25
26
27
28
29
30
31
# File 'lib/wmap/site_tracker.rb', line 21

def initialize (params = {})
  # Initialize the instance variables
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
  Dir.mkdir(@data_dir) unless Dir.exist?(@data_dir)
  @sites_file=params.fetch(:sites_file, @data_dir+'sites')
  @verbose=params.fetch(:verbose, false)
  @max_parallel=params.fetch(:max_parallel, 30)
  File.write(@sites_file, "") unless File.exist?(@sites_file)
  # Hash table to hold the site store
  load_site_stores_from_file(@sites_file)
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def data_dir
  @data_dir
end

#known_sitesObject

Returns the value of attribute known_sites.



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def known_sites
  @known_sites
end

#max_parallelObject

Returns the value of attribute max_parallel.



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def max_parallel
  @max_parallel
end

#sites_fileObject

Returns the value of attribute sites_file.



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def sites_file
  @sites_file
end

#verboseObject

Returns the value of attribute verbose.



18
19
20
# File 'lib/wmap/site_tracker.rb', line 18

def verbose
  @verbose
end

Instance Method Details

#add(site) ⇒ Object

Setter to add site entry to the cache one at a time



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/wmap/site_tracker.rb', line 98

def add(site)
  puts "Add entry to the site store: #{site}"
  # Preliminary sanity check
  site=site.strip.downcase unless site.nil?
  if site_known?(site)
    puts  "Site already exists. Skip it: #{site}"
    return nil
  end
  site=normalize_url(site) if is_url?(site)
  site=url_2_site(site) if is_url?(site)
  puts "Site in standard format: #{site}" if @verbose
  raise "Exception on method #{__method__}: invalid site format of #{site}. Expected format is: http://your_website_name/" unless is_site?(site)
  trusted=false
  host=url_2_host(site)
  ip=host_2_ip(host)
  # Additional logic to refresh deactivated site, 02/12/2014
  deact=Wmap::SiteTracker::DeactivatedSite.instance
  deact.sites_file=@data_dir + "/" + "deactivated_sites"
  File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
  deact.load_site_stores_from_file
  # only trust either the domain or IP we know
  if is_ip?(host)
    trusted=Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
  else
    root=get_domain_root(host)
    if root.nil?
      raise "Invalid web site format. Please check your record again."
    else
      domain_tracker=Wmap::DomainTracker.instance
      domain_tracker.domains_file=@data_dir + "/" + "domains"
      File.write(domain_tracker.domains_file, "") unless File.exist?(domain_tracker.domains_file)
      domain_tracker.load_domains_from_file(domain_tracker.domains_file)
      trusted=domain_tracker.domain_known?(root)
      domain_tracker=nil
    end
  end
  # add record only if trusted
  if trusted
    # Add logic to check site status before adding it
    checker=Wmap::UrlChecker.new(:data_dir=>@data_dir).check(site)
    raise "Site is currently down. Skip #{site}" if checker.nil?
    # Skip the http site if it's un-responsive; for the https we'll keep it because we're interested in analysing the SSL layer later
    if is_https?(site)
      # do nothing
    else
      raise "Site is currently down. Skip #{site}" if checker['code']==10000
    end
    raise "Exception on add method - Fail to resolve the host-name: Host - #{host}, IP - #{ip}. Skip #{site}" unless is_ip?(ip)
    host_tracker = Wmap::HostTracker.instance
    host_tracker.data_dir= @data_dir
    host_tracker.hosts_file = host_tracker.data_dir + "/" + "hosts"
    host_tracker.load_known_hosts_from_file(host_tracker.hosts_file)
    # Update the local host table when necessary
    if is_ip?(host)
      # Case #1: Trusted site contains IP
      if host_tracker.ip_known?(host)
        # Try local reverse DNS lookup first
        puts "Local hosts table lookup for IP: #{ip}" if @verbose
        host=host_tracker.local_ip_2_host(host)
        puts "Host found from the local hosts table for #{ip}: #{host}" if @verbose
        site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
      else
        # Try reverse DNS lookup over Internet as secondary precaution
        puts "Reverse DNS lookup for IP: #{ip}" if @verbose
        host1=ip_2_host(host)
        puts "host1: #{host1}" if @verbose
        if is_fqdn?(host1)
          if host_tracker.domain_known?(host1)
            # replace IP with host-name only if domain root is known
            puts "Host found from the Internet reverse DNS lookup for #{ip}: #{host1}" if @verbose
            host=host1
            site.sub!(/\d+\.\d+\.\d+\.\d+/,host)
          end
        end
      end
      # Adding site for Case #1
      raise "Site already exist! Skip #{site}" if @known_sites.key?(site)
      puts "Adding site: #{site}" if @verbose
      @known_sites[site]=Hash.new
      @known_sites[site]=checker
      if deact.site_known?(site)
        deact.delete(site)
        deact.save!
      end
      puts "Site entry loaded: #{checker}"
      if is_fqdn?(host)
      # Add logic to update the hosts table for case #1 variance
      # -  case that reverse DNS lookup successful
        puts "Update local hosts table for host: #{host}"
        if host_tracker.host_known?(host)
          old_ip=host_tracker.local_host_2_ip(host)
          if old_ip != ip
            host_tracker.refresh(host)
            host_tracker.save!
          else
            puts "Host resolve to the same IP #{ip} - no need to update the local host table." if @verbose
          end
        else
          host_tracker.add(host)
          host_tracker.save!
        end
      end
    else
      # Case #2: Trusted site contains valid FQDN
      puts "Ading site: #{site}" if @verbose
      @known_sites[site]=Hash.new
      @known_sites[site]=checker
      if deact.site_known?(site)
        deact.delete(site)
        deact.save!
      end
      puts "Site entry loaded: #{checker}"
      # Add logic to update the hosts table for case #2
      puts "Update local hosts table for host: #{host}"
      if host_tracker.host_known?(host)
        old_ip=host_tracker.local_host_2_ip(host)
        if old_ip != ip
          host_tracker.refresh(host)
          host_tracker.save!
        else
          # Skip - no need to update the local hosts table
        end
      else
        host_tracker.add(host)
        host_tracker.save!
      end
    end
    deact=nil
    host_tracker=nil
    return checker
  else
    puts "Problem found: untrusted Internet domain or IP. Skip #{site}"
    deact=nil
    host_tracker=nil
    return nil
  end
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
  checker=nil
  deact=nil
  host_tracker=nil
  return nil
end

#bulk_add(list, num = @max_parallel) ⇒ Object Also known as: adds

Setter to add site entry to the cache in batch (from a list)



256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# File 'lib/wmap/site_tracker.rb', line 256

def bulk_add(list,num=@max_parallel)
  puts "Add entries to the local site store from list:\n #{list}"
  results=Hash.new
  list = list - [nil,""]
  if list.size > 0
    puts "Start parallel adding on the sites:\n #{list}"
    Parallel.map(list, :in_processes => num) { |target|
      add(target)
    }.each do |process|
      if process.nil?
        next
      elsif process.empty?
        next #do nothing
      else
        results[process['url']]=Hash.new
        results[process['url']]=process
      end
    end
    @known_sites.merge!(results)
  else
    puts "Error: no entry is added. Please check your list and try again."
  end
  puts "Done adding site entries."
  if results.size>0
    puts "New entries added: #{results}"
  else
    puts "No new entry added. "
  end
  return results
#rescue => ee
  #puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#bulk_delete(list) ⇒ Object Also known as: dels

Setter to delete site entry to the cache in batch (from a list)



331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
# File 'lib/wmap/site_tracker.rb', line 331

def bulk_delete(list)
  puts "Delete entries to the local site store from list:\n #{list}" if @verbose
  sites=list
  changes=Array.new
  if sites.size > 0
    sites.map do |x|
      x=url_2_site(x)
      site=delete(x)
      changes.push(site) unless site.nil?
    end
    puts "Done deleting sites from the list:\n #{list}"
    return changes
  else
    puts "Error: no entry is loaded. Please check your list and try again."
  end
rescue => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#bulk_refresh(list, num = @max_parallel) ⇒ Object Also known as: refreshs

‘Refresh sites in the site store in batch (from a list)



397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
# File 'lib/wmap/site_tracker.rb', line 397

def bulk_refresh(list,num=@max_parallel)
  puts "Refresh entries in the site store from list:\n #{list}" if @verbose
  results=Hash.new
  if list.size > 0
    puts "Start parallel refreshing on the sites:\n #{list}"
    Parallel.map(list, :in_processes => num) { |target|
      refresh(target)
    }.each do |process|
      if process.nil?
        next
      elsif process.empty?
        #do nothing
      else
        results[process['url']]=Hash.new
        results[process['url']]=process
      end
    end
    # Clean up old entries, by Y.L. 03/30/2015
    list.map {|x| @known_sites.delete(x)}
    # Add back fresh entries
    @known_sites.merge!(results)
    puts "Done refresh sites."
  else
    puts "Error: no entry is loaded. Please check your list and try again."
  end
  return results
rescue => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#countObject

Count numbers of entries in the site store table



90
91
92
93
94
95
# File 'lib/wmap/site_tracker.rb', line 90

def count
  puts "Counting number of entries in the site store table ..."
  return @known_sites.size
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
end

#delete(site) ⇒ Object Also known as: del

Setter to remove entry from the site store one at a time



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/wmap/site_tracker.rb', line 291

def delete(site)
  puts "Remove entry from the site store: #{site} " if @verbose
  # Additional logic to deactivate the site properly, by moving it to the DeactivatedSite list, 02/07/2014
  deact=Wmap::SiteTracker::DeactivatedSite.instance
  deact.sites_file=@data_dir + 'deactivated_sites'
  File.write(deact.sites_file, "") unless File.exist?(deact.sites_file)
  site=site.strip.downcase
  site=url_2_site(site)
  if @known_sites.key?(site)
    site_info=@known_sites[site]
    deact.add(site,site_info)
    deact.save!
    deact=nil
    del=@known_sites.delete(site)
    puts "Entry cleared: #{site}"
    return del
  else
    puts "Entry not fund. Skip #{site}"
    deact=nil
    return nil
  end
rescue => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  deact=nil
end

#file_add(file) ⇒ Object

Setter to add site entry to the cache table in batch (from a file)



243
244
245
246
247
248
249
250
251
252
253
# File 'lib/wmap/site_tracker.rb', line 243

def file_add(file)
  puts "Add entries to the local site store from file: #{file}"
  raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
  changes=Hash.new
  sites=file_2_list(file)
  changes=bulk_add(sites) unless sites.nil? or sites.empty?
  puts "Done loading file #{file}. "
  return changes
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
end

#file_delete(file) ⇒ Object Also known as: file_del

Setter to delete site entry to the cache in batch (from a file)



319
320
321
322
323
324
325
326
327
# File 'lib/wmap/site_tracker.rb', line 319

def file_delete(file)
  puts "Delete entries to the local site store from file: #{file}" if @verbose
  raise "File non-exist. Please check your file path and name again: #{file}" unless File.exist?(file)
  sites=file_2_list(file)
  changes=Array.new
  changes=bulk_delete(sites) unless sites.nil? or sites.empty?
rescue => ee
  puts "Exception on method file_delete: #{ee} for file: #{file}" if @verbose
end

#file_refresh(file) ⇒ Object

‘Refresh sites in the site store in batch (from a file)



371
372
373
374
375
376
377
378
379
# File 'lib/wmap/site_tracker.rb', line 371

def file_refresh(file)
  puts "Refresh entries in the site store from file: #{file}" if @verbose
  changes=Hash.new
  sites=file_2_list(file)
  changes=bulk_refresh(sites) unless sites.nil? or sites.empty?
  return changes
rescue => ee
  puts "Exception on method #{__method__}: #{ee} for file: #{file}" if @verbose
end

#get_ext_sitesObject Also known as: get_ext

Retrieve external hosted sites into a list



499
500
501
502
503
504
505
506
507
508
509
510
511
512
# File 'lib/wmap/site_tracker.rb', line 499

def get_ext_sites
  puts "getter to retrieve all the external hosted sites. " if @verbose
  sites=Array.new
  @known_sites.keys.map do |key|
    if @known_sites[key]['status']=="ext_hosted"
      sites.push(key)
    end
  end
  sites.sort!
  return sites
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#get_int_sitesObject Also known as: get_int

Retrieve a list of internal hosted site URLs



516
517
518
519
520
521
522
523
524
525
526
527
528
529
# File 'lib/wmap/site_tracker.rb', line 516

def get_int_sites
  puts "getter to retrieve all the internal hosted sites." if @verbose
  sites=Array.new
  @known_sites.keys.map do |key|
    if @known_sites[key]['status']=="int_hosted"
      sites.push(key)
    end
  end
  sites.sort!
  return sites
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#get_ip_sitesObject

Retrieve a list of sites that contain an IP in the site URL



533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
# File 'lib/wmap/site_tracker.rb', line 533

def get_ip_sites
  puts "Getter to retrieve sites contain an IP instead of a host-name ." if @verbose
  sites=Array.new
  @known_sites.keys.map do |key|
    host=url_2_host(key)
    if is_ip?(host)
      sites.push(key)
    end
  end
  sites.sort!
  return sites
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#get_prim_uniq_sitesObject Also known as: get_prime

Retrieve the unique sites from the local site store in the primary host format



800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
# File 'lib/wmap/site_tracker.rb', line 800

def get_prim_uniq_sites
  puts "Retrieve and prime unique sites in the site store. " if @verbose
  host_tracker=Wmap::HostTracker.instance
  host_tracker.data_dir=@data_dir
  primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
  primary_host_tracker.data_dir=@data_dir
  # Step 1. Retrieve the unique site list first
  sites=get_uniq_sites
  prim_uniq_sites=Array.new
  # Step 2. Iterate on the unique site list, spit out the site in the primary host format one at a time
  sites.map do |site|
    puts "Work on priming unique site: #{site}" if @verbose
    host=url_2_host(site)
    # case#1, for the IP only site, do nothing (presuming 'refresh_ip_sites' or 'refresh_all' method already take care of the potential discrepancy here).
    if is_ip?(host)
      prim_uniq_sites.push(site)
      next
    end
    ip=@known_sites[site]['ip']
    # case#2, for site with an unique IP, do nothing
    puts "Local hosts table entry count for #{ip}: #{host_tracker.alias[ip]}" if @verbose
    if host_tracker.alias[ip] == 1
      prim_uniq_sites.push(site)
      next
    end
    # case#3, case of multiple IPs for A DNS record, where the site IP may have 0 alias count, do nothing
    if host_tracker.alias[ip] == nil
      prim_uniq_sites.push(site)
      next
    end
    # case#4, for the site has a duplicate IP with others, we try to determine which one is the primary site
    # raise "Error: inconsistency detected on record: #{site}. Please run the following shell command to refresh it first: \n\srefresh #{site}" if tracker1.alias[ip].nil?
    if ( primary_host_tracker.known_hosts.key?(ip) and (host_tracker.alias[ip] > 1) )
      new_host=primary_host_tracker.prime(host)
      puts "Host: #{host}, New host:#{new_host}" if @verbose
      unless host==new_host
        new_site=site.sub(host,new_host)
        raise "Site not found in the site tracking data repository: #{new_site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twadd #{new_site}\n" unless @known_sites.key?(new_site)
        new_ip=@known_sites[new_site]['ip']
        if new_ip==ip   # consistency check
          site=new_site
        else
          # TBD - case of multiple IPs for A DNS record
          #raise "Inconsistency found on prime host entrance: #{new_ip}, #{ip}; #{new_site}, #{site}. Please refresh your entries by running the following shell command: \n\s refresh #{new_site}"
        end
      end
    end
    prim_uniq_sites.push(site)
  end
  primary_host_tracker=nil
  host_tracker=nil
  return prim_uniq_sites
#rescue => ee
# puts "Exception on method #{__method__}: #{ee}"
end

#get_redirection_url(site) ⇒ Object

Retrieve redirection URL if available



630
631
632
633
634
635
636
637
638
639
640
641
642
# File 'lib/wmap/site_tracker.rb', line 630

def get_redirection_url (site)
  puts "getter to retrieve the redirection URL from the site store." if @verbose
  site=site.strip.downcase
  if @known_sites.key?(site)
    return @known_sites[site]['redirection']
  else
    puts "Unknown site: #{site}" if @verbose
    return nil
  end
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#get_redirection_urlsObject

Retrieve a list of redirection URLs from the site store



614
615
616
617
618
619
620
621
622
623
624
625
626
627
# File 'lib/wmap/site_tracker.rb', line 614

def get_redirection_urls
  puts "getter to retrieve all the redirection URLs from the site store." if @verbose
  urls=Array.new
  @known_sites.keys.map do |key|
    unless @known_sites[key]['redirection'].nil?
      urls.push(@known_sites[key]['redirection'])
    end
  end
  urls.sort!
  return urls
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#get_ssl_sitesObject

Retrieve a list of sites that contain an IP in the site URL



599
600
601
602
603
604
605
606
607
608
609
610
611
# File 'lib/wmap/site_tracker.rb', line 599

def get_ssl_sites
  puts "getter to retrieve https sites from the site store." if @verbose
  sites=Array.new
  @known_sites.keys.map do |key|
    key =~ /https/i
    sites.push(key)
  end
  sites.sort!
  return sites
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#get_uniq_sitesObject Also known as: uniq_sites

Retrieve a list of unique sites within the known site store



550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
# File 'lib/wmap/site_tracker.rb', line 550

def get_uniq_sites
  puts "Getter to retrieve unique sites containing unique IP:PORT key identifier." if @verbose=
  #primary_host_tracker=Wmap::HostTracker::PrimaryHost.instance
  sites=Hash.new
  #uniqueness=Hash.new
  host_tracker=Wmap::HostTracker.instance
  host_tracker.hosts_file=@data_dir + 'hosts'
  host_tracker.load_known_hosts_from_file
  @known_sites.keys.map do |key|
    port=url_2_port(key).to_s
    host=url_2_host(key)
    md5=@known_sites[key]['md5']
    code=@known_sites[key]['code']
    ip=host_tracker.local_host_2_ip(host)
    ip=host_2_ip(host) if ip.nil?
    # filtering out 'un-reachable' sites
    next if (code == 10000 or code == 20000)
    # filtering out 'empty' sites
    next if (md5.nil? or md5.empty?)
    next if ip.nil?
    # url_new=key
    #if primary_host_tracker.ip_known?(ip)
    # p_host=primary_host_tracker.known_hosts[ip]
    # url_new=key.sub(host,p_host)
    #end
    id=ip+":"+port
    # filtering out duplicates by 'IP:PORT' key pair
    unless sites.key?(id)
      #if @known_sites.key?(key)
      #  sites[id]=url_new
      #else
        # Further filtering out redundant site by checking MD5 finger-print
        #unless uniqueness.key?(md5)
          sites[id]=key
        # uniqueness[md5]=true
        #end
      #end
    end
  end
  #primary_host_tracker=nil
  host_tracker=nil
  return sites.values
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#load_site_stores_from_file(file = @sites_file) ⇒ Object

Setter to load the known hosts into an instance variable



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/wmap/site_tracker.rb', line 34

def load_site_stores_from_file (file=@sites_file)
  puts "Loading the site store data repository from file: #{file} " if @verbose
  @known_sites=Hash.new
  File.write(file, "") unless File.exist?(file)
  f=File.open(file, 'r')
  f.each do |line|
    line=line.chomp.strip
    next if line.nil?
    next if line.empty?
    next if line =~ /^\s*#/
    entry=line.split(%r{\t+|\,})
    site=entry[0].downcase
    ip=entry[1]
    port=entry[2]
    status=entry[3]
    server=entry[4]
    res=entry[5].to_i
    fp=entry[6]
    loc=entry[7]
    timestamp=entry[8]
    puts "Loading entry: #{site} - #{ip} - #{status}" if @verbose
    @known_sites[site]= Hash.new unless @known_sites.key?(site)
    @known_sites[site]['ip']=ip
    @known_sites[site]['port']=port
    @known_sites[site]['status']=status
    @known_sites[site]['server']=server
    @known_sites[site]['code']=res
    @known_sites[site]['md5']=fp
    @known_sites[site]['redirection']=loc
    @known_sites[site]['timestamp']=timestamp
  end
  f.close
  puts "Successfully loading file: #{file}" if @verbose
  return @known_sites
rescue => ee
  puts "Exception on method #{__method__} for file #{file}: #{ee}"
end

Print summary report of all sites URL in the site store



718
719
720
721
722
723
724
725
726
727
# File 'lib/wmap/site_tracker.rb', line 718

def print_all_sites
  puts "\nSummary Report of the site store:"
  sites=@known_sites.keys.sort
  sites.each do |site|
    puts site
  end
  puts "End of the summary"
rescue => ee
  puts "Exception on method #{__method__} "
end

Print summary report of external hosted sites URL in the



858
859
860
861
862
863
864
865
# File 'lib/wmap/site_tracker.rb', line 858

def print_ext_sites
  puts "\nSummary Report of the External Hosted Site"
  sites=get_ext_sites
  sites.each do |site|
    puts site
  end
  return nil
end

Print summary report of internal hosted site URLs



869
870
871
872
873
874
875
876
# File 'lib/wmap/site_tracker.rb', line 869

def print_int_sites
  puts "\nSummary Report of the Internal Hosted Site"
  sites=get_int_sites
  sites.each do |site|
    puts site
  end
  return nil
end

Print summary report on all sites that contain an IP in the site URL



688
689
690
691
692
693
694
695
# File 'lib/wmap/site_tracker.rb', line 688

def print_ip_sites
  puts "Print sites contain an IP instead of a host-name."
  sites=get_ip_sites
  sites.map { |x| puts x }
  puts "End of report. "
rescue => ee
  puts "Exception on method #{__method__} "
end

Retrieve and print specific information of a site in the site store



698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
# File 'lib/wmap/site_tracker.rb', line 698

def print_site(site)
  puts "Site Information Report for: #{site}" if @verbose
  site=site.strip unless site.nil?
  raise "Unknown site: #{site}" unless @known_sites.key?(site)
  ip=@known_sites[site]['ip']
  port=@known_sites[site]['port']
  status=@known_sites[site]['status']
  server=@known_sites[site]['server']
  fp=@known_sites[site]['md5']
  loc=@known_sites[site]['redirection']
  res=@known_sites[site]['code']
  timestamp=@known_sites[site]['timestamp']
  puts "#{site},#{ip},#{port},#{status},#{server},#{res},#{fp},#{loc},#{timestamp}"
rescue => ee
  puts "Exception on method #{__method__} for #{site}: #{ee}"
end

Print summary report of internal hosted site URLs



880
881
882
883
884
885
886
887
# File 'lib/wmap/site_tracker.rb', line 880

def print_ssl_sites
  puts "\nSummary Report of the HTTPS Sites from the Site Store"
  sites=get_ssl_sites
  sites.each do |site|
    puts site
  end
  return nil
end

Print summary report of unique sites in the site store



890
891
892
893
894
895
896
897
# File 'lib/wmap/site_tracker.rb', line 890

def print_uniq_sites
  puts "Summary Report for the Unique sites:"
  puts "Website,Primary IP,Port,Hosting Status,Server,Response Code,Site MD5 Finger-print,Site Redirection,Timestamp"
  sites=get_uniq_sites
  sites.each do |site|
    print_site(site)
  end
end

#refresh(site) ⇒ Object

Setter to refresh the entry in the site store one at a time



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# File 'lib/wmap/site_tracker.rb', line 352

def refresh(site)
  puts "Refresh the local site store for site: #{site} "
  raise "Invalid site: #{site}" if site.nil? or site.empty?
  site=site.strip.downcase
  if @known_sites.key?(site)
    delete(site)
    site_info=add(site)
    puts "Done refresh entry: #{site}"
    return site_info
  else
    puts "Error entry non exist: #{site}"
  end
  return nil
rescue => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end

#refresh_allObject

Refresh all site entries in the stores in one shot



430
431
432
433
434
435
436
437
438
439
# File 'lib/wmap/site_tracker.rb', line 430

def refresh_all
  puts "Refresh all the entries within the local site store ... "
  changes=Hash.new
  changes=bulk_refresh(@known_sites.keys)
  @known_sites.merge!(changes)
  puts "Done refresh all entries."
  return changes
rescue => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#refresh_ip_sitesObject

Refresh all site entries in the stores that contains an IP instead of a hostname



442
443
444
445
446
447
448
449
450
451
452
453
# File 'lib/wmap/site_tracker.rb', line 442

def refresh_ip_sites
  puts "Refresh all entries that contain an IP address instead of a FQDN ... "
  sites=get_ip_sites
  live_sites=sites.delete_if { |x| @known_sites[x]['code'] == 10000 or  @known_sites[x]['code'] == 20000 }
  changes=Hash.new
  changes=bulk_refresh(live_sites)
  @known_sites.merge!(changes)
  puts "Done refresh IP sites."
  return changes
rescue => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#refresh_uniq_sitesObject

‘Refresh unique sites in the site store only



382
383
384
385
386
387
388
389
390
391
392
393
394
# File 'lib/wmap/site_tracker.rb', line 382

def refresh_uniq_sites
  puts "Refresh unique site entries in the site store. " if @verbose
  changes=Hash.new
  sites=get_uniq_sites
  if sites.size > 0
    changes=bulk_refresh(sites)
  else
    puts "Error: no entry is refreshed. Please check your site store and try again."
  end
  return changes
rescue => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#resolve_ip_sitesObject

Perform local host table reverse lookup for the IP sites, in hope that the hostname could now be resolved since the site was discovered



645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
# File 'lib/wmap/site_tracker.rb', line 645

def resolve_ip_sites
  puts "Resolve sites that contain an IP address. Update the site cache table once a hostname is found in the local host table." if @verbose
  updates=Array.new
  sites=get_ip_sites
  host_tracker=Wmap::HostTracker.instance
  host_tracker.data_dir=@data_dir
  sites.map do |site|
    puts "Work on resolve the IP site: #{site}" if @verbose
    ip=url_2_host(site)
    hostname=host_tracker.local_ip_2_host(ip)
    if hostname.nil?
      puts "Can't resolve #{ip} from the local host store. Skip #{site}" if @verbose
    else
      puts "Host-name found for IP #{ip}: #{hostname}" if @verbose
      updates.push(site)
      refresh(site)
    end
  end
  updates.sort!
  puts "The following sites are now refreshed: #{updates}" if @verbose
  host_tracker=nil
  return updates
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#save_sites_to_file!(file_sites = @sites_file) ⇒ Object Also known as: save!

Save the current site store hash table into a file



73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/wmap/site_tracker.rb', line 73

def save_sites_to_file!(file_sites=@sites_file)
  puts "Saving the current site store table from memory to file: #{file_sites}"
  timestamp=Time.now
  f=File.open(file_sites, 'w')
  f.write "# Local site store created by class #{self.class} method #{__method__} at: #{timestamp}\n"
  f.write "# Website,Primary IP,Port,Hosting Status,Server,Response Code,MD5 Finger-print,Redirection,Timestamp\n"
  @known_sites.keys.sort.map do |key|
    f.write "#{key},#{@known_sites[key]['ip']},#{@known_sites[key]['port']},#{@known_sites[key]['status']},#{@known_sites[key]['server']},#{@known_sites[key]['code']},#{@known_sites[key]['md5']},#{@known_sites[key]['redirection']},#{@known_sites[key]['timestamp']}\n"
  end
  f.close
  puts "site store table is successfully saved: #{file_sites}"
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
end

#save_uniq_sites(file) ⇒ Object Also known as: dump

Retrieve and save unique sites information for the quarterly scan into a plain local file



731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
# File 'lib/wmap/site_tracker.rb', line 731

def save_uniq_sites(file)
  puts "Save unique sites information into a flat file: #{file}\nThis may take a long while as it go through a lengthy self correction check process, please be patient ..."
  prime_sites=get_prim_uniq_sites
  puts "Primary Sites: #{prime_sites}" if @verbose
  f=File.open(file,"w")
  f.write "Unique Sites Information Report\n"
  f.write "Site, IP, Port, Server, Hosting, Response Code, MD5, Redirect, Timestamps\n"
  prime_sites.map do |key|
    next if key.nil?
    site=key.strip
    raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\wadd #{site}\n" unless @known_sites.key?(site)
    ip=@known_sites[site]['ip']
    port=@known_sites[site]['port']
    status=@known_sites[site]['status']
    server=@known_sites[site]['server']
    fp=@known_sites[site]['md5']
    loc=@known_sites[site]['redirection']
    res=@known_sites[site]['code']
    timestamp=@known_sites[site]['timestamp']
    f.write "#{site},#{ip},#{port},#{server},#{status},#{res},#{fp},#{loc},#{timestamp}\n"
  end
  f.close
  puts "Done!"
  return true  # success
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
  return false # fail
end

#save_uniq_sites_xml(file) ⇒ Object Also known as: dump_xml

Retrieve and save unique sites information for the quarterly scan into a XML file



762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
# File 'lib/wmap/site_tracker.rb', line 762

def save_uniq_sites_xml(file)
  puts "Save unique sites information into XML file: #{file}\nThis may take a long while as it go through lengthy self correctness check, please be patient ..."
  prime_sites=get_prim_uniq_sites
  builder = Nokogiri::XML::Builder.new do |xml|
    xml.root {
      xml.websites {
        prime_sites.each do |key|
          next if key.nil?
          site=key.strip
          raise "Unknown site: #{site}. You may need to add it into the site store first. Execute the following shell command before trying again: \n\twmap #{site}\n" unless @known_sites.key?(site)
          xml.site {
            xml.name site
            xml.ip_ @known_sites[site]['ip']
            xml.port_ @known_sites[site]['port']
            xml.status_ @known_sites[site]['status']
            xml.server_ @known_sites[site]['server']
            xml.fingerprint_ @known_sites[site]['md5']
            xml.redirection_ @known_sites[site]['redirection']
            xml.responsecode_ @known_sites[site]['code']
            xml.timestamp_ @known_sites[site]['timestamp']
          }
        end
      }
    }
  end
  puts builder.to_xml if @verbose
  f=File.new(file,'w')
  f.write(builder.to_xml)
  f.close
  puts "Done!"
  return true
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
  return false
end

#search(pattern) ⇒ Object

Search potential matching sites from the site store by using simple regular expression. Note that any upper-case char in the search string will be automatically converted into lower case



672
673
674
675
676
677
678
679
680
681
682
683
684
685
# File 'lib/wmap/site_tracker.rb', line 672

def search (pattern)
  puts "Search site store based on the regular expression: #{pattern}" if @verbose
  pattern=pattern.strip.downcase
  results=Array.new
  @known_sites.keys.map do |key|
    if key =~ /#{pattern}/i
      results.push(key)
    end
  end
  return results
rescue Exception => ee
  puts "Exception on method search: #{ee}" if @verbose
  return nil
end

#site_check(site) ⇒ Object Also known as: check

Quick check of the stored information of a site within the store



487
488
489
490
491
492
493
494
495
# File 'lib/wmap/site_tracker.rb', line 487

def site_check(site)
  raise "Web site store not loaded properly! " if @known_sites.nil?
  site=site.strip.downcase unless site.nil?
  site=url_2_site(site)
  return @known_sites[site] unless site.nil?
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
  return nil
end

#site_ip_known?(ip) ⇒ Boolean Also known as: siteip_known?

Quick validation check on an IP is already part of the site store



468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
# File 'lib/wmap/site_tracker.rb', line 468

def site_ip_known?(ip)
  ip=ip.chomp.strip
  known=false
  if is_ip?(ip)
    @known_sites.keys.map do |site|
      if @known_sites[site]['ip']==ip
        return true
      end
    end
  end
  myDis=nil
  return known
rescue => ee
  puts "Exception on method #{__method__}: #{ee}"
  return false
end

#site_known?(site) ⇒ Boolean Also known as: is_known?

Quick validation if a site is already covered under the site store



456
457
458
459
460
461
462
463
464
# File 'lib/wmap/site_tracker.rb', line 456

def site_known?(site)
  raise "Web site store not loaded properly! " if @known_sites.nil?
  site=site.strip.downcase unless site.nil?
  site=url_2_site(site)
  return @known_sites.key?(site) unless site.nil?
rescue => ee
  puts "Error checking web site #{site} against the site store: #{ee}"
  return false
end