Class: Wmap::UrlChecker

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/wmap/url_checker.rb

Overview

A quick checker class to identify / finger-print a URL / site

Constant Summary

Constants included from Wmap::Utils::UrlMagic

Wmap::Utils::UrlMagic::Max_http_timeout

Constants included from Wmap::Utils::DomainRoot

Wmap::Utils::DomainRoot::File_ccsld, Wmap::Utils::DomainRoot::File_cctld, Wmap::Utils::DomainRoot::File_gtld, Wmap::Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Wmap::Utils::Logger

#wlog

Methods included from Wmap::Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #landing_location, #make_absolute, #normalize_url, #open_page, #redirect_location, #response_code, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Wmap::Utils::DomainRoot

#get_domain_root, #get_domain_root_by_ccsld, #get_domain_root_by_cctld, #get_domain_root_by_tlds, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Constructor Details

#initialize(params = {}) ⇒ UrlChecker

Returns a new instance of UrlChecker.



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/wmap/url_checker.rb', line 20

def initialize (params = {})
  # Set default instance variables
  @verbose=params.fetch(:verbose, false)
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
  @http_timeout=params.fetch(:http_timeout, 5000)
  @max_parallel=params.fetch(:max_parallel, 40)
  @ssl_version=nil
  @url_code={}
  @url_redirection={}
  @url_finger_print={}
  @url_server={}
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def data_dir
  @data_dir
end

#http_timeoutObject

Returns the value of attribute http_timeout.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def http_timeout
  @http_timeout
end

#max_parallelObject

Returns the value of attribute max_parallel.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def max_parallel
  @max_parallel
end

#verboseObject

Returns the value of attribute verbose.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def verbose
  @verbose
end

Instance Method Details

#get_cert_cn(url) ⇒ Object Also known as: get_cn

Retrieve the X509 cert in the clear text from the remote web server, extract and return the common name field within the cert



300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# File 'lib/wmap/url_checker.rb', line 300

def get_cert_cn (url)
  puts "Extract the common name field from a X509 cert: #{cert}" if @verbose
  cert=get_certificate(url)
  subject, cn = ""
  if cert =~ /\n(.+)Subject\:(.+)\n/i
    subject=$2
  end
  if subject =~/CN\=(.+)/i
    cn=$1
  end
  return cn
rescue Exception => ee
  puts "Error on method #{__method__} from #{url}: #{ee}" if @verbose
  return nil
end

#get_certificate(url) ⇒ Object Also known as: get_cert

Retrieve the remote web server certification, open it and return the cert content as a string



283
284
285
286
287
288
289
290
291
292
293
294
295
296
# File 'lib/wmap/url_checker.rb', line 283

def get_certificate (url)
  puts "Retrieve the remote web server SSL certificate in clear text: #{url}" if @verbose
  url=url.strip
  raise "Invalid URL string: #{url}" unless is_ssl?(url)
  client = HTTPClient.new
  client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
  response = client.get(url)
  cert = response.peer_cert
  cer = OpenSSL::X509::Certificate.new(cert)
  return cer.to_text
rescue Exception => ee
  puts "Exception on method #{__method__} from #{url}: #{ee}"
  return nil
end

#get_server_header(url) ⇒ Object

Test the URL / site and return the web server type from the HTTP header “server” field



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/wmap/url_checker.rb', line 226

def get_server_header (url)
  puts "Retrieve the server header field from the url: #{url}" if @verbose
  server=String.new
  raise "Invalid url: #{url}" unless is_url?(url)
  url=url.strip.downcase
  timeo = @http_timeout/1000.0
  uri = URI.parse(url)
  code = response_code (url)
  http = Net::HTTP.new(uri.host, uri.port)
  http.open_timeout = timeo
  http.read_timeout = timeo
  if (url =~ /https\:/i)
    http.use_ssl = true
    # Bypass the remote web server cert validation test
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    http.ssl_version = @ssl_version
  end
  request = Net::HTTP::Get.new(uri.request_uri)
  response = http.request(request)
  server=response["server"]
  server=server.gsub(/\,/,' ')
  return server
rescue Exception => ee
  puts "Exception on method get_server_header for URL #{url}: #{ee}" if @verbose
  @url_server[url]=server
  return server
end

#response_body_md5(url) ⇒ Object Also known as: md5

Use MD5 algorithm to fingerprint the URL / site response payload (web page content)



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/wmap/url_checker.rb', line 255

def response_body_md5(url)
  puts "MD5 finger print page body content: #{url}" if @verbose
  raise "Invalid url: #{url}" unless is_url?(url)
  url=url.strip.downcase
  timeo = @http_timeout/1000.0
  uri = URI.parse(url)
  fp=""
  http = Net::HTTP.new(uri.host, uri.port)
  http.open_timeout = timeo
  http.read_timeout = timeo
  if (url =~ /https\:/i)
    http.use_ssl = true
    # Bypass the remote web server cert validation test
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    http.ssl_version = @ssl_version
  end
  request = Net::HTTP::Get.new(uri.request_uri)
  response = http.request(request)
  response_body = response.body.to_s
  fp=Digest::MD5.hexdigest(response_body) unless response_body.nil?
  @url_finger_print[url] = fp
  return fp
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#url_worker(url) ⇒ Object Also known as: check

Main worker method to perform various checks on the URL / site



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/wmap/url_checker.rb', line 34

def url_worker (url)
  puts "Checking out an unknown URL: #{url}" if @verbose
  url=url.strip.downcase
  raise "Invalid URL format: #{url}" unless is_url?(url)
  timestamp=Time.now
  host=url_2_host(url)
  ip=host_2_ip(host)
  port=url_2_port(url)
  code=10000
  if @url_code.key?(url)
    code=@url_code[url]
  else
    code=response_code(url)
  end
  if code>=300 && code < 400
    loc=landing_location(4,url)
  else
    loc=nil
  end
  if @url_finger_print.key?(url)
    fp=@url_finger_print[url]
  else
    fp=response_body_md5(url)
  end
  if @url_server.key?(url)
    server=@url_server[url]
  else
    server=get_server_header(url)
  end
  # save the data
  checker=Hash.new
  checker['ip']=ip
  checker['port']=port
  checker['url']=url
  checker['code']=code
  checker['redirection']=loc
  checker['md5']=fp
  checker['server']=server
  checker['timestamp']=timestamp
  if Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
    checker['status']="int_hosted"
  else
    checker['status']="ext_hosted"
  end
  return checker
rescue OpenSSL::SSL::SSLError => es  # handler to temporally hold the openssl bug in bay:  SSL_set_session: unable to find ssl method
  checker=Hash.new
  checker['ip']=ip
  checker['port']=port
  checker['url']=url
  checker['code']=20000
  checker['server']="Unknown SSL error: #{es}"
  checker['md']=nil
  checker['redirection']=nil
  checker['timestamp']=timestamp
  return checker
rescue Exception => ee
  puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
  return nil
end

#url_workers(targets, num = @max_parallel) ⇒ Object Also known as: checks

Parallel scanner - by utilizing fork manager ‘parallel’ to spawn numbers of child processes on multiple urls simultaneously



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/wmap/url_checker.rb', line 97

def url_workers (targets,num=@max_parallel)
  results=Array.new
  targets -= ["", nil]
  if targets.size > 0
    puts "Start the url checker on the targets:\n #{targets}"
    Parallel.map(targets, :in_processes => num) { |target|
      url_worker(target)
    }.each do |process|
      if process.nil?
        next
      elsif process.empty?
        #do nothing
      else
        results << process
      end
    end
  end
  return results
rescue Exception => ee
  puts "Exception on method #{__method__}: #{ee}" if @verbose
  return nil
end