Class: Wmap::UrlChecker

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/wmap/url_checker.rb

Overview

A quick checker class to identify / finger-print a URL / site

Constant Summary

Constants included from Wmap::Utils::UrlMagic

Wmap::Utils::UrlMagic::Max_http_timeout

Constants included from Wmap::Utils::DomainRoot

Wmap::Utils::DomainRoot::File_ccsld, Wmap::Utils::DomainRoot::File_cctld, Wmap::Utils::DomainRoot::File_gtld, Wmap::Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Wmap::Utils::Logger

#wlog

Methods included from Wmap::Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #landing_location, #make_absolute, #normalize_url, #open_page, #redirect_location, #response_code, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Wmap::Utils::DomainRoot

#get_domain_root, #get_domain_root_by_ccsld, #get_domain_root_by_cctld, #get_domain_root_by_tlds, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Constructor Details

#initialize(params = {}) ⇒ UrlChecker

Returns a new instance of UrlChecker.



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/wmap/url_checker.rb', line 20

def initialize (params = {})
	# Set default instance variables
	@verbose=params.fetch(:verbose, false)
	@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
	@http_timeout=params.fetch(:http_timeout, 5000)
	@max_parallel=params.fetch(:max_parallel, 40)
	@ssl_version=nil
	@url_code={}
	@url_redirection={}
	@url_finger_print={}
	@url_server={}
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def data_dir
  @data_dir
end

#http_timeoutObject

Returns the value of attribute http_timeout.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def http_timeout
  @http_timeout
end

#max_parallelObject

Returns the value of attribute max_parallel.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def max_parallel
  @max_parallel
end

#verboseObject

Returns the value of attribute verbose.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def verbose
  @verbose
end

Instance Method Details

#get_cert_cn(url) ⇒ Object Also known as: get_cn

Retrieve the X509 cert in the clear text from the remote web server, extract and return the common name field within the cert



300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
# File 'lib/wmap/url_checker.rb', line 300

def get_cert_cn (url)
	puts "Extract the common name field from a X509 cert: #{cert}" if @verbose
	cert=get_certificate(url)
	subject, cn = ""
	if cert =~ /\n(.+)Subject\:(.+)\n/i
		subject=$2
	end
	if subject =~/CN\=(.+)/i
		cn=$1
	end
	return cn
rescue Exception => ee
	puts "Error on method #{__method__} from #{url}: #{ee}" if @verbose
	return nil
end

#get_certificate(url) ⇒ Object Also known as: get_cert

Retrieve the remote web server certification, open it and return the cert content as a string



283
284
285
286
287
288
289
290
291
292
293
294
295
296
# File 'lib/wmap/url_checker.rb', line 283

def get_certificate (url)
	puts "Retrieve the remote web server SSL certificate in clear text: #{url}" if @verbose
	url=url.strip
	raise "Invalid URL string: #{url}" unless is_ssl?(url)
	client = HTTPClient.new
	client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
	response = client.get(url)
	cert = response.peer_cert
	cer = OpenSSL::X509::Certificate.new(cert)
	return cer.to_text
rescue Exception => ee
	puts "Exception on method #{__method__} from #{url}: #{ee}"
	return nil
end

#get_server_header(url) ⇒ Object

Test the URL / site and return the web server type from the HTTP header “server” field



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/wmap/url_checker.rb', line 226

def get_server_header (url)
	puts "Retrieve the server header field from the url: #{url}" if @verbose
	server=String.new
	raise "Invalid url: #{url}" unless is_url?(url)
	url=url.strip.downcase
	timeo = @http_timeout/1000.0
	uri = URI.parse(url)
	code = response_code (url)
	http = Net::HTTP.new(uri.host, uri.port)
	http.open_timeout = timeo
	http.read_timeout = timeo
	if (url =~ /https\:/i)
		http.use_ssl = true
		# Bypass the remote web server cert validation test
		http.verify_mode = OpenSSL::SSL::VERIFY_NONE
		http.ssl_version = @ssl_version
	end
	request = Net::HTTP::Get.new(uri.request_uri)
	response = http.request(request)
	server=response["server"]
	server=server.gsub(/\,/,' ')
	return server
rescue Exception => ee
	puts "Exception on method get_server_header for URL #{url}: #{ee}" if @verbose
	@url_server[url]=server
	return server
end

#response_body_md5(url) ⇒ Object Also known as: md5

Use MD5 algorithm to fingerprint the URL / site response payload (web page content)



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/wmap/url_checker.rb', line 255

def response_body_md5(url)
	puts "MD5 finger print page body content: #{url}" if @verbose
	raise "Invalid url: #{url}" unless is_url?(url)
	url=url.strip.downcase
	timeo = @http_timeout/1000.0
	uri = URI.parse(url)
	fp=""
	http = Net::HTTP.new(uri.host, uri.port)
	http.open_timeout = timeo
	http.read_timeout = timeo
	if (url =~ /https\:/i)
		http.use_ssl = true
		# Bypass the remote web server cert validation test
		http.verify_mode = OpenSSL::SSL::VERIFY_NONE
		http.ssl_version = @ssl_version
	end
	request = Net::HTTP::Get.new(uri.request_uri)
	response = http.request(request)
	response_body = response.body.to_s
	fp=Digest::MD5.hexdigest(response_body) unless response_body.nil?
	@url_finger_print[url] = fp
	return fp
rescue Exception => ee
	puts "Exception on method #{__method__}: #{ee}" if @verbose
end

#url_worker(url) ⇒ Object Also known as: check

Main worker method to perform various checks on the URL / site



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/wmap/url_checker.rb', line 34

def url_worker (url)
	puts "Checking out an unknown URL: #{url}" if @verbose
	url=url.strip.downcase
	raise "Invalid URL format: #{url}" unless is_url?(url)
	timestamp=Time.now
	host=url_2_host(url)
	ip=host_2_ip(host)
	port=url_2_port(url)
	code=10000
	if @url_code.key?(url)
		code=@url_code[url]
	else
		code=response_code(url)
	end
	if code>=300 && code < 400
		loc=landing_location(4,url)
	else
		loc=nil
	end
	if @url_finger_print.key?(url)
		fp=@url_finger_print[url]
	else
		fp=response_body_md5(url)
	end
	if @url_server.key?(url)
		server=@url_server[url]
	else
		server=get_server_header(url)
	end
	# save the data
	checker=Hash.new
	checker['ip']=ip
	checker['port']=port
	checker['url']=url
	checker['code']=code
	checker['redirection']=loc
	checker['md5']=fp
	checker['server']=server
	checker['timestamp']=timestamp
	if Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
		checker['status']="int_hosted"
	else
		checker['status']="ext_hosted"
	end
	return checker
rescue OpenSSL::SSL::SSLError => es  # handler to temporally hold the openssl bug in bay:  SSL_set_session: unable to find ssl method
	checker=Hash.new
	checker['ip']=ip
	checker['port']=port
	checker['url']=url
	checker['code']=20000
	checker['server']="Unknown SSL error: #{es}"
	checker['md']=nil
	checker['redirection']=nil
	checker['timestamp']=timestamp
	return checker
rescue Exception => ee
	puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
	return nil
end

#url_workers(targets, num = @max_parallel) ⇒ Object Also known as: checks

Parallel scanner - by utilizing fork manager ‘parallel’ to spawn numbers of child processes on multiple urls simultaneously



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/wmap/url_checker.rb', line 97

def url_workers (targets,num=@max_parallel)
	results=Array.new
	targets -= ["", nil]
	if targets.size > 0
		puts "Start the url checker on the targets:\n #{targets}"
		Parallel.map(targets, :in_processes => num) { |target|
			url_worker(target)
		}.each do |process|
			if process.nil?
				next
			elsif process.empty?
				#do nothing
			else
				results << process
			end
		end
	end
	return results
rescue Exception => ee
	puts "Exception on method #{__method__}: #{ee}" if @verbose
	return nil
end