Class: Wmap::UrlChecker

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/wmap/url_checker.rb

Overview

A quick checker class to identify / finger-print a URL / site

Constant Summary

Constants included from Wmap::Utils::DomainRoot

Wmap::Utils::DomainRoot::File_ccsld, Wmap::Utils::DomainRoot::File_cctld, Wmap::Utils::DomainRoot::File_gtld, Wmap::Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Wmap::Utils::Logger

#wlog

Methods included from Wmap::Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #make_absolute, #normalize_url, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Wmap::Utils::DomainRoot

#get_domain_root, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Constructor Details

#initialize(params = {}) ⇒ UrlChecker

Returns a new instance of UrlChecker.



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/wmap/url_checker.rb', line 20

def initialize (params = {})
	# Set default instance variables
	@verbose=params.fetch(:verbose, false)
	@data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
	@http_timeout=params.fetch(:http_timeout, 5000)
	@max_parallel=params.fetch(:max_parallel, 40)
	@ssl_version=nil
	@url_code={}
	@url_redirection={}
	@url_finger_print={}
	@url_server={}
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def data_dir
  @data_dir
end

#http_timeoutObject

Returns the value of attribute http_timeout.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def http_timeout
  @http_timeout
end

#max_parallelObject

Returns the value of attribute max_parallel.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def max_parallel
  @max_parallel
end

#verboseObject

Returns the value of attribute verbose.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def verbose
  @verbose
end

Instance Method Details

#get_cert_cn(url) ⇒ Object Also known as: get_cn

Retrieve the X509 cert in the clear text from the remote web server, extract and return the common name field within the cert



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/wmap/url_checker.rb', line 296

def get_cert_cn (url)
	puts "Extract the common name field from a X509 cert: #{cert}" if @verbose
	begin
		cert=get_certificate(url)
		subject, cn = ""
		if cert =~ /\n(.+)Subject\:(.+)\n/i
			subject=$2
		end
		if subject =~/CN\=(.+)/i
			cn=$1
		end
		return cn
	rescue Exception => ee
		puts "Error on method #{__method__} from #{cert}: #{ee}" if @verbose
	end
	return nil
end

#get_certificate(url) ⇒ Object Also known as: get_cert

Retrieve the remote web server certification, open it and return the cert content as a string



277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
# File 'lib/wmap/url_checker.rb', line 277

def get_certificate (url)
	puts "Retrieve the remote web server SSL certificate in clear text: #{url}" if @verbose
	begin
		url=url.strip
		raise "Invalid URL string: #{url}" unless is_ssl?(url)
		client = HTTPClient.new
		client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
		response = client.get(url)
		cert = response.peer_cert
		cer = OpenSSL::X509::Certificate.new(cert)
		return cer.to_text
	rescue Exception => ee
		puts "Exception on method #{__method__} from #{url}: #{ee}"
	end
	return nil
end

#get_server_header(url) ⇒ Object

Test the URL / site and return the web server type from the HTTP header “server” field



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# File 'lib/wmap/url_checker.rb', line 216

def get_server_header (url)
	begin
		puts "Retrieve the server header field from the url: #{url}" if @verbose
		server=String.new
		raise "Invalid url: #{url}" unless is_url?(url)
		url=url.strip.downcase
		timeo = @http_timeout/1000.0
		uri = URI.parse(url)
		code = response_code (url)
		http = Net::HTTP.new(uri.host, uri.port)
		http.open_timeout = timeo
		http.read_timeout = timeo
		if (url =~ /https\:/i)
			http.use_ssl = true
			# Bypass the remote web server cert validation test
			http.verify_mode = OpenSSL::SSL::VERIFY_NONE
			http.ssl_version = @ssl_version
		end
		request = Net::HTTP::Get.new(uri.request_uri)
		response = http.request(request)
		server=response["server"]
		server=server.gsub(/\,/,' ')
		return server
	rescue Exception => ee
		puts "Exception on method get_server_header for URL #{url}: #{ee}" if @verbose
		@url_server[url]=server
		return server
	end
end

#redirect_location(url) ⇒ Object Also known as: location

Test the URL / site and return the redirection location (3xx response code only)



178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/wmap/url_checker.rb', line 178

def redirect_location (url)
	begin
		puts "Test the redirection location for the url: #{url}" if @verbose
		location=""
		raise "Invalid url: #{url}" unless is_url?(url)
		url=url.strip.downcase
		timeo = @http_timeout/1000.0
		uri = URI.parse(url)
		code = response_code (url)
		if code >= 300 && code < 400
			http = Net::HTTP.new(uri.host, uri.port)
			http.open_timeout = timeo
			http.read_timeout = timeo
			if (url =~ /https\:/i)
				http.use_ssl = true
				# Bypass the remote web server cert validation test
				http.verify_mode = OpenSSL::SSL::VERIFY_NONE
				http.ssl_version = @ssl_version
			end
			request = Net::HTTP::Get.new(uri.request_uri)
			response = http.request(request)
			puts "Response: #{response}" if @verbose
			case response
			when Net::HTTPRedirection then
				location = response['location']
			end
		end
		@url_redirection[url]=location
		return location
	rescue Exception => ee
		puts "Exception on method redirect_location for URL #{url}: #{ee}" if @verbose
		@url_redirection[url]=location
		return location
	end
end

#response_body_md5(url) ⇒ Object Also known as: md5

Use MD5 algorithm to fingerprint the URL / site response payload (web page content)



247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
# File 'lib/wmap/url_checker.rb', line 247

def response_body_md5(url)
	puts "MD5 finger print page body content: #{url}" if @verbose
	begin
		raise "Invalid url: #{url}" unless is_url?(url)
		url=url.strip.downcase
		timeo = @http_timeout/1000.0
		uri = URI.parse(url)
		fp=""
		http = Net::HTTP.new(uri.host, uri.port)
		http.open_timeout = timeo
		http.read_timeout = timeo
		if (url =~ /https\:/i)
			http.use_ssl = true
			# Bypass the remote web server cert validation test
			http.verify_mode = OpenSSL::SSL::VERIFY_NONE
			http.ssl_version = @ssl_version
		end
		request = Net::HTTP::Get.new(uri.request_uri)
		response = http.request(request)
		response_body = response.body.to_s
		fp=Digest::MD5.hexdigest(response_body) unless response_body.nil?
		@url_finger_print[url] = fp
		return fp
	rescue Exception => ee
		puts "Exception on method #{__method__}: #{ee}" if @verbose
	end
end

#response_code(url) ⇒ Object Also known as: query

Test the URL and return the response code



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/wmap/url_checker.rb', line 126

def response_code (url)
	puts "Check the http response code on the url: #{url}" if @verbose
	response_code = 10000	# All unknown url connection exceptions go here
	begin
		raise "Invalid url: #{url}" unless is_url?(url)
		url=url.strip.downcase
		timeo = @http_timeout/1000.0
		uri = URI.parse(url)
		http = Net::HTTP.new(uri.host, uri.port)
		http.open_timeout = timeo
		http.read_timeout = timeo
		if (url =~ /https\:/i)
			http.use_ssl = true
			#http.ssl_version = :SSLv3
			# Bypass the remote web server cert validation test
			http.verify_mode = OpenSSL::SSL::VERIFY_NONE
		end
		request = Net::HTTP::Get.new(uri.request_uri)
		response = http.request(request)
		puts "Server response the following: #{response}" if @verbose
		response_code = response.code.to_i
		#response.finish if response.started?()
		@url_code[url]=response_code
		puts "Response code on #{url}: #{response_code}" if @verbose
		return response_code
	rescue Exception => ee
		puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
		case ee
			# rescue "Connection reset by peer" error type
			when Errno::ECONNRESET
				response_code=104
			when Errno::ECONNABORTED,Errno::ETIMEDOUT
				#response_code=10000
			when Timeout::Error				# Quick fix
				if (url =~ /https\:/i)		# try again for ssl timeout session, in case of default :TLSv1 failure
					http.ssl_version = :SSLv3
					response = http.request(request)
					response_code = response.code.to_i
					unless response_code.nil?
						@ssl_version = http.ssl_version
					end
				end
			else
				#response_code=10000
		end
		@url_code[url]=response_code
		return response_code
	end
end

#url_worker(url) ⇒ Object Also known as: check

Main worker method to perform various checks on the URL / site



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/wmap/url_checker.rb', line 34

def url_worker (url)
	puts "Checking out an unknown URL: #{url}" if @verbose
	begin
		url=url.strip.downcase
		raise "Invalid URL format: #{url}" unless is_url?(url)
		timestamp=Time.now
		host=url_2_host(url)
		ip=host_2_ip(host)
		port=url_2_port(url)
		code=10000
		if @url_code.key?(url)
			code=@url_code[url]
		else
			code=response_code(url)
		end
		if @url_redirection.key?(url)
			loc=@url_redirection[url]
		else
			loc=redirect_location(url)
		end
		if @url_finger_print.key?(url)
			fp=@url_finger_print[url]
		else
			fp=response_body_md5(url)
		end
		if @url_server.key?(url)
			server=@url_server[url]
		else
			server=get_server_header(url)
		end
		# save the data
		checker=Hash.new
		checker['ip']=ip
		checker['port']=port
		checker['url']=url
		checker['code']=code
		checker['redirection']=loc
		checker['md5']=fp
		checker['server']=server
		checker['timestamp']=timestamp
		if Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
			checker['status']="int_hosted"
		else
			checker['status']="ext_hosted"
		end
		return checker
	rescue OpenSSL::SSL::SSLError => es  # handler to temporally hold the openssl bug in bay:  SSL_set_session: unable to find ssl method
		checker=Hash.new
		checker['ip']=ip
		checker['port']=port
		checker['url']=url
		checker['code']=20000
		checker['server']="Unknown SSL error: #{es}"
		checker['md']=nil
		checker['redirection']=nil
		checker['timestamp']=timestamp
		return checker
	rescue Exception => ee
		puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
		return nil
	end
end

#url_workers(targets, num = @max_parallel) ⇒ Object Also known as: checks

Parallel scanner - by utilizing fork manager ‘parallel’ to spawn numbers of child processes on multiple urls simultaneously



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/wmap/url_checker.rb', line 99

def url_workers (targets,num=@max_parallel)
	begin
		results=Array.new
		targets -= ["", nil]
		if targets.size > 0
			puts "Start the url checker on the targets:\n #{targets}"
			Parallel.map(targets, :in_processes => num) { |target|
				url_worker(target)
			}.each do |process|
				if process.nil?
					next
				elsif process.empty?
					#do nothing
				else
					results << process
				end
			end
		end
		return results
	rescue Exception => ee
		puts "Exception on method #{__method__}: #{ee}" if @verbose
		return nil
	end
end