Class: Wmap::UrlChecker

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/wmap/url_checker.rb

Overview

A quick checker class to identify / finger-print a URL / site

Constant Summary

Constants included from Wmap::Utils::DomainRoot

Wmap::Utils::DomainRoot::File_ccsld, Wmap::Utils::DomainRoot::File_cctld, Wmap::Utils::DomainRoot::File_gtld, Wmap::Utils::DomainRoot::File_tld

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

#cidr_2_ips, #file_2_hash, #file_2_list, #get_nameserver, #get_nameservers, #host_2_ip, #host_2_ips, #is_cidr?, #is_fqdn?, #is_ip?, #list_2_file, #reverse_dns_lookup, #sort_ips, #valid_dns_record?, #zone_transferable?

Methods included from Wmap::Utils::Logger

#wlog

Methods included from Wmap::Utils::UrlMagic

#create_absolute_url_from_base, #create_absolute_url_from_context, #host_2_url, #is_site?, #is_ssl?, #is_url?, #make_absolute, #normalize_url, #url_2_host, #url_2_path, #url_2_port, #url_2_site, #urls_on_same_domain?

Methods included from Wmap::Utils::DomainRoot

#get_domain_root, #get_sub_domain, #is_domain_root?, #print_ccsld, #print_cctld, #print_gtld

Constructor Details

#initialize(params = {}) ⇒ UrlChecker



20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/wmap/url_checker.rb', line 20

def initialize (params = {})
  # Set default instance variables
  @verbose=params.fetch(:verbose, false)
  @data_dir=params.fetch(:data_dir, File.dirname(__FILE__)+'/../../data/')
  @http_timeout=params.fetch(:http_timeout, 5000)
  @max_parallel=params.fetch(:max_parallel, 40)
  @ssl_version=nil
  @url_code={}
  @url_redirection={}
  @url_finger_print={}
  @url_server={}
end

Instance Attribute Details

#data_dirObject

Returns the value of attribute data_dir.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def data_dir
  @data_dir
end

#http_timeoutObject

Returns the value of attribute http_timeout.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def http_timeout
  @http_timeout
end

#max_parallelObject

Returns the value of attribute max_parallel.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def max_parallel
  @max_parallel
end

#verboseObject

Returns the value of attribute verbose.



18
19
20
# File 'lib/wmap/url_checker.rb', line 18

def verbose
  @verbose
end

Instance Method Details

#get_cert_cn(url) ⇒ Object Also known as: get_cn

Retrieve the X509 cert in the clear text from the remote web server, extract and return the common name field within the cert



316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
# File 'lib/wmap/url_checker.rb', line 316

def get_cert_cn (url)
  puts "Extract the common name field from a X509 cert: #{cert}" if @verbose
  begin
    cert=get_certificate(url)
    subject, cn = ""
    if cert =~ /\n(.+)Subject\:(.+)\n/i
      subject=$2
    end
    if subject =~/CN\=(.+)/i
      cn=$1
    end
    return cn
  rescue Exception => ee
    puts "Error on method #{__method__} from #{cert}: #{ee}" if @verbose
  end
  return nil
end

#get_certificate(url) ⇒ Object Also known as: get_cert

Retrieve the remote web server certification, open it and return the cert content as a string



297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/wmap/url_checker.rb', line 297

def get_certificate (url)
  puts "Retrieve the remote web server SSL certificate in clear text: #{url}" if @verbose
  begin
    url=url.strip
    raise "Invalid URL string: #{url}" unless is_ssl?(url)
    client = HTTPClient.new
    client.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
    response = client.get(url)
    cert = response.peer_cert
    cer = OpenSSL::X509::Certificate.new(cert)
    return cer.to_text
  rescue Exception => ee
    puts "Exception on method #{__method__} from #{url}: #{ee}"
  end
  return nil
end

#get_server_header(url) ⇒ Object

Test the URL / site and return the web server type from the HTTP header “server” field



236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/wmap/url_checker.rb', line 236

def get_server_header (url)
  begin
    puts "Retrieve the server header field from the url: #{url}" if @verbose
    server=String.new
    raise "Invalid url: #{url}" unless is_url?(url)
    url=url.strip.downcase
    timeo = @http_timeout/1000.0
    uri = URI.parse(url)
    code = response_code (url)
    http = Net::HTTP.new(uri.host, uri.port)
    http.open_timeout = timeo
    http.read_timeout = timeo
    if (url =~ /https\:/i)
      http.use_ssl = true
      # Bypass the remote web server cert validation test
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
      http.ssl_version = @ssl_version
    end
    request = Net::HTTP::Get.new(uri.request_uri)
    response = http.request(request)
    server=response["server"]
    server=server.gsub(/\,/,' ')
    return server
  rescue Exception => ee
    puts "Exception on method get_server_header for URL #{url}: #{ee}" if @verbose
    @url_server[url]=server
    return server
  end
end

#landing_location(depth = 4, url) ⇒ Object

Test the URL / Site and return the landing url location (recursive with the depth = 4 )



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/wmap/url_checker.rb', line 216

def landing_location (depth=4, url)
  begin
    depth -= 1
    return url if depth < 1
    timeo = @http_timeout/1000.0
    uri = URI.parse(url)
    code = response_code (url)
    if code >= 300 && code < 400
      url = redirect_location (url)
      url = landing_location(depth,url)
    else
      return url
    end
    return url
  rescue Exception => ee
    puts "Exception on method #{__method__} on URL #{url}: #{ee}" if @verbose
  end
end

#redirect_location(url) ⇒ Object Also known as: location

Test the URL / site and return the redirection location (3xx response code only)



178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/wmap/url_checker.rb', line 178

def redirect_location (url)
  begin
    puts "Test the redirection location for the url: #{url}" if @verbose
    location=""
    raise "Invalid url: #{url}" unless is_url?(url)
    url=url.strip.downcase
    timeo = @http_timeout/1000.0
    uri = URI.parse(url)
    code = response_code (url)
    if code >= 300 && code < 400
      http = Net::HTTP.new(uri.host, uri.port)
      http.open_timeout = timeo
      http.read_timeout = timeo
      if (url =~ /https\:/i)
        http.use_ssl = true
        # Bypass the remote web server cert validation test
        http.verify_mode = OpenSSL::SSL::VERIFY_NONE
        http.ssl_version = @ssl_version
      end
      request = Net::HTTP::Get.new(uri.request_uri)
      response = http.request(request)
      puts "Response: #{response}" if @verbose
      case response
      when Net::HTTPRedirection then
        location = response['location']
      end
    end
    @url_redirection[url]=location
    return location
  rescue Exception => ee
    puts "Exception on method redirect_location for URL #{url}: #{ee}" if @verbose
    @url_redirection[url]=location
    return location
  end
end

#response_body_md5(url) ⇒ Object Also known as: md5

Use MD5 algorithm to fingerprint the URL / site response payload (web page content)



267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/wmap/url_checker.rb', line 267

def response_body_md5(url)
  puts "MD5 finger print page body content: #{url}" if @verbose
  begin
    raise "Invalid url: #{url}" unless is_url?(url)
    url=url.strip.downcase
    timeo = @http_timeout/1000.0
    uri = URI.parse(url)
    fp=""
    http = Net::HTTP.new(uri.host, uri.port)
    http.open_timeout = timeo
    http.read_timeout = timeo
    if (url =~ /https\:/i)
      http.use_ssl = true
      # Bypass the remote web server cert validation test
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
      http.ssl_version = @ssl_version
    end
    request = Net::HTTP::Get.new(uri.request_uri)
    response = http.request(request)
    response_body = response.body.to_s
    fp=Digest::MD5.hexdigest(response_body) unless response_body.nil?
    @url_finger_print[url] = fp
    return fp
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
  end
end

#response_code(url) ⇒ Object Also known as: query

Test the URL and return the response code



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/wmap/url_checker.rb', line 126

def response_code (url)
  puts "Check the http response code on the url: #{url}" if @verbose
  response_code = 10000  # All unknown url connection exceptions go here
  begin
    raise "Invalid url: #{url}" unless is_url?(url)
    url=url.strip.downcase
    timeo = @http_timeout/1000.0
    uri = URI.parse(url)
    http = Net::HTTP.new(uri.host, uri.port)
    http.open_timeout = timeo
    http.read_timeout = timeo
    if (url =~ /https\:/i)
      http.use_ssl = true
      #http.ssl_version = :SSLv3
      # Bypass the remote web server cert validation test
      http.verify_mode = OpenSSL::SSL::VERIFY_NONE
    end
    request = Net::HTTP::Get.new(uri.request_uri)
    response = http.request(request)
    puts "Server response the following: #{response}" if @verbose
    response_code = response.code.to_i
    #response.finish if response.started?()
    @url_code[url]=response_code
    puts "Response code on #{url}: #{response_code}" if @verbose
    return response_code
  rescue Exception => ee
    puts "Exception on method #{__method__} for #{url}: #{ee}" if @verbose
    case ee
      # rescue "Connection reset by peer" error type
      when Errno::ECONNRESET
        response_code=104
      when Errno::ECONNABORTED,Errno::ETIMEDOUT
        #response_code=10000
      when Timeout::Error        # Quick fix
        if (url =~ /https\:/i)    # try again for ssl timeout session, in case of default :TLSv1 failure
          http.ssl_version = :SSLv3
          response = http.request(request)
          response_code = response.code.to_i
          unless response_code.nil?
            @ssl_version = http.ssl_version
          end
        end
      else
        #response_code=10000
    end
    @url_code[url]=response_code
    return response_code
  end
end

#url_worker(url) ⇒ Object Also known as: check

Main worker method to perform various checks on the URL / site



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/wmap/url_checker.rb', line 34

def url_worker (url)
  begin
    puts "Checking out an unknown URL: #{url}" if @verbose
    url=url.strip.downcase
    raise "Invalid URL format: #{url}" unless is_url?(url)
    timestamp=Time.now
    host=url_2_host(url)
    ip=host_2_ip(host)
    port=url_2_port(url)
    code=10000
    if @url_code.key?(url)
      code=@url_code[url]
    else
      code=response_code(url)
    end
    if code>=300 && code < 400
      loc=landing_location(4,url)
    else
      loc=nil
    end
    if @url_finger_print.key?(url)
      fp=@url_finger_print[url]
    else
      fp=response_body_md5(url)
    end
    if @url_server.key?(url)
      server=@url_server[url]
    else
      server=get_server_header(url)
    end
    # save the data
    checker=Hash.new
    checker['ip']=ip
    checker['port']=port
    checker['url']=url
    checker['code']=code
    checker['redirection']=loc
    checker['md5']=fp
    checker['server']=server
    checker['timestamp']=timestamp
    if Wmap::CidrTracker.new(:data_dir=>@data_dir).ip_trusted?(ip)
      checker['status']="int_hosted"
    else
      checker['status']="ext_hosted"
    end
    return checker
  rescue OpenSSL::SSL::SSLError => es  # handler to temporally hold the openssl bug in bay:  SSL_set_session: unable to find ssl method
    checker=Hash.new
    checker['ip']=ip
    checker['port']=port
    checker['url']=url
    checker['code']=20000
    checker['server']="Unknown SSL error: #{es}"
    checker['md']=nil
    checker['redirection']=nil
    checker['timestamp']=timestamp
    return checker
  rescue Exception => ee
    puts "Exception on method #{__method__} for #{url}: #{ee}" # if @verbose
    return nil
  end
end

#url_workers(targets, num = @max_parallel) ⇒ Object Also known as: checks

Parallel scanner - by utilizing fork manager ‘parallel’ to spawn numbers of child processes on multiple urls simultaneously



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/wmap/url_checker.rb', line 99

def url_workers (targets,num=@max_parallel)
  begin
    results=Array.new
    targets -= ["", nil]
    if targets.size > 0
      puts "Start the url checker on the targets:\n #{targets}"
      Parallel.map(targets, :in_processes => num) { |target|
        url_worker(target)
      }.each do |process|
        if process.nil?
          next
        elsif process.empty?
          #do nothing
        else
          results << process
        end
      end
    end
    return results
  rescue Exception => ee
    puts "Exception on method #{__method__}: #{ee}" if @verbose
    return nil
  end
end