Class: Arachni::BrowserCluster
- Includes:
- UI::Output, Utilities
- Defined in:
- lib/arachni/browser_cluster.rb,
lib/arachni/browser_cluster/job.rb,
lib/arachni/browser_cluster/worker.rb,
lib/arachni/browser_cluster/job/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace.rb,
lib/arachni/browser_cluster/jobs/browser_provider.rb,
lib/arachni/browser_cluster/jobs/taint_trace/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/result.rb,
lib/arachni/browser_cluster/jobs/taint_trace/event_trigger/result.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger.rb,
lib/arachni/browser_cluster/jobs/resource_exploration/event_trigger/result.rb
Overview
Real browser driver providing DOM/JS/AJAX support.
Defined Under Namespace
Modules: Jobs Classes: Error, Job, Worker
Instance Attribute Summary collapse
-
#consumed_pids ⇒ Object
readonly
Returns the value of attribute consumed_pids.
-
#javascript_token ⇒ String
readonly
Javascript token used to namespace the custom JS environment.
-
#pending_job_counter ⇒ Integer
readonly
Number of pending jobs.
-
#pool_size ⇒ Integer
readonly
Amount of browser instances in the pool.
-
#sitemap ⇒ Hash<String, Integer>
readonly
List of crawled URLs with their HTTP codes.
-
#workers ⇒ Array<Worker>
readonly
Worker pool.
Instance Method Summary collapse
- #callback_for(job) ⇒ Object
- #decrease_pending_job(job) ⇒ Object
-
#done? ⇒ Bool
‘true` if there are no resources to analyze and no running workers.
- #explore(resource, options = {}, &block) ⇒ Object
- #handle_job_result(result) ⇒ Object
-
#initialize(options = {}) ⇒ BrowserCluster
constructor
A new instance of BrowserCluster.
- #job_done(job) ⇒ Object
-
#job_done?(job, fail_if_not_found = true) ⇒ Bool
‘true` if the `job` has been marked as finished, `false` otherwise.
-
#pop ⇒ Job
Pops a job from the queue.
- #push_to_sitemap(url, code) ⇒ Object
- #queue(job, &block) ⇒ Object
-
#shutdown(wait = true) ⇒ Object
Shuts the cluster down.
-
#skip_state(job_id, state) ⇒ Object
Used to sync operations between browser workers.
-
#skip_state?(job_id, state) ⇒ Boolean
Used to sync operations between browser workers.
- #skip_states(id) ⇒ Object
- #trace_taint(resource, options = {}, &block) ⇒ Object
- #update_skip_states(id, lookups) ⇒ Object
-
#wait ⇒ Object
Blocks until all resources have been analyzed.
- #with_browser(&block) ⇒ Object
Methods included from Utilities
#available_port, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite
Methods included from UI::Output
#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on
Constructor Details
#initialize(options = {}) ⇒ BrowserCluster
Returns a new instance of BrowserCluster.
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/arachni/browser_cluster.rb', line 89 def initialize( = {} ) { pool_size: Options.browser_cluster.pool_size }.merge( ).each do |k, v| begin send( "#{k}=", try_dup( v ) ) rescue NoMethodError instance_variable_set( "@#{k}".to_sym, v ) end end # Used to sync operations between workers per Job#id. @skip_states_per_job = {} # Callbacks for each job per Job#id. We need to keep track of this # here because jobs are serialized and off-loaded to disk and thus can't # contain Block or Proc objects. @job_callbacks = {} # Keeps track of the amount of pending jobs distributed across the # cluster, by Job#id. Once a job's count reaches 0, it's passed to # #job_done. @pending_jobs = Hash.new(0) @pending_job_counter = 0 # Jobs are off-loaded to disk. @jobs = Support::Database::Queue.new # Worker pool holding BrowserCluster::Worker instances. @workers = [] # Stores visited resources from all workers. @sitemap = {} @mutex = Monitor.new @done_signal = Queue.new # Javascript token to share across all workers. @javascript_token = Utilities.generate_token @consumed_pids = [] initialize_workers end |
Instance Attribute Details
#consumed_pids ⇒ Object (readonly)
Returns the value of attribute consumed_pids.
78 79 80 |
# File 'lib/arachni/browser_cluster.rb', line 78 def consumed_pids @consumed_pids end |
#javascript_token ⇒ String (readonly)
Returns Javascript token used to namespace the custom JS environment.
68 69 70 |
# File 'lib/arachni/browser_cluster.rb', line 68 def javascript_token @javascript_token end |
#pending_job_counter ⇒ Integer (readonly)
Returns Number of pending jobs.
76 77 78 |
# File 'lib/arachni/browser_cluster.rb', line 76 def pending_job_counter @pending_job_counter end |
#pool_size ⇒ Integer (readonly)
Returns Amount of browser instances in the pool.
60 61 62 |
# File 'lib/arachni/browser_cluster.rb', line 60 def pool_size @pool_size end |
Instance Method Details
#callback_for(job) ⇒ Object
360 361 362 |
# File 'lib/arachni/browser_cluster.rb', line 360 def callback_for( job ) @job_callbacks[job.id] end |
#decrease_pending_job(job) ⇒ Object
351 352 353 354 355 356 357 |
# File 'lib/arachni/browser_cluster.rb', line 351 def decrease_pending_job( job ) synchronize do @pending_job_counter -= 1 @pending_jobs[job.id] -= 1 job_done( job ) if @pending_jobs[job.id] <= 0 end end |
#done? ⇒ Bool
Returns ‘true` if there are no resources to analyze and no running workers.
260 261 262 263 |
# File 'lib/arachni/browser_cluster.rb', line 260 def done? fail_if_shutdown @pending_job_counter == 0 end |
#explore(resource, options = {}, &block) ⇒ Object
179 180 181 182 183 184 |
# File 'lib/arachni/browser_cluster.rb', line 179 def explore( resource, = {}, &block ) queue( Jobs::ResourceExploration.new( .merge( resource: resource ) ), &block ) end |
#handle_job_result(result) ⇒ Object
243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
# File 'lib/arachni/browser_cluster.rb', line 243 def handle_job_result( result ) return if @shutdown return if job_done? result.job synchronize do print_debug "Got job result: #{result}" exception_jail( false ) do @job_callbacks[result.job.id].call result end end nil end |
#job_done(job) ⇒ Object
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/arachni/browser_cluster.rb', line 203 def job_done( job ) synchronize do print_debug "Job done: #{job}" if !job.never_ending? @skip_states_per_job.delete job.id @job_callbacks.delete job.id end @pending_job_counter -= @pending_jobs[job.id] @pending_jobs[job.id] = 0 if @pending_job_counter <= 0 @pending_job_counter = 0 @done_signal << nil end end true end |
#job_done?(job, fail_if_not_found = true) ⇒ Bool
Returns ‘true` if the `job` has been marked as finished, `false` otherwise.
230 231 232 233 234 235 236 237 238 |
# File 'lib/arachni/browser_cluster.rb', line 230 def job_done?( job, fail_if_not_found = true ) return false if job.never_ending? synchronize do fail_if_job_not_found job if fail_if_not_found return false if !@pending_jobs.include?( job.id ) @pending_jobs[job.id] == 0 end end |
#pop ⇒ Job
Returns Pops a job from the queue.
298 299 300 301 |
# File 'lib/arachni/browser_cluster.rb', line 298 def pop {} while job_done?( job = @jobs.pop ) job end |
#push_to_sitemap(url, code) ⇒ Object
333 334 335 |
# File 'lib/arachni/browser_cluster.rb', line 333 def push_to_sitemap( url, code ) synchronize { @sitemap[url] = code } end |
#queue(job, &block) ⇒ Object
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/arachni/browser_cluster.rb', line 146 def queue( job, &block ) fail_if_shutdown fail_if_job_done job @done_signal.clear synchronize do print_debug "Queueing: #{job}" @pending_job_counter += 1 @pending_jobs[job.id] += 1 @job_callbacks[job.id] = block if block if !@job_callbacks[job.id] fail ArgumentError, "No callback set for job ID #{job.id}." end @jobs << job end nil end |
#shutdown(wait = true) ⇒ Object
Shuts the cluster down.
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 |
# File 'lib/arachni/browser_cluster.rb', line 273 def shutdown( wait = true ) @shutdown = true # Clear the jobs -- don't forget this, it also removes the disk files for # the contained items. @jobs.clear # Kill the browsers. @workers.each { |b| exception_jail( false ) { b.shutdown wait } } @workers.clear # Very important to leave these for last, they may contain data # necessary to cleanly handle interrupted jobs. @job_callbacks.clear @skip_states_per_job.clear @pending_jobs.clear true end |
#skip_state(job_id, state) ⇒ Object
Used to sync operations between browser workers.
328 329 330 |
# File 'lib/arachni/browser_cluster.rb', line 328 def skip_state( job_id, state ) synchronize { skip_states( job_id ) << state } end |
#skip_state?(job_id, state) ⇒ Boolean
Used to sync operations between browser workers.
314 315 316 317 318 |
# File 'lib/arachni/browser_cluster.rb', line 314 def skip_state?( job_id, state ) synchronize do skip_states( job_id ).include? state end end |
#skip_states(id) ⇒ Object
343 344 345 346 347 348 |
# File 'lib/arachni/browser_cluster.rb', line 343 def skip_states( id ) synchronize do @skip_states_per_job[id] ||= Support::LookUp::HashSet.new( hasher: :persistent_hash ) end end |
#trace_taint(resource, options = {}, &block) ⇒ Object
196 197 198 |
# File 'lib/arachni/browser_cluster.rb', line 196 def trace_taint( resource, = {}, &block ) queue( Jobs::TaintTrace.new( .merge( resource: resource ) ), &block ) end |
#update_skip_states(id, lookups) ⇒ Object
338 339 340 |
# File 'lib/arachni/browser_cluster.rb', line 338 def update_skip_states( id, lookups ) synchronize { skip_states( id ).merge lookups } end |
#wait ⇒ Object
Blocks until all resources have been analyzed.
266 267 268 269 270 |
# File 'lib/arachni/browser_cluster.rb', line 266 def wait fail_if_shutdown @done_signal.pop if !done? self end |
#with_browser(&block) ⇒ Object
Operates in non-blocking mode.
136 137 138 |
# File 'lib/arachni/browser_cluster.rb', line 136 def with_browser( &block ) queue( Jobs::BrowserProvider.new, &block ) end |