Class: Arachni::Page

Inherits:
Object show all
Includes:
Utilities
Defined in:
lib/arachni/page.rb,
lib/arachni/page/dom.rb,
lib/arachni/page/scope.rb,
lib/arachni/page/dom/transition.rb

Overview

It holds page data like elements, cookies, headers, etc…

Author:

Defined Under Namespace

Classes: DOM, Error, Scope

Constant Summary collapse

ELEMENTS =
[
    :links, :forms, :cookies, :headers, :link_templates, :jsons, :xmls
]
METADATA =
[ :nonce_name, :skip_dom ]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utilities

#available_port, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #full_and_absolute_url?, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #regexp_array_match, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite

Constructor Details

#initialize(options) ⇒ Page

Needs either a ‘:parser` or a `:response` or user provided data.

Parameters:

  • options (Hash)

    Hash from which to set instance attributes.

Options Hash (options):



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# File 'lib/arachni/page.rb', line 144

def initialize( options )
    fail ArgumentError, 'Options cannot be empty.' if options.empty?
    options = options.dup

    @cache = {}

    @do_not_audit_elements = options.delete(:do_not_audit_elements)

    @cache[:parser] = options.delete(:parser)
    @response = @cache[:parser].response if @cache[:parser]

    # We need to know whether or not the page has been dynamically updated
    # with elements, in order to optimize #dup and #hash operations.
    @has_custom_elements = Set.new

    @metadata ||= {}

    options.each do |k, v|
        send( "#{k}=", try_dup( v ) )
    end

    @dom = DOM.new( (options[:dom] || {}).merge( page: self ) )

    fail ArgumentError, 'No URL given!' if !url

    Platform::Manager.fingerprint( self )

    @element_audit_whitelist ||= []
    @element_audit_whitelist   = Set.new( @element_audit_whitelist )
end

Instance Attribute Details

#cacheHash (readonly)

Returns:



120
121
122
# File 'lib/arachni/page.rb', line 120

def cache
  @cache
end

#domDOM

Returns DOM snapshot.

Returns:

  • (DOM)

    DOM snapshot.



111
112
113
# File 'lib/arachni/page.rb', line 111

def dom
  @dom
end

#element_audit_whitelistSet<Integer> (readonly)

Returns Audit whitelist based on Element::Capabilities::Auditable#coverage_hash.



133
134
135
# File 'lib/arachni/page.rb', line 133

def element_audit_whitelist
  @element_audit_whitelist
end

#metadataHash (readonly)

Returns Holds page data that will need to persist between #clear_cache calls and other utility data.

Returns:

  • (Hash)

    Holds page data that will need to persist between #clear_cache calls and other utility data.



125
126
127
# File 'lib/arachni/page.rb', line 125

def 
  @metadata
end

#responseHTTP::Response (readonly)

Returns HTTP response.

Returns:



115
116
117
# File 'lib/arachni/page.rb', line 115

def response
  @response
end

Class Method Details

._load(data) ⇒ Object



575
576
577
# File 'lib/arachni/page.rb', line 575

def self._load( data )
    new( Marshal.load( data ) )
end

.from_data(data) ⇒ Object

Parameters:

  • options (Hash)

    a customizable set of options



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/arachni/page.rb', line 82

def self.from_data( data )
    data = data.dup

    data[:response]        ||= {}
    data[:response][:code] ||= 200
    data[:response][:url]  ||= data.delete( :url )
    data[:response][:body] ||= data.delete( :body ) || ''

    data[:response][:request]       ||= {}
    data[:response][:request][:url] ||= data[:response][:url]

    ELEMENTS.each { |e| data[e] ||= [] }

    data[:cookie_jar] ||= []

    data[:response][:request] = Arachni::HTTP::Request.new( data[:response][:request] )
    data[:response]           = Arachni::HTTP::Response.new( data[:response] )

    new data
end

.from_response(response) ⇒ Page

Parameters:

Returns:



59
60
61
# File 'lib/arachni/page.rb', line 59

def self.from_response( response )
    Parser.new( response ).page
end

.from_rpc_data(data) ⇒ Page

Parameters:

Returns:



543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
# File 'lib/arachni/page.rb', line 543

def self.from_rpc_data( data )
    dom = data.delete('dom')
    normalized_data = {}
    data.each do |name, value|

        value = case name
                    when 'response'
                        HTTP::Response.from_rpc_data( value )

                    when *ELEMENTS.map(&:to_s)
                        value.map do |e|
                            Element.const_get(name[0...-1].capitalize.to_sym).from_rpc_data( e )
                        end.to_a

                    else
                        value
                end

        normalized_data[name.to_sym] = value
    end

    instance = new( normalized_data )
    instance.instance_variable_set(
        '@dom', DOM.from_rpc_data( dom.merge( page: instance ) )
    )
    instance
end

.from_url(url, opts = {}, &block) ⇒ Page

Parameters:

  • url (String)

    URL to fetch.

  • opts (Hash) (defaults to: {})
  • block (Block)

    Block to which to pass the page object. If given, the request will be performed asynchronously. If no block is given, the page will be fetched synchronously and be returned by this method.

Options Hash (opts):

  • :precision (Integer) — default: 2

    How many times to request the page and examine changes between requests. Used tp identify nonce tokens etc.

  • :http (Hash)

    HTTP request options.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/arachni/page.rb', line 37

def self.from_url( url, opts = {}, &block )
    responses = []

    opts[:precision] ||= 2
    opts[:precision].times do
        HTTP::Client.get( url, opts[:http] || {} ) do |res|
            responses << res
            next if responses.size != opts[:precision]
            block.call( from_response( responses ) ) if block_given?
        end
    end

    if !block_given?
        HTTP::Client.run
        from_response( responses )
    end
end

Instance Method Details

#==(other) ⇒ Object



449
450
451
# File 'lib/arachni/page.rb', line 449

def ==( other )
    hash == other.hash
end

#_dump(_) ⇒ Object



571
572
573
# File 'lib/arachni/page.rb', line 571

def _dump( _ )
    Marshal.dump( to_initialization_options )
end

#audit_element?(element) ⇒ Bool

Returns ‘true` if the element should be audited, `false` otherwise.

Parameters:

Returns:

  • (Bool)

    ‘true` if the element should be audited, `false` otherwise.

See Also:



226
227
228
229
230
231
232
# File 'lib/arachni/page.rb', line 226

def audit_element?( element )
    return if @do_not_audit_elements
    return true if @element_audit_whitelist.empty?
    @element_audit_whitelist.include?(
        element.is_a?( Integer ) ? element : element.coverage_hash
    )
end

#bodyString

Returns HTTP response body.

Returns:

  • (String)

    HTTP response body.



266
267
268
269
# File 'lib/arachni/page.rb', line 266

def body
    return '' if !@body && !@response
    @body ||= response.body
end

#body=(string) ⇒ Object

Parameters:

  • string (String)

    Page body.



273
274
275
276
277
278
# File 'lib/arachni/page.rb', line 273

def body=( string )
    @has_javascript = nil
    clear_cache

    @body = string.to_s.dup.freeze
end

#clear_cachePage

Note:

Will preserve caches for elements which have been externally modified.

Returns ‘self` with caches cleared.

Returns:

  • (Page)

    ‘self` with caches cleared.



347
348
349
350
351
352
353
354
355
356
357
# File 'lib/arachni/page.rb', line 347

def clear_cache
    ELEMENTS.each do |type|
        next if @has_custom_elements.include? type
        # Remove the association to this page before clearing the elements
        # from cache to make it easier on the GC.
        (@cache[type] || []).each { |e| e.page = nil }
    end

    @cache.delete_if { |k, _| !@has_custom_elements.include? k }
    self
end

#codeString

Returns URL of the page.

Returns:

  • (String)

    URL of the page.



253
254
255
256
# File 'lib/arachni/page.rb', line 253

def code
    return 0 if !@code && !response
    @code ||= response.code
end

Returns Cookies extracted from the supplied cookie-jar.

Returns:



297
298
299
# File 'lib/arachni/page.rb', line 297

def cookie_jar
    @cookie_jar ||= (parser ? parser.cookie_jar : [])
end

#do_not_audit_elementsObject

It forces #audit_element? to always returns false.



235
236
237
# File 'lib/arachni/page.rb', line 235

def do_not_audit_elements
    @do_not_audit_elements = true
end

#documentNokogiri::HTML

Returns Parsed HTML document.

Returns:

  • (Nokogiri::HTML)

    Parsed HTML document.



339
340
341
# File 'lib/arachni/page.rb', line 339

def document
    @cache[:document] ||= (parser.nil? ? Nokogiri::HTML( body ) : parser.document)
end

#dupObject



457
458
459
# File 'lib/arachni/page.rb', line 457

def dup
    self.class.new to_initialization_options
end

#elementsArray<Element::Base>

Returns All page elements.

Returns:



317
318
319
# File 'lib/arachni/page.rb', line 317

def elements
    ELEMENTS.map { |type| send( type ) }.flatten
end

#elements_within_scopeArray<Element::Base>

Returns All page elements that are within the scope of the scan.

Returns:



323
324
325
326
327
328
# File 'lib/arachni/page.rb', line 323

def elements_within_scope
    ELEMENTS.map do |type|
        next if !Options.audit.element? type
        send( type ).select { |e| e.scope.in? }
    end.flatten.compact
end

#eql?(other) ⇒ Boolean

Returns:

  • (Boolean)


453
454
455
# File 'lib/arachni/page.rb', line 453

def eql?( other )
    self == other
end

#has_script?Boolean

Returns ‘true` if the page contains client-side code, `false` otherwise.

Returns:

  • (Boolean)

    ‘true` if the page contains client-side code, `false` otherwise.



379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
# File 'lib/arachni/page.rb', line 379

def has_script?
    return @has_javascript if !@has_javascript.nil?

    if !response.headers.content_type.to_s.start_with?( 'text/html' ) ||
        !text? || !document
        return @has_javascript = false
    end

    # First check, quick and simple.
    return @has_javascript = true if document.css( 'script' ).any?

    # Check for event attributes, if there are any then there's JS to be
    # executed.
    Browser::Javascript.events.flatten.each do |event|
        return @has_javascript = true if document.xpath( "//*[@#{event}]" ).any?
    end

    # If there's 'javascript:' in 'href' and 'action' attributes then
    # there's JS to be executed.
    [:action, :href].each do |candidate|
        document.xpath( "//*[@#{candidate}]" ).each do |attribute|
            if attribute.attributes[candidate.to_s].to_s.start_with?( 'javascript:' )
                return @has_javascript = true
            end
        end
    end

    @has_javascript = false
end

#hashObject



445
446
447
# File 'lib/arachni/page.rb', line 445

def hash
    digest.hash
end

#import_metadata(other, metas = METADATA) ⇒ Object



477
478
479
480
481
482
483
484
485
486
487
488
489
# File 'lib/arachni/page.rb', line 477

def ( other, metas = METADATA )
    [metas].flatten.each do |meta|
        other..each do |element_type, data|
            @metadata[element_type] ||= {}
            @metadata[element_type][meta.to_s] ||= {}
            @metadata[element_type][meta.to_s].merge!( data[meta.to_s] )
        end
    end

    

    self
end

#method(*args) ⇒ String

Returns The request method that returned the page.

Returns:

  • (String)

    The request method that returned the page



332
333
334
335
# File 'lib/arachni/page.rb', line 332

def method( *args )
    return super( *args ) if args.any?
    response.request.method
end

#parsed_urlArachni::URI

Returns:



187
188
189
# File 'lib/arachni/page.rb', line 187

def parsed_url
    Arachni::URI( url )
end

#parserParser

Returns:



192
193
194
195
196
197
198
199
200
201
# File 'lib/arachni/page.rb', line 192

def parser
    return if !@response
    return @cache[:parser] if @cache[:parser]

    @cache[:parser] = Parser.new( @response )

    # The page may have a browser-assigned body, set it as the one to parse.
    @cache[:parser].body = body
    @cache[:parser]
end

#pathsArray<String>

Returns Paths contained in this page.

Returns:

See Also:



305
306
307
# File 'lib/arachni/page.rb', line 305

def paths
    @cache[:paths] ||= parser ? parser.paths : []
end

#performerObject

Returns Object which performed the #request which lead to this page.

Returns:

  • (Object)

    Object which performed the #request which lead to this page.



182
183
184
# File 'lib/arachni/page.rb', line 182

def performer
    request.performer
end

#persistent_hashObject



441
442
443
# File 'lib/arachni/page.rb', line 441

def persistent_hash
    digest.persistent_hash
end

#platformsPlatform

Returns Applicable platforms for the page.

Returns:

  • (Platform)

    Applicable platforms for the page.



311
312
313
# File 'lib/arachni/page.rb', line 311

def platforms
    Platform::Manager[url]
end

#prepare_for_reportObject



359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/arachni/page.rb', line 359

def prepare_for_report
    # We want a hard clear, that's why we don't call #clear_cache.
    @cache.clear

    # If we're dealing with binary data remove it before storing.
    if !text?
        response.body = nil
        self.body     = nil
    end

    @cookie_jar.clear if @cookie_jar

    @dom.digest      = nil
    @dom.skip_states = nil

    self
end

#query_varsHash

Returns URL query parameters.

Returns:



260
261
262
# File 'lib/arachni/page.rb', line 260

def query_vars
    @cache[:query_vars] ||= uri_parse_query( url )
end

#reload_metadataObject



469
470
471
472
473
474
475
# File 'lib/arachni/page.rb', line 469

def 
    ELEMENTS.each do |type|
        next if !@cache[type]

        @cache[type].each { |e|  e }
    end
end

#requestHTTP::Request

Returns HTTP request.

Returns:



241
242
243
# File 'lib/arachni/page.rb', line 241

def request
    response.request
end

#scopeScope

Returns:



176
177
178
# File 'lib/arachni/page.rb', line 176

def scope
    @scope = Scope.new( self )
end

#text?Boolean

Returns ‘true` if the body of the page is text-base, `false` otherwise.

Returns:

  • (Boolean)

    ‘true` if the body of the page is text-base, `false` otherwise.



411
412
413
414
# File 'lib/arachni/page.rb', line 411

def text?
    return false if !response
    response.text?
end

#titleString

Returns Title of the page.

Returns:

  • (String)

    Title of the page.



418
419
420
# File 'lib/arachni/page.rb', line 418

def title
    document.css( 'title' ).first.text rescue nil
end

#to_hHash Also known as: to_hash

Returns Converts the page data to a hash.

Returns:

  • (Hash)

    Converts the page data to a hash.



424
425
426
427
428
429
430
431
432
433
# File 'lib/arachni/page.rb', line 424

def to_h
    skip = [:@document, :@do_not_audit_elements, :@has_custom_elements, :@scope]

    instance_variables.inject({}) do |h, iv|
        next h if skip.include? iv

        h[iv.to_s.gsub( '@', '').to_sym] = try_dup( instance_variable_get( iv ) )
        h
    end.merge(@cache).tap { |h| h.delete :parser }
end

#to_initialization_optionsObject



491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
# File 'lib/arachni/page.rb', line 491

def to_initialization_options
    h = {}
    [:body, :cookie_jar, :element_audit_whitelist, :metadata].each do |m|
        h[m] = try_dup( instance_variable_get( "@#{m}".to_sym ) )
        h.delete( m ) if !h[m]
    end

    ELEMENTS.each do |type|
        next if !@has_custom_elements.include?( type )
        h[type] = @cache[type]

        if !h[type] || h[type].empty?
            h.delete( type )
            next
        end

        h[type] = h[type].map { |e| c = e.dup; c.page = nil; c }
    end

    h[:response] = response
    h[:do_not_audit_elements] = @do_not_audit_elements

    h[:dom] = dom.to_h.keys.inject({}) do |dh, k|
        dh[k] = try_dup( dom.send( k ) )
        dh
    end

    h
end

#to_rpc_dataHash

Returns Data representing this instance that are suitable the RPC transmission.

Returns:

  • (Hash)

    Data representing this instance that are suitable the RPC transmission.



523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
# File 'lib/arachni/page.rb', line 523

def to_rpc_data
    data        = to_initialization_options.my_stringify_keys(false)
    data['dom'] = dom.to_rpc_data
    data['element_audit_whitelist'] = element_audit_whitelist.to_a
    data['response'] = data['response'].to_rpc_data

    (ELEMENTS - [:headers]).map(&:to_s).each do |e|
        next if !data[e]
        data[e] = send(e).map(&:to_rpc_data)
    end

    data.delete 'cookie_jar'

    data
end

#to_sObject Also known as: inspect



436
437
438
# File 'lib/arachni/page.rb', line 436

def to_s
    "#<#{self.class}:#{object_id} @url=#{@url.inspect} @dom=#{@dom}>"
end

#update_element_audit_whitelist(list) ⇒ Set



211
212
213
214
215
216
# File 'lib/arachni/page.rb', line 211

def update_element_audit_whitelist( list )
    [list].flatten.each do |e|
        @element_audit_whitelist <<
            (e.is_a?( Integer ) ? e : e.coverage_hash )
    end
end

#update_metadataObject



461
462
463
464
465
466
467
# File 'lib/arachni/page.rb', line 461

def 
    ELEMENTS.each do |type|
        next if !@cache[type]

        @cache[type].each { |e|  e }
    end
end

#urlString

Returns URL of the page.

Returns:

  • (String)

    URL of the page.



247
248
249
# File 'lib/arachni/page.rb', line 247

def url
    @url ||= @response.url
end