Class: Arachni::Page

Inherits:
Object show all
Includes:
Utilities
Defined in:
lib/arachni/page.rb,
lib/arachni/page/dom.rb,
lib/arachni/page/scope.rb,
lib/arachni/page/dom/transition.rb

Overview

It holds page data like elements, cookies, headers, etc…

Author:

Defined Under Namespace

Classes: DOM, Error, Scope

Constant Summary collapse

ELEMENTS =
[
    :links, :forms, :cookies, :headers, :link_templates
]

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Utilities

#available_port, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #to_absolute, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite

Constructor Details

#initialize(options) ⇒ Page

Needs either a ‘:parser` or a `:response` or user provided data.

Parameters:

  • options (Hash)

    Hash from which to set instance attributes.

Options Hash (options):



145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/arachni/page.rb', line 145

def initialize( options )
    fail ArgumentError, 'Options cannot be empty.' if options.empty?
    options = options.dup

    @cache = {}

    @do_not_audit_elements = options.delete(:do_not_audit_elements)

    @cache[:parser] = options.delete(:parser)
    @response = @cache[:parser].response if @cache[:parser]

    # We need to know whether or not the page has been dynamically updated
    # with elements, in order to optimize #dup and #hash operations.
    @has_custom_elements = Set.new

    @metadata ||= {}

    options.each do |k, v|
        send( "#{k}=", try_dup( v ) )
    end

    @dom = DOM.new( (options[:dom] || {}).merge( page: self ) )

    fail ArgumentError, 'No URL given!' if !url

    Platform::Manager.fingerprint( self )

    @element_audit_whitelist ||= []
    @element_audit_whitelist   = Set.new( @element_audit_whitelist )
end

Instance Attribute Details

#cacheHash (readonly)

Returns:



121
122
123
# File 'lib/arachni/page.rb', line 121

def cache
  @cache
end

#domDOM

Returns DOM snapshot.

Returns:

  • (DOM)

    DOM snapshot.



112
113
114
# File 'lib/arachni/page.rb', line 112

def dom
  @dom
end

#element_audit_whitelistSet<Integer> (readonly)

Returns Audit whitelist based on Element::Capabilities::Auditable#coverage_hash.



134
135
136
# File 'lib/arachni/page.rb', line 134

def element_audit_whitelist
  @element_audit_whitelist
end

#metadataHash (readonly)

Returns Holds page data that will need to persist between #clear_cache calls and other utility data.

Returns:

  • (Hash)

    Holds page data that will need to persist between #clear_cache calls and other utility data.



126
127
128
# File 'lib/arachni/page.rb', line 126

def 
  @metadata
end

#responseHTTP::Response (readonly)

Returns HTTP response.

Returns:



116
117
118
# File 'lib/arachni/page.rb', line 116

def response
  @response
end

Class Method Details

._load(data) ⇒ Object



546
547
548
# File 'lib/arachni/page.rb', line 546

def self._load( data )
    new( Marshal.load( data ) )
end

.from_data(data) ⇒ Object

Parameters:

  • options (Hash)

    a customizable set of options



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/arachni/page.rb', line 82

def self.from_data( data )
    data = data.dup

    data[:response]        ||= {}
    data[:response][:code] ||= 200
    data[:response][:url]  ||= data.delete( :url )
    data[:response][:body] ||= data.delete( :body ) || ''

    data[:response][:request]       ||= {}
    data[:response][:request][:url] ||= data[:response][:url]

    data[:links]   ||= []
    data[:forms]   ||= []
    data[:cookies] ||= []
    data[:headers] ||= []

    data[:cookie_jar] ||= []

    data[:response][:request] = Arachni::HTTP::Request.new( data[:response][:request] )
    data[:response]           = Arachni::HTTP::Response.new( data[:response] )

    new data
end

.from_response(response) ⇒ Page

Parameters:

Returns:



59
60
61
# File 'lib/arachni/page.rb', line 59

def self.from_response( response )
    Parser.new( response ).page
end

.from_rpc_data(data) ⇒ Page

Parameters:

Returns:



504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
# File 'lib/arachni/page.rb', line 504

def self.from_rpc_data( data )
    dom = data.delete('dom')
    normalized_data = {}
    data.each do |name, value|

        value = case name
                    when 'response'
                        HTTP::Response.from_rpc_data( value )

                    when 'metadata'
                        sanitized = {}
                        %w(link form cookie header).each do |e|
                            next if !value[e] || !value[e]['nonces']

                            sanitized[e.to_sym] = {}
                            sanitized[e.to_sym][:nonces] = value[e]['nonces']
                        end
                        sanitized

                    when 'links', 'forms', 'cookies'
                        value.map do |e|
                            Element.const_get(name[0...-1].capitalize.to_sym).from_rpc_data( e )
                        end.to_a

                    else
                        value
                end

        normalized_data[name.to_sym] = value
    end

    instance = new( normalized_data )
    instance.instance_variable_set(
        '@dom', DOM.from_rpc_data( dom.merge( page: instance ) )
    )
    instance
end

.from_url(url, opts = {}, &block) ⇒ Page

Parameters:

  • url (String)

    URL to fetch.

  • opts (Hash) (defaults to: {})
  • block (Block)

    Block to which to pass the page object. If given, the request will be performed asynchronously. If no block is given, the page will be fetched synchronously and be returned by this method.

Options Hash (opts):

  • :precision (Integer) — default: 2

    How many times to request the page and examine changes between requests. Used tp identify nonce tokens etc.

  • :http (Hash)

    HTTP request options.

Returns:



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/arachni/page.rb', line 37

def self.from_url( url, opts = {}, &block )
    responses = []

    opts[:precision] ||= 2
    opts[:precision].times do
        HTTP::Client.get( url, opts[:http] || {} ) do |res|
            responses << res
            next if responses.size != opts[:precision]
            block.call( from_response( responses ) ) if block_given?
        end
    end

    if !block_given?
        HTTP::Client.run
        from_response( responses )
    end
end

Instance Method Details

#==(other) ⇒ Object



440
441
442
# File 'lib/arachni/page.rb', line 440

def ==( other )
    hash == other.hash
end

#_dump(_) ⇒ Object



542
543
544
# File 'lib/arachni/page.rb', line 542

def _dump( _ )
    Marshal.dump( to_initialization_options )
end

#audit_element?(element) ⇒ Bool

Returns ‘true` if the element should be audited, `false` otherwise.

Parameters:

Returns:

  • (Bool)

    ‘true` if the element should be audited, `false` otherwise.

See Also:



227
228
229
230
231
232
233
# File 'lib/arachni/page.rb', line 227

def audit_element?( element )
    return if @do_not_audit_elements
    return true if @element_audit_whitelist.empty?
    @element_audit_whitelist.include?(
        element.is_a?( Integer ) ? element : element.coverage_hash
    )
end

#bodyString

Returns HTTP response body.

Returns:

  • (String)

    HTTP response body.



267
268
269
270
# File 'lib/arachni/page.rb', line 267

def body
    return '' if !@body && !@response
    @body ||= response.body
end

#body=(string) ⇒ Object

Parameters:

  • string (String)

    Page body.



274
275
276
277
278
279
# File 'lib/arachni/page.rb', line 274

def body=( string )
    @has_javascript = nil
    clear_cache

    @body = string.to_s.dup.freeze
end

#clear_cachePage

Note:

Will preserve caches for elements which have been externally modified.

Returns ‘self` with caches cleared.

Returns:

  • (Page)

    ‘self` with caches cleared.



339
340
341
342
343
344
345
346
347
348
349
# File 'lib/arachni/page.rb', line 339

def clear_cache
    ELEMENTS.each do |type|
        next if @has_custom_elements.include? type
        # Remove the association to this page before clearing the elements
        # from cache to make it easier on the GC.
        (@cache[type] || []).each { |e| e.page = nil }
    end

    @cache.delete_if { |k, _| !@has_custom_elements.include? k }
    self
end

#codeString

Returns URL of the page.

Returns:

  • (String)

    URL of the page.



254
255
256
257
# File 'lib/arachni/page.rb', line 254

def code
    return 0 if !@code && !response
    @code ||= response.code
end

Returns Cookies extracted from the supplied cookie-jar.

Returns:



298
299
300
# File 'lib/arachni/page.rb', line 298

def cookie_jar
    @cookie_jar ||= (parser ? parser.cookie_jar : [])
end

#do_not_audit_elementsObject

It forces #audit_element? to always returns false.



236
237
238
# File 'lib/arachni/page.rb', line 236

def do_not_audit_elements
    @do_not_audit_elements = true
end

#documentNokogiri::HTML

Returns Parsed HTML document.

Returns:

  • (Nokogiri::HTML)

    Parsed HTML document.



331
332
333
# File 'lib/arachni/page.rb', line 331

def document
    @cache[:document] ||= (parser.nil? ? Nokogiri::HTML( body ) : parser.document)
end

#dupObject



448
449
450
# File 'lib/arachni/page.rb', line 448

def dup
    self.class.new to_initialization_options
end

#elementsArray

Returns All page elements.

Returns:

  • (Array)

    All page elements.



318
319
320
# File 'lib/arachni/page.rb', line 318

def elements
    ELEMENTS.map { |type| send( type ) }.flatten
end

#eql?(other) ⇒ Boolean

Returns:

  • (Boolean)


444
445
446
# File 'lib/arachni/page.rb', line 444

def eql?( other )
    self == other
end

#has_script?Boolean

Returns ‘true` if the page contains client-side code, `false` otherwise.

Returns:

  • (Boolean)

    ‘true` if the page contains client-side code, `false` otherwise.



371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
# File 'lib/arachni/page.rb', line 371

def has_script?
    return @has_javascript if !@has_javascript.nil?

    if !response.headers.content_type.to_s.start_with?( 'text/html' ) ||
        !text? || !document
        return @has_javascript = false
    end

    # First check, quick and simple.
    return @has_javascript = true if document.css( 'script' ).any?

    # Check for event attributes, if there are any then there's JS to be
    # executed.
    Browser::Javascript.events.flatten.each do |event|
        return @has_javascript = true if document.xpath( "//*[@#{event}]" ).any?
    end

    # If there's 'javascript:' in 'href' and 'action' attributes then
    # there's JS to be executed.
    [:action, :href].each do |candidate|
        document.xpath( "//*[@#{candidate}]" ).each do |attribute|
            if attribute.attributes[candidate.to_s].to_s.start_with?( 'javascript:' )
                return @has_javascript = true
            end
        end
    end

    @has_javascript = false
end

#hashObject



436
437
438
# File 'lib/arachni/page.rb', line 436

def hash
    digest.hash
end

#method(*args) ⇒ String

Returns The request method that returned the page.

Returns:

  • (String)

    The request method that returned the page



324
325
326
327
# File 'lib/arachni/page.rb', line 324

def method( *args )
    return super( *args ) if args.any?
    response.request.method
end

#parsed_urlArachni::URI

Returns:



188
189
190
# File 'lib/arachni/page.rb', line 188

def parsed_url
    Arachni::URI( url )
end

#parserParser

Returns:



193
194
195
196
197
198
199
200
201
202
# File 'lib/arachni/page.rb', line 193

def parser
    return if !@response
    return @cache[:parser] if @cache[:parser]

    @cache[:parser] = Parser.new( @response )

    # The page may have a browser-assigned body, set it as the one to parse.
    @cache[:parser].body = body
    @cache[:parser]
end

#pathsArray<String>

Returns Paths contained in this page.

Returns:

See Also:



306
307
308
# File 'lib/arachni/page.rb', line 306

def paths
    @cache[:paths] ||= parser ? parser.paths : []
end

#performerObject

Returns Object which performed the #request which lead to this page.

Returns:

  • (Object)

    Object which performed the #request which lead to this page.



183
184
185
# File 'lib/arachni/page.rb', line 183

def performer
    request.performer
end

#persistent_hashObject



432
433
434
# File 'lib/arachni/page.rb', line 432

def persistent_hash
    digest.persistent_hash
end

#platformsPlatform

Returns Applicable platforms for the page.

Returns:

  • (Platform)

    Applicable platforms for the page.



312
313
314
# File 'lib/arachni/page.rb', line 312

def platforms
    Platform::Manager[url]
end

#prepare_for_reportObject



351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
# File 'lib/arachni/page.rb', line 351

def prepare_for_report
    # We want a hard clear, that's why we don't call #clear_cache.
    @cache.clear

    # If we're dealing with binary data remove it before storing.
    if !text?
        response.body = nil
        self.body     = nil
    end

    @cookie_jar.clear if @cookie_jar

    @dom.digest      = nil
    @dom.skip_states = nil

    self
end

#query_varsHash

Returns URL query parameters.

Returns:



261
262
263
# File 'lib/arachni/page.rb', line 261

def query_vars
    @cache[:query_vars] ||= uri_parse_query( url )
end

#requestHTTP::Request

Returns HTTP request.

Returns:



242
243
244
# File 'lib/arachni/page.rb', line 242

def request
    response.request
end

#scopeScope

Returns:



177
178
179
# File 'lib/arachni/page.rb', line 177

def scope
    @scope = Scope.new( self )
end

#text?Boolean

Returns ‘true` if the body of the page is text-base, `false` otherwise.

Returns:

  • (Boolean)

    ‘true` if the body of the page is text-base, `false` otherwise.



403
404
405
406
# File 'lib/arachni/page.rb', line 403

def text?
    return false if !response
    response.text?
end

#titleString

Returns Title of the page.

Returns:

  • (String)

    Title of the page.



410
411
412
# File 'lib/arachni/page.rb', line 410

def title
    document.css( 'title' ).first.text rescue nil
end

#to_hHash Also known as: to_hash

Returns Converts the page data to a hash.

Returns:

  • (Hash)

    Converts the page data to a hash.



416
417
418
419
420
421
422
423
424
425
# File 'lib/arachni/page.rb', line 416

def to_h
    skip = [:@document, :@do_not_audit_elements, :@has_custom_elements, :@scope]

    instance_variables.inject({}) do |h, iv|
        next h if skip.include? iv

        h[iv.to_s.gsub( '@', '').to_sym] = try_dup( instance_variable_get( iv ) )
        h
    end.merge(@cache).tap { |h| h.delete :parser }
end

#to_initialization_optionsObject



452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
# File 'lib/arachni/page.rb', line 452

def to_initialization_options
    h = {}
    [:body, :cookie_jar, :element_audit_whitelist, :metadata].each do |m|
        h[m] = try_dup( instance_variable_get( "@#{m}".to_sym ) )
        h.delete( m ) if !h[m]
    end

    ELEMENTS.each do |type|
        next if !@has_custom_elements.include?( type )
        h[type] = @cache[type]

        if !h[type] || h[type].empty?
            h.delete( type )
            next
        end

        h[type] = h[type].map { |e| c = e.dup; c.page = nil; c }
    end

    h[:response] = response
    h[:do_not_audit_elements] = @do_not_audit_elements

    h[:dom] = dom.to_h.keys.inject({}) do |dh, k|
        dh[k] = try_dup( dom.send( k ) )
        dh
    end

    h
end

#to_rpc_dataHash

Returns Data representing this instance that are suitable the RPC transmission.

Returns:

  • (Hash)

    Data representing this instance that are suitable the RPC transmission.



484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
# File 'lib/arachni/page.rb', line 484

def to_rpc_data
    data        = to_initialization_options.my_stringify_keys(false)
    data['dom'] = dom.to_rpc_data
    data['element_audit_whitelist'] = element_audit_whitelist.to_a
    data['response'] = data['response'].to_rpc_data

    %w(links forms cookies).each do |e|
        next if !data[e]
        data[e] = send(e).map(&:to_rpc_data)
    end

    data.delete 'cookie_jar'

    data
end

#to_sObject



428
429
430
# File 'lib/arachni/page.rb', line 428

def to_s
    "#<#{self.class}:#{object_id} @url=#{@url.inspect} @dom=#{@dom}>"
end

#update_element_audit_whitelist(list) ⇒ Set



212
213
214
215
216
217
# File 'lib/arachni/page.rb', line 212

def update_element_audit_whitelist( list )
    [list].flatten.each do |e|
        @element_audit_whitelist <<
            (e.is_a?( Integer ) ? e : e.coverage_hash )
    end
end

#urlString

Returns URL of the page.

Returns:

  • (String)

    URL of the page.



248
249
250
# File 'lib/arachni/page.rb', line 248

def url
    @url ||= @response.url
end