Class: Arachni::Parser

response (HTTP::Response, Array<HTTP::Response>) —

Response(s) to analyze and parse. By providing multiple responses the parser will be able to perform some preliminary differential analysis and identify nonce tokens in inputs.
options (Options) (defaults to: Options)

# File 'lib/arachni/parser.rb', line 61

def initialize( response, options = Options )
    @options = options

    if response.is_a? Array
        @secondary_responses = response[1..-1]
        @secondary_responses.compact! if @secondary_responses
        response = response.shift
    end

    @response = response
    self.url  = response.url
end

Instance Attribute Details

#response ⇒ `HTTP::Response` (readonly)

Returns:

(HTTP::Response)



53
54
55

# File 'lib/arachni/parser.rb', line 53

def response
  @response
end

#url ⇒ `String`

Returns:

(String)



50
51
52

# File 'lib/arachni/parser.rb', line 50

def url
  @url
end

Instance Method Details

#base ⇒ `String`

Returns Base ‘href`, if there is one.

Returns:

(String) —

Base ‘href`, if there is one.



289
290
291

# File 'lib/arachni/parser.rb', line 289

def base
    @base ||= document.search( '//base[@href]' ).first['href'] rescue nil
end

#body ⇒ `Object`



115
116
117

# File 'lib/arachni/parser.rb', line 115

def body
    @body || @response.body
end

#body=(string) ⇒ `String`

Returns Override the #response body for the parsing process.

Returns:

(String) —

Override the #response body for the parsing process.

# File 'lib/arachni/parser.rb', line 110

def body=( string )
    @links = @forms = @cookies = @document = nil
    @body = string
end

#cookie_jar ⇒ `Array<Element::Cookie>`

Returns Cookies with which to update the HTTP cookie-jar.

Returns:

(Array<Element::Cookie>) —

Cookies with which to update the HTTP cookie-jar.

# File 'lib/arachni/parser.rb', line 264

def cookie_jar
    return @cookie_jar.freeze if @cookie_jar
    from_jar = []

    # Make a list of the response cookie names.
    cookie_names = Set.new( cookies.map( &:name ) )

    from_jar |= HTTP::Client.cookie_jar.for_url( @url ).
        reject { |cookie| cookie_names.include?( cookie.name ) }

    @cookie_jar = (cookies | from_jar)
end

#cookies ⇒ `Array<Element::Cookie>`

Returns Cookies from HTTP headers and response body.

Returns:

(Array<Element::Cookie>) —

Cookies from HTTP headers and response body.

# File 'lib/arachni/parser.rb', line 227

def cookies
    return @cookies.freeze if @cookies

    @cookies = Cookie.from_headers( @url, @response.headers )
    return @cookies if !text?

    @cookies |= Cookie.from_document( @url, document )
end

#cookies_to_be_audited ⇒ `Array<Element::Cookie>`

Returns Cookies to be audited.

Returns:

(Array<Element::Cookie>) —

Cookies to be audited.

# File 'lib/arachni/parser.rb', line 238

def cookies_to_be_audited
    return @cookies_to_be_audited.freeze if @cookies_to_be_audited
    return [] if !text?

    # Make a list of the response cookie names.
    cookie_names = Set.new( cookies.map(&:name) )

    # Grab all cookies from the cookiejar giving preferrence to the ones
    # specified by the current page, if there are any.
    from_http_jar = HTTP::Client.cookie_jar.cookies.reject do |c|
        cookie_names.include?( c.name )
    end

    # These cookies are to be audited and thus are dirty and anarchistic,
    # so they have to contain even cookies completely irrelevant to the
    # current page. I.e. it contains all cookies that have been observed
    # since the beginning of the scan
    @cookies_to_be_audited = (cookies | from_http_jar).map do |c|
        dc = c.dup
        dc.action = @url
        dc
    end
end

#document ⇒ `Nokogiri::HTML`^?

Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.

Returns:

(Nokogiri::HTML, nil) —

Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.

# File 'lib/arachni/parser.rb', line 123

def document
    return @document.freeze if @document
    @document = Nokogiri::HTML( body ) if text? rescue nil
end

#forms ⇒ `Array<Element::Form>`

Returns Forms from #document.

Returns:

(Array<Element::Form>) —

Forms from #document.

# File 'lib/arachni/parser.rb', line 149

def forms
    return @forms.freeze if @forms
    return [] if !text?

    f = Form.from_document( @url, document )
    return f if !@secondary_responses

    @secondary_responses.each do |response|
        next if response.body.to_s.empty?

        Form.from_document( @url, response.body ).each do |form2|
            f.each do |form|
                next if "#{form.coverage_id}:#{form.name_or_id}" !=
                    "#{form2.coverage_id}:#{form2.name_or_id}"

                form.inputs.each do |k, v|
                    next if !(v != form2.inputs[k] &&
                        form.field_type_for( k ) == :hidden)

                    form.nonce_name = k
                end
            end
        end
    end

    @forms = f
end

#headers ⇒ `Hash`

Note:

It’s more of a placeholder method, it doesn’t actually analyze anything. It’s a long shot that any of these will be vulnerable but better be safe than sorry.

Returns List of valid auditable HTTP header fields.

Returns:

(Hash) —

List of valid auditable HTTP header fields.

# File 'lib/arachni/parser.rb', line 134

def headers
    @headers ||= {
        'Accept'          => 'text/html,application/xhtml+xml,application' +
            '/xml;q=0.9,*/*;q=0.8',
        'Accept-Charset'  => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
        'From'            => @options.authorized_by  || '',
        'User-Agent'      => @options.http.user_agent || '',
        'Referer'         => @url,
        'Pragma'          => 'no-cache'
    }.map { |k, v| Header.new( url: @url, inputs: { k => v } ) }.freeze
end

#link ⇒ `Element::Link`

Returns Link to the page.

Returns:

(Element::Link) —

Link to the page.

# File 'lib/arachni/parser.rb', line 179

def link
    return if link_vars.empty? && !@response.redirection?
    Link.new( url: @url, inputs: link_vars )
end

#link_template ⇒ `Element::LinkTemplate`

Returns LinkTemplate for the current page.

Returns:

(Element::LinkTemplate) —

LinkTemplate for the current page.

# File 'lib/arachni/parser.rb', line 186

def link_template
    template, inputs = LinkTemplate.extract_inputs( @url )
    return if !template

    LinkTemplate.new(
        url:      @url.freeze,
        action:   @url.freeze,
        inputs:   inputs,
        template: template
    )
end

#link_templates ⇒ `Array<Element::LinkTemplate>`

Returns Links matching OptionsGroups::Audit#link_templates in #document.

Returns:

(Array<Element::LinkTemplate>) —

Links matching OptionsGroups::Audit#link_templates in #document.

# File 'lib/arachni/parser.rb', line 209

def link_templates
    return @link_templates.freeze if @link_templates
    return @link_templates = [link_template].compact if !text?

    @link_templates =
        [link_template].compact | LinkTemplate.from_document( @url, document )
end

#link_vars ⇒ `Hash`

Returns Parameters found in #url.

Returns:

(Hash) —

Parameters found in #url.

# File 'lib/arachni/parser.rb', line 219

def link_vars
    return {} if (!parsed = uri_parse( @url ))

    @link_vars ||= parsed.rewrite.query_parameters.freeze
end

#links ⇒ `Array<Element::Link>`

Returns Links in #document.

Returns:

(Array<Element::Link>) —

Links in #document.

# File 'lib/arachni/parser.rb', line 200

def links
    return @links.freeze if @links
    return @links = [link].compact if !text?

    @links = [link].compact | Link.from_document( @url, document )
end

#page ⇒ `Page`

Returns:

(Page)



98
99
100

# File 'lib/arachni/parser.rb', line 98

def page
    @page ||= Page.new( parser: self )
end

#paths ⇒ `Array<String>`

Returns Distinct links to follow.

Returns:

(Array<String>) —

Distinct links to follow.

# File 'lib/arachni/parser.rb', line 279

def paths
  return @paths if @paths
  @paths = []
  return @paths.freeze if !document

  @paths = run_extractors.freeze
end

#text? ⇒ `Boolean`

Returns ‘true` if the given HTTP response data are text based, `false` otherwise.

Returns:

(Boolean) —

‘true` if the given HTTP response data are text based, `false` otherwise.



104
105
106

# File 'lib/arachni/parser.rb', line 104

def text?
    !@body.to_s.empty? || @response.text?
end

#to_absolute(relative_url) ⇒ `String`

Converts a relative URL to an absolute one.

Parameters:

relative_url (String) —

URL to convert to absolute.

Returns:

(String) —

Absolute URL.

# File 'lib/arachni/parser.rb', line 87

def to_absolute( relative_url )
    if (url = base)
        base_url = url
    else
        base_url = @url
    end

    super( relative_url, base_url )
end

Class: Arachni::Parser

Overview

Defined Under Namespace

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utilities

Methods included from UI::Output

Constructor Details

#initialize(response, options = Options) ⇒ Parser

Instance Attribute Details

#response ⇒ HTTP::Response (readonly)

#url ⇒ String

Instance Method Details

#base ⇒ String

#body ⇒ Object

#body=(string) ⇒ String

#cookie_jar ⇒ Array<Element::Cookie>

#cookies ⇒ Array<Element::Cookie>

#cookies_to_be_audited ⇒ Array<Element::Cookie>

#document ⇒ Nokogiri::HTML?

#forms ⇒ Array<Element::Form>

#headers ⇒ Hash

#link ⇒ Element::Link

#link_template ⇒ Element::LinkTemplate

#link_templates ⇒ Array<Element::LinkTemplate>

#link_vars ⇒ Hash

#links ⇒ Array<Element::Link>

#page ⇒ Page

#paths ⇒ Array<String>

#text? ⇒ Boolean

#to_absolute(relative_url) ⇒ String

#initialize(response, options = Options) ⇒ `Parser`

#response ⇒ `HTTP::Response` (readonly)

#url ⇒ `String`

#base ⇒ `String`

#body ⇒ `Object`

#body=(string) ⇒ `String`

#cookie_jar ⇒ `Array<Element::Cookie>`

#cookies ⇒ `Array<Element::Cookie>`

#cookies_to_be_audited ⇒ `Array<Element::Cookie>`

#document ⇒ `Nokogiri::HTML`^?

#forms ⇒ `Array<Element::Form>`

#headers ⇒ `Hash`

#link ⇒ `Element::Link`

#link_template ⇒ `Element::LinkTemplate`

#link_templates ⇒ `Array<Element::LinkTemplate>`

#link_vars ⇒ `Hash`

#links ⇒ `Array<Element::Link>`

#page ⇒ `Page`

#paths ⇒ `Array<String>`

#text? ⇒ `Boolean`

#to_absolute(relative_url) ⇒ `String`