Class: Arachni::Parser

Inherits:
Object show all
Includes:
UI::Output, Utilities
Defined in:
lib/arachni/parser.rb

Overview

Analyzes HTML code extracting inputs vectors and supporting information.

Author:

Defined Under Namespace

Modules: Extractors

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utilities

#available_port, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite

Methods included from UI::Output

#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on

Constructor Details

#initialize(response, options = Options) ⇒ Parser

Returns a new instance of Parser.

Parameters:

  • response (HTTP::Response, Array<HTTP::Response>)

    Response(s) to analyze and parse. By providing multiple responses the parser will be able to perform some preliminary differential analysis and identify nonce tokens in inputs.

  • options (Options) (defaults to: Options)


61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/arachni/parser.rb', line 61

def initialize( response, options = Options )
    @options = options

    if response.is_a? Array
        @secondary_responses = response[1..-1]
        @secondary_responses.compact! if @secondary_responses
        response = response.shift
    end

    @response = response
    self.url  = response.url
end

Instance Attribute Details

#responseHTTP::Response (readonly)

Returns:



53
54
55
# File 'lib/arachni/parser.rb', line 53

def response
  @response
end

#urlString

Returns:



50
51
52
# File 'lib/arachni/parser.rb', line 50

def url
  @url
end

Instance Method Details

#baseString

Returns Base ‘href`, if there is one.

Returns:

  • (String)

    Base ‘href`, if there is one.



289
290
291
# File 'lib/arachni/parser.rb', line 289

def base
    @base ||= document.search( '//base[@href]' ).first['href'] rescue nil
end

#bodyObject



115
116
117
# File 'lib/arachni/parser.rb', line 115

def body
    @body || @response.body
end

#body=(string) ⇒ String

Returns Override the #response body for the parsing process.

Returns:



110
111
112
113
# File 'lib/arachni/parser.rb', line 110

def body=( string )
    @links = @forms = @cookies = @document = nil
    @body = string
end

Returns Cookies with which to update the HTTP cookie-jar.

Returns:



264
265
266
267
268
269
270
271
272
273
274
275
# File 'lib/arachni/parser.rb', line 264

def cookie_jar
    return @cookie_jar.freeze if @cookie_jar
    from_jar = []

    # Make a list of the response cookie names.
    cookie_names = Set.new( cookies.map( &:name ) )

    from_jar |= HTTP::Client.cookie_jar.for_url( @url ).
        reject { |cookie| cookie_names.include?( cookie.name ) }

    @cookie_jar = (cookies | from_jar)
end

#cookiesArray<Element::Cookie>

Returns Cookies from HTTP headers and response body.

Returns:



227
228
229
230
231
232
233
234
# File 'lib/arachni/parser.rb', line 227

def cookies
    return @cookies.freeze if @cookies

    @cookies = Cookie.from_headers( @url, @response.headers )
    return @cookies if !text?

    @cookies |= Cookie.from_document( @url, document )
end

#cookies_to_be_auditedArray<Element::Cookie>

Returns Cookies to be audited.

Returns:



238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/arachni/parser.rb', line 238

def cookies_to_be_audited
    return @cookies_to_be_audited.freeze if @cookies_to_be_audited
    return [] if !text?

    # Make a list of the response cookie names.
    cookie_names = Set.new( cookies.map(&:name) )

    # Grab all cookies from the cookiejar giving preferrence to the ones
    # specified by the current page, if there are any.
    from_http_jar = HTTP::Client.cookie_jar.cookies.reject do |c|
        cookie_names.include?( c.name )
    end

    # These cookies are to be audited and thus are dirty and anarchistic,
    # so they have to contain even cookies completely irrelevant to the
    # current page. I.e. it contains all cookies that have been observed
    # since the beginning of the scan
    @cookies_to_be_audited = (cookies | from_http_jar).map do |c|
        dc = c.dup
        dc.action = @url
        dc
    end
end

#documentNokogiri::HTML?

Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.

Returns:

  • (Nokogiri::HTML, nil)

    Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.



123
124
125
126
# File 'lib/arachni/parser.rb', line 123

def document
    return @document.freeze if @document
    @document = Nokogiri::HTML( body ) if text? rescue nil
end

#formsArray<Element::Form>

Returns Forms from #document.

Returns:



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/arachni/parser.rb', line 149

def forms
    return @forms.freeze if @forms
    return [] if !text?

    f = Form.from_document( @url, document )
    return f if !@secondary_responses

    @secondary_responses.each do |response|
        next if response.body.to_s.empty?

        Form.from_document( @url, response.body ).each do |form2|
            f.each do |form|
                next if "#{form.coverage_id}:#{form.name_or_id}" !=
                    "#{form2.coverage_id}:#{form2.name_or_id}"

                form.inputs.each do |k, v|
                    next if !(v != form2.inputs[k] &&
                        form.field_type_for( k ) == :hidden)

                    form.nonce_name = k
                end
            end
        end
    end

    @forms = f
end

#headersHash

Note:

It’s more of a placeholder method, it doesn’t actually analyze anything. It’s a long shot that any of these will be vulnerable but better be safe than sorry.

Returns List of valid auditable HTTP header fields.

Returns:

  • (Hash)

    List of valid auditable HTTP header fields.



134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/arachni/parser.rb', line 134

def headers
    @headers ||= {
        'Accept'          => 'text/html,application/xhtml+xml,application' +
            '/xml;q=0.9,*/*;q=0.8',
        'Accept-Charset'  => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
        'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3',
        'From'            => @options.authorized_by  || '',
        'User-Agent'      => @options.http.user_agent || '',
        'Referer'         => @url,
        'Pragma'          => 'no-cache'
    }.map { |k, v| Header.new( url: @url, inputs: { k => v } ) }.freeze
end

Returns Link to the page.

Returns:



179
180
181
182
# File 'lib/arachni/parser.rb', line 179

def link
    return if link_vars.empty? && !@response.redirection?
    Link.new( url: @url, inputs: link_vars )
end

Returns LinkTemplate for the current page.

Returns:



186
187
188
189
190
191
192
193
194
195
196
# File 'lib/arachni/parser.rb', line 186

def link_template
    template, inputs = LinkTemplate.extract_inputs( @url )
    return if !template

    LinkTemplate.new(
        url:      @url.freeze,
        action:   @url.freeze,
        inputs:   inputs,
        template: template
    )
end

Returns Links matching OptionsGroups::Audit#link_templates in #document.

Returns:



209
210
211
212
213
214
215
# File 'lib/arachni/parser.rb', line 209

def link_templates
    return @link_templates.freeze if @link_templates
    return @link_templates = [link_template].compact if !text?

    @link_templates =
        [link_template].compact | LinkTemplate.from_document( @url, document )
end

Returns Parameters found in #url.

Returns:



219
220
221
222
223
# File 'lib/arachni/parser.rb', line 219

def link_vars
    return {} if (!parsed = uri_parse( @url ))

    @link_vars ||= parsed.rewrite.query_parameters.freeze
end

Returns Links in #document.

Returns:



200
201
202
203
204
205
# File 'lib/arachni/parser.rb', line 200

def links
    return @links.freeze if @links
    return @links = [link].compact if !text?

    @links = [link].compact | Link.from_document( @url, document )
end

#pagePage

Returns:



98
99
100
# File 'lib/arachni/parser.rb', line 98

def page
    @page ||= Page.new( parser: self )
end

#pathsArray<String>

Returns Distinct links to follow.

Returns:



279
280
281
282
283
284
285
# File 'lib/arachni/parser.rb', line 279

def paths
  return @paths if @paths
  @paths = []
  return @paths.freeze if !document

  @paths = run_extractors.freeze
end

#text?Boolean

Returns ‘true` if the given HTTP response data are text based, `false` otherwise.

Returns:

  • (Boolean)

    ‘true` if the given HTTP response data are text based, `false` otherwise.



104
105
106
# File 'lib/arachni/parser.rb', line 104

def text?
    !@body.to_s.empty? || @response.text?
end

#to_absolute(relative_url) ⇒ String

Converts a relative URL to an absolute one.

Parameters:

  • relative_url (String)

    URL to convert to absolute.

Returns:



87
88
89
90
91
92
93
94
95
# File 'lib/arachni/parser.rb', line 87

def to_absolute( relative_url )
    if (url = base)
        base_url = url
    else
        base_url = @url
    end

    super( relative_url, base_url )
end