Class: Arachni::Parser
- Includes:
- UI::Output, Utilities
- Defined in:
- lib/arachni/parser.rb
Overview
Analyzes HTML code extracting inputs vectors and supporting information.
Defined Under Namespace
Modules: Extractors
Instance Attribute Summary collapse
- #response ⇒ HTTP::Response readonly
- #url ⇒ String
Instance Method Summary collapse
-
#base ⇒ String
Base ‘href`, if there is one.
- #body ⇒ Object
-
#body=(string) ⇒ String
Override the #response body for the parsing process.
-
#cookie_jar ⇒ Array<Element::Cookie>
Cookies with which to update the HTTP cookie-jar.
-
#cookies ⇒ Array<Element::Cookie>
Cookies from HTTP headers and response body.
-
#cookies_to_be_audited ⇒ Array<Element::Cookie>
Cookies to be audited.
-
#document ⇒ Nokogiri::HTML?
Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.
-
#forms ⇒ Array<Element::Form>
Forms from #document.
-
#headers ⇒ Hash
List of valid auditable HTTP header fields.
-
#initialize(response, options = Options) ⇒ Parser
constructor
A new instance of Parser.
-
#link ⇒ Element::Link
Link to the page.
-
#link_template ⇒ Element::LinkTemplate
LinkTemplate for the current page.
-
#link_templates ⇒ Array<Element::LinkTemplate>
Links matching OptionsGroups::Audit#link_templates in #document.
-
#link_vars ⇒ Hash
Parameters found in #url.
-
#links ⇒ Array<Element::Link>
Links in #document.
- #page ⇒ Page
-
#paths ⇒ Array<String>
Distinct links to follow.
-
#text? ⇒ Boolean
‘true` if the given HTTP response data are text based, `false` otherwise.
-
#to_absolute(relative_url) ⇒ String
Converts a relative URL to an absolute one.
Methods included from Utilities
#available_port, #caller_name, #caller_path, #cookie_decode, #cookie_encode, #cookies_from_document, #cookies_from_file, #cookies_from_response, #exception_jail, #exclude_path?, #follow_protocol?, #form_decode, #form_encode, #forms_from_document, #forms_from_response, #generate_token, #get_path, #hms_to_seconds, #html_decode, #html_encode, #include_path?, #links_from_document, #links_from_response, #normalize_url, #page_from_response, #page_from_url, #parse_set_cookie, #path_in_domain?, #path_too_deep?, #port_available?, #rand_port, #random_seed, #redundant_path?, #remove_constants, #request_parse_body, #seconds_to_hms, #skip_page?, #skip_path?, #skip_resource?, #skip_response?, #uri_decode, #uri_encode, #uri_parse, #uri_parse_query, #uri_parser, #uri_rewrite
Methods included from UI::Output
#debug?, #debug_off, #debug_on, #disable_only_positives, #included, #mute, #muted?, #only_positives, #only_positives?, #print_bad, #print_debug, #print_debug_backtrace, #print_debug_level_1, #print_debug_level_2, #print_debug_level_3, #print_error, #print_error_backtrace, #print_exception, #print_info, #print_line, #print_ok, #print_status, #print_verbose, #reroute_to_file, #reroute_to_file?, reset_output_options, #unmute, #verbose?, #verbose_on
Constructor Details
#initialize(response, options = Options) ⇒ Parser
Returns a new instance of Parser.
61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/arachni/parser.rb', line 61 def initialize( response, = Options ) @options = if response.is_a? Array @secondary_responses = response[1..-1] @secondary_responses.compact! if @secondary_responses response = response.shift end @response = response self.url = response.url end |
Instance Attribute Details
#response ⇒ HTTP::Response (readonly)
53 54 55 |
# File 'lib/arachni/parser.rb', line 53 def response @response end |
Instance Method Details
#base ⇒ String
Returns Base ‘href`, if there is one.
289 290 291 |
# File 'lib/arachni/parser.rb', line 289 def base @base ||= document.search( '//base[@href]' ).first['href'] rescue nil end |
#body ⇒ Object
115 116 117 |
# File 'lib/arachni/parser.rb', line 115 def body @body || @response.body end |
#body=(string) ⇒ String
Returns Override the #response body for the parsing process.
110 111 112 113 |
# File 'lib/arachni/parser.rb', line 110 def body=( string ) @links = @forms = @cookies = @document = nil @body = string end |
#cookie_jar ⇒ Array<Element::Cookie>
Returns Cookies with which to update the HTTP cookie-jar.
264 265 266 267 268 269 270 271 272 273 274 275 |
# File 'lib/arachni/parser.rb', line 264 def return @cookie_jar.freeze if @cookie_jar from_jar = [] # Make a list of the response cookie names. = Set.new( .map( &:name ) ) from_jar |= HTTP::Client..for_url( @url ). reject { || .include?( .name ) } @cookie_jar = ( | from_jar) end |
#cookies ⇒ Array<Element::Cookie>
Returns Cookies from HTTP headers and response body.
227 228 229 230 231 232 233 234 |
# File 'lib/arachni/parser.rb', line 227 def return @cookies.freeze if @cookies @cookies = Cookie.from_headers( @url, @response.headers ) return @cookies if !text? @cookies |= Cookie.from_document( @url, document ) end |
#cookies_to_be_audited ⇒ Array<Element::Cookie>
Returns Cookies to be audited.
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/arachni/parser.rb', line 238 def return @cookies_to_be_audited.freeze if @cookies_to_be_audited return [] if !text? # Make a list of the response cookie names. = Set.new( .map(&:name) ) # Grab all cookies from the cookiejar giving preferrence to the ones # specified by the current page, if there are any. from_http_jar = HTTP::Client...reject do |c| .include?( c.name ) end # These cookies are to be audited and thus are dirty and anarchistic, # so they have to contain even cookies completely irrelevant to the # current page. I.e. it contains all cookies that have been observed # since the beginning of the scan @cookies_to_be_audited = ( | from_http_jar).map do |c| dc = c.dup dc.action = @url dc end end |
#document ⇒ Nokogiri::HTML?
Returns a parsed HTML document from the body of the HTTP response or ‘nil` if the response data wasn’t text-based or the response couldn’t be parsed.
123 124 125 126 |
# File 'lib/arachni/parser.rb', line 123 def document return @document.freeze if @document @document = Nokogiri::HTML( body ) if text? rescue nil end |
#forms ⇒ Array<Element::Form>
Returns Forms from #document.
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/arachni/parser.rb', line 149 def forms return @forms.freeze if @forms return [] if !text? f = Form.from_document( @url, document ) return f if !@secondary_responses @secondary_responses.each do |response| next if response.body.to_s.empty? Form.from_document( @url, response.body ).each do |form2| f.each do |form| next if "#{form.coverage_id}:#{form.name_or_id}" != "#{form2.coverage_id}:#{form2.name_or_id}" form.inputs.each do |k, v| next if !(v != form2.inputs[k] && form.field_type_for( k ) == :hidden) form.nonce_name = k end end end end @forms = f end |
#headers ⇒ Hash
It’s more of a placeholder method, it doesn’t actually analyze anything. It’s a long shot that any of these will be vulnerable but better be safe than sorry.
Returns List of valid auditable HTTP header fields.
134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/arachni/parser.rb', line 134 def headers @headers ||= { 'Accept' => 'text/html,application/xhtml+xml,application' + '/xml;q=0.9,*/*;q=0.8', 'Accept-Charset' => 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', 'From' => @options. || '', 'User-Agent' => @options.http.user_agent || '', 'Referer' => @url, 'Pragma' => 'no-cache' }.map { |k, v| Header.new( url: @url, inputs: { k => v } ) }.freeze end |
#link ⇒ Element::Link
Returns Link to the page.
179 180 181 182 |
# File 'lib/arachni/parser.rb', line 179 def link return if link_vars.empty? && !@response.redirection? Link.new( url: @url, inputs: link_vars ) end |
#link_template ⇒ Element::LinkTemplate
Returns LinkTemplate for the current page.
186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/arachni/parser.rb', line 186 def link_template template, inputs = LinkTemplate.extract_inputs( @url ) return if !template LinkTemplate.new( url: @url.freeze, action: @url.freeze, inputs: inputs, template: template ) end |
#link_templates ⇒ Array<Element::LinkTemplate>
Returns Links matching OptionsGroups::Audit#link_templates in #document.
209 210 211 212 213 214 215 |
# File 'lib/arachni/parser.rb', line 209 def link_templates return @link_templates.freeze if @link_templates return @link_templates = [link_template].compact if !text? @link_templates = [link_template].compact | LinkTemplate.from_document( @url, document ) end |
#link_vars ⇒ Hash
Returns Parameters found in #url.
219 220 221 222 223 |
# File 'lib/arachni/parser.rb', line 219 def link_vars return {} if (!parsed = uri_parse( @url )) @link_vars ||= parsed.rewrite.query_parameters.freeze end |
#links ⇒ Array<Element::Link>
Returns Links in #document.
200 201 202 203 204 205 |
# File 'lib/arachni/parser.rb', line 200 def links return @links.freeze if @links return @links = [link].compact if !text? @links = [link].compact | Link.from_document( @url, document ) end |
#page ⇒ Page
98 99 100 |
# File 'lib/arachni/parser.rb', line 98 def page @page ||= Page.new( parser: self ) end |
#paths ⇒ Array<String>
Returns Distinct links to follow.
279 280 281 282 283 284 285 |
# File 'lib/arachni/parser.rb', line 279 def paths return @paths if @paths @paths = [] return @paths.freeze if !document @paths = run_extractors.freeze end |
#text? ⇒ Boolean
Returns ‘true` if the given HTTP response data are text based, `false` otherwise.
104 105 106 |
# File 'lib/arachni/parser.rb', line 104 def text? !@body.to_s.empty? || @response.text? end |
#to_absolute(relative_url) ⇒ String
Converts a relative URL to an absolute one.
87 88 89 90 91 92 93 94 95 |
# File 'lib/arachni/parser.rb', line 87 def to_absolute( relative_url ) if (url = base) base_url = url else base_url = @url end super( relative_url, base_url ) end |