Class: WWMD::Page
- Inherits:
-
Object
- Object
- WWMD::Page
- Includes:
- WWMDUtils
- Defined in:
- lib/wwmd/page/page.rb,
lib/wwmd/page/auth.rb,
lib/wwmd/page/headers.rb,
lib/wwmd/page/helpers.rb,
lib/wwmd/page/irb_helpers.rb,
lib/wwmd/page/html2text_hpricot.rb,
lib/wwmd/page/reporting_helpers.rb,
lib/wwmd/page/html2text_nokogiri.rb,
lib/wwmd/page/parsing_convenience.rb
Overview
NamedCharRegex = Regexp.new(“(&(”+Hpricot::NamedCharacters.keys.join(“|”)+“);)”)
Instance Attribute Summary collapse
-
#base_url ⇒ Object
needed to properly munge relative urls into fq urls.
-
#body_data ⇒ Object
Returns the value of attribute body_data.
-
#comments ⇒ Object
readonly
Returns the value of attribute comments.
-
#curl_object ⇒ Object
Returns the value of attribute curl_object.
-
#forms ⇒ Object
readonly
Returns the value of attribute forms.
-
#header_data ⇒ Object
Returns the value of attribute header_data.
-
#inputs ⇒ Object
Returns the value of attribute inputs.
-
#jlinks ⇒ Object
readonly
array of included javascript files.
-
#last_error ⇒ Object
readonly
Returns the value of attribute last_error.
-
#links ⇒ Object
readonly
array of links (urls).
-
#logged_in ⇒ Object
are we logged in?.
-
#opts ⇒ Object
Returns the value of attribute opts.
-
#post_data ⇒ Object
Returns the value of attribute post_data.
-
#scrape ⇒ Object
readonly
scrape object.
-
#spider ⇒ Object
readonly
spider object.
-
#urlparse ⇒ Object
readonly
urlparse object.
-
#use_referer ⇒ Object
Returns the value of attribute use_referer.
Instance Method Summary collapse
-
#_body_cb(data) ⇒ Object
callback for
self.on_body. -
#_header_cb(data) ⇒ Object
callback for
self.on_header. -
#action(id = nil) ⇒ Object
return the complete url to the form action on this page.
-
#all_forms ⇒ Object
IRB: display a human readable report of all forms contained in page.body_data.
-
#all_tags ⇒ Object
:nodoc:.
-
#auth? ⇒ Boolean
does this request have an authenticate header?.
-
#clear_data ⇒ Object
clear self.body_data and self.header_data.
-
#clear_header(key) ⇒ Object
(also: #delete_header)
clear header at <key>.
-
#clear_headers ⇒ Object
clear all headers.
-
#code ⇒ Object
the last http response code.
-
#current_url ⇒ Object
(also: #current, #cur, #now)
alias_method for last_effective_url.
-
#default_headers(arg = nil) ⇒ Object
(also: #set_default)
set headers back to default headers.
-
#dump_body ⇒ Object
(also: #dump)
display self.body_data.
-
#dump_scripts ⇒ Object
(also: #scripts)
return an array of inner_html for each <script> tag encountered.
- #element_to_text(n) ⇒ Object
-
#fingerprint ⇒ Object
(also: #fp)
return MD5 for DOM fingerprint take all tag names in page.to_s.md5.
-
#from_paste ⇒ Object
copy and paste from burp request windows page object gets set with headers and url (not correct) returns [headers,form] form = page.from_paste.
- #furl(url) ⇒ Object
-
#get(url = nil, parse = true) ⇒ Object
override for Curl::Easy.perform.
-
#get_cookie ⇒ Object
get the current Cookie header.
-
#get_form(id = nil) ⇒ Object
return this page’s form (at index id) as a FormArray.
-
#get_link(index) ⇒ Object
(also: #link, #l)
return link at index from @links array.
-
#grep(reg) ⇒ Object
grep for regexp and remove leading whitespace.
- #has_comments? ⇒ Boolean
- #has_form? ⇒ Boolean
- #has_jlinks? ⇒ Boolean
- #has_links? ⇒ Boolean
-
#head(i = 1) ⇒ Object
:section: IRB helper methods.
-
#headers_from_array(arr) ⇒ Object
set headers from text.
-
#headers_from_file(fn) ⇒ Object
set headers from file.
-
#headers_from_paste ⇒ Object
set headers from paste.
-
#hexdump ⇒ Object
hexdump self.body_data.
-
#html2text ⇒ Object
def lookup_named_char(s) c = Hpricot::NamedCharacters[s] c.chr if c end.
-
#initialize(opts = {}, &block) ⇒ Page
constructor
A new instance of Page.
-
#logged_in? ⇒ Boolean
return value of @logged_in.
-
#md5 ⇒ Object
return md5sum for self.body_data.
-
#method_missing(methodname, *args) ⇒ Object
send methods not defined here to
@curl_object. - #node_to_text(n) ⇒ Object
- #onclicks ⇒ Object
-
#open ⇒ Object
this only works on a mac so get a mac.
-
#page_status ⇒ Object
(also: #status)
return text representation of page code.
-
#perform ⇒ Object
override Curl::Easy.perform to perform page actions, call
self.set_data. -
#post(url = nil) ⇒ Object
GET with params and POST it as a form.
-
#raw ⇒ Object
alias_method for body_data.
-
#read(filename) ⇒ Object
read self.body_data from file.
-
#report(short = nil) ⇒ Object
(also: #show)
IRB: text report what has been parsed from this page.
-
#report_flags ⇒ Object
return a string of flags: Ll links Jj javascript includes Ff forms Cc comments.
-
#request_headers ⇒ Object
(also: #show_headers, #req_headers)
IRB: display current headers.
- #resp_paste ⇒ Object
-
#response_headers ⇒ Object
(also: #resp_headers)
IRB: display response headers.
-
#search(xpath) ⇒ Object
return an array of Element objects for an xpath search.
-
#set_ajax_headers ⇒ Object
set headers to ajax.
-
#set_cookie(cookie = nil) ⇒ Object
set the Cookie header.
-
#set_cookies? ⇒ Boolean
does this response have SET-COOKIE headers?.
-
#set_data ⇒ Object
set reporting data for the page.
-
#set_headers(arg = nil, clear = false) ⇒ Object
set headers from passed argument Nil: set headers from WWMD::DEFAULT_HEADERS Symbol: entry in WWMD::HEADERS to set from Hash: hash to set headers from String: filename (NOT IMPLEMENTED).
-
#set_link(index) ⇒ Object
set link using an integer link from self.report – NOTE: I always use page.get(page.l(1)) anyway.
-
#set_soap_headers ⇒ Object
set headers to SOAP request headers.
-
#set_utf7_headers ⇒ Object
set headers to utf7 encoding post.
-
#setbase(url = nil) ⇒ Object
set self.opts.
-
#size ⇒ Object
return page size in bytes.
-
#submit(iform = nil, reg = ) ⇒ Object
replacement for Curl::Easy.http_post.
-
#submit_string(post_string) ⇒ Object
submit a form using POST string.
-
#summary ⇒ Object
IRB: display summary of what has been parsed from this page.
- #text ⇒ Object
- #time ⇒ Object
-
#to_text ⇒ Object
IRB: puts the page filtered through html2text.
-
#verb(verb, url = nil) ⇒ Object
send arbitrary verb (only works with patch to taf2-curb).
-
#write(filename) ⇒ Object
write self.body_data to file.
Methods included from WWMDUtils
header_array_from_file, rannum, ranstr
Constructor Details
#initialize(opts = {}, &block) ⇒ Page
Returns a new instance of Page.
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# File 'lib/wwmd/page/page.rb', line 28 def initialize(opts={}, &block) @opts = opts.clone DEFAULTS.each { |k,v| @opts[k] = v unless opts[k] } @spider = Spider.new(opts) @scrape = Scrape.new @base_url ||= opts[:base_url] @scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil? # yeah yeah... bool false @urlparse = URLParse.new() @inputs = Inputs.new(self) @logged_in = false @body_data = "" @post_data = "" @comments = [] @header_data = FormArray.new @curl_object = Curl::Easy.new @opts.each do |k,v| next if k == :proxy_url self.instance_variable_set("@#{k.to_s}".intern,v) if (@curl_object.methods.include?("#{k}=")) @curl_object.send("#{k}=",v) end end @curl_object.on_body { |data| self._body_cb(data) } @curl_object.on_header { |data| self._header_cb(data) } # cookies? @curl_object. = @opts[:enable_cookies] if @curl_object. @curl_object. = @opts[:cookiejar] || "./__cookiejar" end #proxy? @curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy] instance_eval(&block) if block_given? if opts.empty? && @scrape.warn putw "Page initialized without opts" @scrape.warn = false end end |
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(methodname, *args) ⇒ Object
send methods not defined here to @curl_object
232 233 234 235 236 237 238 |
# File 'lib/wwmd/page/page.rb', line 232 def method_missing(methodname, *args) if self.respond_to?(methodname) self.send(methodname, *args) else @curl_object.send(methodname, *args) end end |
Instance Attribute Details
#base_url ⇒ Object
needed to properly munge relative urls into fq urls
20 21 22 |
# File 'lib/wwmd/page/page.rb', line 20 def base_url @base_url end |
#body_data ⇒ Object
Returns the value of attribute body_data.
7 8 9 |
# File 'lib/wwmd/page/page.rb', line 7 def body_data @body_data end |
#comments ⇒ Object (readonly)
Returns the value of attribute comments.
18 19 20 |
# File 'lib/wwmd/page/page.rb', line 18 def comments @comments end |
#curl_object ⇒ Object
Returns the value of attribute curl_object.
6 7 8 |
# File 'lib/wwmd/page/page.rb', line 6 def curl_object @curl_object end |
#forms ⇒ Object (readonly)
Returns the value of attribute forms.
11 12 13 |
# File 'lib/wwmd/page/page.rb', line 11 def forms @forms end |
#header_data ⇒ Object
Returns the value of attribute header_data.
9 10 11 |
# File 'lib/wwmd/page/page.rb', line 9 def header_data @header_data end |
#inputs ⇒ Object
Returns the value of attribute inputs.
24 25 26 |
# File 'lib/wwmd/page/page.rb', line 24 def inputs @inputs end |
#jlinks ⇒ Object (readonly)
array of included javascript files
14 15 16 |
# File 'lib/wwmd/page/page.rb', line 14 def jlinks @jlinks end |
#last_error ⇒ Object (readonly)
Returns the value of attribute last_error.
12 13 14 |
# File 'lib/wwmd/page/page.rb', line 12 def last_error @last_error end |
#links ⇒ Object (readonly)
array of links (urls)
13 14 15 |
# File 'lib/wwmd/page/page.rb', line 13 def links @links end |
#logged_in ⇒ Object
are we logged in?
21 22 23 |
# File 'lib/wwmd/page/page.rb', line 21 def logged_in @logged_in end |
#opts ⇒ Object
Returns the value of attribute opts.
23 24 25 |
# File 'lib/wwmd/page/page.rb', line 23 def opts @opts end |
#post_data ⇒ Object
Returns the value of attribute post_data.
8 9 10 |
# File 'lib/wwmd/page/page.rb', line 8 def post_data @post_data end |
#scrape ⇒ Object (readonly)
scrape object
16 17 18 |
# File 'lib/wwmd/page/page.rb', line 16 def scrape @scrape end |
#spider ⇒ Object (readonly)
spider object
15 16 17 |
# File 'lib/wwmd/page/page.rb', line 15 def spider @spider end |
#urlparse ⇒ Object (readonly)
urlparse object
17 18 19 |
# File 'lib/wwmd/page/page.rb', line 17 def urlparse @urlparse end |
#use_referer ⇒ Object
Returns the value of attribute use_referer.
10 11 12 |
# File 'lib/wwmd/page/page.rb', line 10 def use_referer @use_referer end |
Instance Method Details
#_body_cb(data) ⇒ Object
callback for self.on_body
219 220 221 222 |
# File 'lib/wwmd/page/page.rb', line 219 def _body_cb(data) @body_data << data if data return data.length.to_i end |
#_header_cb(data) ⇒ Object
callback for self.on_header
225 226 227 228 229 |
# File 'lib/wwmd/page/page.rb', line 225 def _header_cb(data) myArr = Array.new(data.split(":",2)) @header_data.extend! myArr[0].to_s.strip,myArr[1].to_s.strip return data.length.to_i end |
#action(id = nil) ⇒ Object
return the complete url to the form action on this page
27 28 29 30 31 32 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 27 def action(id=nil) id = 0 if not id act = self.forms[id].action return self.last_effective_url if (act.nil? || act.empty?) return @urlparse.parse(self.last_effective_url,act).to_s end |
#all_forms ⇒ Object
IRB: display a human readable report of all forms contained in page.body_data
68 69 70 71 |
# File 'lib/wwmd/page/irb_helpers.rb', line 68 def all_forms self.forms.each_index { |x| puts "[#{x.to_s}]-------"; self.forms[x].report } nil end |
#all_tags ⇒ Object
:nodoc:
62 63 64 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 62 def #:nodoc: return self.search("*").map { |x| x.name } end |
#auth? ⇒ Boolean
does this request have an authenticate header?
5 6 7 8 9 10 11 12 13 14 |
# File 'lib/wwmd/page/auth.rb', line 5 def auth? return false if self.code != 401 count = 0 self.header_data.each do |i| if i[0] =~ /www-authenticate/i count += 1 end end return (count > 0) end |
#clear_data ⇒ Object
clear self.body_data and self.header_data
106 107 108 109 110 111 112 |
# File 'lib/wwmd/page/page.rb', line 106 def clear_data return false if self.opts[:parse] = false @body_data = "" @header_data.clear @post_data = "" @last_error = nil end |
#clear_header(key) ⇒ Object Also known as: delete_header
clear header at <key>
7 8 9 10 |
# File 'lib/wwmd/page/headers.rb', line 7 def clear_header(key) self.headers.delete_if { |k,v| k.upcase == key.upcase } return nil end |
#clear_headers ⇒ Object
clear all headers
15 16 17 18 |
# File 'lib/wwmd/page/headers.rb', line 15 def clear_headers self.headers.delete_if { |k,v| true } "headers cleared" end |
#code ⇒ Object
the last http response code
82 83 84 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 82 def code self.response_code # .to_s end |
#current_url ⇒ Object Also known as: current, cur, now
alias_method for last_effective_url
73 74 75 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 73 def current_url self.last_effective_url end |
#default_headers(arg = nil) ⇒ Object Also known as: set_default
set headers back to default headers
52 53 54 |
# File 'lib/wwmd/page/headers.rb', line 52 def default_headers(arg=nil) set_headers end |
#dump_body ⇒ Object Also known as: dump
display self.body_data
57 58 59 |
# File 'lib/wwmd/page/irb_helpers.rb', line 57 def dump_body puts self.body_data end |
#dump_scripts ⇒ Object Also known as: scripts
return an array of inner_html for each <script> tag encountered
40 41 42 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 40 def dump_scripts self.("//script").map { |s| s.inner_html if s.inner_html.strip != '' } end |
#element_to_text(n) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/wwmd/page/html2text_hpricot.rb', line 16 def element_to_text(n) tag = n.etag || n.stag name = tag.name.downcase s = "" is_block = BlockTags.include?(name) is_list = ListTags.include?(name) is_item = ItemTags.include?(name) is_inline = InlineTags.include?(name) if is_block or is_list or is_item or is_inline n.each_child do |c| s += node_to_text(c) end if is_block or is_list s += "\n" elsif is_item s = "* " + s + "\n" end end s end |
#fingerprint ⇒ Object Also known as: fp
return MD5 for DOM fingerprint take all tag names in page.to_s.md5
67 68 69 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 67 def fingerprint self..to_s.md5 end |
#from_paste ⇒ Object
copy and paste from burp request windows page object gets set with headers and url (not correct) returns [headers,form]
form = page.from_paste
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/wwmd/page/helpers.rb', line 7 def from_paste self. = false req = %x[pbpaste] return false if not req h,b = req.chomp.split("\r\n\r\n",2) oh = h h = h.split("\r\n") m,u,p = h.shift.split(" ") return nil unless m =~ (/^(POST|GET)/) self.url = self.base_url + u self.headers_from_array(h) self.body_data = b self.set_data form = b.to_form form.action = @urlparse.parse(self.base_url, u).to_s [oh,form] end |
#furl(url) ⇒ Object
66 67 68 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 66 def furl(url) self.url = @urlparse.parse(self.base_url,url).to_s end |
#get(url = nil, parse = true) ⇒ Object
override for Curl::Easy.perform
if the passed url string doesn’t contain an fully qualified path, we’ll guess and prepend opts
returns: array [ code, body_data.size ]
185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/wwmd/page/page.rb', line 185 def get(url=nil,parse=true) self.clear_data self.headers["Referer"] = self.cur if self.use_referer if !(url =~ /[a-z]+:\/\//) && parse self.url = @urlparse.parse(self.opts[:base_url],url).to_s if url elsif url self.url = url end self.http_get putw "WARN: authentication headers in response" if self.auth? self.set_data end |
#get_cookie ⇒ Object
get the current Cookie header
101 102 103 |
# File 'lib/wwmd/page/headers.rb', line 101 def self.headers["Cookie"] end |
#get_form(id = nil) ⇒ Object
return this page’s form (at index id) as a FormArray
13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 13 def get_form(id=nil) id = 0 if not id return nil if forms.empty? || !forms[id] f = @forms[id] url_action = @urlparse.parse(self.cur,f.action).to_s type = f.type FormArray.new do |x| x.set_fields(f.fields) x.action = url_action x.type = type end end |
#get_link(index) ⇒ Object Also known as: link, l
return link at index from @links array
55 56 57 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 55 def get_link(index) @links[index] end |
#grep(reg) ⇒ Object
grep for regexp and remove leading whitespace
8 9 10 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 8 def grep(reg) self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") } end |
#has_comments? ⇒ Boolean
38 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 38 def has_comments?; return !@comments.empty?; end |
#has_form? ⇒ Boolean
37 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 37 def has_form?; return !(@forms.size < 1); end |
#has_jlinks? ⇒ Boolean
36 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 36 def has_jlinks?; return !@jlinks.empty?; end |
#has_links? ⇒ Boolean
35 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 35 def has_links?; return !@links.empty?; end |
#head(i = 1) ⇒ Object
:section: IRB helper methods
9 10 11 12 13 14 15 |
# File 'lib/wwmd/page/irb_helpers.rb', line 9 def head(i=1) if i.kind_of?(Range) puts self.body_data.split("\n")[i].join("\n") return nil end puts self.body_data.head(i) end |
#headers_from_array(arr) ⇒ Object
set headers from text
59 60 61 62 63 64 65 66 67 |
# File 'lib/wwmd/page/headers.rb', line 59 def headers_from_array(arr) clear_headers arr.each do |line| next if (line.empty? || line =~ /^(GET|POST)/) k,v = line.split(":",2) self.headers[k.strip] = v.strip end nil end |
#headers_from_file(fn) ⇒ Object
set headers from file
75 76 77 78 |
# File 'lib/wwmd/page/headers.rb', line 75 def headers_from_file(fn) headers_from_array(File.read(fn).split("\n")) return "headers set from #{fn}" end |
#headers_from_paste ⇒ Object
set headers from paste
70 71 72 |
# File 'lib/wwmd/page/headers.rb', line 70 def headers_from_paste headers_from_array(%x[pbpaste]) end |
#hexdump ⇒ Object
hexdump self.body_data
79 80 81 |
# File 'lib/wwmd/page/irb_helpers.rb', line 79 def hexdump puts self.body_data.hexdump end |
#html2text ⇒ Object
def lookup_named_char(s)
c = Hpricot::NamedCharacters[s[1...-1]]
c.chr if c
end
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/wwmd/page/html2text_hpricot.rb', line 58 def html2text doc = self.scrape.hdoc text = node_to_text(doc) # text.gsub!(NamedCharRegex){|s| "#{lookup_named_char(s)}"} # clean up white space text.gsub!("\r"," ") text.squeeze!(" ") text.strip! ret = '' text.split(/\n/).each do |l| l.strip! next if l == '' next if l =~ /^\?+$/ ret += "#{l}\n" end return ret end |
#logged_in? ⇒ Boolean
return value of @logged_in
18 19 20 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 18 def logged_in? return @logged_in end |
#md5 ⇒ Object
return md5sum for self.body_data
46 47 48 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 46 def md5 return self.body_data.md5 end |
#node_to_text(n) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/wwmd/page/html2text_hpricot.rb', line 37 def node_to_text(n) return "" if n.comment? return element_to_text(n) if n.elem? return n.inner_text if n.text? s = "" begin n.each_child do |c| s += node_to_text(c) end rescue => e putw "WARN: #{e.inspect}" end return s end |
#onclicks ⇒ Object
73 74 75 76 |
# File 'lib/wwmd/page/irb_helpers.rb', line 73 def onclicks self.search("//*[@onclick]").each { |x| puts x[:onclick] } nil end |
#open ⇒ Object
this only works on a mac so get a mac
84 85 86 87 88 |
# File 'lib/wwmd/page/irb_helpers.rb', line 84 def open #:nodoc: fn = "wwmdtmp_#{Guid.new}.html" self.write(fn) %x[open #{fn}] end |
#page_status ⇒ Object Also known as: status
return text representation of page code
override with specific statuses in helper depending on page text etc to include statuses outside 200 = OK and other = ERR
10 11 12 13 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 10 def page_status return "ERR" if self.response_code != 200 return "OK" end |
#perform ⇒ Object
override Curl::Easy.perform to perform page actions,
call <tt>self.set_data</tt>
returns: array [ code, page_status, body_data.size ]
don’t call this directly if we are in console mode use get and submit respectively for GET and POST
121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/wwmd/page/page.rb', line 121 def perform self.clear_data self.headers["Referer"] = self.cur if self.use_referer begin @curl_object.perform rescue => e @last_error = e putw "WARN: #{e.class}" if e.class =~ /Curl::Err/ end self.set_data end |
#post(url = nil) ⇒ Object
GET with params and POST it as a form
199 200 201 202 203 204 |
# File 'lib/wwmd/page/page.rb', line 199 def post(url=nil) ep = url.clip self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep form = url.clop.to_form self.submit(form) end |
#raw ⇒ Object
alias_method for body_data
90 91 92 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 90 def raw self.body_data end |
#read(filename) ⇒ Object
read self.body_data from file
84 85 86 87 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 84 def read(filename) self.body_data = File.read(filename) self.set_data end |
#report(short = nil) ⇒ Object Also known as: show
IRB: text report what has been parsed from this page
18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/wwmd/page/irb_helpers.rb', line 18 def report(short=nil) puts "-------------------------------------------------" self.summary puts "---- links found [#{self.has_links?.to_s} | #{self.links.size}]" self.links.each_index { |i| puts "#{i.to_s} :: #{@links[i]}" } if short.nil? puts "---- javascript found [#{self.has_jlinks?.to_s} | #{self.jlinks.size}]" self.jlinks.each { |url| puts url } if short.nil? puts "---- forms found [#{self.has_form?.to_s} | #{self.forms.size}]" puts "---- comments found [#{self.has_comments?.to_s}]" return nil end |
#report_flags ⇒ Object
return a string of flags: Ll links Jj javascript includes Ff forms Cc comments
27 28 29 30 31 32 33 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 27 def report_flags self.has_links? ? ret = "L" : ret = "l" self.has_jlinks? ? ret += "J" : ret += "j" self.has_form? ? ret += "F" : ret += "f" self.has_comments? ? ret += "C" : ret += "c" return ret end |
#request_headers ⇒ Object Also known as: show_headers, req_headers
IRB: display current headers
40 41 42 43 |
# File 'lib/wwmd/page/irb_helpers.rb', line 40 def request_headers self.headers.each_pair { |k,v| puts "#{k}: #{v}" } return nil end |
#resp_paste ⇒ Object
25 26 27 28 |
# File 'lib/wwmd/page/helpers.rb', line 25 def resp_paste self.body_data = %x[pbpaste].split("\r\n\r\n",2)[1] self.set_data end |
#response_headers ⇒ Object Also known as: resp_headers
IRB: display response headers
49 50 51 52 |
# File 'lib/wwmd/page/irb_helpers.rb', line 49 def response_headers self.header_data.each { |x| puts "#{x[0]} :: #{x[1]}" } return nil end |
#search(xpath) ⇒ Object
return an array of Element objects for an xpath search
35 36 37 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 35 def search(xpath) self.scrape.hdoc.search(xpath) end |
#set_ajax_headers ⇒ Object
set headers to ajax
87 88 89 90 91 |
# File 'lib/wwmd/page/headers.rb', line 87 def set_ajax_headers self.headers["X-Requested-With"] = "XMLHttpRequest" self.headers["X-Prototype-Version"] = "1.5.0" return "headers set to ajax" end |
#set_cookie(cookie = nil) ⇒ Object
set the Cookie header
106 107 108 |
# File 'lib/wwmd/page/headers.rb', line 106 def (=nil) self.headers["Cookie"] = end |
#set_cookies? ⇒ Boolean
does this response have SET-COOKIE headers?
51 52 53 54 55 56 57 58 59 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 51 def ret = [] self.header_data.each do |x| if x[0].upcase == "SET-COOKIE" ret << x[1] end end return ret end |
#set_data ⇒ Object
set reporting data for the page
Scan for comments, anchors, links and javascript includes and set page flags. The heavy lifting for parsing is done in the scrape class.
returns: array [ code, page_status, body_data.size ]
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/wwmd/page/page.rb', line 78 def set_data # reset scrape and inputs object # transparently gunzip begin io = StringIO.new(self.body_data) gz = Zlib::GzipReader.new(io) self.body_data.replace(gz.read) rescue => e end @scrape.reset(self.body_data) @inputs.set # remove comments that are css selectors for IE silliness @comments = @scrape.for_comments.reject do |c| c =~ /\[if IE\]/ || c =~ /\[if IE \d/ || c =~ /\[if lt IE \d/ end @links = @scrape.for_links.map do |url| @urlparse.parse(self.last_effective_url,url).to_s end @jlinks = @scrape.for_javascript_links @forms = @scrape.for_forms @spider.add(self.last_effective_url,@links) return [self.code,self.body_data.size] end |
#set_headers(arg = nil, clear = false) ⇒ Object
set headers from passed argument
Nil: set headers from WWMD::DEFAULT_HEADERS
Symbol: entry in WWMD::HEADERS to set from
Hash: hash to set headers from
String: filename (NOT IMPLEMENTED)
if clear == true then headers will be cleared before setting
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/wwmd/page/headers.rb', line 27 def set_headers(arg=nil,clear=false) clear_headers if clear if arg.nil? begin clear_headers WWMD::DEFAULT_HEADERS.each { |k,v| self.headers[k] = v } return "headers set from default" rescue => e putw "WARN: " + e return false end elsif arg.class == Symbol set_headers(WWMD::HEADERS[arg]) putw "headers set from #{arg}" return true elsif arg.class == Hash arg.each { |k,v| self.headers[k] = v } putw "headers set from hash" return true end putw "error setting headers" return false end |
#set_link(index) ⇒ Object
set link using an integer link from self.report – NOTE: I always use page.get(page.l(1)) anyway. ++
50 51 52 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 50 def set_link(index) self.url = @links[index] end |
#set_soap_headers ⇒ Object
set headers to SOAP request headers
94 95 96 97 98 |
# File 'lib/wwmd/page/headers.rb', line 94 def set_soap_headers self.headers['Content-Type'] = "text/xml;charset=utf-8" self.headers['SOAPAction'] = "\"\"" return "headers set to soap" end |
#set_utf7_headers ⇒ Object
set headers to utf7 encoding post
81 82 83 84 |
# File 'lib/wwmd/page/headers.rb', line 81 def set_utf7_headers self.headers["Content-Type"] = "application/x-www-form-urlencoded;charset=UTF-7" return "headers set to utf7" end |
#setbase(url = nil) ⇒ Object
set self.opts
71 72 73 74 75 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 71 def setbase(url=nil) return nil if not url self.opts[:base_url] = url self.base_url = url end |
#size ⇒ Object
return page size in bytes
41 42 43 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 41 def size return self.body_data.size end |
#submit(iform = nil, reg = ) ⇒ Object
replacement for Curl::Easy.http_post
post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For call self.set_data
if passed a regexp, escape values in the form using regexp before submitting if passed nil for the regexp arg, the form will not be escaped default: WWMD::ESCAPE
returns: array [ code, body_data.size ]
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'lib/wwmd/page/page.rb', line 143 def submit(iform=nil,reg=WWMD::ESCAPE[:default]) ## this is just getting worse and worse if iform.class == "Symbol" reg = iform iform = nil end self.clear_data ["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) } self.headers["Referer"] = self.cur if self.use_referer unless iform unless self.form.empty? sform = self.form.clone else return "no form provided" end else sform = iform.clone # clone the form so that we don't change the original end sform.escape_all!(reg) self.url = sform.action if sform.action if sform.empty? self.http_post('') else self.http_post(self.post_data = sform.to_post) end self.set_data end |
#submit_string(post_string) ⇒ Object
submit a form using POST string
172 173 174 175 176 177 |
# File 'lib/wwmd/page/page.rb', line 172 def submit_string(post_string) self.clear_data self.http_post(post_string) putw "WARN: authentication headers in response" if self.auth? self.set_data end |
#summary ⇒ Object
IRB: display summary of what has been parsed from this page
33 34 35 36 37 |
# File 'lib/wwmd/page/irb_helpers.rb', line 33 def summary status = self.page_status puts "XXXX[#{self.report_flags}] | #{self.response_code.to_s} | #{status} | #{self.url} | #{self.size}" return nil end |
#text ⇒ Object
65 |
# File 'lib/wwmd/page/irb_helpers.rb', line 65 def text; self.html2text; end |
#time ⇒ Object
61 62 63 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 61 def time self.total_time end |
#to_text ⇒ Object
IRB: puts the page filtered through html2text
64 |
# File 'lib/wwmd/page/irb_helpers.rb', line 64 def to_text; puts self.html2text; end |
#verb(verb, url = nil) ⇒ Object
send arbitrary verb (only works with patch to taf2-curb)
207 208 209 210 211 212 213 214 |
# File 'lib/wwmd/page/page.rb', line 207 def verb(verb,url=nil) return false if !@curl_object.respond_to?(:http_verb) self.url = url if url self.clear_data self.headers["Referer"] = self.cur if self.use_referer self.http_verb(verb) self.set_data end |