Class: WWMD::Page
- Inherits:
-
Object
- Object
- WWMD::Page
- Includes:
- WWMDUtils
- Defined in:
- lib/wwmd/page/page.rb,
lib/wwmd/page/auth.rb,
lib/wwmd/page/headers.rb,
lib/wwmd/page/helpers.rb,
lib/wwmd/page/irb_helpers.rb,
lib/wwmd/page/html2text_hpricot.rb,
lib/wwmd/page/reporting_helpers.rb,
lib/wwmd/page/html2text_nokogiri.rb,
lib/wwmd/page/parsing_convenience.rb
Overview
NamedCharRegex = Regexp.new(“(&(”+Hpricot::NamedCharacters.keys.join(“|”)+“);)”)
Instance Attribute Summary collapse
-
#base_url ⇒ Object
needed to properly munge relative urls into fq urls.
-
#body_data ⇒ Object
Returns the value of attribute body_data.
-
#comments ⇒ Object
readonly
Returns the value of attribute comments.
-
#curl_object ⇒ Object
Returns the value of attribute curl_object.
-
#forms ⇒ Object
readonly
Returns the value of attribute forms.
-
#header_data ⇒ Object
Returns the value of attribute header_data.
-
#header_file ⇒ Object
readonly
Returns the value of attribute header_file.
-
#inputs ⇒ Object
Returns the value of attribute inputs.
-
#jlinks ⇒ Object
readonly
array of included javascript files.
-
#last_error ⇒ Object
readonly
Returns the value of attribute last_error.
-
#links ⇒ Object
readonly
array of links (urls).
-
#logged_in ⇒ Object
are we logged in?.
-
#opts ⇒ Object
Returns the value of attribute opts.
-
#post_data ⇒ Object
Returns the value of attribute post_data.
-
#scrape ⇒ Object
readonly
scrape object.
-
#spider ⇒ Object
readonly
spider object.
-
#status ⇒ Object
Returns the value of attribute status.
-
#urlparse ⇒ Object
readonly
urlparse object.
-
#use_referer ⇒ Object
Returns the value of attribute use_referer.
Instance Method Summary collapse
-
#_body_cb(data) ⇒ Object
callback for
self.on_body
. -
#_header_cb(data) ⇒ Object
callback for
self.on_header
. -
#action(id = nil) ⇒ Object
return the complete url to the form action on this page.
-
#all_forms ⇒ Object
IRB: display a human readable report of all forms contained in page.body_data.
-
#all_tags ⇒ Object
:nodoc:.
-
#auth? ⇒ Boolean
does this request have an authenticate header?.
-
#clear_data ⇒ Object
clear self.body_data and self.header_data.
-
#clear_header(key) ⇒ Object
(also: #delete_header)
clear header at <key>.
-
#clear_headers ⇒ Object
clear all headers.
-
#code ⇒ Object
the last http response code.
-
#current_url ⇒ Object
(also: #current, #cur, #now)
alias_method for last_effective_url.
-
#default_headers(arg = nil) ⇒ Object
(also: #set_default)
set headers back to default headers.
-
#dump_body ⇒ Object
(also: #dump)
display self.body_data.
-
#dump_scripts ⇒ Object
(also: #scripts)
return an array of inner_html for each <script> tag encountered.
- #element_to_text(n) ⇒ Object
-
#fingerprint ⇒ Object
(also: #fp)
return MD5 for DOM fingerprint take all tag names in page.to_s.md5.
- #from_file(fn) ⇒ Object
-
#from_input(req) ⇒ Object
copy and paste from burp request windows page object gets set with headers and url (not correct) returns [headers,form] form = page.from_paste.
- #from_paste ⇒ Object
- #furl(url) ⇒ Object
-
#get(url = nil, parse = true) ⇒ Object
override for Curl::Easy.perform.
-
#get_cookie ⇒ Object
get the current Cookie header.
-
#get_form(id = nil) ⇒ Object
return this page’s form (at index id) as a FormArray.
-
#get_link(index) ⇒ Object
(also: #link, #l)
return link at index from @links array.
-
#grep(reg) ⇒ Object
grep for regexp and remove leading whitespace.
- #has_comments? ⇒ Boolean
- #has_form? ⇒ Boolean
- #has_jlinks? ⇒ Boolean
- #has_links? ⇒ Boolean
-
#head(i = 1) ⇒ Object
:section: IRB helper methods.
-
#headers_from_array(arr) ⇒ Object
set headers from text.
-
#headers_from_file(fn) ⇒ Object
set headers from file.
-
#headers_from_paste ⇒ Object
set headers from paste.
-
#hexdump ⇒ Object
hexdump self.body_data.
-
#html2text ⇒ Object
def lookup_named_char(s) c = Hpricot::NamedCharacters[s] c.chr if c end.
-
#initialize(opts = {}, &block) ⇒ Page
constructor
A new instance of Page.
- #inspect ⇒ Object
-
#logged_in? ⇒ Boolean
return value of @logged_in.
-
#md5 ⇒ Object
return md5sum for self.body_data.
-
#method_missing(methodname, *args) ⇒ Object
send methods not defined here to
@curl_object
. - #node_to_text(n) ⇒ Object
- #onclicks ⇒ Object
-
#open ⇒ Object
this only works on a mac so get a mac.
-
#page_status ⇒ Object
return text representation of page code.
-
#perform ⇒ Object
override Curl::Easy.perform to perform page actions, call
self.set_data
. -
#post(url = nil) ⇒ Object
GET with params and POST it as a form.
-
#raw ⇒ Object
alias_method for body_data.
-
#read(filename) ⇒ Object
read self.body_data from file.
-
#report(short = nil) ⇒ Object
(also: #show)
IRB: text report what has been parsed from this page.
-
#report_flags ⇒ Object
return a string of flags: Ll links Jj javascript includes Ff forms Cc comments.
-
#request_headers ⇒ Object
(also: #show_headers, #req_headers)
IRB: display current headers.
- #resp_paste ⇒ Object
-
#response_headers ⇒ Object
(also: #resp_headers)
IRB: display response headers.
-
#search(xpath) ⇒ Object
return an array of Element objects for an xpath search.
-
#set_ajax_headers ⇒ Object
set headers to ajax.
-
#set_cookie(cookie = nil) ⇒ Object
set the Cookie header.
-
#set_cookies? ⇒ Boolean
(also: #set_cookies)
does this response have SET-COOKIE headers?.
-
#set_data ⇒ Object
set reporting data for the page.
-
#set_headers(arg = nil, clear = false) ⇒ Object
set headers from passed argument Nil: set headers from WWMD::DEFAULT_HEADERS Symbol: entry in WWMD::HEADERS to set from Hash: hash to set headers from String: filename (NOT IMPLEMENTED).
-
#set_link(index) ⇒ Object
set link using an integer link from self.report – NOTE: I always use page.get(page.l(1)) anyway.
-
#set_soap_headers ⇒ Object
set headers to SOAP request headers.
-
#set_utf7_headers ⇒ Object
set headers to utf7 encoding post.
-
#setbase(url = nil) ⇒ Object
set self.opts.
-
#size ⇒ Object
return page size in bytes.
-
#submit(iform = nil, reg = ) ⇒ Object
replacement for Curl::Easy.http_post.
-
#submit_string(post_string) ⇒ Object
submit a form using POST string.
-
#summary ⇒ Object
IRB: display summary of what has been parsed from this page.
- #text ⇒ Object
- #time ⇒ Object
-
#to_text ⇒ Object
IRB: puts the page filtered through html2text.
-
#user_agent=(ua) ⇒ Object
:section: Header helper methods.
-
#verb(verb, url = nil) ⇒ Object
send arbitrary verb (only works with patch to taf2-curb).
-
#write(filename) ⇒ Object
write self.body_data to file.
Methods included from WWMDUtils
header_array_from_file, rannum, ranstr
Constructor Details
#initialize(opts = {}, &block) ⇒ Page
Returns a new instance of Page.
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/wwmd/page/page.rb', line 35 def initialize(opts={}, &block) @opts = opts.clone DEFAULTS.each { |k,v| @opts[k] = v unless @opts.has_key?(k) } @spider = Spider.new(@opts) @scrape = Scrape.new @base_url ||= opts[:base_url] @scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil? # yeah yeah... bool false @urlparse = URLParse.new() @inputs = Inputs.new(self) @logged_in = false @body_data = "" @post_data = "" @comments = [] @header_data = FormArray.new @header_file = nil @curl_object = Curl::Easy.new @opts.each do |k,v| next if k == :proxy_url if (@curl_object.respond_to?("#{k}=".intern)) @curl_object.send("#{k}=".intern,v) else self.instance_variable_set("@#{k.to_s}".intern,v) end end @curl_object.on_body { |data| self._body_cb(data) } @curl_object.on_header { |data| self._header_cb(data) } # cookies? @curl_object. = @opts[:enable_cookies] if @curl_object. @curl_object. = @opts[:cookiejar] || "./__cookiejar" end #proxy? @curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy] instance_eval(&block) if block_given? if opts.empty? && @scrape.warn putw "Page initialized without opts" @scrape.warn = false end if @header_file begin headers_from_file(@header_file) @curl_object. = false rescue => e puts "ERROR: #{e}" end end end |
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(methodname, *args) ⇒ Object
send methods not defined here to @curl_object
252 253 254 |
# File 'lib/wwmd/page/page.rb', line 252 def method_missing(methodname, *args) @curl_object.send(methodname, *args) end |
Instance Attribute Details
#base_url ⇒ Object
needed to properly munge relative urls into fq urls
21 22 23 |
# File 'lib/wwmd/page/page.rb', line 21 def base_url @base_url end |
#body_data ⇒ Object
Returns the value of attribute body_data.
6 7 8 |
# File 'lib/wwmd/page/page.rb', line 6 def body_data @body_data end |
#comments ⇒ Object (readonly)
Returns the value of attribute comments.
17 18 19 |
# File 'lib/wwmd/page/page.rb', line 17 def comments @comments end |
#curl_object ⇒ Object
Returns the value of attribute curl_object.
5 6 7 |
# File 'lib/wwmd/page/page.rb', line 5 def curl_object @curl_object end |
#forms ⇒ Object (readonly)
Returns the value of attribute forms.
10 11 12 |
# File 'lib/wwmd/page/page.rb', line 10 def forms @forms end |
#header_data ⇒ Object
Returns the value of attribute header_data.
8 9 10 |
# File 'lib/wwmd/page/page.rb', line 8 def header_data @header_data end |
#header_file ⇒ Object (readonly)
Returns the value of attribute header_file.
19 20 21 |
# File 'lib/wwmd/page/page.rb', line 19 def header_file @header_file end |
#inputs ⇒ Object
Returns the value of attribute inputs.
25 26 27 |
# File 'lib/wwmd/page/page.rb', line 25 def inputs @inputs end |
#jlinks ⇒ Object (readonly)
array of included javascript files
13 14 15 |
# File 'lib/wwmd/page/page.rb', line 13 def jlinks @jlinks end |
#last_error ⇒ Object (readonly)
Returns the value of attribute last_error.
11 12 13 |
# File 'lib/wwmd/page/page.rb', line 11 def last_error @last_error end |
#links ⇒ Object (readonly)
array of links (urls)
12 13 14 |
# File 'lib/wwmd/page/page.rb', line 12 def links @links end |
#logged_in ⇒ Object
are we logged in?
22 23 24 |
# File 'lib/wwmd/page/page.rb', line 22 def logged_in @logged_in end |
#opts ⇒ Object
Returns the value of attribute opts.
24 25 26 |
# File 'lib/wwmd/page/page.rb', line 24 def opts @opts end |
#post_data ⇒ Object
Returns the value of attribute post_data.
7 8 9 |
# File 'lib/wwmd/page/page.rb', line 7 def post_data @post_data end |
#scrape ⇒ Object (readonly)
scrape object
15 16 17 |
# File 'lib/wwmd/page/page.rb', line 15 def scrape @scrape end |
#spider ⇒ Object (readonly)
spider object
14 15 16 |
# File 'lib/wwmd/page/page.rb', line 14 def spider @spider end |
#status ⇒ Object
Returns the value of attribute status.
3 4 5 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 3 def status @status end |
#urlparse ⇒ Object (readonly)
urlparse object
16 17 18 |
# File 'lib/wwmd/page/page.rb', line 16 def urlparse @urlparse end |
#use_referer ⇒ Object
Returns the value of attribute use_referer.
9 10 11 |
# File 'lib/wwmd/page/page.rb', line 9 def use_referer @use_referer end |
Instance Method Details
#_body_cb(data) ⇒ Object
callback for self.on_body
238 239 240 241 |
# File 'lib/wwmd/page/page.rb', line 238 def _body_cb(data) @body_data << data if data return data.length.to_i end |
#_header_cb(data) ⇒ Object
callback for self.on_header
244 245 246 247 248 249 |
# File 'lib/wwmd/page/page.rb', line 244 def _header_cb(data) myArr = Array.new(data.split(":",2)) @header_data.add(myArr[0].to_s.strip,myArr[1].to_s.strip) # @header_data[myArr[0].to_s.strip] = myArr[1].to_s.strip return data.length.to_i end |
#action(id = nil) ⇒ Object
return the complete url to the form action on this page
31 32 33 34 35 36 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 31 def action(id=nil) id = 0 if not id act = self.forms[id].action return self.last_effective_url if (act.nil? || act.empty?) return @urlparse.parse(self.last_effective_url,act).to_s end |
#all_forms ⇒ Object
IRB: display a human readable report of all forms contained in page.body_data
68 69 70 71 |
# File 'lib/wwmd/page/irb_helpers.rb', line 68 def all_forms self.forms.each_index { |x| puts "[#{x.to_s}]-------"; self.forms[x].report } nil end |
#all_tags ⇒ Object
:nodoc:
66 67 68 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 66 def #:nodoc: return self.search("*").map { |x| x.name } end |
#auth? ⇒ Boolean
does this request have an authenticate header?
5 6 7 8 9 10 11 12 13 14 |
# File 'lib/wwmd/page/auth.rb', line 5 def auth? return false if self.code != 401 count = 0 self.header_data.each do |i| if i[0] =~ /www-authenticate/i count += 1 end end return (count > 0) end |
#clear_data ⇒ Object
clear self.body_data and self.header_data
124 125 126 127 128 129 130 |
# File 'lib/wwmd/page/page.rb', line 124 def clear_data return false if self.opts[:parse] = false @body_data = "" @post_data = nil @header_data.clear @last_error = nil end |
#clear_header(key) ⇒ Object Also known as: delete_header
clear header at <key>
11 12 13 14 |
# File 'lib/wwmd/page/headers.rb', line 11 def clear_header(key) self.headers.delete_if { |k,v| k.upcase == key.upcase } return nil end |
#clear_headers ⇒ Object
clear all headers
19 20 21 22 |
# File 'lib/wwmd/page/headers.rb', line 19 def clear_headers self.headers.delete_if { |k,v| true } "headers cleared" end |
#code ⇒ Object
the last http response code
84 85 86 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 84 def code self.response_code # .to_s end |
#current_url ⇒ Object Also known as: current, cur, now
alias_method for last_effective_url
75 76 77 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 75 def current_url self.last_effective_url end |
#default_headers(arg = nil) ⇒ Object Also known as: set_default
set headers back to default headers
56 57 58 |
# File 'lib/wwmd/page/headers.rb', line 56 def default_headers(arg=nil) set_headers end |
#dump_body ⇒ Object Also known as: dump
display self.body_data
57 58 59 |
# File 'lib/wwmd/page/irb_helpers.rb', line 57 def dump_body puts self.body_data end |
#dump_scripts ⇒ Object Also known as: scripts
return an array of inner_html for each <script> tag encountered
44 45 46 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 44 def dump_scripts self.("//script").map { |s| s.inner_html if s.inner_html.strip != '' } end |
#element_to_text(n) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/wwmd/page/html2text_hpricot.rb', line 16 def element_to_text(n) tag = n.etag || n.stag name = tag.name.downcase s = "" is_block = BlockTags.include?(name) is_list = ListTags.include?(name) is_item = ItemTags.include?(name) is_inline = InlineTags.include?(name) if is_block or is_list or is_item or is_inline n.each_child do |c| s += node_to_text(c) end if is_block or is_list s += "\n" elsif is_item s = "* " + s + "\n" end end s end |
#fingerprint ⇒ Object Also known as: fp
return MD5 for DOM fingerprint take all tag names in page.to_s.md5
69 70 71 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 69 def fingerprint self..to_s.md5 end |
#from_file(fn) ⇒ Object
25 26 27 28 29 30 |
# File 'lib/wwmd/page/helpers.rb', line 25 def from_file(fn) h = headers.clone ret = from_input(File.read(fn)) headers.replace(h) ret end |
#from_input(req) ⇒ Object
copy and paste from burp request windows page object gets set with headers and url (not correct) returns [headers,form]
form = page.from_paste
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/wwmd/page/helpers.rb', line 8 def from_input(req) self. = false return false if not req h,b = req.chomp.split("\r\n\r\n",2) oh = h h = h.split("\r\n") m,u,p = h.shift.split(" ") return nil unless m =~ (/^(POST|GET)/) self.url = self.base_url + u self.headers_from_array(h) self.body_data = b self.set_data form = b.to_form form.action = @urlparse.parse(self.base_url, u).to_s [oh,form] end |
#from_paste ⇒ Object
32 33 34 |
# File 'lib/wwmd/page/helpers.rb', line 32 def from_paste from_input(%x[pbpaste]) end |
#furl(url) ⇒ Object
70 71 72 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 70 def furl(url) self.url = @urlparse.parse(self.base_url,url).to_s end |
#get(url = nil, parse = true) ⇒ Object
override for Curl::Easy.perform
if the passed url string doesn’t contain an fully qualified path, we’ll guess and prepend opts
returns: array [ code, body_data.size ]
204 205 206 207 208 209 210 211 212 213 214 215 |
# File 'lib/wwmd/page/page.rb', line 204 def get(url=nil,parse=true) self.clear_data self.headers["Referer"] = self.cur if self.use_referer if !(url =~ /[a-z]+:\/\//) && parse self.url = @urlparse.parse(self.base_url,url).to_s if url elsif url self.url = url end self.http_get putw "WARN: authentication headers in response" if self.auth? self.set_data end |
#get_cookie ⇒ Object
get the current Cookie header
108 109 110 |
# File 'lib/wwmd/page/headers.rb', line 108 def self.headers["Cookie"] end |
#get_form(id = nil) ⇒ Object
return this page’s form (at index id) as a FormArray
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 13 def get_form(id=nil) id = 0 if not id return nil if forms.empty? || !forms[id] f = @forms[id] action = f.action action ||= action action ||= cur action ||= "PARSE_ERROR" url_action = @urlparse.parse(self.cur,action).to_s type = f.type FormArray.new do |x| x.set_fields(f.fields) x.action = url_action x.type = type end end |
#get_link(index) ⇒ Object Also known as: link, l
return link at index from @links array
59 60 61 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 59 def get_link(index) @links[index] end |
#grep(reg) ⇒ Object
grep for regexp and remove leading whitespace
8 9 10 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 8 def grep(reg) self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") } end |
#has_comments? ⇒ Boolean
39 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 39 def has_comments?; return !@comments.empty?; end |
#has_form? ⇒ Boolean
38 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 38 def has_form?; return !(@forms.size < 1); end |
#has_jlinks? ⇒ Boolean
37 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 37 def has_jlinks?; return !@jlinks.empty?; end |
#has_links? ⇒ Boolean
36 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 36 def has_links?; return !@links.empty?; end |
#head(i = 1) ⇒ Object
:section: IRB helper methods
9 10 11 12 13 14 15 |
# File 'lib/wwmd/page/irb_helpers.rb', line 9 def head(i=1) if i.kind_of?(Range) puts self.body_data.split("\n")[i].join("\n") return nil end puts self.body_data.head(i) end |
#headers_from_array(arr) ⇒ Object
set headers from text
63 64 65 66 67 68 69 70 71 72 |
# File 'lib/wwmd/page/headers.rb', line 63 def headers_from_array(arr) clear_headers arr = arr.split("\r\n\r\n").first if arr.class == String arr.each do |line| next if (line.empty? || line =~ /^(GET|POST)/) k,v = line.split(":",2) self.headers[k.strip] = v.strip end nil end |
#headers_from_file(fn) ⇒ Object
set headers from file
81 82 83 84 85 |
# File 'lib/wwmd/page/headers.rb', line 81 def headers_from_file(fn) clear_headers headers_from_array(File.read(fn).split("\n")) return "headers set from #{fn}" end |
#headers_from_paste ⇒ Object
set headers from paste
76 77 78 |
# File 'lib/wwmd/page/headers.rb', line 76 def headers_from_paste headers_from_array(%x[pbpaste]) end |
#hexdump ⇒ Object
hexdump self.body_data
79 80 81 |
# File 'lib/wwmd/page/irb_helpers.rb', line 79 def hexdump puts self.body_data.hexdump end |
#html2text ⇒ Object
def lookup_named_char(s)
c = Hpricot::NamedCharacters[s[1...-1]]
c.chr if c
end
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/wwmd/page/html2text_hpricot.rb', line 58 def html2text doc = self.scrape.hdoc text = node_to_text(doc) # text.gsub!(NamedCharRegex){|s| "#{lookup_named_char(s)}"} # clean up white space text.gsub!("\r"," ") text.squeeze!(" ") text.strip! ret = '' text.split(/\n/).each do |l| l.strip! next if l == '' next if l =~ /^\?+$/ ret += "#{l}\n" end return ret end |
#inspect ⇒ Object
30 31 32 33 |
# File 'lib/wwmd/page/page.rb', line 30 def inspect # hack return "Page" end |
#logged_in? ⇒ Boolean
return value of @logged_in
19 20 21 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 19 def logged_in? return @logged_in end |
#md5 ⇒ Object
return md5sum for self.body_data
47 48 49 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 47 def md5 return self.body_data.md5 end |
#node_to_text(n) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/wwmd/page/html2text_hpricot.rb', line 37 def node_to_text(n) return "" if n.comment? return element_to_text(n) if n.elem? return n.inner_text if n.text? s = "" begin n.each_child do |c| s += node_to_text(c) end rescue => e putw "WARN: #{e.inspect}" end return s end |
#onclicks ⇒ Object
73 74 75 76 |
# File 'lib/wwmd/page/irb_helpers.rb', line 73 def onclicks self.search("//*[@onclick]").each { |x| puts x[:onclick] } nil end |
#open ⇒ Object
this only works on a mac so get a mac
84 85 86 87 88 |
# File 'lib/wwmd/page/irb_helpers.rb', line 84 def open #:nodoc: fn = "wwmdtmp_#{Guid.new}.html" self.write(fn) %x[open #{fn}] end |
#page_status ⇒ Object
return text representation of page code
override with specific statuses in helper depending on page text etc to include statuses outside 200 = OK and other = ERR
11 12 13 14 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 11 def page_status @status = "OK" @status = "ERR" if self.response_code > 399 end |
#perform ⇒ Object
override Curl::Easy.perform to perform page actions,
call <tt>self.set_data</tt>
returns: array [ code, page_status, body_data.size ]
don’t call this directly if we are in console mode use get and submit respectively for GET and POST
139 140 141 142 143 144 145 146 147 148 149 |
# File 'lib/wwmd/page/page.rb', line 139 def perform self.clear_data self.headers["Referer"] = self.cur if self.use_referer begin @curl_object.perform rescue => e @last_error = e putw "WARN: #{e.class}" if e.class =~ /Curl::Err/ end self.set_data end |
#post(url = nil) ⇒ Object
GET with params and POST it as a form
218 219 220 221 222 223 |
# File 'lib/wwmd/page/page.rb', line 218 def post(url=nil) ep = url.clip self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep form = url.clop.to_form self.submit(form) end |
#raw ⇒ Object
alias_method for body_data
94 95 96 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 94 def raw self.body_data end |
#read(filename) ⇒ Object
read self.body_data from file
88 89 90 91 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 88 def read(filename) self.body_data = File.read(filename) self.set_data end |
#report(short = nil) ⇒ Object Also known as: show
IRB: text report what has been parsed from this page
18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/wwmd/page/irb_helpers.rb', line 18 def report(short=nil) puts "-------------------------------------------------" self.summary puts "---- links found [#{self.has_links?.to_s} | #{self.links.size}]" self.links.each_index { |i| puts "#{i.to_s} :: #{@links[i]}" } if short.nil? puts "---- javascript found [#{self.has_jlinks?.to_s} | #{self.jlinks.size}]" self.jlinks.each { |url| puts url } if short.nil? puts "---- forms found [#{self.has_form?.to_s} | #{self.forms.size}]" puts "---- comments found [#{self.has_comments?.to_s}]" return nil end |
#report_flags ⇒ Object
return a string of flags: Ll links Jj javascript includes Ff forms Cc comments
28 29 30 31 32 33 34 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 28 def report_flags self.has_links? ? ret = "L" : ret = "l" self.has_jlinks? ? ret += "J" : ret += "j" self.has_form? ? ret += "F" : ret += "f" self.has_comments? ? ret += "C" : ret += "c" return ret end |
#request_headers ⇒ Object Also known as: show_headers, req_headers
IRB: display current headers
40 41 42 43 |
# File 'lib/wwmd/page/irb_helpers.rb', line 40 def request_headers self.headers.each_pair { |k,v| puts "#{k}: #{v}" } return nil end |
#resp_paste ⇒ Object
36 37 38 39 |
# File 'lib/wwmd/page/helpers.rb', line 36 def resp_paste self.body_data = %x[pbpaste].split("\r\n\r\n",2)[1] self.set_data end |
#response_headers ⇒ Object Also known as: resp_headers
IRB: display response headers
49 50 51 52 |
# File 'lib/wwmd/page/irb_helpers.rb', line 49 def response_headers self.header_data.each { |x| puts "#{x[0]} :: #{x[1]}" } return nil end |
#search(xpath) ⇒ Object
return an array of Element objects for an xpath search
39 40 41 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 39 def search(xpath) self.scrape.hdoc.search(xpath) end |
#set_ajax_headers ⇒ Object
set headers to ajax
94 95 96 97 98 |
# File 'lib/wwmd/page/headers.rb', line 94 def set_ajax_headers self.headers["X-Requested-With"] = "XMLHttpRequest" self.headers["X-Prototype-Version"] = "1.5.0" return "headers set to ajax" end |
#set_cookie(cookie = nil) ⇒ Object
set the Cookie header
113 114 115 |
# File 'lib/wwmd/page/headers.rb', line 113 def (=nil) self.headers["Cookie"] = end |
#set_cookies? ⇒ Boolean Also known as:
does this response have SET-COOKIE headers?
52 53 54 55 56 57 58 59 60 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 52 def ret = FormArray.new() self.header_data.each do |x| if x[0].upcase == "SET-COOKIE" ret << x[1].split(";").first.split("=",2) end end ret end |
#set_data ⇒ Object
set reporting data for the page
Scan for comments, anchors, links and javascript includes and set page flags. The heavy lifting for parsing is done in the scrape class.
returns: array [ code, page_status, body_data.size ]
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/wwmd/page/page.rb', line 96 def set_data # reset scrape and inputs object # transparently gunzip begin io = StringIO.new(self.body_data) gz = Zlib::GzipReader.new(io) self.body_data.replace(gz.read) rescue => e end @scrape.reset(self.body_data) @inputs.set # remove comments that are css selectors for IE silliness @comments = @scrape.for_comments.reject do |c| c =~ /\[if IE\]/ || c =~ /\[if IE \d/ || c =~ /\[if lt IE \d/ end @links = @scrape.for_links.map do |url| l = @urlparse.parse(self.last_effective_url,url).to_s end @jlinks = @scrape.for_javascript_links @forms = @scrape.for_forms @spider.add(self.last_effective_url,@links) return [self.code,self.body_data.size] end |
#set_headers(arg = nil, clear = false) ⇒ Object
set headers from passed argument
Nil: set headers from WWMD::DEFAULT_HEADERS
Symbol: entry in WWMD::HEADERS to set from
Hash: hash to set headers from
String: filename (NOT IMPLEMENTED)
if clear == true then headers will be cleared before setting
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/wwmd/page/headers.rb', line 31 def set_headers(arg=nil,clear=false) clear_headers if clear if arg.nil? begin clear_headers WWMD::DEFAULT_HEADERS.each { |k,v| self.headers[k] = v } return "headers set from default" rescue => e putw "WARN: " + e return false end elsif arg.class == Symbol set_headers(WWMD::HEADERS[arg]) putw "headers set from #{arg}" return true elsif arg.class == Hash arg.each { |k,v| self.headers[k] = v } putw "headers set from hash" return true end putw "error setting headers" return false end |
#set_link(index) ⇒ Object
set link using an integer link from self.report – NOTE: I always use page.get(page.l(1)) anyway. ++
54 55 56 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 54 def set_link(index) self.url = @links[index] end |
#set_soap_headers ⇒ Object
set headers to SOAP request headers
101 102 103 104 105 |
# File 'lib/wwmd/page/headers.rb', line 101 def set_soap_headers self.headers['Content-Type'] = "text/xml;charset=utf-8" self.headers['SOAPAction'] = "\"\"" return "headers set to soap" end |
#set_utf7_headers ⇒ Object
set headers to utf7 encoding post
88 89 90 91 |
# File 'lib/wwmd/page/headers.rb', line 88 def set_utf7_headers self.headers["Content-Type"] = "application/x-www-form-urlencoded;charset=UTF-7" return "headers set to utf7" end |
#setbase(url = nil) ⇒ Object
set self.opts
75 76 77 78 79 |
# File 'lib/wwmd/page/parsing_convenience.rb', line 75 def setbase(url=nil) return nil if not url self.opts[:base_url] = url self.base_url = url end |
#size ⇒ Object
return page size in bytes
42 43 44 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 42 def size return self.body_data.size end |
#submit(iform = nil, reg = ) ⇒ Object
replacement for Curl::Easy.http_post
post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For call self.set_data
if passed a regexp, escape values in the form using regexp before submitting if passed nil for the regexp arg, the form will not be escaped default: WWMD::ESCAPE
returns: array [ code, body_data.size ]
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
# File 'lib/wwmd/page/page.rb', line 161 def submit(iform=nil,reg=WWMD::ESCAPE[:default]) ## this is just getting worse and worse if iform.class == "Symbol" reg = iform iform = nil end reg = WWMD::ESCAPE[reg] if reg.class == Symbol self.clear_data ["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) } self.headers["Referer"] = self.cur if self.use_referer unless iform unless self.form.empty? sform = self.form.clone else return "no form provided" end else sform = iform.clone # clone the form so that we don't change the original end sform.escape_all!(reg) self.url = sform.action if sform.action if sform.empty? self.http_post('') else self.http_post(self.post_data = sform.to_post) end self.set_data end |
#submit_string(post_string) ⇒ Object
submit a form using POST string
191 192 193 194 195 196 |
# File 'lib/wwmd/page/page.rb', line 191 def submit_string(post_string) self.clear_data self.http_post(post_string) putw "WARN: authentication headers in response" if self.auth? self.set_data end |
#summary ⇒ Object
IRB: display summary of what has been parsed from this page
33 34 35 36 37 |
# File 'lib/wwmd/page/irb_helpers.rb', line 33 def summary status = self.page_status puts "XXXX[#{self.report_flags}] | #{self.response_code.to_s} | #{status} | #{self.url} | #{self.size}" return nil end |
#text ⇒ Object
65 |
# File 'lib/wwmd/page/irb_helpers.rb', line 65 def text; self.html2text; end |
#time ⇒ Object
63 64 65 |
# File 'lib/wwmd/page/reporting_helpers.rb', line 63 def time self.total_time end |
#to_text ⇒ Object
IRB: puts the page filtered through html2text
64 |
# File 'lib/wwmd/page/irb_helpers.rb', line 64 def to_text; puts self.html2text; end |
#user_agent=(ua) ⇒ Object
:section: Header helper methods
6 7 8 |
# File 'lib/wwmd/page/headers.rb', line 6 def user_agent=(ua) self.headers["User-Agent"] = ua end |
#verb(verb, url = nil) ⇒ Object
send arbitrary verb (only works with patch to taf2-curb)
226 227 228 229 230 231 232 233 |
# File 'lib/wwmd/page/page.rb', line 226 def verb(verb,url=nil) return false if !@curl_object.respond_to?(:http_verb) self.url = url if url self.clear_data self.headers["Referer"] = self.cur if self.use_referer self.http_verb(verb) self.set_data end |