Class: WWMD::Page

Inherits:
Object
  • Object
show all
Includes:
WWMDUtils
Defined in:
lib/wwmd/page/page.rb,
lib/wwmd/page/auth.rb,
lib/wwmd/page/headers.rb,
lib/wwmd/page/helpers.rb,
lib/wwmd/page/irb_helpers.rb,
lib/wwmd/page/html2text_hpricot.rb,
lib/wwmd/page/reporting_helpers.rb,
lib/wwmd/page/html2text_nokogiri.rb,
lib/wwmd/page/parsing_convenience.rb

Overview

NamedCharRegex = Regexp.new(“(&(”+Hpricot::NamedCharacters.keys.join(“|”)+“);)”)

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from WWMDUtils

header_array_from_file, rannum, ranstr

Constructor Details

#initialize(opts = {}, &block) ⇒ Page

Returns a new instance of Page.



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/wwmd/page/page.rb', line 35

def initialize(opts={}, &block)
  @opts = opts.clone
  DEFAULTS.each { |k,v| @opts[k] = v unless @opts.has_key?(k) }
  @spider = Spider.new(@opts)
  @scrape = Scrape.new
  @base_url ||= opts[:base_url]
  @scrape.warn = opts[:scrape_warn] if !opts[:scrape_warn].nil? # yeah yeah... bool false
  @urlparse = URLParse.new()
  @inputs = Inputs.new(self)
  @logged_in = false
  @body_data = ""
  @post_data = ""
  @comments = []
  @header_data = FormArray.new
  @header_file = nil

  @curl_object = Curl::Easy.new
  @opts.each do |k,v|
    next if k == :proxy_url
    if (@curl_object.respond_to?("#{k}=".intern))
      @curl_object.send("#{k}=".intern,v)
    else
      self.instance_variable_set("@#{k.to_s}".intern,v)
    end
  end
  @curl_object.on_body   { |data| self._body_cb(data) }
  @curl_object.on_header { |data| self._header_cb(data) }

  # cookies?
  @curl_object.enable_cookies = @opts[:enable_cookies]
  if @curl_object.enable_cookies?
    @curl_object.cookiejar = @opts[:cookiejar] || "./__cookiejar"
  end

  #proxy?
  @curl_object.proxy_url = @opts[:proxy_url] if @opts[:use_proxy]
  instance_eval(&block) if block_given?
  if opts.empty? && @scrape.warn
    putw "Page initialized without opts"
    @scrape.warn = false
  end

  if @header_file
    begin
      headers_from_file(@header_file)
      @curl_object.enable_cookies = false
    rescue => e
      puts "ERROR: #{e}"
    end
  end
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(methodname, *args) ⇒ Object

send methods not defined here to @curl_object



252
253
254
# File 'lib/wwmd/page/page.rb', line 252

def method_missing(methodname, *args)
  @curl_object.send(methodname, *args)
end

Instance Attribute Details

#base_urlObject

needed to properly munge relative urls into fq urls



21
22
23
# File 'lib/wwmd/page/page.rb', line 21

def base_url
  @base_url
end

#body_dataObject

Returns the value of attribute body_data.



6
7
8
# File 'lib/wwmd/page/page.rb', line 6

def body_data
  @body_data
end

#commentsObject (readonly)

Returns the value of attribute comments.



17
18
19
# File 'lib/wwmd/page/page.rb', line 17

def comments
  @comments
end

#curl_objectObject

Returns the value of attribute curl_object.



5
6
7
# File 'lib/wwmd/page/page.rb', line 5

def curl_object
  @curl_object
end

#formsObject (readonly)

Returns the value of attribute forms.



10
11
12
# File 'lib/wwmd/page/page.rb', line 10

def forms
  @forms
end

#header_dataObject

Returns the value of attribute header_data.



8
9
10
# File 'lib/wwmd/page/page.rb', line 8

def header_data
  @header_data
end

#header_fileObject (readonly)

Returns the value of attribute header_file.



19
20
21
# File 'lib/wwmd/page/page.rb', line 19

def header_file
  @header_file
end

#inputsObject

Returns the value of attribute inputs.



25
26
27
# File 'lib/wwmd/page/page.rb', line 25

def inputs
  @inputs
end

array of included javascript files



13
14
15
# File 'lib/wwmd/page/page.rb', line 13

def jlinks
  @jlinks
end

#last_errorObject (readonly)

Returns the value of attribute last_error.



11
12
13
# File 'lib/wwmd/page/page.rb', line 11

def last_error
  @last_error
end

array of links (urls)



12
13
14
# File 'lib/wwmd/page/page.rb', line 12

def links
  @links
end

#logged_inObject

are we logged in?



22
23
24
# File 'lib/wwmd/page/page.rb', line 22

def logged_in
  @logged_in
end

#optsObject

Returns the value of attribute opts.



24
25
26
# File 'lib/wwmd/page/page.rb', line 24

def opts
  @opts
end

#post_dataObject

Returns the value of attribute post_data.



7
8
9
# File 'lib/wwmd/page/page.rb', line 7

def post_data
  @post_data
end

#scrapeObject (readonly)

scrape object



15
16
17
# File 'lib/wwmd/page/page.rb', line 15

def scrape
  @scrape
end

#spiderObject (readonly)

spider object



14
15
16
# File 'lib/wwmd/page/page.rb', line 14

def spider
  @spider
end

#statusObject

Returns the value of attribute status.



3
4
5
# File 'lib/wwmd/page/reporting_helpers.rb', line 3

def status
  @status
end

#urlparseObject (readonly)

urlparse object



16
17
18
# File 'lib/wwmd/page/page.rb', line 16

def urlparse
  @urlparse
end

#use_refererObject

Returns the value of attribute use_referer.



9
10
11
# File 'lib/wwmd/page/page.rb', line 9

def use_referer
  @use_referer
end

Instance Method Details

#_body_cb(data) ⇒ Object

callback for self.on_body



238
239
240
241
# File 'lib/wwmd/page/page.rb', line 238

def _body_cb(data)
  @body_data << data if data
  return data.length.to_i
end

#_header_cb(data) ⇒ Object

callback for self.on_header



244
245
246
247
248
249
# File 'lib/wwmd/page/page.rb', line 244

def _header_cb(data)
  myArr = Array.new(data.split(":",2))
  @header_data.add(myArr[0].to_s.strip,myArr[1].to_s.strip)
#      @header_data[myArr[0].to_s.strip] = myArr[1].to_s.strip
  return data.length.to_i
end

#action(id = nil) ⇒ Object

return the complete url to the form action on this page



31
32
33
34
35
36
# File 'lib/wwmd/page/parsing_convenience.rb', line 31

def action(id=nil)
  id = 0 if not id
  act = self.forms[id].action
  return self.last_effective_url if (act.nil? || act.empty?)
  return @urlparse.parse(self.last_effective_url,act).to_s
end

#all_formsObject

IRB: display a human readable report of all forms contained in page.body_data



68
69
70
71
# File 'lib/wwmd/page/irb_helpers.rb', line 68

def all_forms
  self.forms.each_index { |x| puts "[#{x.to_s}]-------"; self.forms[x].report }
  nil
end

#all_tagsObject

:nodoc:



66
67
68
# File 'lib/wwmd/page/parsing_convenience.rb', line 66

def all_tags#:nodoc:
  return self.search("*").map { |x| x.name }
end

#auth?Boolean

does this request have an authenticate header?

Returns:

  • (Boolean)


5
6
7
8
9
10
11
12
13
14
# File 'lib/wwmd/page/auth.rb', line 5

def auth?
  return false if self.code != 401
  count = 0
  self.header_data.each do |i|
    if i[0] =~ /www-authenticate/i
      count += 1
    end
  end
  return (count > 0)
end

#clear_dataObject

clear self.body_data and self.header_data



124
125
126
127
128
129
130
# File 'lib/wwmd/page/page.rb', line 124

def clear_data
  return false if self.opts[:parse] = false
  @body_data = ""
  @post_data = nil
  @header_data.clear
  @last_error = nil
end

#clear_header(key) ⇒ Object Also known as: delete_header

clear header at <key>



11
12
13
14
# File 'lib/wwmd/page/headers.rb', line 11

def clear_header(key)
  self.headers.delete_if { |k,v| k.upcase == key.upcase }
  return nil
end

#clear_headersObject

clear all headers



19
20
21
22
# File 'lib/wwmd/page/headers.rb', line 19

def clear_headers
  self.headers.delete_if { |k,v| true }
  "headers cleared"
end

#codeObject

the last http response code



84
85
86
# File 'lib/wwmd/page/reporting_helpers.rb', line 84

def code
  self.response_code # .to_s
end

#current_urlObject Also known as: current, cur, now

alias_method for last_effective_url



75
76
77
# File 'lib/wwmd/page/reporting_helpers.rb', line 75

def current_url
  self.last_effective_url
end

#default_headers(arg = nil) ⇒ Object Also known as: set_default

set headers back to default headers



56
57
58
# File 'lib/wwmd/page/headers.rb', line 56

def default_headers(arg=nil)
  set_headers
end

#dump_bodyObject Also known as: dump

display self.body_data



57
58
59
# File 'lib/wwmd/page/irb_helpers.rb', line 57

def dump_body
  puts self.body_data
end

#dump_scriptsObject Also known as: scripts

return an array of inner_html for each <script> tag encountered



44
45
46
# File 'lib/wwmd/page/parsing_convenience.rb', line 44

def dump_scripts
  self.get_tags("//script").map { |s| s.inner_html if s.inner_html.strip != '' }
end

#element_to_text(n) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/wwmd/page/html2text_hpricot.rb', line 16

def element_to_text(n)
  tag = n.etag || n.stag
  name = tag.name.downcase
  s = ""
  is_block  = BlockTags.include?(name)
  is_list   = ListTags.include?(name)
  is_item   = ItemTags.include?(name)
  is_inline = InlineTags.include?(name)
  if is_block or is_list or is_item or is_inline
    n.each_child do |c|
      s += node_to_text(c)
    end
    if is_block or is_list
      s += "\n"
    elsif is_item
      s = "* " + s + "\n"
    end
  end
  s
end

#fingerprintObject Also known as: fp

return MD5 for DOM fingerprint take all tag names in page.to_s.md5



69
70
71
# File 'lib/wwmd/page/reporting_helpers.rb', line 69

def fingerprint
  self.all_tags.to_s.md5
end

#from_file(fn) ⇒ Object



25
26
27
28
29
30
# File 'lib/wwmd/page/helpers.rb', line 25

def from_file(fn)
  h = headers.clone
  ret = from_input(File.read(fn))
  headers.replace(h)
  ret
end

#from_input(req) ⇒ Object

copy and paste from burp request windows page object gets set with headers and url (not correct) returns [headers,form]

form = page.from_paste


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/wwmd/page/helpers.rb', line 8

def from_input(req)
  self.enable_cookies = false
  return false if not req
  h,b = req.chomp.split("\r\n\r\n",2)
  oh = h
  h = h.split("\r\n")
  m,u,p = h.shift.split(" ")
  return nil unless m =~ (/^(POST|GET)/)
  self.url = self.base_url + u
  self.headers_from_array(h)
  self.body_data = b
  self.set_data
  form = b.to_form
  form.action = @urlparse.parse(self.base_url, u).to_s
  [oh,form]
end

#from_pasteObject



32
33
34
# File 'lib/wwmd/page/helpers.rb', line 32

def from_paste
  from_input(%x[pbpaste])
end

#furl(url) ⇒ Object



70
71
72
# File 'lib/wwmd/page/parsing_convenience.rb', line 70

def furl(url)
  self.url = @urlparse.parse(self.base_url,url).to_s
end

#get(url = nil, parse = true) ⇒ Object

override for Curl::Easy.perform

if the passed url string doesn’t contain an fully qualified path, we’ll guess and prepend opts

returns: array [ code, body_data.size ]



204
205
206
207
208
209
210
211
212
213
214
215
# File 'lib/wwmd/page/page.rb', line 204

def get(url=nil,parse=true)
  self.clear_data
  self.headers["Referer"] = self.cur if self.use_referer
  if !(url =~ /[a-z]+:\/\//) && parse
    self.url = @urlparse.parse(self.base_url,url).to_s if url
  elsif url
    self.url = url
  end
  self.http_get
  putw "WARN: authentication headers in response" if self.auth?
  self.set_data
end

get the current Cookie header



108
109
110
# File 'lib/wwmd/page/headers.rb', line 108

def get_cookie
  self.headers["Cookie"]
end

#get_form(id = nil) ⇒ Object

return this page’s form (at index id) as a FormArray



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/wwmd/page/parsing_convenience.rb', line 13

def get_form(id=nil)
  id = 0 if not id
  return nil if forms.empty? || !forms[id]
  f = @forms[id]
  action = f.action
  action ||= action
  action ||= cur
  action ||= "PARSE_ERROR"
  url_action = @urlparse.parse(self.cur,action).to_s
  type = f.type
  FormArray.new do |x|
    x.set_fields(f.fields)
    x.action = url_action
    x.type   = type
  end
end

return link at index from @links array



59
60
61
# File 'lib/wwmd/page/parsing_convenience.rb', line 59

def get_link(index)
  @links[index]
end

#grep(reg) ⇒ Object

grep for regexp and remove leading whitespace



8
9
10
# File 'lib/wwmd/page/parsing_convenience.rb', line 8

def grep(reg)
  self.body_data.grep(reg).map { |i| i.gsub(/^\s+/, "") }
end

#has_comments?Boolean

Returns:

  • (Boolean)


39
# File 'lib/wwmd/page/reporting_helpers.rb', line 39

def has_comments?; return !@comments.empty?;  end

#has_form?Boolean

Returns:

  • (Boolean)


38
# File 'lib/wwmd/page/reporting_helpers.rb', line 38

def has_form?;     return !(@forms.size < 1); end

#has_jlinks?Boolean

Returns:

  • (Boolean)


37
# File 'lib/wwmd/page/reporting_helpers.rb', line 37

def has_jlinks?;   return !@jlinks.empty?;    end

#has_links?Boolean

Returns:

  • (Boolean)


36
# File 'lib/wwmd/page/reporting_helpers.rb', line 36

def has_links?;    return !@links.empty?;     end

#head(i = 1) ⇒ Object

:section: IRB helper methods



9
10
11
12
13
14
15
# File 'lib/wwmd/page/irb_helpers.rb', line 9

def head(i=1)
  if i.kind_of?(Range)
    puts self.body_data.split("\n")[i].join("\n")
    return nil
  end
  puts self.body_data.head(i)
end

#headers_from_array(arr) ⇒ Object

set headers from text



63
64
65
66
67
68
69
70
71
72
# File 'lib/wwmd/page/headers.rb', line 63

def headers_from_array(arr)
  clear_headers
  arr = arr.split("\r\n\r\n").first if arr.class == String
  arr.each do |line|
    next if (line.empty? || line =~ /^(GET|POST)/)
    k,v = line.split(":",2)
    self.headers[k.strip] = v.strip
  end
  nil
end

#headers_from_file(fn) ⇒ Object

set headers from file



81
82
83
84
85
# File 'lib/wwmd/page/headers.rb', line 81

def headers_from_file(fn)
  clear_headers
  headers_from_array(File.read(fn).split("\n"))
  return "headers set from #{fn}"
end

#headers_from_pasteObject

set headers from paste



76
77
78
# File 'lib/wwmd/page/headers.rb', line 76

def headers_from_paste
  headers_from_array(%x[pbpaste])
end

#hexdumpObject

hexdump self.body_data



79
80
81
# File 'lib/wwmd/page/irb_helpers.rb', line 79

def hexdump
  puts self.body_data.hexdump
end

#html2textObject

def lookup_named_char(s)

  c = Hpricot::NamedCharacters[s[1...-1]]
  c.chr if c
end


58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/wwmd/page/html2text_hpricot.rb', line 58

def html2text
  doc = self.scrape.hdoc
  text = node_to_text(doc)
#      text.gsub!(NamedCharRegex){|s| "#{lookup_named_char(s)}"}
  # clean up white space
  text.gsub!("\r"," ")
  text.squeeze!(" ")
  text.strip!
  ret = ''
  text.split(/\n/).each do |l|
    l.strip!
    next if l == ''
    next if l =~ /^\?+$/
    ret += "#{l}\n"
  end
  return ret
end

#inspectObject



30
31
32
33
# File 'lib/wwmd/page/page.rb', line 30

def inspect
  # hack
  return "Page"
end

#logged_in?Boolean

return value of @logged_in

Returns:

  • (Boolean)


19
20
21
# File 'lib/wwmd/page/reporting_helpers.rb', line 19

def logged_in?
  return @logged_in
end

#md5Object

return md5sum for self.body_data



47
48
49
# File 'lib/wwmd/page/reporting_helpers.rb', line 47

def md5
  return self.body_data.md5
end

#node_to_text(n) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/wwmd/page/html2text_hpricot.rb', line 37

def node_to_text(n)
  return "" if n.comment?
  return element_to_text(n) if n.elem?
  return n.inner_text if n.text?
  
  s = ""
  begin
    n.each_child do |c|
      s += node_to_text(c)
    end
  rescue => e
    putw "WARN: #{e.inspect}"
  end
  return s
end

#onclicksObject



73
74
75
76
# File 'lib/wwmd/page/irb_helpers.rb', line 73

def onclicks
  self.search("//*[@onclick]").each { |x| puts x[:onclick] }
  nil
end

#openObject

this only works on a mac so get a mac



84
85
86
87
88
# File 'lib/wwmd/page/irb_helpers.rb', line 84

def open #:nodoc:
  fn = "wwmdtmp_#{Guid.new}.html"
  self.write(fn)
  %x[open #{fn}]
end

#page_statusObject

return text representation of page code

override with specific statuses in helper depending on page text etc to include statuses outside 200 = OK and other = ERR



11
12
13
14
# File 'lib/wwmd/page/reporting_helpers.rb', line 11

def page_status
  @status = "OK"
  @status = "ERR" if self.response_code > 399
end

#performObject

override Curl::Easy.perform to perform page actions,

call <tt>self.set_data</tt>

returns: array [ code, page_status, body_data.size ]

don’t call this directly if we are in console mode use get and submit respectively for GET and POST



139
140
141
142
143
144
145
146
147
148
149
# File 'lib/wwmd/page/page.rb', line 139

def perform
  self.clear_data
  self.headers["Referer"] = self.cur if self.use_referer
  begin
    @curl_object.perform
  rescue => e
    @last_error = e
    putw "WARN: #{e.class}" if e.class =~ /Curl::Err/
  end
  self.set_data
end

#post(url = nil) ⇒ Object

GET with params and POST it as a form



218
219
220
221
222
223
# File 'lib/wwmd/page/page.rb', line 218

def post(url=nil)
  ep = url.clip
  self.url = @urlparse.parse(self.opts[:base_url],ep).to_s if ep
  form = url.clop.to_form
  self.submit(form)
end

#rawObject

alias_method for body_data



94
95
96
# File 'lib/wwmd/page/parsing_convenience.rb', line 94

def raw 
  self.body_data
end

#read(filename) ⇒ Object

read self.body_data from file



88
89
90
91
# File 'lib/wwmd/page/parsing_convenience.rb', line 88

def read(filename)
  self.body_data = File.read(filename)
  self.set_data
end

#report(short = nil) ⇒ Object Also known as: show

IRB: text report what has been parsed from this page



18
19
20
21
22
23
24
25
26
27
28
# File 'lib/wwmd/page/irb_helpers.rb', line 18

def report(short=nil)
  puts "-------------------------------------------------"
  self.summary
  puts "---- links found [#{self.has_links?.to_s} | #{self.links.size}]"
  self.links.each_index { |i| puts "#{i.to_s} :: #{@links[i]}" } if short.nil?
  puts "---- javascript found [#{self.has_jlinks?.to_s} | #{self.jlinks.size}]"
  self.jlinks.each { |url| puts url } if short.nil?
  puts "---- forms found [#{self.has_form?.to_s} | #{self.forms.size}]"
  puts "---- comments found [#{self.has_comments?.to_s}]"
  return nil
end

#report_flagsObject

return a string of flags: Ll links Jj javascript includes Ff forms Cc comments



28
29
30
31
32
33
34
# File 'lib/wwmd/page/reporting_helpers.rb', line 28

def report_flags
  self.has_links?      ? ret  = "L" : ret  = "l"
  self.has_jlinks?     ? ret += "J" : ret += "j"
  self.has_form?       ? ret += "F" : ret += "f"
  self.has_comments?   ? ret += "C" : ret += "c"
  return ret
end

#request_headersObject Also known as: show_headers, req_headers

IRB: display current headers



40
41
42
43
# File 'lib/wwmd/page/irb_helpers.rb', line 40

def request_headers
  self.headers.each_pair { |k,v| puts "#{k}: #{v}" }
  return nil
end

#resp_pasteObject



36
37
38
39
# File 'lib/wwmd/page/helpers.rb', line 36

def resp_paste
  self.body_data = %x[pbpaste].split("\r\n\r\n",2)[1]
  self.set_data
end

#response_headersObject Also known as: resp_headers

IRB: display response headers



49
50
51
52
# File 'lib/wwmd/page/irb_helpers.rb', line 49

def response_headers
  self.header_data.each { |x| puts "#{x[0]} :: #{x[1]}" }
  return nil
end

#search(xpath) ⇒ Object

return an array of Element objects for an xpath search



39
40
41
# File 'lib/wwmd/page/parsing_convenience.rb', line 39

def search(xpath)
  self.scrape.hdoc.search(xpath)
end

#set_ajax_headersObject

set headers to ajax



94
95
96
97
98
# File 'lib/wwmd/page/headers.rb', line 94

def set_ajax_headers
  self.headers["X-Requested-With"] = "XMLHttpRequest"
  self.headers["X-Prototype-Version"] = "1.5.0"
  return "headers set to ajax"
end

set the Cookie header



113
114
115
# File 'lib/wwmd/page/headers.rb', line 113

def set_cookie(cookie=nil)
  self.headers["Cookie"] = cookie
end

#set_cookies?Boolean Also known as: set_cookies

does this response have SET-COOKIE headers?

Returns:

  • (Boolean)


52
53
54
55
56
57
58
59
60
# File 'lib/wwmd/page/reporting_helpers.rb', line 52

def set_cookies?
  ret = FormArray.new()
  self.header_data.each do |x|
    if x[0].upcase == "SET-COOKIE"
      ret << x[1].split(";").first.split("=",2)
    end
  end
  ret
end

#set_dataObject

set reporting data for the page

Scan for comments, anchors, links and javascript includes and set page flags. The heavy lifting for parsing is done in the scrape class.

returns: array [ code, page_status, body_data.size ]



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/wwmd/page/page.rb', line 96

def set_data
  # reset scrape and inputs object
  # transparently gunzip
  begin
    io = StringIO.new(self.body_data)
    gz = Zlib::GzipReader.new(io)
    self.body_data.replace(gz.read)
  rescue => e
  end
  @scrape.reset(self.body_data)
  @inputs.set

  # remove comments that are css selectors for IE silliness
  @comments = @scrape.for_comments.reject do |c|
    c =~ /\[if IE\]/ ||
    c =~ /\[if IE \d/ ||
    c =~ /\[if lt IE \d/
  end
  @links = @scrape.for_links.map do |url|
    l = @urlparse.parse(self.last_effective_url,url).to_s
  end
  @jlinks = @scrape.for_javascript_links
  @forms = @scrape.for_forms
  @spider.add(self.last_effective_url,@links)
  return [self.code,self.body_data.size]
end

#set_headers(arg = nil, clear = false) ⇒ Object

set headers from passed argument

Nil:    set headers from WWMD::DEFAULT_HEADERS
Symbol: entry in WWMD::HEADERS to set from
Hash:   hash to set headers from
String: filename (NOT IMPLEMENTED)

if clear == true then headers will be cleared before setting


31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/wwmd/page/headers.rb', line 31

def set_headers(arg=nil,clear=false)
  clear_headers if clear
  if arg.nil?
    begin
      clear_headers
      WWMD::DEFAULT_HEADERS.each { |k,v| self.headers[k] = v }
      return "headers set from default"
    rescue => e
      putw "WARN: " + e
      return false
    end
  elsif arg.class == Symbol
    set_headers(WWMD::HEADERS[arg])
    putw "headers set from #{arg}"
    return true
  elsif arg.class == Hash
    arg.each { |k,v| self.headers[k] = v }
    putw "headers set from hash"
    return true
  end
  putw "error setting headers"
  return false
end

set link using an integer link from self.report – NOTE: I always use page.get(page.l(1)) anyway. ++



54
55
56
# File 'lib/wwmd/page/parsing_convenience.rb', line 54

def set_link(index)
  self.url = @links[index]
end

#set_soap_headersObject

set headers to SOAP request headers



101
102
103
104
105
# File 'lib/wwmd/page/headers.rb', line 101

def set_soap_headers
  self.headers['Content-Type'] = "text/xml;charset=utf-8"
  self.headers['SOAPAction'] = "\"\""
  return "headers set to soap"
end

#set_utf7_headersObject

set headers to utf7 encoding post



88
89
90
91
# File 'lib/wwmd/page/headers.rb', line 88

def set_utf7_headers
  self.headers["Content-Type"] = "application/x-www-form-urlencoded;charset=UTF-7"
  return "headers set to utf7"
end

#setbase(url = nil) ⇒ Object

set self.opts



75
76
77
78
79
# File 'lib/wwmd/page/parsing_convenience.rb', line 75

def setbase(url=nil)
  return nil if not url
  self.opts[:base_url] = url
  self.base_url = url
end

#sizeObject

return page size in bytes



42
43
44
# File 'lib/wwmd/page/reporting_helpers.rb', line 42

def size
  return self.body_data.size
end

#submit(iform = nil, reg = ) ⇒ Object

replacement for Curl::Easy.http_post

post the form attempting to remove curl supplied headers (Expect, X-Forwarded-For call self.set_data

if passed a regexp, escape values in the form using regexp before submitting if passed nil for the regexp arg, the form will not be escaped default: WWMD::ESCAPE

returns: array [ code, body_data.size ]



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/wwmd/page/page.rb', line 161

def submit(iform=nil,reg=WWMD::ESCAPE[:default])
## this is just getting worse and worse
  if iform.class == "Symbol"
    reg = iform
    iform = nil
  end
  reg = WWMD::ESCAPE[reg] if reg.class == Symbol
  self.clear_data
  ["Expect","X-Forwarded-For","Content-length"].each { |s| self.clear_header(s) }
  self.headers["Referer"] = self.cur if self.use_referer
  unless iform
    unless self.form.empty?
      sform = self.form.clone
    else
      return "no form provided"
    end
  else
    sform = iform.clone             # clone the form so that we don't change the original
  end
  sform.escape_all!(reg)
  self.url = sform.action if sform.action
  if sform.empty?
    self.http_post('')
  else
    self.http_post(self.post_data = sform.to_post)
  end
  self.set_data
end

#submit_string(post_string) ⇒ Object

submit a form using POST string



191
192
193
194
195
196
# File 'lib/wwmd/page/page.rb', line 191

def submit_string(post_string)
  self.clear_data
  self.http_post(post_string)
  putw "WARN: authentication headers in response" if self.auth?
  self.set_data
end

#summaryObject

IRB: display summary of what has been parsed from this page



33
34
35
36
37
# File 'lib/wwmd/page/irb_helpers.rb', line 33

def summary
  status = self.page_status
  puts "XXXX[#{self.report_flags}] | #{self.response_code.to_s} | #{status} | #{self.url} | #{self.size}"
  return nil
end

#textObject



65
# File 'lib/wwmd/page/irb_helpers.rb', line 65

def text; self.html2text; end

#timeObject



63
64
65
# File 'lib/wwmd/page/reporting_helpers.rb', line 63

def time
  self.total_time
end

#to_textObject

IRB: puts the page filtered through html2text



64
# File 'lib/wwmd/page/irb_helpers.rb', line 64

def to_text; puts self.html2text; end

#user_agent=(ua) ⇒ Object

:section: Header helper methods



6
7
8
# File 'lib/wwmd/page/headers.rb', line 6

def user_agent=(ua)
  self.headers["User-Agent"] = ua
end

#verb(verb, url = nil) ⇒ Object

send arbitrary verb (only works with patch to taf2-curb)



226
227
228
229
230
231
232
233
# File 'lib/wwmd/page/page.rb', line 226

def verb(verb,url=nil)
  return false if !@curl_object.respond_to?(:http_verb)
  self.url = url if url
  self.clear_data
  self.headers["Referer"] = self.cur if self.use_referer
  self.http_verb(verb)
  self.set_data
end

#write(filename) ⇒ Object

write self.body_data to file



82
83
84
85
# File 'lib/wwmd/page/parsing_convenience.rb', line 82

def write(filename)
  File.write(filename,self.body_data)
  return "wrote to " + filename
end