Class: RHACK::Page
- Inherits:
-
Object
- Object
- RHACK::Page
- Defined in:
- lib/rhack/page.rb
Overview
Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, … ) ) => Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, …
Constant Summary collapse
- @@ignore =
for johnson
/google|_gat|tracker|adver/i
Instance Attribute Summary collapse
-
#curl_res ⇒ Object
readonly
Returns the value of attribute curl_res.
-
#doc ⇒ Object
readonly
Returns the value of attribute doc.
-
#failed ⇒ Object
readonly
Returns the value of attribute failed.
-
#hash ⇒ Object
readonly
Returns the value of attribute hash.
-
#html ⇒ Object
readonly
Returns the value of attribute html.
-
#js ⇒ Object
readonly
Returns the value of attribute js.
-
#loc ⇒ Object
readonly
Returns the value of attribute loc.
-
#res ⇒ Object
result of page processing been made in frame context.
- #title(full = true) ⇒ Object
Instance Method Summary collapse
- #at(xp) ⇒ Object
- #empty? ⇒ Boolean
- #eval_js(frame = nil) ⇒ Object
- #eval_string(str) ⇒ Object
- #expand_link(link) ⇒ Object
- #find(xp) ⇒ Object
- #form(form = 'form', hash = {}, opts = {}) ⇒ Object
- #get_link(link = 'a') ⇒ Object (also: #get_href, #link)
- #get_links(links = 'a') ⇒ Object (also: #get_hrefs, #links)
- #get_src(link = 'img') ⇒ Object (also: #src)
- #get_srcs(links = 'img') ⇒ Object (also: #srcs)
- #html!(encoding = 'UTF-8') ⇒ Object
-
#initialize(obj = '', loc = Hash.new(''), js = Johnson::Runtime.browser||Johnson::Runtime.new) ⇒ Page
constructor
A new instance of Page.
- #inspect ⇒ Object
- #load_scripts(frame) ⇒ Object
-
#process(c, opts = {}) ⇒ Object
We can then alternate #process in Page subclasses Frame doesn’t mind about value returned by #process.
- #submit(form, frame, hash = {}, opts = {}, &callback) ⇒ Object
- #to_doc ⇒ Object
- #url ⇒ Object (also: #href)
Constructor Details
#initialize(obj = '', loc = Hash.new(''), js = Johnson::Runtime.browser||Johnson::Runtime.new) ⇒ Page
Returns a new instance of Page.
17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/rhack/page.rb', line 17 def initialize(obj='', loc=Hash.new(''), js=Johnson::Runtime.browser||Johnson::Runtime.new) loc = loc.parse:uri if !loc.is Hash @js = js if obj.is Curl::Easy or obj.kinda Scout c = obj.kinda(Scout) ? obj.http : obj @html = '' # just (c, loc) would pass to #process opts variable that returns '' on any key process(c, loc.b || {}) else @html = obj @loc = loc end end |
Instance Attribute Details
#curl_res ⇒ Object (readonly)
Returns the value of attribute curl_res.
11 12 13 |
# File 'lib/rhack/page.rb', line 11 def curl_res @curl_res end |
#doc ⇒ Object (readonly)
Returns the value of attribute doc.
11 12 13 |
# File 'lib/rhack/page.rb', line 11 def doc @doc end |
#failed ⇒ Object (readonly)
Returns the value of attribute failed.
11 12 13 |
# File 'lib/rhack/page.rb', line 11 def failed @failed end |
#hash ⇒ Object (readonly)
Returns the value of attribute hash.
11 12 13 |
# File 'lib/rhack/page.rb', line 11 def hash @hash end |
#html ⇒ Object (readonly)
Returns the value of attribute html.
11 12 13 |
# File 'lib/rhack/page.rb', line 11 def html @html end |
#js ⇒ Object (readonly)
Returns the value of attribute js.
11 12 13 |
# File 'lib/rhack/page.rb', line 11 def js @js end |
#loc ⇒ Object (readonly)
Returns the value of attribute loc.
11 12 13 |
# File 'lib/rhack/page.rb', line 11 def loc @loc end |
#res ⇒ Object
result of page processing been made in frame context
13 14 15 |
# File 'lib/rhack/page.rb', line 13 def res @res end |
#title(full = true) ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# File 'lib/rhack/page.rb', line 128 def title(full=true) if @hash.nil? and !@failed and @html.b if full to_doc unless defined? @doc if @doc.title.b @title = @doc.title else @title = @loc.href @doc.at('head').prepend XML::Node('title', @title) if @doc.at('head') @title end else title true unless defined? @title if RUBY_VERSION < '1.9' and @title.cyr? and UTF2ANSI[@title].size > 40 @short_title = ANSI2UTF[UTF2ANSI[@title][/.{1,30}\S*/][0..38]]+'…' elsif @title.size > 40 @short_title = @title[/.{1,30}\S*/][0..38]+'…' else @short_title = @title end end else @loc.href end end |
Instance Method Details
#at(xp) ⇒ Object
156 |
# File 'lib/rhack/page.rb', line 156 def at(xp) (@doc || to_doc).at xp end |
#empty? ⇒ Boolean
31 32 33 |
# File 'lib/rhack/page.rb', line 31 def empty? !(@hash.nil? ? @html : @hash).b end |
#eval_js(frame = nil) ⇒ Object
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/rhack/page.rb', line 91 def eval_js(frame=nil) eval_string "document.location = window.location = #{@loc.to_json}; document.URL = document.baseURI = document.documentURI = location.href; document.domain = location.host;" find("script").each {|n| L.debug n.text.strip if text = n.text.strip.b js[:write_output] = '' eval_string text if res = js[:write_output].b then n.after res end n.remove! elsif frame and n.src eval_string frame.get_cached n.src end } end |
#eval_string(str) ⇒ Object
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/rhack/page.rb', line 108 def eval_string(str) @js ||= Johnson::Runtime.new L.debug "#{@js} evaluating in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}" begin @js.evaluate(str) rescue Johnson::Error => e L.warn e. L.debug { if m = e..match(/(\w+) is undefined|([\w.]+) is not a function/) L.clr.hl! str, /\b#{m[1] || m[2]}\b/ end "\n\t#{str}" } end end |
#expand_link(link) ⇒ Object
201 202 203 204 205 206 207 208 |
# File 'lib/rhack/page.rb', line 201 def (link) case link when /^\w+:\/\// then link when /^\/\// then @loc.protocol+link when /^\// then @loc.root+link else File.join((@loc.path.b ? File.dirname(@loc.path) : @loc.root), link) end end |
#find(xp) ⇒ Object
154 |
# File 'lib/rhack/page.rb', line 154 def find(xp) (@doc || to_doc).find xp end |
#form(form = 'form', hash = {}, opts = {}) ⇒ Object
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 |
# File 'lib/rhack/page.rb', line 210 def form(form='form', hash={}, opts={}) form = "[action=#{@loc.path.inspect}]" if form == :self if form.is String form_node = at form raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node or form_node.name != 'form' else form_node = form end hash = form_node.inputs_all.merge!(hash) action = (form_node.action || @loc.path) if form_node['method'].downcase == 'post' [hash, form_node.enctype =~ /multipart/, action, opts] else action = "#{action}#{action['?'] ? '&' : '?'}#{hash.urlencode}" if hash.b [action, opts] end end |
#get_link(link = 'a') ⇒ Object Also known as: get_href, link
187 188 189 190 191 192 193 |
# File 'lib/rhack/page.rb', line 187 def get_link(link='a') begin link = at(link) && (at(link).href || at(link+'//a').href) if link.is String rescue XML::Error; nil end link if link end |
#get_links(links = 'a') ⇒ Object Also known as: get_hrefs, links
178 179 180 181 182 183 184 185 |
# File 'lib/rhack/page.rb', line 178 def get_links(links='a') begin links = find(links).map {|e| e.href}.b || find(links+'//a').map {|e| e.href} if links.is String rescue XML::Error links = [links] end links.map {|link| link}.uniq end |
#get_src(link = 'img') ⇒ Object Also known as: src
170 171 172 173 174 175 176 |
# File 'lib/rhack/page.rb', line 170 def get_src(link='img') begin link = at(link) && at(link).src if link.is String rescue XML::Error; nil end link if link end |
#get_srcs(links = 'img') ⇒ Object Also known as: srcs
161 162 163 164 165 166 167 168 |
# File 'lib/rhack/page.rb', line 161 def get_srcs(links='img') begin links = find(links).map {|e| e.src} if links.is String rescue XML::Error links = [links] end links.map {|link| link}.uniq end |
#html!(encoding = 'UTF-8') ⇒ Object
43 44 45 |
# File 'lib/rhack/page.rb', line 43 def html!(encoding='UTF-8') @html.force_encoding(encoding) end |
#inspect ⇒ Object
35 36 37 38 39 40 41 |
# File 'lib/rhack/page.rb', line 35 def inspect if !@hash.nil? "<#FramePage (#{@hash ? @hash.inspect.size.bytes : 'failed to parse'}) #{@json ? 'json' : 'params hash'}>" else "<#FramePage #{@html.b ? "#{@failed ? @curl_res.header : '«'+title(false)+'»'} (#{@html.size.bytes}" : '(empty'})#{' js enabled' if @js and @doc and @hash.nil?}>" end end |
#load_scripts(frame) ⇒ Object
240 241 242 |
# File 'lib/rhack/page.rb', line 240 def load_scripts(frame) frame && frame.get_cached(*get_srcs("script[src]")).each {|js| eval_string js} end |
#process(c, opts = {}) ⇒ Object
We can then alternate #process in Page subclasses Frame doesn’t mind about value returned by #process
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/rhack/page.rb', line 49 def process(c, opts={}) @loc = c.last_effective_url.parse:uri @curl_res = c.res L.debug "#{@loc.fullpath} -> #{@curl_res}" if @curl_res.code == 200 body = @curl_res.body if opts[:json] @json = true @hash = begin; body.from_json rescue StandardError false end if !@hash or @hash.is String L.debug "failed to get json from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}" @html = body; to_doc @hash = false end elsif opts[:hash] if body.inline @hash = body.to_params else @hash = false L.debug "failed to get params hash from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}" @html = body; to_doc end else @html = body.xml_to_utf to_doc if opts[:eval] load_scripts opts[:load_scripts] eval_js end end elsif !(opts[:json] or opts[:hash]) @html = @curl_res.body @failed = @curl_res.code end self end |
#submit(form, frame, hash = {}, opts = {}, &callback) ⇒ Object
227 228 229 230 231 232 233 234 235 236 237 238 |
# File 'lib/rhack/page.rb', line 227 def submit(form, frame, hash={}, opts={}, &callback) (opts[:headers] ||= {}).Referer ||= @loc.href if @loc query = form(form, hash, opts) curr_target, new_target = frame.loc.href, (query[2] || query[0]) if need_retargeting = (frame.static && curr_target != new_target) frame.retarget new_target end page = frame.exec(*query, &callback) frame.retarget curr_target, :forced if need_retargeting page end |
#to_doc ⇒ Object
124 125 126 |
# File 'lib/rhack/page.rb', line 124 def to_doc @doc = @html.to_doc :forceutf end |
#url ⇒ Object Also known as: href
158 |
# File 'lib/rhack/page.rb', line 158 def url() @loc.href end |