Class: RHACK::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/rhack/page.rb

Overview

Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, … ) ) => Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, …

Direct Known Subclasses

CodeIndiffirentPage

Constant Summary collapse

@@ignore =

for johnson

/google|_gat|tracker|adver/i

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(obj = '', loc = Hash.new(''), js = Johnson::Runtime.browser||Johnson::Runtime.new) ⇒ Page

Returns a new instance of Page.



17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/rhack/page.rb', line 17

def initialize(obj='', loc=Hash.new(''), js=Johnson::Runtime.browser||Johnson::Runtime.new)
  loc = loc.parse:uri if !loc.is Hash
  @js = js
  if obj.is Curl::Easy or obj.kinda Scout
    c = obj.kinda(Scout) ? obj.http : obj
    @html = ''
    # just (c, loc) would pass to #process opts variable that returns '' on any key
    process(c, loc.b || {})
  else
    @html = obj
    @loc = loc
  end
end

Instance Attribute Details

#curl_resObject (readonly)

Returns the value of attribute curl_res.



11
12
13
# File 'lib/rhack/page.rb', line 11

def curl_res
  @curl_res
end

#docObject (readonly)

Returns the value of attribute doc.



11
12
13
# File 'lib/rhack/page.rb', line 11

def doc
  @doc
end

#failedObject (readonly)

Returns the value of attribute failed.



11
12
13
# File 'lib/rhack/page.rb', line 11

def failed
  @failed
end

#hashObject (readonly)

Returns the value of attribute hash.



11
12
13
# File 'lib/rhack/page.rb', line 11

def hash
  @hash
end

#htmlObject (readonly)

Returns the value of attribute html.



11
12
13
# File 'lib/rhack/page.rb', line 11

def html
  @html
end

#jsObject (readonly)

Returns the value of attribute js.



11
12
13
# File 'lib/rhack/page.rb', line 11

def js
  @js
end

#locObject (readonly)

Returns the value of attribute loc.



11
12
13
# File 'lib/rhack/page.rb', line 11

def loc
  @loc
end

#resObject

result of page processing been made in frame context



13
14
15
# File 'lib/rhack/page.rb', line 13

def res
  @res
end

#title(full = true) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/rhack/page.rb', line 128

def title(full=true)
  if @hash.nil? and !@failed and @html.b
    if full
      to_doc unless defined? @doc
      if @doc.title.b
        @title = @doc.title
      else
        @title = @loc.href
        @doc.at('head').prepend XML::Node('title', @title) if @doc.at('head')
        @title
      end
    else
      title true unless defined? @title
      if RUBY_VERSION < '1.9' and @title.cyr? and UTF2ANSI[@title].size > 40
        @short_title = ANSI2UTF[UTF2ANSI[@title][/.{1,30}\S*/][0..38]]+''
      elsif @title.size > 40
        @short_title = @title[/.{1,30}\S*/][0..38]+''
      else
        @short_title = @title
      end
    end
  else
    @loc.href
  end
end

Instance Method Details

#at(xp) ⇒ Object



156
# File 'lib/rhack/page.rb', line 156

def at(xp) (@doc || to_doc).at xp end

#empty?Boolean

Returns:

  • (Boolean)


31
32
33
# File 'lib/rhack/page.rb', line 31

def empty?
  !(@hash.nil? ? @html : @hash).b
end

#eval_js(frame = nil) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rhack/page.rb', line 91

def eval_js(frame=nil)
  eval_string "document.location = window.location = #{@loc.to_json};
  document.URL = document.baseURI = document.documentURI = location.href;
  document.domain = location.host;"
  find("script").each {|n|
    L.debug n.text.strip
    if text = n.text.strip.b
      js[:write_output] = ''
      eval_string text
      if res = js[:write_output].b then n.after res end
      n.remove!
    elsif frame and n.src
      eval_string frame.get_cached expand_link n.src
    end
  }
end

#eval_string(str) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/rhack/page.rb', line 108

def eval_string(str)
  @js ||= Johnson::Runtime.new
  L.debug "#{@js} evaluating in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"
  begin
    @js.evaluate(str)
  rescue Johnson::Error => e
    L.warn e.message
    L.debug {
      if m = e.message.match(/(\w+) is undefined|([\w.]+) is not a function/)
        L.clr.hl! str, /\b#{m[1] || m[2]}\b/
      end
      "\n\t#{str}"
    }
  end
end


201
202
203
204
205
206
207
208
# File 'lib/rhack/page.rb', line 201

def expand_link(link)
  case link
    when /^\w+:\/\// then link
    when /^\/\// then @loc.protocol+link
    when /^\// then @loc.root+link
    else File.join((@loc.path.b ? File.dirname(@loc.path) : @loc.root), link)
  end
end

#find(xp) ⇒ Object



154
# File 'lib/rhack/page.rb', line 154

def find(xp) (@doc || to_doc).find xp end

#form(form = 'form', hash = {}, opts = {}) ⇒ Object



210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/rhack/page.rb', line 210

def form(form='form', hash={}, opts={})
  form = "[action=#{@loc.path.inspect}]" if form == :self
  if form.is String
         form_node = at form
         raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node or form_node.name != 'form'
  else form_node = form
  end
  hash = form_node.inputs_all.merge!(hash)
  action = expand_link(form_node.action || @loc.path)
  if form_node['method'].downcase == 'post'
    [hash, form_node.enctype =~ /multipart/, action, opts]
  else
    action = "#{action}#{action['?'] ? '&' : '?'}#{hash.urlencode}" if hash.b
    [action, opts]
  end
end


187
188
189
190
191
192
193
# File 'lib/rhack/page.rb', line 187

def get_link(link='a')
  begin
    link = at(link) && (at(link).href || at(link+'//a').href) if link.is String
  rescue XML::Error; nil
  end
  expand_link link if link
end


178
179
180
181
182
183
184
185
# File 'lib/rhack/page.rb', line 178

def get_links(links='a')
  begin
    links = find(links).map {|e| e.href}.b || find(links+'//a').map {|e| e.href} if links.is String
  rescue XML::Error
    links = [links]
  end
  links.map {|link| expand_link link}.uniq
end

#get_src(link = 'img') ⇒ Object Also known as: src



170
171
172
173
174
175
176
# File 'lib/rhack/page.rb', line 170

def get_src(link='img')
  begin
    link = at(link) && at(link).src if link.is String
  rescue XML::Error; nil
  end
  expand_link link if link
end

#get_srcs(links = 'img') ⇒ Object Also known as: srcs



161
162
163
164
165
166
167
168
# File 'lib/rhack/page.rb', line 161

def get_srcs(links='img')
  begin
    links = find(links).map {|e| e.src} if links.is String
  rescue XML::Error
    links = [links]
  end
  links.map {|link| expand_link link}.uniq
end

#html!(encoding = 'UTF-8') ⇒ Object



43
44
45
# File 'lib/rhack/page.rb', line 43

def html!(encoding='UTF-8')
  @html.force_encoding(encoding)
end

#inspectObject



35
36
37
38
39
40
41
# File 'lib/rhack/page.rb', line 35

def inspect
  if !@hash.nil?
    "<#FramePage (#{@hash ? @hash.inspect.size.bytes : 'failed to parse'}) #{@json ? 'json' : 'params hash'}>"
  else
    "<#FramePage #{@html.b ? "#{@failed ? @curl_res.header : '«'+title(false)+'»'} (#{@html.size.bytes}" : '(empty'})#{' js enabled' if @js and @doc and @hash.nil?}>"
  end
end

#load_scripts(frame) ⇒ Object



240
241
242
# File 'lib/rhack/page.rb', line 240

def load_scripts(frame)
  frame && frame.get_cached(*get_srcs("script[src]")).each {|js| eval_string js}
end

#process(c, opts = {}) ⇒ Object

We can then alternate #process in Page subclasses Frame doesn’t mind about value returned by #process



49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/rhack/page.rb', line 49

def process(c, opts={})
  @loc = c.last_effective_url.parse:uri
  @curl_res = c.res
  L.debug "#{@loc.fullpath} -> #{@curl_res}"
  if @curl_res.code == 200
    body = @curl_res.body
    if opts[:json]
      @json = true
      @hash = begin; body.from_json
      rescue StandardError
        false 
      end
      if !@hash or @hash.is String
        L.debug "failed to get json from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
        @html = body; to_doc
        @hash = false
      end
      
    elsif opts[:hash]
      if body.inline
        @hash = body.to_params
      else
        @hash = false
        L.debug "failed to get params hash from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
        @html = body; to_doc
      end
      
    else
      @html = body.xml_to_utf
      to_doc
      if opts[:eval]
        load_scripts opts[:load_scripts]
        eval_js
      end
    end
  elsif !(opts[:json] or opts[:hash])
    @html = @curl_res.body
    @failed = @curl_res.code
  end
  self
end

#submit(form, frame, hash = {}, opts = {}, &callback) ⇒ Object



227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/rhack/page.rb', line 227

def submit(form, frame, hash={}, opts={}, &callback)
  (opts[:headers] ||= {}).Referer ||= @loc.href if @loc
  query = form(form, hash, opts)
  
  curr_target, new_target = frame.loc.href, (query[2] || query[0])
  if need_retargeting = (frame.static && curr_target != new_target)
    frame.retarget new_target
  end
  page = frame.exec(*query, &callback)
  frame.retarget curr_target, :forced if need_retargeting
  page
end

#to_docObject



124
125
126
# File 'lib/rhack/page.rb', line 124

def to_doc
  @doc = @html.to_doc :forceutf
end

#urlObject Also known as: href



158
# File 'lib/rhack/page.rb', line 158

def url() @loc.href end