Class: RHACK::Page

Inherits:
Object show all
Defined in:
lib/rhack/page.rb

Overview

Frame( ScoutSquad( Curl::Multi <- Scout( Curl API ), Scout, … ) ) => Curl -> Johnson::Runtime -> XML::Document => Page( XML::Document ), Page, …

Direct Known Subclasses

CodeIndiffirentPage

Constant Summary collapse

@@ignore =

for johnson

/google|_gat|tracker|adver/i

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(obj = '', loc = Hash.new(''), js = Johnson::Runtime.browser||Johnson::Runtime.new) ⇒ Page

Returns a new instance of Page.



42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/rhack/page.rb', line 42

def initialize(obj='', loc=Hash.new(''), js=Johnson::Runtime.browser||Johnson::Runtime.new)
  loc = loc.parse:uri if !loc.is Hash
  @js = js
  if obj.is Curl::Easy or obj.kinda Scout
    c = obj.kinda(Scout) ? obj.http : obj
    @html = ''
    # just (c, loc) would pass to #process opts variable that returns '' on any key
    process(c, loc.b || {})
  else
    @html = obj
    @loc = loc
  end
end

Instance Attribute Details

#curl_resObject (readonly)

Returns the value of attribute curl_res.



36
37
38
# File 'lib/rhack/page.rb', line 36

def curl_res
  @curl_res
end

#docObject (readonly)

Returns the value of attribute doc.



36
37
38
# File 'lib/rhack/page.rb', line 36

def doc
  @doc
end

#failedObject (readonly)

Returns the value of attribute failed.



36
37
38
# File 'lib/rhack/page.rb', line 36

def failed
  @failed
end

#hashObject (readonly)

Returns the value of attribute hash.



36
37
38
# File 'lib/rhack/page.rb', line 36

def hash
  @hash
end

#htmlObject (readonly)

Returns the value of attribute html.



36
37
38
# File 'lib/rhack/page.rb', line 36

def html
  @html
end

#jsObject (readonly)

Returns the value of attribute js.



36
37
38
# File 'lib/rhack/page.rb', line 36

def js
  @js
end

#locObject (readonly)

Returns the value of attribute loc.



36
37
38
# File 'lib/rhack/page.rb', line 36

def loc
  @loc
end

#resObject

result of page processing been made in frame context



38
39
40
# File 'lib/rhack/page.rb', line 38

def res
  @res
end

#title(full = true) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/rhack/page.rb', line 156

def title(full=true)
  if @hash.nil? and !@failed and @html.b
    if full
      to_doc unless defined? @doc
      if @doc.title.b
        @title = @doc.title
      else
        @title = @loc.href
        @doc.at('head').prepend XML::Node('title', @title) if @doc.at('head')
        @title
      end
    else
      title true unless defined? @title
      if RUBY_VERSION < '1.9' and @title.cyr? and UTF2ANSI[@title].size > 40
        @short_title = ANSI2UTF[UTF2ANSI[@title][/.{1,30}\S*/][0..38]]+'…'
      elsif @title.size > 40
        @short_title = @title[/.{1,30}\S*/][0..38]+'…'
      else
        @short_title = @title
      end
    end
  else
    @loc.href
  end
end

Instance Method Details

#at(selector_or_node, options = {}) ⇒ Object Also known as: first



262
263
264
265
266
267
268
269
270
271
272
# File 'lib/rhack/page.rb', line 262

def at(selector_or_node, options={})
  if selector_or_node and preresult = selector_or_node.is_a?(XML::Node) ? 
      selector_or_node : __at(selector_or_node)
      
    preresult = preprocess_search_result(preresult, options[:preprocess])
    block_given? ? yield(preresult) : preresult
  else
    node_is_missing!(selector_or_node, options)
    preresult
  end
end

#dict(hash) ⇒ Object

hook to create even-looked lines defining a hash in my Verdana 10px, e.g. dict key1: value1, …

key2: value2, ...


188
189
190
# File 'lib/rhack/page.rb', line 188

def dict(hash)
  hash.is_a?(Hash) ? hash : Hash[hash]
end

#empty?Boolean

Returns:

  • (Boolean)


56
57
58
# File 'lib/rhack/page.rb', line 56

def empty?
  !(@hash.nil? ? @html : @hash).b
end

#eval_js(frame = nil) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/rhack/page.rb', line 119

def eval_js(frame=nil)
  eval_string "document.location = window.location = #{@loc.to_json};
  document.URL = document.baseURI = document.documentURI = location.href;
  document.domain = location.host;"
  find("script").each {|n|
    L.debug n.text.strip
    if text = n.text.strip.b
      js[:write_output] = ''
      eval_string text
      if res = js[:write_output].b then n.after res end
      n.remove!
    elsif frame and n.src
      eval_string frame.get_cached expand_link n.src
    end
  }
end

#eval_string(str) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/rhack/page.rb', line 136

def eval_string(str)
  @js ||= Johnson::Runtime.new
  L.debug "#{@js} evaluating in #{Thread.current}\nmain: #{Thread.main}; carier: #{Curl.carier_thread}"
  begin
    @js.evaluate(str)
  rescue Johnson::Error => e
    L.warn e.message
    L.debug {
      if m = e.message.match(/(\w+) is undefined|([\w.]+) is not a function/)
        L.clr.hl! str, /\b#{m[1] || m[2]}\b/
      end
      "\n\t#{str}"
    }
  end
end

makes a relative path being on this page into an absolute path



208
209
210
211
212
213
214
215
216
217
# File 'lib/rhack/page.rb', line 208

def expand_link(link)
  case link
    when /^\w+:\/\// then link
    when /^\/\// then @loc.protocol + ':' + link
    when /^\// then @loc.root + link
    when /^\?/ then File.join(@loc.root, @loc.path) + link
    when /^#/ then File.join(@loc.root, @loc.fullpath) + link
    else File.join @loc.root, File.dirname(@loc.path), link
  end
end

#find(selector_or_nodes, options = {}, &foreach) ⇒ Object Also known as: all



275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/rhack/page.rb', line 275

def find(selector_or_nodes, options={}, &foreach)
  preresult = selector_or_nodes.is_a?(XML::XPath::Object, Array) ?
    selector_or_nodes : __find(selector_or_nodes)
    
  if preresult.size > 0
    preresult = preprocess_search_results(preresult, options[:preprocess])
    foreach ? preresult.each(&foreach) : preresult
  else
    node_is_missing!(selector_or_nodes, options)
    preresult
  end
end

#flatten_dict(hash) ⇒ Object

maps lastname’ => tuple into => tuple, :lastname => tuple



193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/rhack/page.rb', line 193

def flatten_dict(hash)
  result = {}
  hash.each {|k, v|
    if k.is String and k[' ']
      k.split(' ').each_with_index {|k_unit, k_idx|
        result[k_unit.to_sym] = v[k_idx]
      }
    else
      result[k.to_sym] = v
    end
  }
  result
end

#form(form = 'form', hash = {}, opts = {}) ⇒ Object

FORMS #



344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'lib/rhack/page.rb', line 344

def form(form='form', hash={}, opts={})
  form = "[action=#{@loc.path.inspect}]" if form == :self
  if form.is String
         form_node = at form
         raise XML::Error, "Can't find form by xpath `#{form}` on page #{inspect}" if !form_node or form_node.name != 'form'
  else form_node = form
  end
  hash = form_node.inputs_all.merge!(hash)
  action = expand_link(form_node.action || @loc.path)
  if form_node['method'].downcase == 'post'
    [hash, form_node.enctype =~ /multipart/, action, opts]
  else
    action = "#{action}#{action['?'] ? '&' : '?'}#{hash.urlencode}" if hash.b
    [action, opts]
  end
end


314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/rhack/page.rb', line 314

def get_link(selector_or_node='a', options={}, &onfound)
  at(selector_or_node, options.merge(:preprocess => lambda {|node|
    unless href = node.href
      if node = node.find('a')
        href = node.href
      end
    end
    if href
      expand_link href
    end
  })) {|href| onfound && href ? onfound.call(href) : href}
end

def get_src(link=‘img’)

begin
  link = at(link) && at(link).src if link.is String
rescue XML::Error; nil
end
expand_link link if link

end



396
397
398
399
400
401
402
403
# File 'lib/rhack/page.rb', line 396

def get_links(links='a')
  begin
    links = find(links).map {|e| e.href}.b || find(links+'//a').map {|e| e.href} if links.is String
  rescue XML::Error
    links = [links]
  end
  links.map {|link| expand_link link}.uniq
end

#get_src(selector_or_node = 'img', options = {}, &onfound) ⇒ Object Also known as: src



305
306
307
308
309
310
311
# File 'lib/rhack/page.rb', line 305

def get_src(selector_or_node='img', options={}, &onfound)
  at(selector_or_node, options.merge(:preprocess => lambda {|node|
    if src = node.src
      expand_link src
    end
  })) {|src| onfound && src ? onfound.call(src) : src}
end

#get_srcs(links = 'img') ⇒ Object Also known as: srcs

TODO: make into same form as #get_src and #map



378
379
380
381
382
383
384
385
# File 'lib/rhack/page.rb', line 378

def get_srcs(links='img')
  begin
    links = find(links).map {|e| e.src} if links.is String
  rescue XML::Error
    links = [links]
  end
  links.map {|link| expand_link link}.uniq
end

#html!(encoding = 'UTF-8') ⇒ Object



68
69
70
# File 'lib/rhack/page.rb', line 68

def html!(encoding='UTF-8')
  @html.force_encoding(encoding)
end

#inspectObject



60
61
62
63
64
65
66
# File 'lib/rhack/page.rb', line 60

def inspect
  if !@hash.nil?
    "<#FramePage (#{@hash ? @hash.inspect.size.bytes : 'failed to parse'}) #{@json ? 'json' : 'params hash'}>"
  else
    "<#FramePage #{@html.b ? "#{@failed ? @curl_res.header : '«'+title(false)+'»'} (#{@html.size.bytes}" : '(empty'})#{' js enabled' if @js and @doc and @hash.nil?}>"
  end
end

#load_scripts(frame) ⇒ Object

def get_link(link=‘a’)

begin
  link = at(link) && (at(link).href || at(link+'//a').href) if link.is String
rescue XML::Error; nil
end
expand_link link if link

end



415
416
417
# File 'lib/rhack/page.rb', line 415

def load_scripts(frame)
  frame && frame.get_cached(*get_srcs("script[src]")).each {|js| eval_string js}
end

#map(selector_or_nodes, options = {}, &mapper) ⇒ Object



329
330
331
332
333
334
335
# File 'lib/rhack/page.rb', line 329

def map(selector_or_nodes, options={}, &mapper)
  mapping = find(selector_or_nodes, options.merge(:preprocess => mapper))
  unless options[:compact] == false
    mapping = mapping.to_a.compact
  end
  mapping
end

#map_json(selector_or_nodes, options = {}, &mapper) ⇒ Object



337
338
339
# File 'lib/rhack/page.rb', line 337

def map_json(selector_or_nodes, options={}, &mapper)
  JsonString map(selector_or_nodes, options, &mapper)
end

#process(c, opts = {}) ⇒ Object

We can then alternate #process in Page subclasses Frame doesn’t mind about value returned by #process



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/rhack/page.rb', line 77

def process(c, opts={})
  @loc = c.last_effective_url.parse:uri
  @curl_res = c.res
  L.debug "#{@loc.fullpath} -> #{@curl_res}"
  if @curl_res.code == 200
    body = @curl_res.body
    if opts[:json]
      @json = true
      @hash = begin; body.from_json
      rescue StandardError
        false 
      end
      if !@hash or @hash.is String
        L.debug "failed to get json from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
        @html = body; to_doc
        @hash = false
      end
      
    elsif opts[:hash]
      if body.inline
        @hash = body.to_params
      else
        @hash = false
        L.debug "failed to get params hash from #{c.last_effective_url}, take a look at my @doc for info; my object_id is #{object_id}"
        @html = body; to_doc
      end
      
    else
      @html = body.xml_to_utf
      to_doc
      if opts[:eval]
        load_scripts opts[:load_scripts]
        eval_js
      end
    end
  elsif !(opts[:json] or opts[:hash])
    @html = @curl_res.body
    @failed = @curl_res.code
  end
  self
end

#submit(form, frame, hash = {}, opts = {}, &callback) ⇒ Object



361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/rhack/page.rb', line 361

def submit(form, frame, hash={}, opts={}, &callback)
  (opts[:headers] ||= {}).Referer ||= @loc.href if @loc
  query = form(form, hash, opts)
  
  curr_target, new_target = frame.loc.href, (query[2] || query[0])
  if need_retargeting = (frame.static && curr_target != new_target)
    frame.retarget new_target
  end
  page = frame.exec(*query, &callback)
  frame.retarget curr_target, :forced if need_retargeting
  page
end

#text(selector_or_node, options = {}) ⇒ Object

FINDERS PREPROCESSORS #



292
293
294
295
296
297
# File 'lib/rhack/page.rb', line 292

def text(selector_or_node, options={})
  if node = at(selector_or_node, options)
    txt = node.text.strip
    block_given? ? yield(txt) : txt
  end
end

#texts(hash, options = {}) ⇒ Object



299
300
301
302
303
# File 'lib/rhack/page.rb', line 299

def texts(hash, options={})
  hash.map_values {|selector_or_node|
    text(selector_or_node, options)
  }
end

#to_docObject



152
153
154
# File 'lib/rhack/page.rb', line 152

def to_doc
  @doc = @html.to_doc :forceutf
end

#urlObject Also known as: href



72
# File 'lib/rhack/page.rb', line 72

def url() @loc.href end