Class: RHACK::Scout

Inherits:
Object show all
Defined in:
lib/rhack.rb,
lib/rhack/scout.rb

Direct Known Subclasses

Proxy::Interceptor

Defined Under Namespace

Classes: ProxyError

Constant Summary collapse

DefaultHeader =
{
    "Expect"                => "",
    "Keep-Alive"          => "300",
    "Accept-Charset"  => "windows-1251,utf-8;q=0.7,*;q=0.7",
    "Accept-Language" => "ru,en-us;q=0.7,en;q=0.3",
    "Connection"          => "keep-alive"
}
@@retry =
scout.retry.b || {}
@@timeout =
scout.timeout.b || 60
@@cacert =
scout.cacert.b ? File.expand_path(scout.cacert) : File.expand_path('../../config/cacert.pem', __FILE__)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(*argv) ⇒ Scout

Returns a new instance of Scout.

Raises:



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rhack/scout.rb', line 27

def initialize(*argv)
  uri, proxy, @ua, @refforge, opts = argv.get_opts ['http://', nil, :rand, 1]
  raise ProxyError, proxy if proxy and (!webproxy && !proxy.is(Array) or webproxy && !proxy.is(String))
  'http://' >> uri if uri !~ /^\w+:\/\//
  if proxy
    if proxy[1] and proxy[1].to_i == 0
      @webproxy = eval("WebProxy::#{proxy[1]}")
      @proxy      = proxy[0].parse(:uri).root
    else 
      proxy[0]    = proxy[0].to_ip if proxy[0].is Integer
      @proxy    = proxy
    end
  end
  @cookies      = {}
  @body         = {}
  @num          = []
  @cookieProc = opts[:cp] || opts[:ck]
  @raise_err    = opts[:raise] # no way to use @raise id, it makes any 'raise' call here fail
  @engine       = opts[:engine]
  @timeout      = opts[:timeout] || @@timeout || 60
  @post_proc  = @get_proc = @head_proc = @put_proc = @delete_proc = Proc::NULL
  update uri
  
  @retry = opts[:retry] || {}
  @retry = {@uri.host => @retry} if @retry.is Array
end

Instance Attribute Details

#bodyObject (readonly)

Returns the value of attribute body.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def body
  @body
end

#cookieProcObject (readonly)

Returns the value of attribute cookieProc.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def cookieProc
  @cookieProc
end

#cookiesObject (readonly)

Returns the value of attribute cookies.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def cookies
  @cookies
end

#cookieStoreObject (readonly)

Returns the value of attribute cookieStore.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def cookieStore
  @cookieStore
end

#errorObject (readonly)

Returns the value of attribute error.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def error
  @error
end

#headersObject (readonly)

Returns the value of attribute headers.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def headers
  @headers
end

#httpObject (readonly)

Returns the value of attribute http.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def http
  @http
end

#last_methodObject (readonly)

Returns the value of attribute last_method.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def last_method
  @last_method
end

#pathObject

Returns the value of attribute path.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def path
  @path
end

#proxyObject

Returns the value of attribute proxy.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def proxy
  @proxy
end

#proxystrObject (readonly)

Returns the value of attribute proxystr.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def proxystr
  @proxystr
end

#raise_errObject

Returns the value of attribute raise_err.



6
7
8
# File 'lib/rhack/scout.rb', line 6

def raise_err
  @raise_err
end

#refforgeObject (readonly)

Returns the value of attribute refforge.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def refforge
  @refforge
end

#retryObject

Returns the value of attribute retry.



6
7
8
# File 'lib/rhack/scout.rb', line 6

def retry
  @retry
end

#rootObject

Returns the value of attribute root.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def root
  @root
end

#sldObject

Returns the value of attribute sld.



7
8
9
# File 'lib/rhack/scout.rb', line 7

def sld
  @sld
end

#timeoutObject

Returns the value of attribute timeout.



6
7
8
# File 'lib/rhack/scout.rb', line 6

def timeout
  @timeout
end

#uaObject (readonly)

Returns the value of attribute ua.



10
11
12
# File 'lib/rhack/scout.rb', line 10

def ua
  @ua
end

#uriObject (readonly)

Returns the value of attribute uri.



8
9
10
# File 'lib/rhack/scout.rb', line 8

def uri
  @uri
end

#webproxyObject (readonly)

Returns the value of attribute webproxy.



9
10
11
# File 'lib/rhack/scout.rb', line 9

def webproxy
  @webproxy
end

Instance Method Details

#available?Boolean

Returns:

  • (Boolean)


234
235
236
# File 'lib/rhack/scout.rb', line 234

def available?
  !loaded?
end

#cp_offObject



202
# File 'lib/rhack/scout.rb', line 202

def cp_off() @cookieProc = false end

#cp_onObject



201
# File 'lib/rhack/scout.rb', line 201

def cp_on() @cookieProc = true end

#dumpObject



110
111
112
113
114
115
# File 'lib/rhack/scout.rb', line 110

def dump
  str = "IP: #{@proxystr}\nRequest: "
  str << ({"Action"=>@root+@path} + @http.headers).dump+@body.dump+"Response: #{res}"
  str << "\nReady" if @ready
  str
end

#expand(uri) ⇒ Object



128
129
130
131
132
133
134
# File 'lib/rhack/scout.rb', line 128

def expand(uri)
  if !@webproxy || @http.last_effective_url
    path = (@http.last_effective_url ? @http.last_effective_url.parse(:uri) : @uri).path
    return uri.sub(/^(\.\.?\/)?/, File.split(uri =~ /^\.\./ ? File.split(path)[0] : path)[0])
  end
  uri
end

#fix(path) ⇒ Object



117
118
119
120
121
122
123
124
125
126
# File 'lib/rhack/scout.rb', line 117

def fix(path)
  path = path.tr ' ', '+'
  path = expand path if path =~ /^\./
  if update(path) or @uri.root
    path = @webproxy.encode(path) if @webproxy
  else
    path = @webproxy.encode(@root+path) if @webproxy
  end
  path
end

#load(path = @path, headers = {}, not_redir = 1, relvl = 10, &callback) ⇒ Object



255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/rhack/scout.rb', line 255

def load(path=@path, headers={}, not_redir=1, relvl=10, &callback)
  # cache preprocessed data for one time for we can do #retry
  @__path = path
  @__headers = headers
  @__not_redir = not_redir
  @__relvl = relvl
  @__callback = callback
  
  @http.path = path = fix(path)
  @http.headers = mkHeader(path).merge!(headers)
  @http.timeout = @timeout

  @http.on_complete {|c|
    # > Carier.requests--
    @error = nil
    # While not outdated, Curl::Response here may contain pointers on freed
    # memory, thus throwing exception on #to_s and #inspect
    c.outdate!
    ProcCookies c.res if @cookieProc
    # We cannot just cancel on_complete in on_redirect block
    # because loadGet will immediately reset on_complete back
    if c.res.code.in(300..399) and !not_redir.b and (relvl -= 1) > -1 and loc = c.res.hash.location
      loadGet(loc, headers: headers, relvl: relvl, redir: true, &callback)
    elsif block_given?
      yield c
    end
  }
  @http.on_failure {|c, e|
    eclass = e[0]
    @error = e
    c.outdate!
    # we must clean @http.on_complete, otherwise
    # it would run right after this function and with broken data
    @http.on_complete &Proc::NULL
    if retry? eclass
      L.debug "#{eclass} -> reloading scout"
      retry!
    else
      L.debug "#{eclass} -> not reloading scout"
      raise *e if @raise_err
    end
  } if !@http.on_failure
  
  load!
end

#load!Object



238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/rhack/scout.rb', line 238

def load!
  unless Curl.carier.add @http
    Curl.carier.remove @http
    Curl.

#loadDelete(*argv, &callback) ⇒ Object



314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/rhack/scout.rb', line 314

def loadDelete(*argv, &callback)
  uri, opts = argv.get_opts [@path], 
                 :headers => {}, :redir => false, :relvl => 2
  @http.delete = true
  @last_method  = :delete
  if block_given?
    @delete_proc  = callback
  else#if @http.callback != @post_proc
    callback = @delete_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#loaded?Boolean

Returns:

  • (Boolean)


230
231
232
# File 'lib/rhack/scout.rb', line 230

def loaded?
  Curl.carier.reqs.include? @http
end

#loadGet(*argv, &callback) ⇒ Object



301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/rhack/scout.rb', line 301

def loadGet(*argv, &callback)
  uri, opts = argv.get_opts [@path], 
                 :headers => {}, :redir => false, :relvl => 2
  @http.get     = true
  @last_method  = :get
  if block_given?
    @get_proc   = callback
  else#if @http.callback != @get_proc
    callback = @get_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#loadHead(*argv, &callback) ⇒ Object



357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
# File 'lib/rhack/scout.rb', line 357

def loadHead(*argv, &callback)
  uri, emulate, headers = argv.get_opts [@path, :if_retry]
  @http.head    = true if emulate != :always
  @last_method  = :head
  if block_given?
    @head_proc  = callback
  else#if @http.callback != @head_proc
    callback = @head_proc 
  end
  emu = lambda {
    @headers = ''
    @http.on_header {|h|
      @headers << h
      h == "\r\n" ? 0 : h.size
    }
    @http.get     = true
    load(uri, headers) {|c| c.on_header; callback[c]}
  }
  if emulate != :always
    load(uri, headers) {|c|
      if !@error and c.res.code != 200 and emulate == :if_retry
        emu.call
      else
        callback[c]
      end
    } 
  else emu.call
  end
end

#loadPost(*argv, &callback) ⇒ Object



327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
# File 'lib/rhack/scout.rb', line 327

def loadPost(*argv, &callback)
  hash, multipart, uri, opts = argv.get_opts [@body, @http.multipart_form_post?, @path], :headers => {}, :redir => false, :relvl => 2
  @http.delete = false
  unless hash.is Hash # not parameterized
    opts[:headers] = opts[:headers].reverse_merge 'Content-Type' => 'application/octet-stream'
  end
  mkBody hash, multipart.b
  @last_method  = :post
  if block_given?
    @post_proc  = callback
  else#if @http.callback != @post_proc
    callback = @post_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#loadPut(*argv, &callback) ⇒ Object



343
344
345
346
347
348
349
350
351
352
353
354
355
# File 'lib/rhack/scout.rb', line 343

def loadPut(*argv, &callback)
  body_or_file, uri, opts = argv.get_opts [@body, @path], 
                         :headers => {}, :redir => false, :relvl => 2
  @http.delete = false
  @http.put_data = @body = body_or_file
  @last_method  = :put
  if block_given?
    @put_proc = callback
  else#if @http.callback != @post_proc
    callback = @put_proc 
  end
  load(uri, opts[:headers], !opts[:redir], opts[:relvl], &callback)
end

#main_cksObject



204
# File 'lib/rhack/scout.rb', line 204

def main_cks() @cookies[@uri.host] ||= {} end

#main_cks=(cks) ⇒ Object



205
206
207
208
209
# File 'lib/rhack/scout.rb', line 205

def main_cks=(cks)
  @cookies[@uri.host] = @webproxy ? 
    @webproxy.ck_encode(@root, cks) : 
    cks.map2 {|k, v| Cookie(k, v)}   
end

#mkBody(params, multipart = false) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/rhack/scout.rb', line 136

def mkBody(params, multipart=false)
  if multipart
    @http.multipart_post_body = @body = params.map {|k, v|
      v = v.call if v.is Proc
      if v[%r(^file://(.+))] or v.is Hash
        path = $1 || v[:path]
        name = v.is(Hash) && v[:name] ||
          File.basename(path)
        content_type = v.is(Hash) && v[:content_type].to_s ||
          (Mime::Types.of(path)[0] || {}).content_type ||
          "application/octet-stream"
        Curl::PostField.file(k, type, name, read(path))
      else
        Curl::PostField.content(k.to_s, v.to_s)
      end
    }
  else
    @http.post_body = case params
    when IO
      @body = params.read
      params.close
      @body
    when String
      @body = if params[%r(^file://(.+))]
        read $1
      else
        params
      end
    else
      @body = params.urlencode
    end
  end
end

#mkHeader(uri) ⇒ Object



170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/rhack/scout.rb', line 170

def mkHeader(uri)
  header = DefaultHeader.dup
  if @cookieProc
    cookies = ''
    main_cks.each {|k, v| main_cks.delete k if v.use(cookies, @uri) == :expired}
    header['Cookie'] = cookies[0..-3]                                 
  end
  if @refforge
    ref = @uri.root ? uri : (@webproxy ? @http.host : @root)+uri
    header['Referer'] = ref.match(/(.+)[^\/]*$/)[1]           
  end
  header['User-Agent'] = @ua == :rand ? RHACK.useragents.rand : @ua if @ua
  header
end

#ProcCookies(res) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/rhack/scout.rb', line 185

def ProcCookies(res)
  ck = []
  case res
    when String
      res.split(/\r?\n/).each {|h|
        hs = h/': '
        ck << hs[1] if hs[0] and hs[0].downcase! == 'set-cookie'
      }
    when Curl::Response
      ck = res['cookies']
  end
  return if !ck.b
  ck.each {|c| Cookie(c, self)}
  #    StoreCookies if @cookieStore
end

#reqObject



106
107
108
# File 'lib/rhack/scout.rb', line 106

def req 
  res.req
end

#resObject



102
103
104
# File 'lib/rhack/scout.rb', line 102

def res
  @http.res
end

#retry!(path = @__path, headers = @__headers, not_redir = @__not_redir, relvl = @__relvl, callback = @__callback) ⇒ Object



223
224
225
226
227
228
# File 'lib/rhack/scout.rb', line 223

def retry!(path=@__path, headers=@__headers, not_redir=@__not_redir, relvl=@__relvl, callback=@__callback)
  # all external params including post_body are still set
  setup_curl # @http reload here
  # and now we can set @http.on_complete back again
  load(path, headers, not_redir, relvl, &callback)
end

#retry?(eclass) ⇒ Boolean

Returns:

  • (Boolean)


211
212
213
214
215
216
217
218
219
220
221
# File 'lib/rhack/scout.rb', line 211

def retry?(eclass)
  # sites = ['0chan.ru', '2-ch.ru', 'www.nomer.org', 'nomer.org'].select_in('http://www.nomer.org') = ['www.nomer.org', 'nomer.org']
  sites = (@@retry.keys + @retry.keys).select_in @root
  return false if sites.empty?
  errname = eclass.self_name
  # retry = ['www.nomer.org', 'nomer.org'].any? {|www| {'nomer.org' => ['TimeoutError']}[www].include? 'TimeoutError'}
  sites.any? {|site|
    (@@retry[site] || []).include? errname or 
    (@retry[site] || []).include? errname
  }
end

#setup_curlObject



54
55
56
57
58
59
60
61
# File 'lib/rhack/scout.rb', line 54

def setup_curl
  if loaded?
    Curl.carier.remove @http
  end
  @http = Curl::Easy(@webproxy ? @proxy : @root)
  @http.base = self       
  @http.cacert = @@cacert
end

#to_sObject Also known as: inspect



91
92
93
94
95
96
97
98
99
# File 'lib/rhack/scout.rb', line 91

def to_s
  str = "<##{self.class.self_name} @ "
  if @webproxy
    str << "#{@proxy} ~ "
  elsif @proxy
    str << @proxy*':'+" ~ " 
  end
  str << @root+'>'
end

#update(uri) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/rhack/scout.rb', line 63

def update(uri)
  if !uri[/^\w+:\/\//]
    uri = '/' + uri if uri[0,1] != '/'
    @uri = uri.parse:uri
    return                       
  end
  @uri = uri.parse:uri
  return if @uri.root == @root
  @root = @uri.root
  @sld    = @root[/[\w-]+\.[a-z]+$/]
  @path = @uri.fullpath
  if @http
    @http.url = @webproxy ? @proxy : @root
  else
    setup_curl
  end
  if @proxy
    @http.proxy_url = @proxy*':' if !@webproxy
    @proxystr = @webproxy ? @proxy[0] : @http.proxy_url
  else @proxystr = 'localhost' 
  end
  if @cookieProc.is Hash
    self.main_cks = @cookieProc
    @cookieProc = true    
  end
  self
end