Class: Server

Inherits:
Object
  • Object
show all
Defined in:
lib/w3m-autopagerize-server.rb

Overview

Entry Points #

Constant Summary collapse

HTML_OUTPUT_FILE =
"/tmp/w3m-autopagerize-tmp.html"

Instance Method Summary collapse

Instance Method Details

#crop_this_page(url, srcfile, charset, client) ⇒ Object


342
343
344
345
346
347
348
349
350
351
# File 'lib/w3m-autopagerize-server.rb', line 342

def crop_this_page(url, srcfile, charset, client)
  src, url = prepare(url, srcfile, charset, client, __method__)

  sitedata = sitedata url
  begin
    crop_html url, nil, sitedata
  ensure
    prefetch_next_location sitedata.next_url(url), sitedata
  end
end

#load_config_file(config_file) ⇒ Object


422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
# File 'lib/w3m-autopagerize-server.rb', line 422

def load_config_file(config_file)
  if config_file == :ignore
    $stderr.puts "load_config_file: config file is ignored!"
  else
    config_file = File.expand_path(config_file, File.dirname(__FILE__))
    if File.file? config_file
      load(config_file)
      $stderr.puts "load_config_file: loaded #{config_file}"
    else
      $stderr.puts "load_config_file: config file #{config_file} not found!"
    end
  end
rescue Exception
  $stderr.puts "load_config_file: error loading #{config_file}!"
end

#load_siteinfoObject

(executable-interpret “ruby19 -r w3m-autopagerize-server -e '$logger=Logger.new(); load_siteinfo'”) (executable-interpret “ruby18 -r w3m-autopagerize-server -e '$logger=Logger.new(); load_siteinfo'”)


403
404
405
406
407
408
409
410
411
412
413
414
415
416
# File 'lib/w3m-autopagerize-server.rb', line 403

def load_siteinfo
  keys = %w[exampleUrl insertBefore pageElement nextLink]
  $SITEINFO_IMPORT_URLS.each do |siteinfo_url|
    JSON.parse(get_content(siteinfo_url).toutf8).each do |hash|
      data = hash["data"]
      if url = data["url"] and not $EXCLUDE_URLS.include? url
        sd = SiteData.new data["nextLink"], data["insertBefore"],
               data["exampleUrl"], data["pageElement"]
        $SITEINFO << [Regexp.new(url), sd]
      end
    end
  end
  $logger.info "#{__method__}: loaded"
end

#nextpage(url, srcfile, charset, client) ⇒ Object


354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/w3m-autopagerize-server.rb', line 354

def nextpage(url, srcfile, charset, client)
  src, url = prepare(url, srcfile, charset, client, __method__)
  sitedata = sitedata url
  location = sitedata.next_url(url)

  if location
    if sitedata.pageElement
      $logger.debug "#{__method__}: location and pageElement found."
    else
      $logger.debug "#{__method__}: location found."
    end
    begin
      crop_html location, url, sitedata
    ensure
      prefetch_next_location location, sitedata
    end
  else
    fallback_nexturl = for fallback in SiteData.fallbacks
                         u = fallback.next_url(url) and break u
                       end
    if fallback_nexturl
      $logger.info "#{__method__}: fallback"
      begin
        crop_html fallback_nexturl, url, fallback
      ensure
        prefetch_next_location fallback_nexturl, fallback
      end
    else
      $logger.debug "#{__method__}: no location."
      raise "no location!"
    end
  end
rescue
  html = %{<pre>Error!
xpath = #{sitedata.nextLink || 'nextLink not found'}
#{$!}
#{[email protected].pretty_inspect}
src_encoding=#{Kconv.guess(src || $content_cache[url].first)}
</pre>
  }
  $logger.error "#{__method__}: error!"
  $logger.error "#{__method__}: #$!"
  $logger.error "#{__method__}: #{[email protected].pretty_inspect}"
  {:html => html}
end

#prepare(url, srcfile, charset, client, method) ⇒ Object


324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/w3m-autopagerize-server.rb', line 324

def prepare(url, srcfile, charset, client, method)
  $logger.info "=================================================="
  $logger.info "#{method}: entered url=#{url} charset=#{charset}"
  $logger.debug "#{method}: W3M_SOURCEFILE = #{srcfile}" if srcfile
  $client = client
  
  src = File.zread(srcfile) if srcfile
  if url =~ /^file:.*\/cgi-bin\// # from Local CGI
    url = src[%r!<base href=['"](.+?)['"]!, 1] # '"
    $logger.info "#{method}: base url=#{url}"
  else                        # from W3M_SOURCEFILE
    $logger.debug "#{method}: set $content_cache[#{url.inspect}] from W3M_SOURCEFILE"
    $logger.debug "#{method}: source is html? = #{src =~ /<body/i and true}"
    $content_cache[url] = [src, normalize_charset(charset)] if src
  end
  [ src, url ]
end

#restartObject


418
419
420
# File 'lib/w3m-autopagerize-server.rb', line 418

def restart
  exec $0
end