Class: Server

Inherits:
Object
  • Object
show all
Defined in:
lib/w3m-autopagerize-server.rb

Overview

Entry Points #

Constant Summary collapse

HTML_OUTPUT_FILE =
"/tmp/w3m-autopagerize-tmp.html"

Instance Method Summary collapse

Instance Method Details

#crop_this_page(url, srcfile, charset, client) ⇒ Object



349
350
351
352
353
354
355
356
357
358
# File 'lib/w3m-autopagerize-server.rb', line 349

def crop_this_page(url, srcfile, charset, client)
  src, url = prepare(url, srcfile, charset, client, __method__)

  sitedata = sitedata url
  begin
    crop_html url, nil, sitedata
  ensure
    prefetch_next_location sitedata.next_url(url), sitedata
  end
end

#load_config_file(config_file) ⇒ Object



429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# File 'lib/w3m-autopagerize-server.rb', line 429

def load_config_file(config_file)
  if config_file == :ignore
    $stderr.puts "load_config_file: config file is ignored!"
  else
    config_file = File.expand_path(config_file, File.dirname(__FILE__))
    if File.file? config_file
      load(config_file)
      $stderr.puts "load_config_file: loaded #{config_file}"
    else
      $stderr.puts "load_config_file: config file #{config_file} not found!"
    end
  end
rescue Exception
  $stderr.puts "load_config_file: error loading #{config_file}!"
end

#load_siteinfoObject

(executable-interpret “ruby19 -r w3m-autopagerize-server -e ‘$logger=Logger.new(); load_siteinfo’”) (executable-interpret “ruby18 -r w3m-autopagerize-server -e ‘$logger=Logger.new(); load_siteinfo’”)



410
411
412
413
414
415
416
417
418
419
420
421
422
423
# File 'lib/w3m-autopagerize-server.rb', line 410

def load_siteinfo
  keys = %w[exampleUrl insertBefore pageElement nextLink]
  $SITEINFO_IMPORT_URLS.each do |siteinfo_url|
    JSON.parse(get_content(siteinfo_url).toutf8).each do |hash|
      data = hash["data"]
      if url = data["url"] and not $EXCLUDE_URLS.include? url
        sd = SiteData.new data["nextLink"], data["insertBefore"],
               data["exampleUrl"], data["pageElement"]
        $SITEINFO << [Regexp.new(url), sd]
      end
    end
  end
  $logger.info "#{__method__}: loaded"
end

#nextpage(url, srcfile, charset, client) ⇒ Object



361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/w3m-autopagerize-server.rb', line 361

def nextpage(url, srcfile, charset, client)
  src, url = prepare(url, srcfile, charset, client, __method__)
  sitedata = sitedata url
  location = sitedata.next_url(url)

  if location
    if sitedata.pageElement
      $logger.debug "#{__method__}: location and pageElement found."
    else
      $logger.debug "#{__method__}: location found."
    end
    begin
      crop_html location, url, sitedata
    ensure
      prefetch_next_location location, sitedata
    end
  else
    fallback_nexturl = for fallback in SiteData.fallbacks
                         u = fallback.next_url(url) and break u
                       end
    if fallback_nexturl
      $logger.info "#{__method__}: fallback"
      begin
        crop_html fallback_nexturl, url, fallback
      ensure
        prefetch_next_location fallback_nexturl, fallback
      end
    else
      $logger.debug "#{__method__}: no location."
      raise "no location!"
    end
  end
rescue
  html = %{<pre>Error!
xpath = #{sitedata.nextLink || 'nextLink not found'}
#{$!}
#{$@.pretty_inspect}
src_encoding=#{Kconv.guess(src || $content_cache[url].first)}
</pre>
  }
  $logger.error "#{__method__}: error!"
  $logger.error "#{__method__}: #$!"
  $logger.error "#{__method__}: #{$@.pretty_inspect}"
  {:html => html}
end

#prepare(url, srcfile, charset, client, method) ⇒ Object



331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/w3m-autopagerize-server.rb', line 331

def prepare(url, srcfile, charset, client, method)
  $logger.info "=================================================="
  $logger.info "#{method}: entered url=#{url} charset=#{charset}"
  $logger.debug "#{method}: W3M_SOURCEFILE = #{srcfile}" if srcfile
  $client = client
  
  src = File.zread(srcfile).force_encoding("ASCII-8BIT") if srcfile
  if url =~ /^file:.*\/cgi-bin\// # from Local CGI
    url = src.force_encoding("ASCII-8BIT")[%r!<base href=['"](.+?)['"]!, 1] # '"
    $logger.info "#{method}: base url=#{url}"
  else                        # from W3M_SOURCEFILE
    $logger.debug "#{method}: set $content_cache[#{url.inspect}] from W3M_SOURCEFILE"
    $logger.debug "#{method}: source is html? = #{src =~ /<body/i and true}"
    $content_cache[url] = [src.force_encoding("ASCII-8BIT"), normalize_charset(charset)] if src
  end
  [ src, url ]
end

#restartObject



425
426
427
# File 'lib/w3m-autopagerize-server.rb', line 425

def restart
  exec $0
end